From 9536d174b05f0d112dde71a4a4ca61ae08f4ae9a Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi Date: Tue, 19 Jul 2022 11:50:29 +0900 Subject: [PATCH v8] Lift initial snapshot limit for logical replication The replication command CREATE_REPLICATION_SLOT tends to fail with "snapshot too large" when many subtransactions are active. This problem stems from SnapBuildInitialSnapshot attempting to generate a snapshot in which all active XIDs, including subxids, are stored within the xip array. This patch mitigates the constraint on the acceptable number of transaction IDs by utilizing the subxid array. Author: Dilip Kumar and Kyotaro Horiguchi --- src/backend/replication/logical/snapbuild.c | 50 +++++++++++++++++---- src/backend/storage/ipc/procarray.c | 18 ++------ src/backend/utils/time/snapmgr.c | 8 ++-- src/include/access/transam.h | 33 ++++++++++++++ src/include/utils/snapshot.h | 12 +++-- 5 files changed, 90 insertions(+), 31 deletions(-) diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 62542827e4..3a15c3cea7 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -565,10 +565,14 @@ Snapshot SnapBuildInitialSnapshot(SnapBuild *builder) { Snapshot snap; + int ntxn; + int xiplen; TransactionId xid; TransactionId safeXid; TransactionId *newxip; + TransactionId *newsubxip; int newxcnt = 0; + int newsubxcnt = 0; Assert(XactIsoLevel == XACT_REPEATABLE_READ); Assert(builder->building_full_snapshot); @@ -610,9 +614,35 @@ SnapBuildInitialSnapshot(SnapBuild *builder) MyProc->xmin = snap->xmin; - /* allocate in transaction context */ - newxip = (TransactionId *) - palloc(sizeof(TransactionId) * GetMaxSnapshotXidCount()); + /* + * Allocate in transaction context + * + * Since we know all the active XIDs, this snapshot won't be + * suboverflowed. However, if the number of XIDs surpasses the XID arrays' + * capacity, we cannot establish this snapshot because we don't know which + * XIDs are top-level. Since the original snapshot is historical, we can't + * reliably identify the top-level XIDs. Thus, we have no choice but fail + * when there are too many active XIDs. + */ + ntxn = TransactionIdDistance(snap->xmin, snap->xmax) - snap->xcnt; + xiplen = GetMaxSnapshotXidCount(); + newxip = (TransactionId *) palloc(sizeof(TransactionId) * xiplen); + + if (ntxn > GetMaxSnapshotXidCount()) + { + /* + * Since we can't identify the top-level XIDs, we can't create a + * suboverflowed snapshot. + */ + if (ntxn > xiplen + GetMaxSnapshotSubxidCount()) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("initial slot snapshot too large"))); + + newsubxip = (TransactionId *) + palloc(sizeof(TransactionId) * GetMaxSnapshotSubxidCount()); + } + /* * snapbuild.c builds transactions in an "inverted" manner, which means it @@ -633,21 +663,23 @@ SnapBuildInitialSnapshot(SnapBuild *builder) if (test == NULL) { - if (newxcnt >= GetMaxSnapshotXidCount()) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("initial slot snapshot too large"))); - - newxip[newxcnt++] = xid; + if (newxcnt < xiplen) + newxip[newxcnt++] = xid; + else + newsubxip[newsubxcnt++] = xid; } TransactionIdAdvance(xid); } + Assert(newxcnt + newsubxcnt == ntxn); + /* adjust remaining snapshot fields as needed */ snap->snapshot_type = SNAPSHOT_MVCC; snap->xcnt = newxcnt; snap->xip = newxip; + snap->subxcnt = newsubxcnt; + snap->subxip = newsubxip; return snap; } diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index ea91ce355f..a0f5ba5a36 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -4823,22 +4823,10 @@ KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid)); /* - * Calculate how many array slots we'll need. Normally this is cheap; in - * the unusual case where the XIDs cross the wrap point, we do it the hard - * way. + * Calculate how many array slots we'll need. Both ends of the region are + * inclusive. */ - if (to_xid >= from_xid) - nxids = to_xid - from_xid + 1; - else - { - nxids = 1; - next_xid = from_xid; - while (TransactionIdPrecedes(next_xid, to_xid)) - { - nxids++; - TransactionIdAdvance(next_xid); - } - } + nxids = TransactionIdDistance(from_xid, to_xid) + 1; /* * Since only the startup process modifies the head/tail pointers, we diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 7d11ae3478..f7951d8787 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -2310,9 +2310,11 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) /* * If the snapshot contains full subxact data, the fastest way to * check things is just to compare the given XID against both subxact - * XIDs and top-level XIDs. If the snapshot overflowed, we have to - * use pg_subtrans to convert a subxact XID to its parent XID, but - * then we need only look at top-level XIDs not subxacts. + * XIDs and top-level XIDs. Note that there's a case where both arrays + * contain top and sub-transaction's XIDs in a mixed manner. If the + * snapshot overflowed, we have to use pg_subtrans to convert a subxact + * XID to its parent XID, but then we need only look at top-level XIDs + * not subxacts. */ if (!snapshot->suboverflowed) { diff --git a/src/include/access/transam.h b/src/include/access/transam.h index f5af6d3055..b6a3c8abee 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -370,6 +370,39 @@ FullTransactionIdNewer(FullTransactionId a, FullTransactionId b) return b; } + +/* return the distance between the two IDs */ +static inline int +TransactionIdDistance(TransactionId from_xid, TransactionId to_xid) +{ + int nxids; + TransactionId next_xid; + + StaticAssertDecl(sizeof(int) >= sizeof(TransactionId), + "TransactionId exceeds the width of int"); + Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid)); + + /* + * Calculate the distance between the two XIDs. Normally this is cheap; in + * the unusual case where the XIDs cross the wrap point, we do it the hard + * way. + */ + if (to_xid >= from_xid) + nxids = to_xid - from_xid; + else + { + nxids = 0; + next_xid = from_xid; + while (TransactionIdPrecedes(next_xid, to_xid)) + { + nxids++; + TransactionIdAdvance(next_xid); + } + } + + return nxids; +} + #endif /* FRONTEND */ #endif /* TRANSAM_H */ diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h index 583a667a40..0de6aed6e0 100644 --- a/src/include/utils/snapshot.h +++ b/src/include/utils/snapshot.h @@ -160,8 +160,10 @@ typedef struct SnapshotData /* * For normal MVCC snapshot this contains the all xact IDs that are in * progress, unless the snapshot was taken during recovery in which case - * it's empty. For historic MVCC snapshots, the meaning is inverted, i.e. - * it contains *committed* transactions between xmin and xmax. + * it's empty. In the case of !suboverflowed, there's a situation where + * this contains both top and sub-transaction IDs in a mixed manner. For + * historic MVCC snapshots, the meaning is inverted, i.e. it contains + * *committed* transactions between xmin and xmax. * * note: all ids in xip[] satisfy xmin <= xip[i] < xmax */ @@ -171,8 +173,10 @@ typedef struct SnapshotData /* * For non-historic MVCC snapshots, this contains subxact IDs that are in * progress (and other transactions that are in progress if taken during - * recovery). For historic snapshot it contains *all* xids assigned to the - * replayed transaction, including the toplevel xid. + * recovery). In the case of !suboverflowed, there's a situation where this + * contains both top and sub-transaction IDs in a mixed manner. For + * historic snapshot it contains *all* xids assigned to the replayed + * transaction, including the toplevel xid. * * note: all ids in subxip[] are >= xmin, but we don't bother filtering * out any that are >= xmax -- 2.31.1