diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 3d9088a704..d690774f33 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -107,7 +107,18 @@ static bool TransactionGroupUpdateXidStatus(TransactionId xid, static void TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, XLogRecPtr lsn, int pageno); +/* + * Run locally by a backend to establish whether or not it needs to call + * SubTransSetParent for subxid. + */ +bool +TransactionIdsAreOnSameXactPage(TransactionId topxid, TransactionId subxid) +{ + int toppageno = TransactionIdToPage(topxid); + int subpageno = TransactionIdToPage(subxid); + return (toppageno == subpageno); +} /* * TransactionIdSetTreeStatus @@ -133,7 +144,7 @@ static void TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids, * only once, and the status will be set to committed directly. Otherwise * we must * 1. set sub-committed all subxids that are not on the same page as the - * main xid + * main xid (see TransactionIdsAreOnSameXactPage()) * 2. atomically set committed the main xid and the subxids on the same page * 3. go over the first bunch again and set them committed * Note that as far as concurrent checkers are concerned, main transaction diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index 66d3548155..922621b4fc 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -177,6 +177,61 @@ SubTransGetTopmostTransaction(TransactionId xid) return previousXid; } +/* + * SubTransGetTopmostTransactionPrecedes + * + * Different form of SubTransGetTopmostTransaction() that minimizes the number + * of iterations required to get the parent or stop if it is earlier than cutoff. + */ +bool +SubTransGetTopmostTransactionPrecedes(TransactionId *xid, TransactionId cutoff_xid, bool standby) +{ + TransactionId parentXid = *xid, + previousXid = *xid; + + /* Can't ask about stuff that might not be around anymore */ + Assert(TransactionIdFollowsOrEquals(cutoff_xid, TransactionXmin)); + Assert(TransactionIdFollowsOrEquals(*xid, cutoff_xid)); + + while (TransactionIdIsValid(parentXid)) + { + previousXid = parentXid; + + /* + * Stop as soon as we are earlier than the cutoff. This saves multiple + * lookups against subtrans when we have a deeply nested subxid with + * a later snapshot with an xmin much higher than TransactionXmin. + */ + if (TransactionIdPrecedes(parentXid, cutoff_xid)) + { + *xid = previousXid; + return true; + } + parentXid = SubTransGetParent(parentXid); + + /* + * By convention the parent xid gets allocated first, so should always + * precede the child xid. Anything else points to a corrupted data + * structure that could lead to an infinite loop, so exit. + */ + if (!TransactionIdPrecedes(parentXid, previousXid)) + elog(ERROR, "pg_subtrans contains invalid entry: xid %u points to parent xid %u", + previousXid, parentXid); + + /* + * subxids always point directly at parent on standby, so we can avoid + * multiple loops in that case. + */ + if (standby) + break; + } + + Assert(TransactionIdIsValid(previousXid)); + + *xid = previousXid; + + return false; +} /* * Initialization of shared memory for SUBTRANS diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 50f092d7eb..5577e5b8df 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -693,8 +693,51 @@ AssignTransactionId(TransactionState s) XactTopFullTransactionId = s->fullTransactionId; if (isSubXact) - SubTransSetParent(XidFromFullTransactionId(s->fullTransactionId), - XidFromFullTransactionId(s->parent->fullTransactionId)); + { + TransactionId subxid = XidFromFullTransactionId(s->fullTransactionId); + + /* + * Subtrans entries are only required in specific circumstances: + * + * 1. When there's no room in PG_PROC, as mentioned above. + * During XactLockTableWait() we sometimes need to know the topxid. + * If there is room in PG_PROC we can get a subxid's topxid direct + * from the procarray if the topxid is still running, using + * GetTopmostTransactionIdFromProcArray(). So we only ever need to + * call SubTransGetTopMostTransaction() if that xact overflowed; + * since that is our current transaction, we know whether or not to + * log the xid for future use. + * This occurs only when large number of subxids are requested by + * app user. + * + * 2. When IsolationIsSerializable() we sometimes need to access topxid + * This occurs only when SERIALIZABLE is requested by app user. + * + * 3. When TransactionIdSetStatus will use a status of SUB_COMMITTED, + * which then requires us to consult subtrans to find parent, which + * is needed to avoid race condition. In this case we ask Clog/Xact + * module if TransactionIdsAreOnSameXactPage(). Since we start a new + * clog page every 32000 xids, this is usually <<1% of subxids. + */ + if (MyProc->subxidStatus.overflowed || + IsolationIsSerializable() || + !TransactionIdsAreOnSameXactPage(GetTopTransactionId(), subxid)) + { + /* + * Insert entries into subtrans for this xid, noting that the entry + * points directly to the topxid, not the immediate parent. This is + * done for two reasons, (1) so it is faster in a long chain of subxids + * (2) so that we don't need to set subxids for unregistered parents. + * This has the downside that anyone waiting for a lock on aborted + * subtransactions would not be released immediately; that may or + * may not be an acceptable compromise. If not acceptable, this + * simple call needs to be replaced with a loop to register the + * parent for the current subxid stack, so we can walk back up it to + * the topxid. + */ + SubTransSetParent(subxid, GetTopTransactionId()); + } + } /* * If it's a top-level transaction, the predicate locking system needs to diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 0555b02a8d..0067fa327f 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -261,6 +261,13 @@ static ProcArrayStruct *procArray; static PGPROC *allProcs; +/* + * Remember the last call to TransactionIdIsInProgress() to avoid need to call + * SubTransGetTopMostTransaction() when the subxid is present in the procarray. + */ +static TransactionId LastCallXidIsInProgressSubXid = InvalidTransactionId; +static TransactionId LastCallXidIsInProgressParentXid = InvalidTransactionId; + /* * Cache to reduce overhead of repeated calls to TransactionIdIsInProgress() */ @@ -1440,6 +1447,8 @@ TransactionIdIsInProgress(TransactionId xid) other_xids = ProcGlobal->xids; other_subxidstates = ProcGlobal->subxidStates; + LastCallXidIsInProgressSubXid = LastCallXidIsInProgressParentXid = InvalidTransactionId; + LWLockAcquire(ProcArrayLock, LW_SHARED); /* @@ -1508,6 +1517,15 @@ TransactionIdIsInProgress(TransactionId xid) { LWLockRelease(ProcArrayLock); xc_by_child_xid_inc(); + + /* + * Remember the parent xid, for use during XactLockTableWait(). + * We do this because it is cheaper than looking up pg_subtrans, + * and also allows us to reduce calls to subtrans. + */ + LastCallXidIsInProgressSubXid = xid; + LastCallXidIsInProgressParentXid = pxid; + return true; } } @@ -1591,7 +1609,11 @@ TransactionIdIsInProgress(TransactionId xid) for (int i = 0; i < nxids; i++) { if (TransactionIdEquals(xids[i], topxid)) + { + LastCallXidIsInProgressSubXid = xid; + LastCallXidIsInProgressParentXid = topxid; return true; + } } } @@ -1599,6 +1621,28 @@ TransactionIdIsInProgress(TransactionId xid) return false; } +/* + * Allow the topmost xid to be accessed from the last call to + * TransactionIdIsInProgress(). Specifically designed for use in + * XactLockTableWait(). + */ +bool +GetTopmostTransactionIdFromProcArray(TransactionId xid, TransactionId *pxid) +{ + bool found = false; + + Assert(TransactionIdIsNormal(xid)); + + if (LastCallXidIsInProgressSubXid == xid) + { + Assert(TransactionIdIsNormal(LastCallXidIsInProgressParentXid)); + *pxid = LastCallXidIsInProgressParentXid; + found = true; + } + + return found; +} + /* * TransactionIdIsActive -- is xid the top-level XID of an active backend? * @@ -2370,27 +2414,34 @@ GetSnapshotData(Snapshot snapshot) * * Again, our own XIDs are not included in the snapshot. */ - if (!suboverflowed) + if (subxidStates[pgxactoff].overflowed) + suboverflowed = true; + + /* + * Include all subxids, whether or not we overflowed. This is + * important because it can reduce the number of accesses to SUBTRANS + * when we search snapshots, which is important for scalability, + * especially in the presence of both overflowed and long running xacts. + * This is true when fraction of subxids held in subxip is a large + * fraction of the total subxids in use. In the case where one or more + * transactions had more subxids in progress than the total size of + * the cache this might not be true, but we consider that case to be + * unlikely, even if it is possible. + */ { + int nsubxids = subxidStates[pgxactoff].count; - if (subxidStates[pgxactoff].overflowed) - suboverflowed = true; - else + if (nsubxids > 0) { - int nsubxids = subxidStates[pgxactoff].count; - - if (nsubxids > 0) - { - int pgprocno = pgprocnos[pgxactoff]; - PGPROC *proc = &allProcs[pgprocno]; + int pgprocno = pgprocnos[pgxactoff]; + PGPROC *proc = &allProcs[pgprocno]; - pg_read_barrier(); /* pairs with GetNewTransactionId */ + pg_read_barrier(); /* pairs with GetNewTransactionId */ - memcpy(snapshot->subxip + subcount, - (void *) proc->subxids.xids, - nsubxids * sizeof(TransactionId)); - subcount += nsubxids; - } + memcpy(snapshot->subxip + subcount, + (void *) proc->subxids.xids, + nsubxids * sizeof(TransactionId)); + subcount += nsubxids; } } } diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 1543da6162..3cb607c285 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -694,6 +694,8 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, for (;;) { + TransactionId pxid = InvalidTransactionId; + Assert(TransactionIdIsValid(xid)); Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny())); @@ -703,6 +705,13 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, LockRelease(&tag, ShareLock, false); + /* + * If a transaction has no lock, it might be a top-level transaction, + * in which case the procarray will show it as not in progress. + * + * If a transaction is a subtransaction, then it could have committed + * or aborted, yet the top-level transaction may still be in progress. + */ if (!TransactionIdIsInProgress(xid)) break; @@ -724,7 +733,27 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, if (!first) pg_usleep(1000L); first = false; - xid = SubTransGetTopmostTransaction(xid); + + /* + * In most cases, we can get the parent xid from our prior call to + * TransactionIdIsInProgress(), except in hot standby. If not, we have + * to ask subtrans for the parent. + */ + if (GetTopmostTransactionIdFromProcArray(xid, &pxid)) + { + /* TESTED-BY src/test/isolation/specs/subx-overflow.spec test3 */ + Assert(TransactionIdIsValid(pxid)); + xid = pxid; + } + else + { + /* + * We can get here if RecoveryInProgress() during the last call to + * TransactionIdIsInProgress(), but we don't Assert that here since + * that would create a race window against the end of recovery. + */ + xid = SubTransGetTopmostTransaction(xid); + } } if (oper != XLTW_None) @@ -745,6 +774,8 @@ ConditionalXactLockTableWait(TransactionId xid) for (;;) { + TransactionId pxid = InvalidTransactionId; + Assert(TransactionIdIsValid(xid)); Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny())); @@ -762,7 +793,15 @@ ConditionalXactLockTableWait(TransactionId xid) if (!first) pg_usleep(1000L); first = false; - xid = SubTransGetTopmostTransaction(xid); + + /* See XactLockTableWait about this case */ + if (GetTopmostTransactionIdFromProcArray(xid, &pxid)) + { + Assert(TransactionIdIsValid(pxid)); + xid = pxid; + } + else + xid = SubTransGetTopmostTransaction(xid); } return true; diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 9b504c9745..ae9dc0dd92 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -639,13 +639,9 @@ CopySnapshot(Snapshot snapshot) newsnap->xip = NULL; /* - * Setup subXID array. Don't bother to copy it if it had overflowed, - * though, because it's not used anywhere in that case. Except if it's a - * snapshot taken during recovery; all the top-level XIDs are in subxip as - * well in that case, so we mustn't lose them. + * Setup subXID array. */ - if (snapshot->subxcnt > 0 && - (!snapshot->suboverflowed || snapshot->takenDuringRecovery)) + if (snapshot->subxcnt > 0) { newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff); memcpy(newsnap->subxip, snapshot->subxip, @@ -1239,8 +1235,10 @@ ExportSnapshot(Snapshot snapshot) snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount()) appendStringInfoString(&buf, "sof:1\n"); else - { appendStringInfoString(&buf, "sof:0\n"); + + /* then unconditionally, since we always include all subxids */ + { appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren); for (i = 0; i < snapshot->subxcnt; i++) appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]); @@ -1491,7 +1489,7 @@ ImportSnapshot(const char *idstr) snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path); - if (!snapshot.suboverflowed) + /* then unconditionally, since we always include all subxids */ { snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path); @@ -1505,11 +1503,6 @@ ImportSnapshot(const char *idstr) for (i = 0; i < xcnt; i++) snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path); } - else - { - snapshot.subxcnt = 0; - snapshot.subxip = NULL; - } snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path); @@ -2285,6 +2278,8 @@ RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc) bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) { + bool have_parent = false; + /* * Make a quick range check to eliminate most XIDs without looking at the * xip arrays. Note that this is OK even if we convert a subxact XID to @@ -2300,80 +2295,66 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) if (TransactionIdFollowsOrEquals(xid, snapshot->xmax)) return true; + /* + * This patch reorders the operations in XidMVCCSnapshot, so as to reduce + * calls to SubTransGetParent to the absolute minimum needed. + * The previous code was neat, but not efficient for the overflow case. + */ +retry_search: + /* * Snapshot information is stored slightly differently in snapshots taken - * during recovery. + * during recovery. xip is empty on standbys. */ if (!snapshot->takenDuringRecovery) { - /* - * If the snapshot contains full subxact data, the fastest way to - * check things is just to compare the given XID against both subxact - * XIDs and top-level XIDs. If the snapshot overflowed, we have to - * use pg_subtrans to convert a subxact XID to its parent XID, but - * then we need only look at top-level XIDs not subxacts. - */ - if (!snapshot->suboverflowed) - { - /* we have full data, so search subxip */ - if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt)) - return true; - - /* not there, fall through to search xip[] */ - } - else - { - /* - * Snapshot overflowed, so convert xid to top-level. This is safe - * because we eliminated too-old XIDs above. - */ - xid = SubTransGetTopmostTransaction(xid); - - /* - * If xid was indeed a subxact, we might now have an xid < xmin, - * so recheck to avoid an array scan. No point in rechecking - * xmax. - */ - if (TransactionIdPrecedes(xid, snapshot->xmin)) - return false; - } - if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt)) return true; } - else + + /* + * If we have the parent xid, then the xid is not in snapshot + */ + if (have_parent) + return false; + + /* + * Now search subxip, which contains first 64 subxids of each topxid. + */ + if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt)) + return true; + + if (!have_parent && snapshot->suboverflowed) { + /* TESTED-BY src/test/isolation/specs/subx-overflow.spec test1 and test2 */ + /* - * In recovery we store all xids in the subxact array because it is by - * far the bigger array, and we mostly don't know which xids are - * top-level and which are subxacts. The xip array is empty. + * If we haven't found xid yet, it might be because it is a subxid + * that is not present because we overflowed, but it might also be + * because the xid is not in the snapshot. * - * We start by searching subtrans, if we overflowed. + * It is important we do this step last because it is expensive, + * and if everybody does this then SubTransSLRU glows white hot. + * + * Use SubTransGetTopmostTransactionPrecedes(), which has been + * specially provided to help here. This does two things for us: + * + * 1. On standby, get the parent directly, since in a standby SUBTRANS + * always stores the direct parent only. Doing this avoids + * one lookup of subtrans, since SubTransGetTopmostTransaction() + * always does at least 2 SUBTRANS lookups, the last lookup is + * how the loop knows it has found the parent in normal running. + * + * 2. Stops the iteration to find the parent as soon as we find an + * xid earlier than snapshot->xmin, so we do the minimum lookups. */ - if (snapshot->suboverflowed) - { - /* - * Snapshot overflowed, so convert xid to top-level. This is safe - * because we eliminated too-old XIDs above. - */ - xid = SubTransGetTopmostTransaction(xid); + if (SubTransGetTopmostTransactionPrecedes(&xid, + snapshot->xmin, + snapshot->takenDuringRecovery)) + return false; - /* - * If xid was indeed a subxact, we might now have an xid < xmin, - * so recheck to avoid an array scan. No point in rechecking - * xmax. - */ - if (TransactionIdPrecedes(xid, snapshot->xmin)) - return false; - } - - /* - * We now have either a top-level xid higher than xmin or an - * indeterminate xid. We don't know whether it's top level or subxact - * but it doesn't matter. If it's present, the xid is visible. - */ - if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt)) - return true; + have_parent = true; + goto retry_search; /* search arrays again, now we have parent */ } return false; diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h index f94e116640..fc4103b769 100644 --- a/src/include/access/subtrans.h +++ b/src/include/access/subtrans.h @@ -17,6 +17,7 @@ extern void SubTransSetParent(TransactionId xid, TransactionId parent); extern TransactionId SubTransGetParent(TransactionId xid); extern TransactionId SubTransGetTopmostTransaction(TransactionId xid); +extern bool SubTransGetTopmostTransactionPrecedes(TransactionId *xid, TransactionId cutoff_xid, bool standby); extern Size SUBTRANSShmemSize(void); extern void SUBTRANSShmemInit(void); diff --git a/src/include/access/transam.h b/src/include/access/transam.h index 775471d2a7..f404db552f 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -301,6 +301,9 @@ extern void AssertTransactionIdInAllowableRange(TransactionId xid); #define AssertTransactionIdInAllowableRange(xid) ((void)true) #endif +/* in transam/clog.c */ +extern bool TransactionIdsAreOnSameXactPage(TransactionId topxid, TransactionId subxid); + /* * Some frontend programs include this header. For compilers that emit static * inline functions even when they're unused, that leads to unsatisfied diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index 1b2cfac5ad..d7ad1da6a8 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -52,6 +52,8 @@ extern bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc); extern RunningTransactions GetRunningTransactionData(void); extern bool TransactionIdIsInProgress(TransactionId xid); +extern bool GetTopmostTransactionIdFromProcArray(TransactionId xid, TransactionId *pxid); + extern bool TransactionIdIsActive(TransactionId xid); extern TransactionId GetOldestNonRemovableTransactionId(Relation rel); extern TransactionId GetOldestTransactionIdConsideredRunning(void);