From 2565b8554e321e8ca9a87f36a48f9ab7f86ab247 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 13 Aug 2024 20:01:07 +0300 Subject: [PATCH v6 10/12] Make SnapBuildWaitSnapshot work without xl_running_xacts.xids array SnapBuildWaitSnapshot looped through all the XIDs in the xl_running_xacts, waiting for them to finish. Change it to grab the list of running XIDs from the proc array instead. This removes the last usage of the XIDs array in the xl_running_xacts record, allowing it to be removed in the next commit. When SnapBuildWaitSnapshot() is called with running->nextXid as the 'cutoff' point, the new code should wait for exactly the same set of transactions as before. But when called with initial_xmin_horizon as the 'cutoff', this might wait for more transactions than before: those between running->nextXid and initial_xmin_horizon. For example, imagine that we see a running-xacts record with nextXid 100, and initial_xmin_horizon is 200. Before, we would wait for all XIDs < 100 to complete, and then log the standby snapshot and proceed, but now we will wait for all XIDs < 200. I believe that's a good thing, because we won't actually be able to move to the next state in the snapshot building until all transactions < 200 have completed. The running-xacts snapshot that we logged after waiting up to XID 100 would not be useful to us either, if there are still XIDs between 100 and 200 running. SnapBuildWaitSnapshot() used to do useless work when called in a standby, because in a standby, there are no XID locks and the XactLockTableWait() calls returned immediately, even if the XIDs were in fact still running in the primary. But as the comment says, the waiting isn't necessary for correctness, so that was harmless. In any case, stop doing the futile work on a standby. --- src/backend/replication/logical/snapbuild.c | 50 ++++++++++++++------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 97d278052df..252526ecf91 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -164,7 +164,7 @@ static inline bool SnapBuildXidHasCatalogChanges(SnapBuild *builder, Transaction /* xlog reading helper functions for SnapBuildProcessRunningXacts */ static bool SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running); -static void SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff); +static void SnapBuildWaitSnapshot(TransactionId cutoff); /* serialization functions */ static void SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn); @@ -1192,14 +1192,17 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn NormalTransactionIdPrecedes(running->oldestRunningXid, builder->initial_xmin_horizon)) { + TransactionId cutoff; + ereport(DEBUG1, (errmsg_internal("skipping snapshot at %X/%X while building logical decoding snapshot, xmin horizon too low", LSN_FORMAT_ARGS(lsn)), errdetail_internal("initial xmin horizon of %u vs the snapshot's %u", builder->initial_xmin_horizon, running->oldestRunningXid))); - - SnapBuildWaitSnapshot(running, builder->initial_xmin_horizon); + cutoff = builder->initial_xmin_horizon; + TransactionIdRetreat(cutoff); + SnapBuildWaitSnapshot(cutoff); return true; } @@ -1286,7 +1289,7 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn errdetail("Waiting for transactions (approximately %d) older than %u to end.", running->xcnt, running->nextXid))); - SnapBuildWaitSnapshot(running, running->nextXid); + SnapBuildWaitSnapshot(running->nextXid); } /* @@ -1310,7 +1313,7 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn errdetail("Waiting for transactions (approximately %d) older than %u to end.", running->xcnt, running->nextXid))); - SnapBuildWaitSnapshot(running, running->nextXid); + SnapBuildWaitSnapshot(running->nextXid); } /* @@ -1343,8 +1346,8 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn } /* --- - * Iterate through xids in record, wait for all older than the cutoff to - * finish. Then, if possible, log a new xl_running_xacts record. + * Wait for all transactions older than or equal to the cutoff to finish. + * Then, if possible, log a new xl_running_xacts record. * * This isn't required for the correctness of decoding, but to: * a) allow isolationtester to notice that we're currently waiting for @@ -1354,13 +1357,31 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn * --- */ static void -SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff) +SnapBuildWaitSnapshot(TransactionId cutoff) { - int off; + RunningTransactions running; + + if (RecoveryInProgress()) + { + /* + * During recovery, we have no mechanism for waiting for an XID to + * finish, and we cannot create new running-xacts records either. + */ + return; + } + + running = GetRunningTransactionData(); + + /* + * GetRunningTransactionData returns with XidGenLock and ProcArrayLock + * held, but we don't need them. + */ + LWLockRelease(XidGenLock); + LWLockRelease(ProcArrayLock); - for (off = 0; off < running->xcnt; off++) + for (int i = 0; i < running->xcnt; i++) { - TransactionId xid = running->xids[off]; + TransactionId xid = running->xids[i]; /* * Upper layers should prevent that we ever need to wait on ourselves. @@ -1370,7 +1391,7 @@ SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff) if (TransactionIdIsCurrentTransactionId(xid)) elog(ERROR, "waiting for ourselves"); - if (TransactionIdFollows(xid, cutoff)) + if (TransactionIdFollowsOrEquals(xid, cutoff)) continue; XactLockTableWait(xid, NULL, NULL, XLTW_None); @@ -1382,10 +1403,7 @@ SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff) * wait for bgwriter or checkpointer to log one. During recovery we can't * enforce that, so we'll have to wait. */ - if (!RecoveryInProgress()) - { - LogStandbySnapshot(); - } + LogStandbySnapshot(); } #define SnapBuildOnDiskConstantSize \ -- 2.39.5