From 2e4d580af4a1e6b2cb000d4e9db2c42e40aa4cd2 Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot Date: Thu, 11 Jan 2024 13:57:53 +0000 Subject: [PATCH v1] Fix race condition in InvalidatePossiblyObsoleteSlot() In case of an active slot we proceed in 2 steps: - terminate the backend holding the slot - report the slot as obsolete This is racy because between the two we release the mutex on the slot, which means the effective_xmin and effective_catalog_xmin could advance during that time. Holding the mutex longer is not an option (given what we'd to do while holding it) so record the effective_xmin and effective_catalog_xmin instead that was used during the backend termination. --- src/backend/replication/slot.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) 100.0% src/backend/replication/ diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 52da694c79..2e34cca0e8 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -1352,6 +1352,9 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, { int last_signaled_pid = 0; bool released_lock = false; + bool terminated = false; + XLogRecPtr initial_effective_xmin; + XLogRecPtr initial_catalog_effective_xmin; for (;;) { @@ -1396,15 +1399,20 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, case RS_INVAL_HORIZON: if (!SlotIsLogical(s)) break; + if (!terminated) + { + initial_effective_xmin = s->effective_xmin; + initial_catalog_effective_xmin = s->effective_catalog_xmin; + } /* invalid DB oid signals a shared relation */ if (dboid != InvalidOid && dboid != s->data.database) break; - if (TransactionIdIsValid(s->effective_xmin) && - TransactionIdPrecedesOrEquals(s->effective_xmin, + if (TransactionIdIsValid(initial_effective_xmin) && + TransactionIdPrecedesOrEquals(initial_effective_xmin, snapshotConflictHorizon)) conflict = cause; - else if (TransactionIdIsValid(s->effective_catalog_xmin) && - TransactionIdPrecedesOrEquals(s->effective_catalog_xmin, + else if (TransactionIdIsValid(initial_catalog_effective_xmin) && + TransactionIdPrecedesOrEquals(initial_catalog_effective_xmin, snapshotConflictHorizon)) conflict = cause; break; @@ -1499,6 +1507,7 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, (void) kill(active_pid, SIGTERM); last_signaled_pid = active_pid; + terminated = true; } /* Wait until the slot is released. */ -- 2.34.1