From a05d9741c3161a894decd1b88e148c4bbac60b0e Mon Sep 17 00:00:00 2001 From: Josh Curtis Date: Mon, 1 Sep 2025 21:07:52 -0700 Subject: [PATCH v1] Fix race condition when reading `PredXact->SxactGlobalXmin` `DropAllPredicateLocksFromTable`, `PredicateLockPageSplit`, and `CheckTableForSerializableConflictIn` all assume that `!TransactionIdIsValid(PredXact->SxactGlobalXmin)` implies there are no active serializable transactions. This assumption is not true due to a race with `SetNewSxactGlobalXmin`, which first sets `PredXact->SxactGlobalXmin` to `InvalidTransactionId` then iterates over the active serializable transactions to find the new xmin. If `SetNewSxactGlobalXmin` is interrupted before setting a new xmin and other processes check `!TransactionIdIsValid(PredXact->SxactGlobalXmin)` without taking a lock during this time, they will incorrectly assume it is safe to proceed as if there are no concurrent serializable transactions. --- src/backend/storage/lmgr/predicate.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index c1d8511ad17..50e9f62b566 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -2949,12 +2949,14 @@ DropAllPredicateLocksFromTable(Relation relation, bool transfer) /* * Bail out quickly if there are no serializable transactions running. - * It's safe to check this without taking locks because the caller is - * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which - * would matter here can be acquired while that is held. */ + LWLockAcquire(SerializableXactHashLock, LW_SHARED); if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)) + { + LWLockRelease(SerializableXactHashLock); return; + } + LWLockRelease(SerializableXactHashLock); if (!PredicateLockingNeededForRelation(relation)) return; @@ -3150,16 +3152,14 @@ PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, /* * Bail out quickly if there are no serializable transactions running. - * - * It's safe to do this check without taking any additional locks. Even if - * a serializable transaction starts concurrently, we know it can't take - * any SIREAD locks on the page being split because the caller is holding - * the associated buffer page lock. Memory reordering isn't an issue; the - * memory barrier in the LWLock acquisition guarantees that this read - * occurs while the buffer page lock is held. */ + LWLockAcquire(SerializableXactHashLock, LW_SHARED); if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)) + { + LWLockRelease(SerializableXactHashLock); return; + } + LWLockRelease(SerializableXactHashLock); if (!PredicateLockingNeededForRelation(relation)) return; @@ -4426,12 +4426,14 @@ CheckTableForSerializableConflictIn(Relation relation) /* * Bail out quickly if there are no serializable transactions running. - * It's safe to check this without taking locks because the caller is - * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which - * would matter here can be acquired while that is held. */ + LWLockAcquire(SerializableXactHashLock, LW_SHARED); if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)) + { + LWLockRelease(SerializableXactHashLock); return; + } + LWLockRelease(SerializableXactHashLock); if (!SerializationNeededForWrite(relation)) return; -- 2.49.0