From da465aac7e94baca11510b9d734b5ad5367d792e Mon Sep 17 00:00:00 2001 From: alterego655 <824662526@qq.com> Date: Fri, 10 Apr 2026 10:59:13 +0800 Subject: [PATCH v4 2/5] Fix memory ordering in WAIT FOR LSN wakeup mechanism WAIT FOR LSN uses a Dekker-style handshake: the waker stores an LSN position then reads minWaitedLSN; the waiter stores its target into minWaitedLSN then reads the position. Without a barrier between each side's store and load, a CPU may satisfy the load before the store becomes globally visible, causing either side to miss a concurrent update. The result is a missed wakeup: the waiter sleeps indefinitely until the next unrelated event. Fix by embedding the required barriers into the atomic operations on minWaitedLSN: - In updateMinWaitedLSN(), use pg_atomic_write_membarrier_u64() so the waiter's preceding heap update is visible before the new minWaitedLSN value is published. - In WaitLSNWakeup(), use pg_atomic_read_membarrier_u64() in the fast-path check so the waker's preceding position store is globally visible before minWaitedLSN is read. The waiter side is also covered by the barrier semantics already present in GetCurrentLSNForWaitType(): GetWalRcvWriteRecPtr() uses an explicit read barrier (from patch 0001), while the remaining getters acquire a spinlock, which implies the same ordering. Also call ResetLatch() unconditionally after WaitLatch(). The previous conditional 'if (rc & WL_LATCH_SET)' could skip ResetLatch() on a timeout return, omitting its memory barrier. Without that barrier, the GetCurrentLSNForWaitType() call at the top of the next loop iteration could read a stale position, potentially causing the waiter to exit with WAIT_LSN_RESULT_TIMEOUT even though the target LSN has been reached. Reported-by: Andres Freund Discussion: https://postgr.es/m/zqbppucpmkeqecfy4s5kscnru4tbk6khp3ozqz6ad2zijz354k%40w4bdf4z3wqoz Author: Xuneng Zhou --- src/backend/access/transam/xlogwait.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/backend/access/transam/xlogwait.c b/src/backend/access/transam/xlogwait.c index 2e31c0d67d7..6a27183c207 100644 --- a/src/backend/access/transam/xlogwait.c +++ b/src/backend/access/transam/xlogwait.c @@ -92,13 +92,19 @@ StaticAssertDecl(lengthof(WaitLSNWaitEvents) == WAIT_LSN_TYPE_COUNT, "WaitLSNWaitEvents must match WaitLSNType enum"); /* - * Get the current LSN for the specified wait type. + * Get the current LSN for the specified wait type. Provide memory + * barrier semantics before getting the value. */ XLogRecPtr GetCurrentLSNForWaitType(WaitLSNType lsnType) { Assert(lsnType >= 0 && lsnType < WAIT_LSN_TYPE_COUNT); + /* + * All of the cases below provide memory barrier semantics: + * GetWalRcvWriteRecPtr() and GetFlushRecPtr() have explicit barriers, + * while GetXLogReplayRecPtr() and GetWalRcvFlushRecPtr() use spinlocks. + */ switch (lsnType) { case WAIT_LSN_TYPE_STANDBY_REPLAY: @@ -184,7 +190,8 @@ updateMinWaitedLSN(WaitLSNType lsnType) minWaitedLSN = procInfo->waitLSN; } - pg_atomic_write_u64(&waitLSNState->minWaitedLSN[i], minWaitedLSN); + /* Pairs with pg_atomic_read_membarrier_u64() in WaitLSNWakeup(). */ + pg_atomic_write_membarrier_u64(&waitLSNState->minWaitedLSN[i], minWaitedLSN); } /* @@ -325,10 +332,11 @@ WaitLSNWakeup(WaitLSNType lsnType, XLogRecPtr currentLSN) /* * Fast path check. Skip if currentLSN is InvalidXLogRecPtr, which means - * "wake all waiters" (e.g., during promotion when recovery ends). + * "wake all waiters" (e.g., during promotion when recovery ends). Pairs + * with pg_atomic_write_membarrier_u64() in updateMinWaitedLSN(). */ if (XLogRecPtrIsValid(currentLSN) && - pg_atomic_read_u64(&waitLSNState->minWaitedLSN[i]) > currentLSN) + pg_atomic_read_membarrier_u64(&waitLSNState->minWaitedLSN[i]) > currentLSN) return; wakeupWaiters(lsnType, currentLSN); @@ -450,8 +458,7 @@ WaitForLSN(WaitLSNType lsnType, XLogRecPtr targetLSN, int64 timeout) errmsg("terminating connection due to unexpected postmaster exit"), errcontext("while waiting for LSN")); - if (rc & WL_LATCH_SET) - ResetLatch(MyLatch); + ResetLatch(MyLatch); } /* -- 2.51.0