From 8ed72e1bc748f99fbf8b103ae5bd4cf395cb54ef Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Tue, 28 Apr 2026 12:21:21 -0700 Subject: [PATCH v1] Fix race between ProcSignalInit() and EmitProcSignalBarrier(). ProcSignalInit() read the global barrier generation before publishing its PID into pss_pid. A concurrent EmitProcSignalBarrier() iterates the ProcSignal slots and skips any whose pss_pid is still zero, on the assumption that such a slot will pick up the new generation when it later reads psh_barrierGeneration. But because the joining backend had already read the (older) global generation under its slot's spinlock, it would store a stale value into pss_barrierGeneration and never absorb the just-emitted barrier, resulting that WaitForProcSignalBarrier() didn't complete. Publish pss_pid before reading psh_barrierGeneration, with a memory barrier in between so that the store is globally visible first. A concurrent EmitProcSignalBarrier() then either observes the published PID and signals this slot, or completes its generation increment before we load it. Discussion: https://postgr.es/m/CAEze2WgAJmWReDN7Chtba8Er2YBvKCoa0KVN25-1evnTrHsLyA@mail.gmail.com Backpatch-through: --- src/backend/storage/ipc/procsignal.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index 264e4c22ca6..b0681ca0ae2 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -188,6 +188,16 @@ ProcSignalInit(const uint8 *cancel_key, int cancel_key_len) /* Clear out any leftover signal reasons */ MemSet(slot->pss_signalFlags, 0, NUM_PROCSIGNALS * sizeof(sig_atomic_t)); + /* + * Publish the PID before reading the global barrier generation to ensure + * that EmitProcSignalBarrier() doesn't skip us while we are grabbing an + * older generation. We need a memory barrier here to make sure that the + * update of pss_pid is globally visible before the load of the global + * barrier generation executes. + */ + pg_atomic_write_u32(&slot->pss_pid, MyProcPid); + pg_memory_barrier(); + /* * Initialize barrier state. Since we're a brand-new process, there * shouldn't be any leftover backend-private state that needs to be @@ -207,7 +217,6 @@ ProcSignalInit(const uint8 *cancel_key, int cancel_key_len) if (cancel_key_len > 0) memcpy(slot->pss_cancel_key, cancel_key, cancel_key_len); slot->pss_cancel_key_len = cancel_key_len; - pg_atomic_write_u32(&slot->pss_pid, MyProcPid); SpinLockRelease(&slot->pss_mutex); -- 2.54.0