From 3fa0f830e547160fe1cf6208716c403ec112ff24 Mon Sep 17 00:00:00 2001 From: alterego655 <824662526@qq.com> Date: Fri, 24 Oct 2025 09:11:34 +0800 Subject: [PATCH v1] Fix unconditional walreceiver shutdown during stream-archive transition Commit 3635a0a introduced unconditional walreceiver shutdown when switching from streaming to archive WAL sources. This causes problems during timeline divergence, when walreceiver enters WALRCV_WAITING state but remains alive. The unconditional shutdown breaks monitoring: walreceiver gets repeatedly terminated and respawned, causing pg_stat_wal_receiver.status to show 'streaming' instead of 'waiting', masking the underlying replication problem. In worst cases with synchronous replication, this can lead to unwritable clusters when the standby reports false readiness. Fix by making the shutdown conditional: only terminate walreceiver when it's actively streaming (WALRCV_STREAMING, WALRCV_STARTING, or WALRCV_RESTARTING). When in WALRCV_WAITING state, just reset InstallXLogFileSegmentActive flag to allow archive restoration without killing the process. --- src/backend/access/transam/xlog.c | 14 ++++++++++---- src/backend/access/transam/xlogrecovery.c | 12 +++++++++++- src/include/access/xlog.h | 1 + 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index eceab341255..05d7dfdd963 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -9512,17 +9512,23 @@ GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli) LWLockRelease(ControlFileLock); } -/* Thin wrapper around ShutdownWalRcv(). */ +/* Reset the InstallXLogFileSegmentActive flag without shutting down walreceiver. */ void -XLogShutdownWalRcv(void) +ResetInstallXLogFileSegmentActive(void) { - ShutdownWalRcv(); - LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); XLogCtl->InstallXLogFileSegmentActive = false; LWLockRelease(ControlFileLock); } +/* Thin wrapper around ShutdownWalRcv(). */ +void +XLogShutdownWalRcv(void) +{ + ShutdownWalRcv(); + ResetInstallXLogFileSegmentActive(); +} + /* Enable WAL file recycling and preallocation. */ void SetInstallXLogFileSegmentActive(void) diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 52ff4d119e6..62467656e90 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -3687,8 +3687,18 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, * Before we leave XLOG_FROM_STREAM state, make sure that * walreceiver is not active, so that it won't overwrite * WAL that we restore from archive. + * + * If walreceiver is actively streaming (or attempting to + * connect), we must shut it down. However, if it's already + * in WAITING state (e.g., due to timeline divergence), we + * only need to reset the install flag to allow archive + * restoration, while keeping the process alive for + * monitoring visibility. */ - XLogShutdownWalRcv(); + if (WalRcvStreaming()) + XLogShutdownWalRcv(); + else + ResetInstallXLogFileSegmentActive(); /* * Before we sleep, re-scan for possible new timelines if diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index d12798be3d8..3b88ae93aae 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -268,6 +268,7 @@ extern void SwitchIntoArchiveRecovery(XLogRecPtr EndRecPtr, TimeLineID replayTLI extern void ReachedEndOfBackup(XLogRecPtr EndRecPtr, TimeLineID tli); extern void SetInstallXLogFileSegmentActive(void); extern bool IsInstallXLogFileSegmentActive(void); +extern void ResetInstallXLogFileSegmentActive(void); extern void XLogShutdownWalRcv(void); /* -- 2.51.0