From 79e18b79cc0a037802903d852199d2fb7058df63 Mon Sep 17 00:00:00 2001 From: alterego655 <824662526@qq.com> Date: Fri, 10 Apr 2026 11:07:54 +0800 Subject: [PATCH v5 4/7] Use replay position as floor for WAIT FOR LSN standby_(write|flush) GetCurrentLSNForWaitType() for standby_write and standby_flush modes returned only the walreceiver position, which may lag behind WAL already present on the standby from a base backup, archive restore, or prior streaming. This could cause unnecessary blocking if the target LSN falls between the walreceiver's tracked position and the replay position. Fix by returning the maximum of the walreceiver position and the replay position. WAL up to the replay point is physically on disk regardless of its origin, so there is no reason to wait for the walreceiver to re-receive it. This complements 29e7dbf5e4d, which seeded writtenUpto to receiveStart in RequestXLogStreaming() to fix the most common hang scenario. The getter-level floor handles the remaining edge cases: targets between receiveStart and the replay position, and standbys running with archive recovery only (no walreceiver). Reported-by: Tom Lane Discussion: https://postgr.es/m/1957514.1775526774%40sss.pgh.pa.us Author: Xuneng Zhou --- doc/src/sgml/ref/wait_for.sgml | 29 ++++------ src/backend/access/transam/xlogwait.c | 21 ++++++- src/test/recovery/t/049_wait_for_lsn.pl | 73 +++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 19 deletions(-) diff --git a/doc/src/sgml/ref/wait_for.sgml b/doc/src/sgml/ref/wait_for.sgml index 9ba785ea321..7b403c98dd0 100644 --- a/doc/src/sgml/ref/wait_for.sgml +++ b/doc/src/sgml/ref/wait_for.sgml @@ -105,30 +105,25 @@ WAIT FOR LSN 'lsn' standby_write: Wait for the WAL containing the - LSN to be received from the primary and written to disk on a - standby server, but not yet flushed. This is faster than + LSN to be written to disk on a standby server, but not yet + necessarily flushed. This is faster than standby_flush but provides weaker durability guarantees since the data may still be in operating system - buffers. After successful completion, the - written_lsn column in - - pg_stat_wal_receiver will show - a value greater than or equal to the target LSN. This mode can - only be used during recovery. + buffers. This is satisfied by WAL already present on the + standby from a base backup, archive restore, or prior + streaming, as well as WAL newly received from the primary. + This mode can only be used during recovery. standby_flush: Wait for the WAL containing the - LSN to be received from the primary and flushed to disk on a - standby server. This provides a durability guarantee without - waiting for the WAL to be applied. After successful completion, - pg_last_wal_receive_lsn() will return a - value greater than or equal to the target LSN. This value is - also available as the flushed_lsn - column in - pg_stat_wal_receiver. This mode - can only be used during recovery. + LSN to be flushed to disk on a standby server. This provides + a durability guarantee without waiting for the WAL to be + applied. This is satisfied by WAL already present on the + standby from a base backup, archive restore, or prior + streaming, as well as WAL newly received from the primary. + This mode can only be used during recovery. diff --git a/src/backend/access/transam/xlogwait.c b/src/backend/access/transam/xlogwait.c index 6a27183c207..18f78338330 100644 --- a/src/backend/access/transam/xlogwait.c +++ b/src/backend/access/transam/xlogwait.c @@ -111,10 +111,27 @@ GetCurrentLSNForWaitType(WaitLSNType lsnType) return GetXLogReplayRecPtr(NULL); case WAIT_LSN_TYPE_STANDBY_WRITE: - return GetWalRcvWriteRecPtr(); + { + XLogRecPtr recptr = GetWalRcvWriteRecPtr(); + XLogRecPtr replay = GetXLogReplayRecPtr(NULL); + + /* + * Use the replay position as a floor. WAL up to the replay + * point is already on disk from a base backup, archive + * restore, or prior streaming, so there is no reason to wait + * for the walreceiver to re-receive it. + */ + return Max(recptr, replay); + } case WAIT_LSN_TYPE_STANDBY_FLUSH: - return GetWalRcvFlushRecPtr(NULL, NULL); + { + XLogRecPtr recptr = GetWalRcvFlushRecPtr(NULL, NULL); + XLogRecPtr replay = GetXLogReplayRecPtr(NULL); + + /* Same floor as standby_write; see comment above. */ + return Max(recptr, replay); + } case WAIT_LSN_TYPE_PRIMARY_FLUSH: return GetFlushRecPtr(NULL); diff --git a/src/test/recovery/t/049_wait_for_lsn.pl b/src/test/recovery/t/049_wait_for_lsn.pl index 0e74175f9eb..26790fda5be 100644 --- a/src/test/recovery/t/049_wait_for_lsn.pl +++ b/src/test/recovery/t/049_wait_for_lsn.pl @@ -674,4 +674,77 @@ for (my $i = 0; $i < 3; $i++) $wait_sessions[$i]->{run}->finish; } +# 9. Archive-only standby tests: verify standby_write/standby_flush work +# without a walreceiver. These exercises the replay-position floor in +# GetCurrentLSNForWaitType(). +# +# We set up a separate primary with archiving and an archive-only standby +# (has_restoring, no has_streaming), so no walreceiver ever starts and the +# shared walreceiver positions (writtenUpto, flushedUpto) stay at their +# zero-initialized values. + +my $arc_primary = PostgreSQL::Test::Cluster->new('arc_primary'); +$arc_primary->init(has_archiving => 1, allows_streaming => 1); +$arc_primary->start; + +$arc_primary->safe_psql('postgres', + "CREATE TABLE arc_test AS SELECT generate_series(1,10) AS a"); + +my $arc_backup_name = 'arc_backup'; +$arc_primary->backup($arc_backup_name); + +# Generate WAL that will be archived and replayed on the standby. +$arc_primary->safe_psql('postgres', + "INSERT INTO arc_test VALUES (generate_series(11, 20))"); +my $arc_target_lsn = + $arc_primary->safe_psql('postgres', "SELECT pg_current_wal_insert_lsn()"); + +# Force WAL to be archived by switching segments, then wait for archiving. +my $arc_segment = $arc_primary->safe_psql('postgres', + "SELECT pg_walfile_name(pg_current_wal_lsn())"); +$arc_primary->safe_psql('postgres', "SELECT pg_switch_wal()"); +$arc_primary->poll_query_until('postgres', + qq{SELECT last_archived_wal >= '$arc_segment' FROM pg_stat_archiver}, 't') + or die "Timed out waiting for WAL archiving on arc_primary"; + +# Create an archive-only standby: has_restoring but NOT has_streaming. +# No primary_conninfo means no walreceiver will start. +my $arc_standby = PostgreSQL::Test::Cluster->new('arc_standby'); +$arc_standby->init_from_backup($arc_primary, $arc_backup_name, + has_restoring => 1); +$arc_standby->start; + +# Wait for the standby to replay past our target LSN via archive recovery. +$arc_standby->poll_query_until('postgres', + qq{SELECT pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '$arc_target_lsn') >= 0} +) or die "Timed out waiting for archive replay on arc_standby"; + +# Sanity: verify no walreceiver is running. +$output = $arc_standby->safe_psql('postgres', + "SELECT count(*) FROM pg_stat_wal_receiver"); +is($output, '0', "arc_standby has no walreceiver"); + +# 9a. Getter fallback: standby_write/standby_flush succeed immediately when +# the target LSN has already been replayed, even though writtenUpto and +# flushedUpto are zero. GetCurrentLSNForWaitType() returns +# Max(walrcv_pos, replay), so replay >= target satisfies the check on the +# first loop iteration without ever sleeping. + +$output = $arc_standby->safe_psql( + 'postgres', qq[ + WAIT FOR LSN '${arc_target_lsn}' + WITH (MODE 'standby_write', timeout '3s', no_throw);]); +ok($output eq "success", + "standby_write succeeds on archive-only standby (getter fallback)"); + +$output = $arc_standby->safe_psql( + 'postgres', qq[ + WAIT FOR LSN '${arc_target_lsn}' + WITH (MODE 'standby_flush', timeout '3s', no_throw);]); +ok($output eq "success", + "standby_flush succeeds on archive-only standby (getter fallback)"); + +$arc_standby->stop; +$arc_primary->stop; + done_testing(); -- 2.39.5 (Apple Git-154)