From 3943d750a8883a2d20ae39ca8caa87fe5bf07971 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy Date: Fri, 3 Apr 2026 18:55:45 +0000 Subject: [PATCH v9 2/2] Add more tests for XID age slot invalidation Consume XIDs up to wraparound WARNING limits with max_slot_xid_age matching vacuum_failsafe_age (1.6B). Verify that autovacuum invalidates the inactive replication slot (XID-age-based invalidation), unblocks datfrozenxid advancement, and prevents wraparound without any intervention. --- src/test/recovery/Makefile | 3 +- src/test/recovery/t/019_replslot_limit.pl | 130 ++++++++++++++++++++++ 2 files changed, 132 insertions(+), 1 deletion(-) diff --git a/src/test/recovery/Makefile b/src/test/recovery/Makefile index d41aaaf8ae1..5c3d2c89941 100644 --- a/src/test/recovery/Makefile +++ b/src/test/recovery/Makefile @@ -12,7 +12,8 @@ EXTRA_INSTALL=contrib/pg_prewarm \ contrib/pg_stat_statements \ contrib/test_decoding \ - src/test/modules/injection_points + src/test/modules/injection_points \ + src/test/modules/xid_wraparound subdir = src/test/recovery top_builddir = ../../.. diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl index 8f0540a3c8b..8ac5f1f699d 100644 --- a/src/test/recovery/t/019_replslot_limit.pl +++ b/src/test/recovery/t/019_replslot_limit.pl @@ -715,4 +715,134 @@ $primary5->stop; # Testcase end: Invalidate logical slot on standby due to max_slot_xid_age GUC # ============================================================================= +# ================================================================================= +# Testcase start: XID-age-based slot invalidation with autovacuum (production-like) + +# Standby sets slot xmin via HS feedback, disconnects, XIDs are consumed. +# max_slot_xid_age is set to vacuum_failsafe_age (1.6B) so autovacuum +# invalidates the slot before entering failsafe mode, unblocking +# datfrozenxid advancement and avoiding XID wraparound without manual +# VACUUM or downtime. + +# Verify server log shows slot invalidation by autovacuum worker +sub verify_slot_xid_aged_invalidation_in_server_log +{ + my ($node, $slot_name, $max_age, $consumed_xids) = @_; + + my $log = slurp_file($node->logfile); + + # Verify the invalidation was performed by an autovacuum worker + like($log, + qr/autovacuum worker\[\d+\] LOG:\s+invalidating obsolete replication slot "$slot_name"/, + "server log: $slot_name invalidated by autovacuum worker"); + + # Verify DETAIL shows the xmin age exceeding max_slot_xid_age + like($log, + qr/autovacuum worker\[\d+\] DETAIL:\s+The slot's (?:catalog )?xmin age of (\d+) exceeds the configured "max_slot_xid_age" of $max_age by (\d+) transactions/, + "server log: DETAIL shows xmin age exceeds max_slot_xid_age $max_age"); + + # Extract xid age from the log and report for diagnostics + $log =~ + /The slot's (?:catalog )?xmin age of (\d+) exceeds the configured "max_slot_xid_age" of $max_age by (\d+)/; + my $log_xid_age = $1 // 'N/A'; + my $exceeded_by = $2 // 'N/A'; + diag "xid_age from server log=$log_xid_age, exceeded_by=$exceeded_by, max_slot_xid_age=$max_age, consumed=$consumed_xids XIDs"; +} + +# Verify slot invalidation and wait for autovacuum to advance datfrozenxid +sub verify_invalidation_and_recovery +{ + my ($node, $slot_name, $max_age, $consumed_xids) = @_; + + return if $max_age == 0; + + wait_for_xid_aged_invalidation($node, $slot_name); + ok(1, 'autovacuum invalidated slot due to xid_aged'); + + verify_slot_xid_aged_invalidation_in_server_log($node, $slot_name, + $max_age, $consumed_xids); + + # Wait for autovacuum to advance datfrozenxid in all databases past the + # wraparound threshold. + $node->poll_query_until( + 'postgres', qq[ + SELECT NOT EXISTS ( + SELECT 1 FROM pg_database + WHERE age(datfrozenxid) > 2000000000 + ); + ]) or die "Timed out waiting for autovacuum to advance datfrozenxid in all databases"; +} + +my $primary6 = PostgreSQL::Test::Cluster->new('primary6'); +$primary6->init(allows_streaming => 'logical'); + +$max_slot_xid_age = 1600000000; # matches vacuum_failsafe_age default +$primary6->append_conf( + 'postgresql.conf', qq{ +max_slot_xid_age = $max_slot_xid_age +autovacuum_naptime = 1s +}); + +$primary6->start; +$primary6->safe_psql('postgres', "CREATE EXTENSION xid_wraparound"); + +$backup_name = 'backup6'; +$primary6->backup($backup_name); + +my $standby6 = PostgreSQL::Test::Cluster->new('standby6'); +$standby6->init_from_backup($primary6, $backup_name, has_streaming => 1); +$standby6->append_conf( + 'postgresql.conf', q{ +primary_slot_name = 'sb6_slot' +hot_standby_feedback = on +wal_receiver_status_interval = 1 +}); + +$primary6->safe_psql('postgres', + "SELECT pg_create_physical_replication_slot('sb6_slot', true)"); + +$standby6->start; + +$primary6->safe_psql('postgres', + "CREATE TABLE tab_int6 AS SELECT generate_series(1,10) AS a"); +$primary6->wait_for_catchup($standby6); + +$primary6->poll_query_until( + 'postgres', qq[ + SELECT xmin IS NOT NULL FROM pg_replication_slots + WHERE slot_name = 'sb6_slot'; +]) or die "Timed out waiting for sb6_slot xmin from HS feedback"; + +# Stop standby; slot xmin persists and holds back datfrozenxid +$standby6->stop; + +# Consume XIDs in 50M chunks; autovacuum (naptime=1s) will invalidate the +# slot once xmin age exceeds max_slot_xid_age. +my $logstart6 = -s $primary6->logfile; +my $chunk = 50_000_000; +my $max_xids = 2_200_000_000; +my $consumed = 0; + +while ($consumed < $max_xids) +{ + $primary6->safe_psql('postgres', "SELECT consume_xids($chunk)"); + $consumed += $chunk; + my $remaining = $max_xids - $consumed; + diag "consumed $consumed / $max_xids XIDs ($remaining remaining)"; +} + +verify_invalidation_and_recovery($primary6, 'sb6_slot', + $max_slot_xid_age, $consumed); + +# Consume 1B more XIDs — combining with the 2.2B consumed above, the total +# of 3.2B exceeds the 2^31 (~2.1B) usable XID space (xidStopLimit), i.e. +# more than one full wraparound cycle, proving the system is healthy. +$primary6->safe_psql('postgres', "SELECT consume_xids(1000000000)"); +ok(1, 'writes succeed after autovacuum invalidated the slot'); + +$primary6->stop; + +# Testcase end: XID-age-based slot invalidation with autovacuum (production-like) +# ================================================================================ + done_testing(); -- 2.47.3