From bca1159e246287dfd6469b2cc7e53609de25fae7 Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot Date: Tue, 20 Feb 2024 08:11:47 +0000 Subject: [PATCH v2] Add regression test for 818fefd8fd --- src/backend/replication/slot.c | 8 ++ .../t/035_standby_logical_decoding.pl | 106 +++++++++++++++++- 2 files changed, 112 insertions(+), 2 deletions(-) 6.9% src/backend/replication/ 93.0% src/test/recovery/t/ diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 2614f98ddd..9d379f154d 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -53,6 +53,7 @@ #include "storage/proc.h" #include "storage/procarray.h" #include "utils/builtins.h" +#include "utils/injection_point.h" /* * Replication slot on-disk data structure. @@ -1658,6 +1659,13 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, last_signaled_pid = active_pid; terminated = true; conflict_prev = conflict; + /* + * This injection needs to be after the kill to ensure that + * the slot is not "active" anymore. It also has to be after + * ReportSlotInvalidation() to ensure the invalidation message + * is reported. + */ + INJECTION_POINT("terminate-process-holding-slot"); } /* Wait until the slot is released. */ diff --git a/src/test/recovery/t/035_standby_logical_decoding.pl b/src/test/recovery/t/035_standby_logical_decoding.pl index 0710bccc17..4204be60f5 100644 --- a/src/test/recovery/t/035_standby_logical_decoding.pl +++ b/src/test/recovery/t/035_standby_logical_decoding.pl @@ -733,14 +733,116 @@ change_hot_standby_feedback_and_wait_for_xmins(1, 1); ################################################## # Recovery conflict: Invalidate conflicting slots, including in-use slots -# Scenario 6: incorrect wal_level on primary. +# Scenario 6: Ensure race condition described in 818fefd8fd is fixed. +################################################## +SKIP: +{ + skip "Injection points not supported by this build", 1 + if ($ENV{enable_injection_points} ne 'yes'); + + # Get the position to search from in the standby logfile + $logstart = -s $node_standby->logfile; + + # Drop the slots, re-create them, change hot_standby_feedback, + # check xmin and catalog_xmin values, make slot active and reset stat. + reactive_slots_change_hfs_and_wait_for_xmins('pruning_', 'injection_', 0, 1); + + # Create the injection_points extension + $node_primary->safe_psql('testdb', 'CREATE EXTENSION injection_points;'); + + # Wait until the extension has been created on the standby + $node_primary->wait_for_replay_catchup($node_standby); + + # Attach the injection point + $node_standby->safe_psql('testdb', + "SELECT injection_points_attach('terminate-process-holding-slot', 'wait');"); + + # Trigger a conflict and insert an XLOG_RUNNING_XACTS before triggering + # the vacuum. + $node_primary->safe_psql('testdb', qq[CREATE TABLE injection_test(x integer); + DROP TABLE injection_test; + SELECT pg_log_standby_snapshot();]); + + # Now launch the vacuum + wait_until_vacuum_can_remove('', 'CREATE TABLE injection_test2(x integer);', 'pg_class'); + + # Wait until the startup process reaches the injection point + $node_standby->wait_for_event('startup', 'terminate-process-holding-slot'); + + # Note: $node_primary->wait_for_replay_catchup($node_standby) would be + # hanging here due to the injection point, so check the log instead. + + ok( $node_standby->log_contains( + "terminating process .* to release replication slot \"injection_activeslot\"", $logstart), + "terminating process holding the active slot is logged with injection point"); + + # Note that at this point the termination message of the process holding the + # active slot has been emitted. + + # Extract xid_horizon from the logfile + my $log_contents = slurp_file($node_standby->logfile, $logstart); + (my $xid_horizon) = $log_contents =~ m/The slot conflicted with xid horizon ([0-9]*)./ + or die "could not get xid horizon"; + + # Ensure the slot is not active + $node_standby->poll_query_until('testdb', + "SELECT active_pid is NULL from pg_catalog.pg_replication_slots where slot_name = 'injection_activeslot'" + ) or die "injection_activeslot is still active"; + + # Decode changes from the slot to reach LogicalConfirmReceivedLocation() + $node_standby->safe_psql('testdb', + qq[SELECT pg_logical_slot_get_changes('injection_activeslot', NULL, NULL);] + ); + + # Wait until catalog_xmin advances after the xid_horizon. That means that + # no conflict would be reported without the fix in 818fefd8fd. + $node_standby->poll_query_until('testdb', + "SELECT (SELECT catalog_xmin::text::int - $xid_horizon from pg_catalog.pg_replication_slots where slot_name = 'injection_activeslot') > 0" + ) or die "catalog_xmin did not advance"; + + # Get the position to search from in the standby logfile + $logstart = -s $node_standby->logfile; + + # Wakeup the injection point + $node_standby->safe_psql('testdb', "SELECT injection_points_wakeup('terminate-process-holding-slot');"); + + # Wait for the standby to catchup + $node_primary->wait_for_replay_catchup($node_standby); + + # Check invalidation in the logfile for the active slot + ok( $node_standby->log_contains( + "invalidating obsolete replication slot \"injection_activeslot\"", + $logstart), + "activeslot slot invalidation is logged with injection point"); + + # Verify conflict_reason is 'rows_removed' in pg_replication_slots + check_slots_conflict_reason('injection_', 'rows_removed'); + + # Detach from the injection point + $node_standby->safe_psql('testdb', + "SELECT injection_points_detach('terminate-process-holding-slot');"); + + # Turn hot_standby_feedback back on + change_hot_standby_feedback_and_wait_for_xmins(1, 1); +} + +################################################## +# Recovery conflict: Invalidate conflicting slots, including in-use slots +# Scenario 7: incorrect wal_level on primary. ################################################## # get the position to search from in the standby logfile $logstart = -s $node_standby->logfile; # drop the logical slots -drop_logical_slots('pruning_'); +if ($ENV{enable_injection_points} ne 'yes') +{ + drop_logical_slots('pruning_'); +} +else +{ + drop_logical_slots('injection_'); +} # create the logical slots create_logical_slots($node_standby, 'wal_level_'); -- 2.34.1