From 0b76c2e0551601e9dfc08a1f9190b0552d4ca1dd Mon Sep 17 00:00:00 2001 From: Shlok Kyal Date: Mon, 10 Mar 2025 09:19:38 +0530 Subject: [PATCH v7] Restrict copying of invalidated replication slots. Previously, invalidated logical and physical replication slots could be copied using the pg_copy_logical_replication_slot and pg_copy_physical_replication_slot functions. Replication slots that were invalidated for reasons other than WAL removal retained their restart_lsn. This meant that a new slot copied from an invalidated slot could have a restart_lsn pointing to a WAL segment that might have already been removed. This commit restricts the copying of invalidated replication slots. Backpatch to v16, where slots could retain their restart_lsn when invalidated for reasons other than WAL removal. Author: Shlok Kyal Reviewed-by: vignesh C Reviewed-by: Zhijie Hou Reviewed-by: Peter Smith Reviewed-by: Masahiko Sawada Discussion: https://postgr.es/m/CANhcyEU65aH0VYnLiu%3DOhNNxhnhNhwcXBeT-jvRe1OiJTo_Ayg%40mail.gmail.com Backpatch-through: 16 --- doc/src/sgml/func.sgml | 7 +++-- src/backend/replication/slotfuncs.c | 31 ++++++++++++++++++- .../t/035_standby_logical_decoding.pl | 9 ++++++ 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index d8b27903593..d76730aeec1 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -29096,7 +29096,9 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset The copied physical slot starts to reserve WAL from the same LSN as the source slot. temporary is optional. If temporary - is omitted, the same value as the source slot is used. + is omitted, the same value as the source slot is used. Copy of an + invalidated slot is not allowed. Copy of an invalidated slot is not + allowed. @@ -29121,7 +29123,8 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset The failover option of the source logical slot is not copied and is set to false by default. This is to avoid the risk of being unable to continue logical replication - after failover to standby where the slot is being synchronized. + after failover to standby where the slot is being synchronized. Copy of + an invalidated slot is not allowed. diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c index 43c3cc7336b..106edbb0663 100644 --- a/src/backend/replication/slotfuncs.c +++ b/src/backend/replication/slotfuncs.c @@ -681,6 +681,13 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot) (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("cannot copy a replication slot that doesn't reserve WAL"))); + /* Cannot copy an invalidated replication slot */ + if (first_slot_contents.data.invalidated != RS_INVAL_NONE) + ereport(ERROR, + errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot copy invalidated replication slot \"%s\"", + NameStr(*src_name))); + /* Overwrite params from optional arguments */ if (PG_NARGS() >= 3) temporary = PG_GETARG_BOOL(2); @@ -690,7 +697,22 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot) plugin = NameStr(*(PG_GETARG_NAME(3))); } - /* Create new slot and acquire it */ + /* + * Create new slot and acquire it + * + * After slot is created there can be a race condition with function + * InvalidateObsoleteReplicationSlots, when the copied slot appear before + * source slot in array ReplicationSlotCtl->replication_slots. + * + * If just after slot creation, the source slot is invalidated due to + * some operation. The execution of InvalidateObsoleteReplicationSlots will + * wait for this function to release the copied slot. Then the source slot + * will be invalidated. So, the copying of source slot is successful in this + * case. + * + * The wal_status of copied slot is set to lost immediately and hence will + * not have any harm. + */ if (logical_slot) { /* @@ -782,6 +804,13 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot) NameStr(*src_name)), errhint("Retry when the source replication slot's confirmed_flush_lsn is valid."))); + /* Check if source slot became invalidated during the copy operation */ + if (second_slot_contents.data.invalidated != RS_INVAL_NONE) + ereport(ERROR, + errmsg("cannot copy replication slot \"%s\"", + NameStr(*src_name)), + errdetail("The source replication slot was invalidated during the copy operation.")); + /* Install copied values again */ SpinLockAcquire(&MyReplicationSlot->mutex); MyReplicationSlot->effective_xmin = copy_effective_xmin; diff --git a/src/test/recovery/t/035_standby_logical_decoding.pl b/src/test/recovery/t/035_standby_logical_decoding.pl index aeb79f51e71..4eca17885d6 100644 --- a/src/test/recovery/t/035_standby_logical_decoding.pl +++ b/src/test/recovery/t/035_standby_logical_decoding.pl @@ -583,6 +583,15 @@ check_pg_recvlogical_stderr($handle, "can no longer get changes from replication slot \"vacuum_full_activeslot\"" ); +# Attempt to copy an invalidated logical replication slot +($result, $stdout, $stderr) = $node_standby->psql( + 'postgres', + qq[select pg_copy_logical_replication_slot('vacuum_full_inactiveslot', 'vacuum_full_inactiveslot_copy');], + replication => 'database'); +ok( $stderr =~ + /ERROR: cannot copy invalidated replication slot "vacuum_full_inactiveslot"/, + "invalidated slot cannot be copied"); + # Turn hot_standby_feedback back on change_hot_standby_feedback_and_wait_for_xmins(1, 1); -- 2.34.1