From 599d429193c6810a049cd9a948f29bc10e9bccf2 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Mon, 24 Feb 2025 10:21:12 -0800 Subject: [PATCH v7] Restrict copying of invalidated replication slots. Previously, invalidated logical and physical replication slots could be copied using the pg_copy_logical_replication_slot and pg_copy_physical_replication_slot functions. Replication slots that were invalidated for reasons other than WAL removal retained their restart_lsn. This meant that a new slot copied from an invalidated slot could have a restart_lsn pointing to a WAL segment that might have already been removed. This commit restricts the copying of invalidated replication slots. Backpatch to v16, where slots could retain their restart_lsn when invalidated for reasons other than WAL removal. Author: Shlok Kyal Reviewed-by: vignesh C Reviewed-by: Zhijie Hou Reviewed-by: Peter Smith Reviewed-by: Masahiko Sawada Discussion: https://postgr.es/m/CANhcyEU65aH0VYnLiu%3DOhNNxhnhNhwcXBeT-jvRe1OiJTo_Ayg%40mail.gmail.com Backpatch-through: 16 --- doc/src/sgml/func.sgml | 4 ++- src/backend/replication/slotfuncs.c | 31 ++++++++++++++++++- .../t/035_standby_logical_decoding.pl | 9 ++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 042b225b920..de6d45d0285 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -27259,7 +27259,8 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset The copied physical slot starts to reserve WAL from the same LSN as the source slot. temporary is optional. If temporary - is omitted, the same value as the source slot is used. + is omitted, the same value as the source slot is used. Copy of an + invalidated slot is not allowed. @@ -27281,6 +27282,7 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset from the same LSN as the source logical slot. Both temporary and plugin are optional; if they are omitted, the values of the source slot are used. + Copy of an invalidated slot is not allowed. diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c index 6035cf48160..5eec788e88a 100644 --- a/src/backend/replication/slotfuncs.c +++ b/src/backend/replication/slotfuncs.c @@ -756,6 +756,13 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot) (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("cannot copy a replication slot that doesn't reserve WAL"))); + /* Cannot copy an invalidated replication slot */ + if (first_slot_contents.data.invalidated != RS_INVAL_NONE) + ereport(ERROR, + errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot copy invalidated replication slot \"%s\"", + NameStr(*src_name))); + /* Overwrite params from optional arguments */ if (PG_NARGS() >= 3) temporary = PG_GETARG_BOOL(2); @@ -765,7 +772,22 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot) plugin = NameStr(*(PG_GETARG_NAME(3))); } - /* Create new slot and acquire it */ + /* + * Create new slot and acquire it + * + * After slot is created there can be a race condition with function + * InvalidateObsoleteReplicationSlots, when the copied slot appear before + * source slot in array ReplicationSlotCtl->replication_slots. + * + * If just after slot creation, the source slot is invalidated due to + * some operation. The execution of InvalidateObsoleteReplicationSlots will + * wait for this function to release the copied slot. Then the source slot + * will be invalidated. So, the copying of source slot is successful in this + * case. + * + * The wal_status of copied slot is set to lost immediately and hence will + * not have any harm. + */ if (logical_slot) { /* @@ -843,6 +865,13 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot) NameStr(*src_name)), errhint("Retry when the source replication slot's confirmed_flush_lsn is valid."))); + /* Check if source slot became invalidated during the copy operation */ + if (second_slot_contents.data.invalidated != RS_INVAL_NONE) + ereport(ERROR, + errmsg("cannot copy replication slot \"%s\"", + NameStr(*src_name)), + errdetail("The source replication slot was invalidated during the copy operation.")); + /* Install copied values again */ SpinLockAcquire(&MyReplicationSlot->mutex); MyReplicationSlot->effective_xmin = copy_effective_xmin; diff --git a/src/test/recovery/t/035_standby_logical_decoding.pl b/src/test/recovery/t/035_standby_logical_decoding.pl index 8120dfc2132..82ad7ce0c2b 100644 --- a/src/test/recovery/t/035_standby_logical_decoding.pl +++ b/src/test/recovery/t/035_standby_logical_decoding.pl @@ -563,6 +563,15 @@ check_pg_recvlogical_stderr($handle, "can no longer get changes from replication slot \"vacuum_full_activeslot\"" ); +# Attempt to copy an invalidated logical replication slot +($result, $stdout, $stderr) = $node_standby->psql( + 'postgres', + qq[select pg_copy_logical_replication_slot('vacuum_full_inactiveslot', 'vacuum_full_inactiveslot_copy');], + replication => 'database'); +ok( $stderr =~ + /ERROR: cannot copy invalidated replication slot "vacuum_full_inactiveslot"/, + "invalidated slot cannot be copied"); + # Turn hot_standby_feedback back on change_hot_standby_feedback_and_wait_for_xmins(1, 1); -- 2.34.1