From e12031e0bdf098811d2fc99962a04c01a0ee77f9 Mon Sep 17 00:00:00 2001 From: Fabrice Chapuis Date: Sun, 7 Sep 2025 09:37:24 +0200 Subject: [PATCH v1] Fix-failover-slot-issue-when-doing-a-switchover. Add allow_overwrite parameter to pg_create_logical_replication_slot. If allow_overwrite is set to true then existing failover slot on standby will be dropped, then ensure main loop continues after drop and create a new slot. --- src/backend/catalog/system_functions.sql | 1 + src/backend/replication/logical/launcher.c | 2 +- src/backend/replication/logical/slotsync.c | 25 +++++++++++++++++++++- src/backend/replication/slot.c | 6 +++++- src/backend/replication/slotfuncs.c | 9 +++++--- src/backend/replication/walsender.c | 4 ++-- src/include/catalog/pg_proc.dat | 8 +++---- src/include/replication/slot.h | 8 ++++++- 8 files changed, 50 insertions(+), 13 deletions(-) diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql index 566f308..6cd3175 100644 --- a/src/backend/catalog/system_functions.sql +++ b/src/backend/catalog/system_functions.sql @@ -480,6 +480,7 @@ CREATE OR REPLACE FUNCTION pg_create_logical_replication_slot( IN temporary boolean DEFAULT false, IN twophase boolean DEFAULT false, IN failover boolean DEFAULT false, + IN allow_overwrite boolean DEFAULT false, OUT slot_name name, OUT lsn pg_lsn) RETURNS RECORD LANGUAGE INTERNAL diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 37377f7..840741e 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -1455,7 +1455,7 @@ CreateConflictDetectionSlot(void) errmsg("creating replication conflict detection slot")); ReplicationSlotCreate(CONFLICT_DETECTION_SLOT, false, RS_PERSISTENT, false, - false, false); + false, false,false); LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c index caa003c..2a8692e 100644 --- a/src/backend/replication/logical/slotsync.c +++ b/src/backend/replication/logical/slotsync.c @@ -626,6 +626,7 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid) ReplicationSlot *slot; XLogRecPtr latestFlushPtr; bool slot_updated = false; + bool allow_overwrite = false; /* * Make sure that concerned WAL is received and flushed before syncing @@ -656,15 +657,36 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid) SpinLockAcquire(&slot->mutex); synced = slot->data.synced; + allow_overwrite = slot->data.allow_overwrite; SpinLockRelease(&slot->mutex); /* User-created slot with the same name exists, raise ERROR. */ if (!synced) - ereport(ERROR, + { + /* Check if we need to overwrite an existing logical slot */ + if (allow_overwrite) + { + /* Get rid of a replication slot that is no longer wanted */ + ReplicationSlotDrop(remote_slot->name,true); + + /* Get rid of a replication slot that is no longer wanted */ + ereport(WARNING, + errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("slot \"%s\" already exists" + " on the standby but it will be dropped because " + "flag allow_overwrite is set to true", + remote_slot->name)); + + /* Going back to the main loop after droping the failover slot */ + return false; + } + else + ereport(ERROR, errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("exiting from slot synchronization because same" " name slot \"%s\" already exists on the standby", remote_slot->name)); + } /* * The slot has been synchronized before. @@ -760,6 +782,7 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid) ReplicationSlotCreate(remote_slot->name, true, RS_TEMPORARY, remote_slot->two_phase, remote_slot->failover, + allow_overwrite, true); /* For shorter lines. */ diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index fd0fdb9..9a85fb0 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -351,7 +351,7 @@ IsSlotForConflictCheck(const char *name) void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, - bool two_phase, bool failover, bool synced) + bool two_phase, bool failover, bool allow_overwrite, bool synced) { ReplicationSlot *slot = NULL; int i; @@ -445,6 +445,7 @@ ReplicationSlotCreate(const char *name, bool db_specific, slot->data.two_phase = two_phase; slot->data.two_phase_at = InvalidXLogRecPtr; slot->data.failover = failover; + slot->data.allow_overwrite = allow_overwrite; slot->data.synced = synced; /* and then data only present in shared memory */ @@ -500,6 +501,9 @@ ReplicationSlotCreate(const char *name, bool db_specific, */ LWLockRelease(ReplicationSlotAllocationLock); + elog(DEBUG1, "logical replication slot %s created with option allow_overwrite to %s", + NameStr(slot->data.name), slot->data.allow_overwrite ? "true" : "false"); + /* Let everybody know we've modified this slot */ ConditionVariableBroadcast(&slot->active_cv); } diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c index 69f4c61..3f53502 100644 --- a/src/backend/replication/slotfuncs.c +++ b/src/backend/replication/slotfuncs.c @@ -41,7 +41,7 @@ create_physical_replication_slot(char *name, bool immediately_reserve, /* acquire replication slot, this will check for conflicting names */ ReplicationSlotCreate(name, false, temporary ? RS_TEMPORARY : RS_PERSISTENT, false, - false, false); + false, false,false); if (immediately_reserve) { @@ -116,7 +116,7 @@ pg_create_physical_replication_slot(PG_FUNCTION_ARGS) static void create_logical_replication_slot(char *name, char *plugin, bool temporary, bool two_phase, - bool failover, + bool failover, bool allow_overwrite, XLogRecPtr restart_lsn, bool find_startpoint) { @@ -134,7 +134,7 @@ create_logical_replication_slot(char *name, char *plugin, */ ReplicationSlotCreate(name, true, temporary ? RS_TEMPORARY : RS_EPHEMERAL, two_phase, - failover, false); + failover, allow_overwrite, false); /* * Create logical decoding context to find start point or, if we don't @@ -173,6 +173,7 @@ pg_create_logical_replication_slot(PG_FUNCTION_ARGS) bool temporary = PG_GETARG_BOOL(2); bool two_phase = PG_GETARG_BOOL(3); bool failover = PG_GETARG_BOOL(4); + bool allow_overwrite = PG_GETARG_BOOL(5); Datum result; TupleDesc tupdesc; HeapTuple tuple; @@ -191,6 +192,7 @@ pg_create_logical_replication_slot(PG_FUNCTION_ARGS) temporary, two_phase, failover, + allow_overwrite, InvalidXLogRecPtr, true); @@ -726,6 +728,7 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot) temporary, false, false, + false, src_restart_lsn, false); } diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index e3dce9d..a2db1e3 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -1201,7 +1201,7 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd) { ReplicationSlotCreate(cmd->slotname, false, cmd->temporary ? RS_TEMPORARY : RS_PERSISTENT, - false, false, false); + false, false, false, false); if (reserve_wal) { @@ -1232,7 +1232,7 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd) */ ReplicationSlotCreate(cmd->slotname, true, cmd->temporary ? RS_TEMPORARY : RS_EPHEMERAL, - two_phase, failover, false); + two_phase, failover, false, false); /* * Do options check early so that we can bail before calling the diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 118d6da..ee55572 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11498,10 +11498,10 @@ { oid => '3786', descr => 'set up a logical replication slot', proname => 'pg_create_logical_replication_slot', provolatile => 'v', proparallel => 'u', prorettype => 'record', - proargtypes => 'name name bool bool bool', - proallargtypes => '{name,name,bool,bool,bool,name,pg_lsn}', - proargmodes => '{i,i,i,i,i,o,o}', - proargnames => '{slot_name,plugin,temporary,twophase,failover,slot_name,lsn}', + proargtypes => 'name name bool bool bool bool', + proallargtypes => '{name,name,bool,bool,bool,bool,name,pg_lsn}', + proargmodes => '{i,i,i,i,i,i,o,o}', + proargnames => '{slot_name,plugin,temporary,twophase,failover,allow_overwrite,slot_name,lsn}', prosrc => 'pg_create_logical_replication_slot' }, { oid => '4222', descr => 'copy a logical replication slot, changing temporality and plugin', diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index e8fc342..eef5c86 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -141,6 +141,12 @@ typedef struct ReplicationSlotPersistentData * for logical slots on the primary server. */ bool failover; + + /* + * Allow Postgres to drop logical replication slot on standby server to ensure + * creation of new failover slot when remote sync_replication_slots is set to true. + */ + bool allow_overwrite; } ReplicationSlotPersistentData; /* @@ -301,7 +307,7 @@ extern void ReplicationSlotsShmemInit(void); /* management of individual slots */ extern void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, - bool two_phase, bool failover, + bool two_phase, bool failover, bool allow_overwrite, bool synced); extern void ReplicationSlotPersist(void); extern void ReplicationSlotDrop(const char *name, bool nowait); -- 2.47.3