From de4deb80756e464cd0c732c4bc2d415d3c5a5074 Mon Sep 17 00:00:00 2001 From: Ajin Cherian Date: Fri, 3 Oct 2025 19:14:06 +1000 Subject: [PATCH v5] Reset synced slots when a standby is promoted. On promotion, reset any slots which have the 'synced' flag set so that the primary starts with synced flag set false. This ensures consistent behavior across all switchovers. Also handle the possibility of server crashing before all slots are reset by reseting slots on primary on a restart. --- doc/src/sgml/system-views.sgml | 3 +- src/backend/access/transam/xlog.c | 19 +++-- src/backend/access/transam/xlogrecovery.c | 9 --- src/backend/replication/slot.c | 72 +++++++++++++++++++ src/include/replication/slot.h | 1 + .../t/040_standby_failover_slots_sync.pl | 6 +- 6 files changed, 92 insertions(+), 18 deletions(-) diff --git a/doc/src/sgml/system-views.sgml b/doc/src/sgml/system-views.sgml index 4187191ea74..ff9384127cd 100644 --- a/doc/src/sgml/system-views.sgml +++ b/doc/src/sgml/system-views.sgml @@ -3031,8 +3031,7 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on - the primary is default false for all slots but may (if leftover from a - promoted standby) also be true. + the primary is false for all slots. diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index eceab341255..02106da3108 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -5642,7 +5642,8 @@ StartupXLOG(void) /* * Initialize replication slots, before there's a chance to remove - * required resources. + * required resources. Clear any leftover 'synced' flags on replication + * slots when on the primary. */ StartupReplicationSlots(); @@ -6244,13 +6245,21 @@ StartupXLOG(void) WalSndWakeup(true, true); /* - * If this was a promotion, request an (online) checkpoint now. This isn't - * required for consistency, but the last restartpoint might be far back, - * and in case of a crash, recovering from it might take a longer than is - * appropriate now that we're not in standby mode anymore. + * If this was a promotion, first reset the synced flag for any logical + * slots if it's set. Although the synced flag for logical slots is reset + * on every primary restart, we also need to handle it during promotion + * since existing backend sessions remain active even after promotion, + * and a restart may not happen for some time. + * Then request an (online) checkpoint. The checkpoint isn't required for + * consistency, but the last restartpoint might be far back, and in case + * of a crash, recovery could take longer than desirable now that we're not + * in standby mode anymore. */ if (promoted) + { + ResetSyncedSlots(); RequestCheckpoint(CHECKPOINT_FORCE); + } } /* diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 52ff4d119e6..6e975c12a97 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -1482,15 +1482,6 @@ FinishWalRecovery(void) /* * Shutdown the slot sync worker to drop any temporary slots acquired by * it and to prevent it from keep trying to fetch the failover slots. - * - * We do not update the 'synced' column in 'pg_replication_slots' system - * view from true to false here, as any failed update could leave 'synced' - * column false for some slots. This could cause issues during slot sync - * after restarting the server as a standby. While updating the 'synced' - * column after switching to the new timeline is an option, it does not - * simplify the handling for the 'synced' column. Therefore, we retain the - * 'synced' column as true after promotion as it may provide useful - * information about the slot origin. */ ShutDownSlotSync(); diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index fd0fdb96d42..2e9f286ec07 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -852,6 +852,59 @@ restart: LWLockRelease(ReplicationSlotControlLock); } +/* + * ResetSyncedSlots() + * + * Reset the synced flag to false for all replication slots where it is + * currently true. Currently this function is only invoked during promotion. + */ +void +ResetSyncedSlots(void) +{ + int i; + + /* + * Iterate through all replication slot entries and reset synced ones + */ + for (i = 0; i < max_replication_slots; i++) + { + ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i]; + + /* Skip inactive/unused slots */ + if (!s->in_use) + continue; + + /* we're only interested in logical slots */ + if (!SlotIsLogical(s)) + continue; + + /* Check if this slot was marked as synced */ + if (s->data.synced) + { + /* Acquire the slot */ + ReplicationSlotAcquire(NameStr(s->data.name), false, true); + + /* Reset the synced flag under spinlock protection */ + SpinLockAcquire(&s->mutex); + s->data.synced = false; + SpinLockRelease(&s->mutex); + + /* Mark dirty and save outside the spinlock */ + ReplicationSlotMarkDirty(); + ReplicationSlotSave(); + + ereport(DEBUG1, + (errmsg("synced flag reset for replication slot \"%s\"" + " during promotion", + NameStr(s->data.name)))); + + /* Release the slot */ + ReplicationSlotRelease(); + } + } + +} + /* * Permanently drop replication slot identified by the passed in name. */ @@ -2690,6 +2743,25 @@ RestoreSlotFromDisk(const char *name) ReplicationSlotSetInactiveSince(slot, now, false); restored = true; + + /* + * A primary should never have a slot with the 'synced' flag set. + * Even if this server was previously a standby, the flag should + * have been cleared during promotion. The only case it may still + * be set is if the server crashed or failed during promotion before + * the flag could be reset. + * In that case, reset it now and mark the slot dirty. + */ + if (!StandbyMode && slot->data.synced) + { + slot->data.synced = false; + slot->just_dirtied = true; + slot->dirty = true; + ereport(DEBUG1, + (errmsg("synced flag reset for replication slot \"%s\"", + NameStr(slot->data.name)))); + } + break; } diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index fe62162cde3..7902d51781d 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -336,6 +336,7 @@ extern int ReplicationSlotIndex(ReplicationSlot *slot); extern bool ReplicationSlotName(int index, Name name); extern void ReplicationSlotNameForTablesync(Oid suboid, Oid relid, char *syncslotname, Size szslot); extern void ReplicationSlotDropAtPubNode(WalReceiverConn *wrconn, char *slotname, bool missing_ok); +extern void ResetSyncedSlots(void); extern void StartupReplicationSlots(void); extern void CheckPointReplicationSlots(bool is_shutdown); diff --git a/src/test/recovery/t/040_standby_failover_slots_sync.pl b/src/test/recovery/t/040_standby_failover_slots_sync.pl index 2c61c51e914..29a48019eda 100644 --- a/src/test/recovery/t/040_standby_failover_slots_sync.pl +++ b/src/test/recovery/t/040_standby_failover_slots_sync.pl @@ -932,10 +932,12 @@ my $standby1_conninfo = $standby1->connstr . ' dbname=postgres'; $subscriber1->safe_psql('postgres', "ALTER SUBSCRIPTION regress_mysub1 CONNECTION '$standby1_conninfo';"); -# Confirm the synced slot 'lsub1_slot' is retained on the new primary +# Confirm that the synced slots 'lsub1_slot' and 'snap_test_slot' are retained on the new primary +# and the synced flag is cleared on promotion. is( $standby1->safe_psql( 'postgres', - q{SELECT count(*) = 2 FROM pg_replication_slots WHERE slot_name IN ('lsub1_slot', 'snap_test_slot') AND synced AND NOT temporary;} + q{SELECT count(*) = 2 FROM pg_replication_slots WHERE slot_name IN ('lsub1_slot', 'snap_test_slot') AND NOT synced AND NOT temporary;} + ), 't', 'synced slot retained on the new primary'); -- 2.47.3