diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index 6e2830603ac..d78a13f27ac 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -650,18 +650,15 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt, /* * Do not allow users to enable the failover and two_phase options - * together. - * - * See comments atop the similar check in ReplicationSlotCreate() for a - * detailed reason. + * together. See comments atop slotsync.c for details. */ if (opts.twophase && opts.failover) ereport(ERROR, errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot enable both \"%s\" and \"%s\" options at CREATE SUBSCRIPTION", "failover", "two_phase"), - errhint("The \"%s\" option can be enabled after \"%s\" state is ready using ALTER SUBSCRIPTION.", - "failover", "two_phase")); + errhint("Use ALTER SUBSCRIPTION ... SET (failover = true) to enable \"%s\" after two_phase state is ready", + "failover")); /* * If built with appropriate switch, whine when regression-testing @@ -1261,24 +1258,8 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, "failover"))); /* - * Do not allow users to enable the failover when the - * two_phase state is still pending i.e., the replication - * slot’s two_phase option has not yet been finalized. - * - * In most cases, the restriction in - * ReplicationSlotAlter() is sufficient to prevent - * enabling failover for a slot with two_phase enabled. - * However, this additional check is necessary to handle a - * race condition, when a user runs CREATE SUBSCRIPTION - * with two_phase=true, but the slot's two_phase flag - * hasn't been set yet, a concurrent attempt to do ALTER - * SUBSCRIPTION(failover=true) may bypass the check in - * ReplicationSlotAlter(). - * - * For a detailed explanation of why enforcing this - * restriction is important when combining two_phase and - * failover, refer to the comments atop similar check in - * ReplicationSlotCreate(). + * Do not allow users to enable the failover until two_phase + * state reaches READY state. */ if (sub->twophasestate == LOGICALREP_TWOPHASE_STATE_PENDING && opts.failover) diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c index 73fe3f56af2..011c244ac35 100644 --- a/src/backend/replication/logical/slotsync.c +++ b/src/backend/replication/logical/slotsync.c @@ -42,6 +42,16 @@ * Any standby synchronized slots will be dropped if they no longer need * to be synchronized. See comment atop drop_local_obsolete_slots() for more * details. + * + * Regarding two-phase enabled slots, we need to note that the two_phase_at + * field of a slot is not synchronized. In the absence of two_phase_at, + * logical decoding could incorrectly identify prepared transactions as + * having been replicated to the subscriber after promotion, resulting in + * their omission. Consequently, both two_phase and failover features + * cannot be simultaneously enabled during slot creation. Failover can only + * be enabled once we can ensure that no transactions were prepared prior + * to two_phase_at on the primary. These restrictions are enforced through + * checks in ReplicationSlotCreate() and ReplicationSlotAlter(). *--------------------------------------------------------------------------- */ diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 6b763ae5dc6..1834c86aaac 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -346,13 +346,7 @@ ReplicationSlotCreate(const char *name, bool db_specific, /* * Do not allow users to enable both failover and two_phase for slots. - * - * This is because the two_phase_at field of a slot, which tracks the - * LSN, from which two-phase decoding starts, is not synchronized to - * standby servers. Without two_phase_at, the logical decoding might - * incorrectly identify prepared transaction as already replicated to - * the subscriber after promotion of standby server, causing them to - * be skipped. + * Please see the comments atop slotsync.c for details. * * However, both failover and two_phase enabled slots can be created * during slot synchronization because we need to retain the same @@ -870,18 +864,14 @@ ReplicationSlotAlter(const char *name, bool failover) /* * Do not allow users to enable failover for a two_phase enabled slot - * where slot's restart_lsn is less than two_phase_at. In such cases, - * there is a risk that transactions prepared before two_phase_at exist - * and would be skipped during decoding. - * - * See comments atop the similar check in ReplicationSlotCreate() for a - * detailed reason. + * if there are potentially un-decoded transactions that are prepared + * before two_phase_at. See comments atop slotsync.c for details. */ if (failover && MyReplicationSlot->data.two_phase && MyReplicationSlot->data.restart_lsn < MyReplicationSlot->data.two_phase_at) ereport(ERROR, errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("cannot enable failover for a two-phase enabled replication slot"), + errmsg("cannot enable failover for a two-phase enabled replication slot due to unconsumed changes"), errdetail("The slot need to consume change upto %X/%X to enable failover.", LSN_FORMAT_ARGS(MyReplicationSlot->data.two_phase_at)));