From d4bb208e621c0f47500fd1b4542a5c10ebd2ec59 Mon Sep 17 00:00:00 2001 From: Ajin Cherian Date: Fri, 5 Apr 2024 06:47:18 -0400 Subject: [PATCH v6 1/4] Allow altering of two_phase option of a SUBSCRIPTION This patch allows user to alter two_phase option of a subscriber provided no uncommitted prepared transactions are pending on that subscription. Author: Cherian Ajin, Hayato Kuroda --- doc/src/sgml/ref/alter_subscription.sgml | 11 +-- src/backend/access/transam/twophase.c | 43 ++++++++++++ src/backend/commands/subscriptioncmds.c | 62 +++++++++++++---- .../libpqwalreceiver/libpqwalreceiver.c | 7 +- src/backend/replication/logical/launcher.c | 21 ++++++ src/backend/replication/logical/worker.c | 2 +- src/backend/replication/slot.c | 19 +++++- src/backend/replication/walsender.c | 20 ++++-- src/bin/psql/tab-complete.c | 2 +- src/include/access/twophase.h | 3 + src/include/replication/slot.h | 3 +- src/include/replication/walreceiver.h | 5 +- src/include/replication/worker_internal.h | 1 + src/test/regress/expected/subscription.out | 5 +- src/test/regress/sql/subscription.sql | 5 +- src/test/subscription/t/021_twophase.pl | 67 ++++++++++++++++++- 16 files changed, 235 insertions(+), 41 deletions(-) diff --git a/doc/src/sgml/ref/alter_subscription.sgml b/doc/src/sgml/ref/alter_subscription.sgml index a78c1c3a47..e69132c39d 100644 --- a/doc/src/sgml/ref/alter_subscription.sgml +++ b/doc/src/sgml/ref/alter_subscription.sgml @@ -68,8 +68,9 @@ ALTER SUBSCRIPTION name RENAME TO < Commands ALTER SUBSCRIPTION ... REFRESH PUBLICATION, ALTER SUBSCRIPTION ... {SET|ADD|DROP} PUBLICATION ... - with refresh option as true and - ALTER SUBSCRIPTION ... SET (failover = on|off) + with refresh option as true, + ALTER SUBSCRIPTION ... SET (failover = on|off) and + ALTER SUBSCRIPTION ... SET (two_phase = on|off) cannot be executed inside a transaction block. These commands also cannot be executed when the subscription has @@ -228,9 +229,11 @@ ALTER SUBSCRIPTION name RENAME TO < disable_on_error, password_required, run_as_owner, - origin, and - failover. + origin, + failover, and + two_phase. Only a superuser can set password_required = false. + two_phase can be altered only for disabled subscription. diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 8090ac9fc1..495f99a357 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -2682,3 +2682,46 @@ LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn, LWLockRelease(TwoPhaseStateLock); return found; } + +/* + * checkGid + */ +static bool +checkGid(char *gid, Oid subid) +{ + int ret; + Oid subid_written, + xid; + + ret = sscanf(gid, "pg_gid_%u_%u", &subid_written, &xid); + + if (ret != 2 || subid != subid_written) + return false; + + return true; +} + +/* + * LookupGXactBySubid + * Check if the prepared transaction done by apply worker exists. + */ +bool +LookupGXactBySubid(Oid subid) +{ + bool found = false; + + LWLockAcquire(TwoPhaseStateLock, LW_SHARED); + for (int i = 0; i < TwoPhaseState->numPrepXacts; i++) + { + GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; + + /* Ignore not-yet-valid GIDs. */ + if (gxact->valid && checkGid(gxact->gid, subid)) + { + found = true; + break; + } + } + LWLockRelease(TwoPhaseStateLock); + return found; +} diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index e407428dbc..aa8a8e1f84 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -16,6 +16,7 @@ #include "access/htup_details.h" #include "access/table.h" +#include "access/twophase.h" #include "access/xact.h" #include "catalog/catalog.h" #include "catalog/dependency.h" @@ -1143,7 +1144,8 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, { supported_opts = (SUBOPT_SLOT_NAME | SUBOPT_SYNCHRONOUS_COMMIT | SUBOPT_BINARY | - SUBOPT_STREAMING | SUBOPT_DISABLE_ON_ERR | + SUBOPT_STREAMING | SUBOPT_TWOPHASE_COMMIT | + SUBOPT_DISABLE_ON_ERR | SUBOPT_PASSWORD_REQUIRED | SUBOPT_RUN_AS_OWNER | SUBOPT_FAILOVER | SUBOPT_ORIGIN); @@ -1151,6 +1153,47 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, parse_subscription_options(pstate, stmt->options, supported_opts, &opts); + /* XXX */ + if (IsSet(opts.specified_opts, SUBOPT_TWOPHASE_COMMIT)) + { + /* + * two_phase can be only changed for disabled + * subscriptions + */ + if (form->subenabled) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot set %s for enabled subscription", + "two_phase"))); + + /* + * Stop all the subscription workers, just in case. Workers + * may still survive even if the subscription is disabled. + */ + logicalrep_workers_stop(subid); + + /* Check whether the number of prepared transactions */ + if (!opts.twophase && + form->subtwophasestate == LOGICALREP_TWOPHASE_STATE_ENABLED && + LookupGXactBySubid(subid)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot disable two_phase when uncommitted prepared transactions present"))); + + /* + * The changed failover option of the slot can't be rolled + * back. + */ + PreventInTransactionBlock(isTopLevel, "ALTER SUBSCRIPTION ... SET (two_phase)"); + + /* Change system catalog acoordingly */ + values[Anum_pg_subscription_subtwophasestate - 1] = + CharGetDatum(opts.twophase ? + LOGICALREP_TWOPHASE_STATE_PENDING : + LOGICALREP_TWOPHASE_STATE_DISABLED); + replaces[Anum_pg_subscription_subtwophasestate - 1] = true; + } + if (IsSet(opts.specified_opts, SUBOPT_SLOT_NAME)) { /* @@ -1505,7 +1548,8 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, * doing the database operations we won't be able to rollback altered * slot. */ - if (replaces[Anum_pg_subscription_subfailover - 1]) + if (replaces[Anum_pg_subscription_subtwophasestate - 1] || + replaces[Anum_pg_subscription_subfailover - 1]) { bool must_use_password; char *err; @@ -1525,7 +1569,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, PG_TRY(); { - walrcv_alter_slot(wrconn, sub->slotname, opts.failover); + walrcv_alter_slot(wrconn, sub->slotname, opts.twophase, opts.failover); } PG_FINALLY(); { @@ -1562,7 +1606,6 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel) char *subname; char *conninfo; char *slotname; - List *subworkers; ListCell *lc; char originname[NAMEDATALEN]; char *err = NULL; @@ -1672,16 +1715,7 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel) * New workers won't be started because we hold an exclusive lock on the * subscription till the end of the transaction. */ - LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); - subworkers = logicalrep_workers_find(subid, false); - LWLockRelease(LogicalRepWorkerLock); - foreach(lc, subworkers) - { - LogicalRepWorker *w = (LogicalRepWorker *) lfirst(lc); - - logicalrep_worker_stop(w->subid, w->relid); - } - list_free(subworkers); + logicalrep_workers_stop(subid); /* * Remove the no-longer-useful entry in the launcher's table of apply diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c index 3c2b1bb496..baef3bdec0 100644 --- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c +++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c @@ -80,7 +80,7 @@ static char *libpqrcv_create_slot(WalReceiverConn *conn, CRSSnapshotAction snapshot_action, XLogRecPtr *lsn); static void libpqrcv_alter_slot(WalReceiverConn *conn, const char *slotname, - bool failover); + bool two_phase, bool failover); static pid_t libpqrcv_get_backend_pid(WalReceiverConn *conn); static WalRcvExecResult *libpqrcv_exec(WalReceiverConn *conn, const char *query, @@ -1121,14 +1121,15 @@ libpqrcv_create_slot(WalReceiverConn *conn, const char *slotname, */ static void libpqrcv_alter_slot(WalReceiverConn *conn, const char *slotname, - bool failover) + bool two_phase, bool failover) { StringInfoData cmd; PGresult *res; initStringInfo(&cmd); - appendStringInfo(&cmd, "ALTER_REPLICATION_SLOT %s ( FAILOVER %s )", + appendStringInfo(&cmd, "ALTER_REPLICATION_SLOT %s ( TWO_PHASE %s, FAILOVER %s )", quote_identifier(slotname), + two_phase ? "true" : "false", failover ? "true" : "false"); res = libpqrcv_PQexec(conn->streamConn, cmd.data); diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 66070e9131..94b73f3085 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -623,6 +623,27 @@ logicalrep_worker_stop(Oid subid, Oid relid) LWLockRelease(LogicalRepWorkerLock); } +/* + * Stop all the subscription workers. + */ +void +logicalrep_workers_stop(Oid subid) +{ + List *subworkers; + ListCell *lc; + + LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); + subworkers = logicalrep_workers_find(subid, false); + LWLockRelease(LogicalRepWorkerLock); + foreach(lc, subworkers) + { + LogicalRepWorker *w = (LogicalRepWorker *) lfirst(lc); + + logicalrep_worker_stop(w->subid, w->relid); + } + list_free(subworkers); +} + /* * Stop the given logical replication parallel apply worker. * diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index b5a80fe3e8..374aa22091 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -3911,7 +3911,7 @@ maybe_reread_subscription(void) /* !slotname should never happen when enabled is true. */ Assert(newsub->slotname); - /* two-phase should not be altered */ + /* two-phase should not be altered while the worker exists */ Assert(newsub->twophasestate == MySubscription->twophasestate); /* diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index cebf44bb0f..621f35ab1e 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -800,8 +800,10 @@ ReplicationSlotDrop(const char *name, bool nowait) * Change the definition of the slot identified by the specified name. */ void -ReplicationSlotAlter(const char *name, bool failover) +ReplicationSlotAlter(const char *name, bool two_phase, bool failover) { + bool update_slot = false; + Assert(MyReplicationSlot == NULL); ReplicationSlotAcquire(name, false); @@ -844,12 +846,27 @@ ReplicationSlotAlter(const char *name, bool failover) errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot enable failover for a temporary replication slot")); + if (MyReplicationSlot->data.two_phase != two_phase) + { + SpinLockAcquire(&MyReplicationSlot->mutex); + MyReplicationSlot->data.two_phase = two_phase; + SpinLockRelease(&MyReplicationSlot->mutex); + + update_slot = true; + } + + if (MyReplicationSlot->data.failover != failover) { SpinLockAcquire(&MyReplicationSlot->mutex); MyReplicationSlot->data.failover = failover; SpinLockRelease(&MyReplicationSlot->mutex); + update_slot = true; + } + + if (update_slot) + { ReplicationSlotMarkDirty(); ReplicationSlotSave(); } diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 9bf7c67f37..c45881554b 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -1411,14 +1411,25 @@ DropReplicationSlot(DropReplicationSlotCmd *cmd) * Process extra options given to ALTER_REPLICATION_SLOT. */ static void -ParseAlterReplSlotOptions(AlterReplicationSlotCmd *cmd, bool *failover) +ParseAlterReplSlotOptions(AlterReplicationSlotCmd *cmd, + bool *two_phase, bool *failover) { + bool two_phase_given = false; bool failover_given = false; /* Parse options */ foreach_ptr(DefElem, defel, cmd->options) { - if (strcmp(defel->defname, "failover") == 0) + if (strcmp(defel->defname, "two_phase") == 0) + { + if (two_phase_given) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + two_phase_given = true; + *two_phase = defGetBoolean(defel); + } + else if (strcmp(defel->defname, "failover") == 0) { if (failover_given) ereport(ERROR, @@ -1438,10 +1449,11 @@ ParseAlterReplSlotOptions(AlterReplicationSlotCmd *cmd, bool *failover) static void AlterReplicationSlot(AlterReplicationSlotCmd *cmd) { + bool two_phase = false; bool failover = false; - ParseAlterReplSlotOptions(cmd, &failover); - ReplicationSlotAlter(cmd->slotname, failover); + ParseAlterReplSlotOptions(cmd, &two_phase, &failover); + ReplicationSlotAlter(cmd->slotname, two_phase, failover); } /* diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 6fee3160f0..5ff84301cd 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -1948,7 +1948,7 @@ psql_completion(const char *text, int start, int end) else if (HeadMatches("ALTER", "SUBSCRIPTION", MatchAny) && TailMatches("SET", "(")) COMPLETE_WITH("binary", "disable_on_error", "failover", "origin", "password_required", "run_as_owner", "slot_name", - "streaming", "synchronous_commit"); + "streaming", "synchronous_commit", "two_phase"); /* ALTER SUBSCRIPTION SKIP ( */ else if (HeadMatches("ALTER", "SUBSCRIPTION", MatchAny) && TailMatches("SKIP", "(")) COMPLETE_WITH("lsn"); diff --git a/src/include/access/twophase.h b/src/include/access/twophase.h index 56248c0006..d493ed24c5 100644 --- a/src/include/access/twophase.h +++ b/src/include/access/twophase.h @@ -62,4 +62,7 @@ extern void PrepareRedoRemove(TransactionId xid, bool giveWarning); extern void restoreTwoPhaseData(void); extern bool LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn, TimestampTz origin_prepare_timestamp); + +extern bool LookupGXactBySubid(Oid subid); + #endif /* TWOPHASE_H */ diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index 7b937d1a0c..2fcb11418f 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -243,7 +243,8 @@ extern void ReplicationSlotCreate(const char *name, bool db_specific, extern void ReplicationSlotPersist(void); extern void ReplicationSlotDrop(const char *name, bool nowait); extern void ReplicationSlotDropAcquired(void); -extern void ReplicationSlotAlter(const char *name, bool failover); +extern void ReplicationSlotAlter(const char *name, bool two_phase, + bool failover); extern void ReplicationSlotAcquire(const char *name, bool nowait); extern void ReplicationSlotRelease(void); diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h index 12f71fa99b..a443f402f5 100644 --- a/src/include/replication/walreceiver.h +++ b/src/include/replication/walreceiver.h @@ -377,6 +377,7 @@ typedef char *(*walrcv_create_slot_fn) (WalReceiverConn *conn, */ typedef void (*walrcv_alter_slot_fn) (WalReceiverConn *conn, const char *slotname, + bool two_phase, bool failover); /* @@ -455,8 +456,8 @@ extern PGDLLIMPORT WalReceiverFunctionsType *WalReceiverFunctions; WalReceiverFunctions->walrcv_send(conn, buffer, nbytes) #define walrcv_create_slot(conn, slotname, temporary, two_phase, failover, snapshot_action, lsn) \ WalReceiverFunctions->walrcv_create_slot(conn, slotname, temporary, two_phase, failover, snapshot_action, lsn) -#define walrcv_alter_slot(conn, slotname, failover) \ - WalReceiverFunctions->walrcv_alter_slot(conn, slotname, failover) +#define walrcv_alter_slot(conn, slotname, two_phase, failover) \ + WalReceiverFunctions->walrcv_alter_slot(conn, slotname, two_phase, failover) #define walrcv_get_backend_pid(conn) \ WalReceiverFunctions->walrcv_get_backend_pid(conn) #define walrcv_exec(conn, exec, nRetTypes, retTypes) \ diff --git a/src/include/replication/worker_internal.h b/src/include/replication/worker_internal.h index 515aefd519..d5428263c1 100644 --- a/src/include/replication/worker_internal.h +++ b/src/include/replication/worker_internal.h @@ -246,6 +246,7 @@ extern bool logicalrep_worker_launch(LogicalRepWorkerType wtype, Oid userid, Oid relid, dsm_handle subworker_dsm); extern void logicalrep_worker_stop(Oid subid, Oid relid); +extern void logicalrep_workers_stop(Oid subid); extern void logicalrep_pa_worker_stop(ParallelApplyWorkerInfo *winfo); extern void logicalrep_worker_wakeup(Oid subid, Oid relid); extern void logicalrep_worker_wakeup_ptr(LogicalRepWorker *worker); diff --git a/src/test/regress/expected/subscription.out b/src/test/regress/expected/subscription.out index 0f2a25cdc1..51fa4b9690 100644 --- a/src/test/regress/expected/subscription.out +++ b/src/test/regress/expected/subscription.out @@ -377,10 +377,7 @@ HINT: To initiate replication, you must manually create the replication slot, e regress_testsub | regress_subscription_user | f | {testpub} | f | off | p | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/0 (1 row) ---fail - alter of two_phase option not supported. -ALTER SUBSCRIPTION regress_testsub SET (two_phase = false); -ERROR: unrecognized subscription parameter: "two_phase" --- but can alter streaming when two_phase enabled +-- We can alter streaming when two_phase enabled ALTER SUBSCRIPTION regress_testsub SET (streaming = true); \dRs+ List of subscriptions diff --git a/src/test/regress/sql/subscription.sql b/src/test/regress/sql/subscription.sql index 3e5ba4cb8c..a3886d79ca 100644 --- a/src/test/regress/sql/subscription.sql +++ b/src/test/regress/sql/subscription.sql @@ -256,10 +256,7 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUBLICATION testpub WITH (connect = false, two_phase = true); \dRs+ ---fail - alter of two_phase option not supported. -ALTER SUBSCRIPTION regress_testsub SET (two_phase = false); - --- but can alter streaming when two_phase enabled +-- We can alter streaming when two_phase enabled ALTER SUBSCRIPTION regress_testsub SET (streaming = true); \dRs+ diff --git a/src/test/subscription/t/021_twophase.pl b/src/test/subscription/t/021_twophase.pl index 9437cd4c3b..e710f3c4c0 100644 --- a/src/test/subscription/t/021_twophase.pl +++ b/src/test/subscription/t/021_twophase.pl @@ -367,6 +367,71 @@ $result = $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_copy;"); is($result, qq(2), 'replicated data in subscriber table'); +# Disable the subscription and alter it to two_phase = false, +# verify that the altered subscription reflects the two_phase option. + +# Alter subscription two_phase to false +$node_subscriber->safe_psql('postgres', + "ALTER SUBSCRIPTION tap_sub_copy DISABLE"); +$node_subscriber->safe_psql('postgres', + "ALTER SUBSCRIPTION tap_sub_copy SET (two_phase = false)"); +$node_subscriber->safe_psql('postgres', + "ALTER SUBSCRIPTION tap_sub_copy ENABLE"); + +# Wait for subscription startup +$node_subscriber->wait_for_subscription_sync($node_publisher, $appname_copy); + +# Make sure that the two-phase is disabled on the subscriber +$result = $node_subscriber->safe_psql('postgres', + "SELECT subtwophasestate FROM pg_subscription WHERE subname = 'tap_sub_copy';" +); +is($result, qq(d), 'two-phase is disabled'); + +# Now do a prepare on publisher and make sure that it is not replicated. +$node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub"); +$node_publisher->safe_psql( + 'postgres', qq{ + BEGIN; + INSERT INTO tab_copy VALUES (100); + PREPARE TRANSACTION 'newgid'; + }); + +# Wait for the subscriber to catchup +$node_publisher->wait_for_catchup($appname_copy); + +# Make sure that there is 0 prepared transaction on the subscriber +$result = $node_subscriber->safe_psql('postgres', + "SELECT count(*) FROM pg_prepared_xacts;"); +is($result, qq(0), 'transaction is prepared on subscriber'); + +# Now commit the insert and verify that it IS replicated +$node_publisher->safe_psql('postgres', "COMMIT PREPARED 'newgid';"); + +# Wait for the subscriber to catchup +$node_publisher->wait_for_catchup($appname_copy); + +# Made sure that the commited transaction is replicated. +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_copy;"); +is($result, qq(3), 'replicated data in subscriber table'); + +# Alter subscription two_phase to true +$node_subscriber->safe_psql('postgres', + "ALTER SUBSCRIPTION tap_sub_copy DISABLE"); +$node_subscriber->safe_psql('postgres', + "ALTER SUBSCRIPTION tap_sub_copy SET (two_phase = true)"); +$node_subscriber->safe_psql('postgres', + "ALTER SUBSCRIPTION tap_sub_copy ENABLE"); + +# Wait for subscription startup +$node_subscriber->wait_for_subscription_sync($node_publisher, $appname_copy); + +# Make sure that the two-phase is enabled on the subscriber +$result = $node_subscriber->safe_psql('postgres', + "SELECT subtwophasestate FROM pg_subscription WHERE subname = 'tap_sub_copy';" +); +is($result, qq(e), 'two-phase is disabled'); + $node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub_copy;"); $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_copy;"); @@ -374,8 +439,6 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_copy;"); # check all the cleanup ############################### -$node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub"); - $result = $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM pg_subscription"); is($result, qq(0), 'check subscription was dropped on subscriber'); -- 2.43.0