From 021af6a0c3698a2599e9f60723b028f9b6a3f525 Mon Sep 17 00:00:00 2001 From: Ajin Cherian Date: Wed, 6 May 2026 14:59:13 +1000 Subject: [PATCH v3 2/2] Preserve replication origin OIDs during pg_upgrade When pg_upgrade migrates a subscriber, replication origin OIDs (roident) can change across the upgrade. This is a problem because commit-timestamp records embed roident and are copied directly from the old cluster's pg_commit_ts directory, causing spurious "update_origin_differs" conflicts after the upgrade. Fix this by dumping replication origins as global objects via pg_dumpall during binary upgrade, using a new function binary_upgrade_create_replication_origin(oid, name, lsn) to recreate each origin with its preserved roident and remote_lsn. To avoid conflicts with this, CreateSubscription() skips replorigin_create() in binary-upgrade mode since the origin is already created by the time the subscription is restored. --- src/backend/commands/subscriptioncmds.c | 11 +++- src/backend/replication/logical/origin.c | 72 ++++++++++++++++++++++ src/backend/utils/adt/pg_upgrade_support.c | 33 ++++++++++ src/bin/pg_dump/pg_dump.c | 27 ++++---- src/bin/pg_dump/pg_dumpall.c | 61 ++++++++++++++++++ src/bin/pg_upgrade/check.c | 9 ++- src/bin/pg_upgrade/info.c | 9 +++ src/bin/pg_upgrade/pg_upgrade.h | 1 + src/bin/pg_upgrade/t/004_subscription.pl | 38 +++++++++++- src/include/catalog/pg_proc.dat | 4 ++ src/include/replication/origin.h | 2 + 11 files changed, 246 insertions(+), 21 deletions(-) diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index 87a1b5c4bc2..677ddab19a9 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -19,6 +19,7 @@ #include "access/table.h" #include "access/twophase.h" #include "access/xact.h" +#include "catalog/binary_upgrade.h" #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/indexing.h" @@ -867,9 +868,15 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt, * apply workers initialization, and to handle origin creation dynamically * when tables are added to the subscription. It is not clear whether * preventing creation of origins is worth additional complexity. + * + * In binary-upgrade mode, skip origin creation here. This is required to + * preserve the roident from the old cluster for this subscription. */ - ReplicationOriginNameForLogicalRep(subid, InvalidOid, originname, sizeof(originname)); - replorigin_create(originname); + if (!IsBinaryUpgrade) + { + ReplicationOriginNameForLogicalRep(subid, InvalidOid, originname, sizeof(originname)); + replorigin_create(originname); + } /* * Connect to remote side to execute requested commands and fetch table diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c index c9dfb094c2b..3c5c2246df3 100644 --- a/src/backend/replication/logical/origin.c +++ b/src/backend/replication/logical/origin.c @@ -379,6 +379,78 @@ replorigin_create(const char *roname) return roident; } +/* + * TODO: unify with replorigin_create(). + * + * XXX: Should we put this function in pg_upgrade_support.c, because it's + * usable only while upgrading? + */ +ReplOriginId +replorigin_create_with_reploriginid(ReplOriginId node, const char *roname) +{ + HeapTuple tuple = NULL; + Relation rel; + Datum roname_d; + SysScanDesc scan; + ScanKeyData key; + bool nulls[Natts_pg_replication_origin]; + Datum values[Natts_pg_replication_origin]; + bool collides; + + Assert(node != InvalidReplOriginId); + + /* + * To avoid needing a TOAST table for pg_replication_origin, we limit + * replication origin names to 512 bytes. This should be more than enough + * for all practical use. + */ + if (strlen(roname) > MAX_RONAME_LEN) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("replication origin name is too long"), + errdetail("Replication origin names must be no longer than %d bytes.", + MAX_RONAME_LEN))); + + roname_d = CStringGetTextDatum(roname); + + Assert(IsTransactionState()); + + rel = table_open(ReplicationOriginRelationId, ExclusiveLock); + + Assert(!OidIsValid(rel->rd_rel->reltoastrelid)); + + ScanKeyInit(&key, + Anum_pg_replication_origin_roident, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(node)); + scan = systable_beginscan(rel, ReplicationOriginIdentIndex, + true /* indexOK */ , + SnapshotSelf, + 1, &key); + collides = HeapTupleIsValid(systable_getnext(scan)); + + if (collides) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("replication origin ID %d already exists", node))); + + systable_endscan(scan); + + memset(&nulls, 0, sizeof(nulls)); + + values[Anum_pg_replication_origin_roident - 1] = ObjectIdGetDatum(node); + values[Anum_pg_replication_origin_roname - 1] = roname_d; + + tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls); + CatalogTupleInsert(rel, tuple); + CommandCounterIncrement(); + + table_close(rel, ExclusiveLock); + + heap_freetuple(tuple); + return node; +} + /* * Helper function to drop a replication origin. */ diff --git a/src/backend/utils/adt/pg_upgrade_support.c b/src/backend/utils/adt/pg_upgrade_support.c index 59c3e7f0146..5358c06e228 100644 --- a/src/backend/utils/adt/pg_upgrade_support.c +++ b/src/backend/utils/adt/pg_upgrade_support.c @@ -445,3 +445,36 @@ binary_upgrade_create_conflict_detection_slot(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } + +Datum +binary_upgrade_create_replication_origin(PG_FUNCTION_ARGS) +{ + ReplOriginId node; + Name originname; + + CHECK_IS_BINARY_UPGRADE; + + /* NULL node id and origin name are not allowed */ + if (PG_ARGISNULL(0) || + PG_ARGISNULL(1)) + elog(ERROR, "null argument to binary_upgrade_create_replication_origin is not allowed"); + + node = PG_GETARG_OID(0); + originname = PG_GETARG_NAME(1); + + replorigin_create_with_reploriginid(node, NameStr(*originname)); + + if (!PG_ARGISNULL(2)) + { + XLogRecPtr remote_commit = PG_GETARG_LSN(2); + + /* Lock to prevent the replication origin from vanishing */ + LockRelationOid(ReplicationOriginRelationId, RowExclusiveLock); + replorigin_advance(node, remote_commit, InvalidXLogRecPtr, + false /* backward */ , + false /* WAL log */ ); + UnlockRelationOid(ReplicationOriginRelationId, RowExclusiveLock); + } + + PG_RETURN_VOID(); +} diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 452d0b5e98a..91e17f78b93 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -5673,20 +5673,21 @@ dumpSubscription(Archive *fout, const SubscriptionInfo *subinfo) if (subinfo->suboriginremotelsn) { /* - * Preserve the remote_lsn for the subscriber's replication - * origin. This value is required to start the replication from - * the position before the upgrade. This value will be stale if - * the publisher gets upgraded before the subscriber node. - * However, this shouldn't be a problem as the upgrade of the - * publisher ensures that all the transactions were replicated - * before upgrading it. + * For servers older than the version where replication origins are + * preserved as global objects (via pg_dumpall), we must advance the + * replication origin LSN here per-subscription. From 19.0 onwards + * this is handled globally by dumpReplicationOrigins() in pg_dumpall, + * so we skip it to avoid double-advancing. */ - appendPQExpBufferStr(query, - "\n-- For binary upgrade, must preserve the remote_lsn for the subscriber's replication origin.\n"); - appendPQExpBufferStr(query, - "SELECT pg_catalog.binary_upgrade_replorigin_advance("); - appendStringLiteralAH(query, subinfo->dobj.name, fout); - appendPQExpBuffer(query, ", '%s');\n", subinfo->suboriginremotelsn); + if (fout->remoteVersion < 190000 && subinfo->suboriginremotelsn) + { + appendPQExpBufferStr(query, + "\n-- For binary upgrade, must preserve the remote_lsn for the subscriber's replication origin.\n"); + appendPQExpBufferStr(query, + "SELECT pg_catalog.binary_upgrade_replorigin_advance("); + appendStringLiteralAH(query, subinfo->dobj.name, fout); + appendPQExpBuffer(query, ", '%s');\n", subinfo->suboriginremotelsn); + } } if (subinfo->subenabled) diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index c1f43113c53..811c824ec61 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -25,6 +25,7 @@ #include #include +#include "access/xlogdefs.h" #include "catalog/pg_authid_d.h" #include "common/connect.h" #include "common/file_perm.h" @@ -76,6 +77,7 @@ static void dropDBs(PGconn *conn); static void dumpUserConfig(PGconn *conn, const char *username); static void dumpDatabases(PGconn *conn); static void dumpTimestamp(const char *msg); +static void dumpReplicationOrigins(PGconn *conn); static int runPgDump(const char *dbname, const char *create_opts, char *dbfile); static void buildShSecLabels(PGconn *conn, const char *catalog_name, Oid objectId, @@ -813,6 +815,10 @@ main(int argc, char *argv[]) /* Dump role GUC privileges */ if (server_version >= 150000 && !skip_acls) dumpRoleGUCPrivs(conn); + + /* Dump replication origins */ + if (server_version >= 190000 && binary_upgrade && archDumpFormat == archNull) + dumpReplicationOrigins(conn); } /* Dump tablespaces */ @@ -2339,6 +2345,61 @@ dumpTimestamp(const char *msg) fprintf(OPF, "-- %s %s\n\n", msg, buf); } +static void +dumpReplicationOrigins(PGconn *conn) +{ + PQExpBuffer buf = createPQExpBuffer(); + PGresult *res; + int i_roident; + int i_roname; + int i_remotelsn; + + /* Get replication origins in current database. */ + appendPQExpBufferStr(buf, + "SELECT o.*, os.remote_lsn " + "FROM pg_catalog.pg_replication_origin o " + "LEFT OUTER JOIN pg_catalog.pg_replication_origin_status os ON o.roident = os.local_id "); + + res = executeQueryOrDie(conn, buf->data); + + i_roident = PQfnumber(res, "roident"); + i_roname = PQfnumber(res, "roname"); + i_remotelsn = PQfnumber(res, "remote_lsn"); + + fprintf(OPF, "--\n-- Replication Origins \n--\n\n"); + + for (int i = 0; i < PQntuples(res); i++) + { + ReplOriginId roident; + const char *roname; + + roident = atooid(PQgetvalue(res, i, i_roident)); + roname = PQgetvalue(res, i, i_roname); + + resetPQExpBuffer(buf); + + appendPQExpBufferStr(buf, "\n-- For binary upgrade, must preserve replication origin roident and remote_lsn\n"); + appendPQExpBuffer(buf, + "SELECT pg_catalog.binary_upgrade_create_replication_origin('%u'::pg_catalog.oid, '%s'::pg_catalog.name", + roident, roname); + + if (!PQgetisnull(res, i, i_remotelsn)) + { + appendPQExpBuffer(buf, ", '%s'::pg_catalog.pg_lsn", + PQgetvalue(res, i, i_remotelsn)); + } + else + appendPQExpBufferStr(buf, ", NULL"); + + appendPQExpBuffer(buf, ");\n"); + + fprintf(OPF, "%s", buf->data); + } + + PQclear(res); + destroyPQExpBuffer(buf); +} + /* * read_dumpall_filters - retrieve database identifier patterns from file * diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 5a7afe62eab..6b5e254a9ef 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -2306,8 +2306,7 @@ check_new_cluster_replication_slots(void) * check_new_cluster_subscription_configuration() * * Verify that the max_active_replication_origins configuration specified is - * enough for creating the subscriptions. This is required to create the - * replication origin for each subscription. + * enough for creating all the replication origins. */ static void check_new_cluster_subscription_configuration(void) @@ -2321,7 +2320,7 @@ check_new_cluster_subscription_configuration(void) return; /* Quick return if there are no subscriptions to be migrated. */ - if (old_cluster.nsubs == 0) + if (old_cluster.nrepl_origins == 0) return; prep_status("Checking for new cluster configuration for subscriptions"); @@ -2335,10 +2334,10 @@ check_new_cluster_subscription_configuration(void) pg_fatal("could not determine parameter settings on new cluster"); max_active_replication_origins = atoi(PQgetvalue(res, 0, 0)); - if (old_cluster.nsubs > max_active_replication_origins) + if (old_cluster.nrepl_origins > max_active_replication_origins) pg_fatal("\"max_active_replication_origins\" (%d) must be greater than or equal to the number of " "subscriptions (%d) on the old cluster", - max_active_replication_origins, old_cluster.nsubs); + max_active_replication_origins, old_cluster.nrepl_origins); PQclear(res); PQfinish(conn); diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c index 37fff93892f..630f3f06e24 100644 --- a/src/bin/pg_upgrade/info.c +++ b/src/bin/pg_upgrade/info.c @@ -843,6 +843,7 @@ get_subscription_info(ClusterInfo *cluster) PGconn *conn; PGresult *res; int i_nsub; + int i_nrepl_origins; int i_retain_dead_tuples; conn = connectToServer(cluster, "template1"); @@ -862,6 +863,14 @@ get_subscription_info(ClusterInfo *cluster) cluster->sub_retain_dead_tuples = (strcmp(PQgetvalue(res, 0, i_retain_dead_tuples), "t") == 0); PQclear(res); + + res = executeQueryOrDie(conn, + "SELECT count(*) AS nrepl_origins " + "FROM pg_catalog.pg_replication_origin"); + i_nrepl_origins = PQfnumber(res, "nrepl_origins"); + cluster->nrepl_origins = atoi(PQgetvalue(res, 0, i_nrepl_origins)); + PQclear(res); + PQfinish(conn); } diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 1d767bbda2d..1d15700a886 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -311,6 +311,7 @@ typedef struct int num_tablespaces; const char *tablespace_suffix; /* directory specification */ int nsubs; /* number of subscriptions */ + int nrepl_origins; /* number of replication origins */ bool sub_retain_dead_tuples; /* whether a subscription enables * retain_dead_tuples. */ } ClusterInfo; diff --git a/src/bin/pg_upgrade/t/004_subscription.pl b/src/bin/pg_upgrade/t/004_subscription.pl index ecaa8e1e075..32b137f339e 100644 --- a/src/bin/pg_upgrade/t/004_subscription.pl +++ b/src/bin/pg_upgrade/t/004_subscription.pl @@ -301,8 +301,30 @@ is($result, qq(t), "Check that the table is in init state"); # Get the replication origin's remote_lsn of the old subscriber my $remote_lsn = $old_sub->safe_psql('postgres', - "SELECT remote_lsn FROM pg_replication_origin_status os, pg_subscription s WHERE os.external_id = 'pg_' || s.oid AND s.subname = 'regress_sub4'" + "SELECT os.remote_lsn + FROM pg_replication_origin_status os + JOIN pg_replication_origin o ON o.roident = os.local_id + JOIN pg_subscription s ON o.roname = 'pg_' || s.oid::text + WHERE s.subname = 'regress_sub4'" ); + +# Get the replication origin OIDs (roident) for all subscriptions, keyed by +# subscription name (which is stable across upgrade, unlike suboid). These +# must be preserved after upgrade. A mismatch would cause spurious +# update_origin_differs conflicts. +my %pre_upgrade_roident; +my $roident_rows = $old_sub->safe_psql('postgres', + "SELECT s.subname, o.roident + FROM pg_subscription s + JOIN pg_replication_origin o ON o.roname = 'pg_' || s.oid::text + ORDER BY s.subname" +); +for my $row (split /\n/, $roident_rows) +{ + my ($subname, $roident) = split /\|/, $row; + $pre_upgrade_roident{$subname} = $roident; +} + # Have the subscription in disabled state before upgrade $old_sub->safe_psql('postgres', "ALTER SUBSCRIPTION regress_sub5 DISABLE"); @@ -378,6 +400,20 @@ regress_sub5|f|f|f), "check that the subscription's running status, failover, and retain_dead_tuples are preserved" ); +# Verify that replication origin OIDs are preserved after upgrade. +my $post_roident_rows = $new_sub->safe_psql('postgres', + "SELECT s.subname, o.roident + FROM pg_subscription s + JOIN pg_replication_origin o ON o.roname = 'pg_' || s.oid::text + ORDER BY s.subname" +); +for my $row (split /\n/, $post_roident_rows) +{ + my ($subname, $roident) = split /\|/, $row; + is($roident, $pre_upgrade_roident{$subname}, + "roident preserved for subscription '$subname' after upgrade"); +} + # Subscription relations should be preserved $result = $new_sub->safe_psql('postgres', "SELECT srrelid, srsubstate FROM pg_subscription_rel ORDER BY srrelid"); diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 2f98473b1cb..71fb1e70465 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12028,6 +12028,10 @@ proname => 'binary_upgrade_set_next_pg_subscription_oid', provolatile => 'v', proparallel => 'r', prorettype => 'void', proargtypes => 'oid', prosrc => 'binary_upgrade_set_next_pg_subscription_oid' }, +{ oid => '9161', descr => 'for use by pg_upgrade (replication origin)', + proname => 'binary_upgrade_create_replication_origin', proisstrict => 'f', + provolatile => 'v', proparallel => 'r', prorettype => 'void', + proargtypes => 'oid name pg_lsn', prosrc => 'binary_upgrade_create_replication_origin' }, # conversion functions { oid => '4310', descr => 'internal conversion function for KOI8R to WIN1251', diff --git a/src/include/replication/origin.h b/src/include/replication/origin.h index a69faf6eaaf..ac2fd824469 100644 --- a/src/include/replication/origin.h +++ b/src/include/replication/origin.h @@ -55,6 +55,8 @@ extern PGDLLIMPORT int max_active_replication_origins; /* API for querying & manipulating replication origins */ extern ReplOriginId replorigin_by_name(const char *roname, bool missing_ok); extern ReplOriginId replorigin_create(const char *roname); +extern ReplOriginId replorigin_create_with_reploriginid(ReplOriginId node, + const char *roname); extern void replorigin_drop_by_name(const char *name, bool missing_ok, bool nowait); extern bool replorigin_by_oid(ReplOriginId roident, bool missing_ok, char **roname); -- 2.47.3