Separate catalog_xmin from xmin in walsender hot standby feedback

When a standby sends hot standby feedback to a primary that does not use
a physical replication slot, ProcessStandbyHSFeedbackMessage() stores
min(feedbackCatalogXmin, feedbackXmin) into MyProc->xmin.  Because
ComputeXidHorizons() treats proc->xmin uniformly for both data and
catalog horizons, a very old catalog_xmin from a logical replication
slot on the standby incorrectly prevents vacuuming of regular user data
tables on the primary.

Physical replication slots already handle this correctly by tracking
xmin and catalog_xmin in separate slot fields, so slot_catalog_xmin
only affects catalog_oldest_nonremovable and not
data_oldest_nonremovable.  However, many production HA deployments
intentionally avoid physical replication slots due to their lifecycle
management complexity across failovers -- when a primary fails, physical
slots on the old primary are lost and cannot be automatically migrated
to the promoted standby.  These deployments rely on wal_keep_size or WAL
archiving for WAL retention, combined with hot_standby_feedback for
visibility horizon management.  The no-slot path is therefore a
legitimate and widely-used production configuration, not a legacy code
path.

This commit adds a catalog_xmin field to PGPROC, so that the walsender
can track the catalog horizon separately from the data horizon even
without a replication slot.  ComputeXidHorizons() now accumulates
proc->catalog_xmin and applies it only to catalog_oldest_nonremovable
and shared_oldest_nonremovable (after saving the _raw value), exactly
mirroring how slot_catalog_xmin is handled.  GetReplicationHorizons()
is updated to include proc_catalog_xmin in the catalog_xmin feedback
sent upstream, ensuring correct propagation in cascading standby
configurations.
---
 src/include/storage/proc.h                    |   4 +
 src/backend/storage/lmgr/proc.c               |   2 +
 src/backend/storage/ipc/procarray.c            |  50 ++++++++--
 src/backend/replication/walsender.c            |  19 ++--
 src/test/recovery/t/053_hs_feedback_catalog_xmin.pl | 181 +++++++++++++++++++
 5 files changed, 240 insertions(+), 16 deletions(-)
 create mode 100644 src/test/recovery/t/053_hs_feedback_catalog_xmin.pl

diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 3e1d1fa..d49bd0d 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -250,6 +250,10 @@ typedef struct PGPROC
 								 * vacuum must not remove tuples deleted by
 								 * xid >= xmin ! */

+	TransactionId catalog_xmin;	/* catalog xmin from hot standby feedback,
+								 * used separately from xmin so that it only
+								 * affects catalog horizon, not data horizon */
+
 	XidCacheStatus subxidStatus;	/* mirrored with
 									 * ProcGlobal->subxidStates[i] */
 	struct XidCache subxids;	/* cache for subtransaction XIDs */
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 1ac2506..258f28c 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -476,6 +476,7 @@ InitProcess(void)
 	MyProc->fpLocalTransactionId = InvalidLocalTransactionId;
 	MyProc->xid = InvalidTransactionId;
 	MyProc->xmin = InvalidTransactionId;
+	MyProc->catalog_xmin = InvalidTransactionId;
 	MyProc->pid = MyProcPid;
 	MyProc->vxid.procNumber = MyProcNumber;
 	MyProc->vxid.lxid = InvalidLocalTransactionId;
@@ -678,6 +679,7 @@ InitAuxiliaryProcess(void)
 	MyProc->fpLocalTransactionId = InvalidLocalTransactionId;
 	MyProc->xid = InvalidTransactionId;
 	MyProc->xmin = InvalidTransactionId;
+	MyProc->catalog_xmin = InvalidTransactionId;
 	MyProc->vxid.procNumber = INVALID_PROC_NUMBER;
 	MyProc->vxid.lxid = InvalidLocalTransactionId;
 	MyProc->databaseId = InvalidOid;
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 9299bce..91d2747 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -208,6 +208,12 @@ typedef struct ComputeXidHorizonsResult
 	TransactionId slot_xmin;
 	TransactionId slot_catalog_xmin;

+	/*
+	 * Oldest catalog_xmin accumulated from PGPROC entries (set by walsenders
+	 * without a replication slot via hot standby feedback).
+	 */
+	TransactionId proc_catalog_xmin;
+
 	/*
 	 * Oldest xid that any backend might still consider running. This needs to
 	 * include processes running VACUUM, in contrast to the normal visibility
@@ -698,6 +704,7 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)

 		proc->vxid.lxid = InvalidLocalTransactionId;
 		proc->xmin = InvalidTransactionId;
+		proc->catalog_xmin = InvalidTransactionId;

 		/* be sure this is cleared in abort */
 		proc->delayChkptFlags = 0;
@@ -738,6 +745,7 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
 	proc->xid = InvalidTransactionId;
 	proc->vxid.lxid = InvalidLocalTransactionId;
 	proc->xmin = InvalidTransactionId;
+	proc->catalog_xmin = InvalidTransactionId;

 	/* be sure this is cleared in abort */
 	proc->delayChkptFlags = 0;
@@ -923,6 +931,7 @@ ProcArrayClearTransaction(PGPROC *proc)

 	proc->vxid.lxid = InvalidLocalTransactionId;
 	proc->xmin = InvalidTransactionId;
+	proc->catalog_xmin = InvalidTransactionId;

 	Assert(!(proc->statusFlags & PROC_VACUUM_STATE_MASK));
 	Assert(!proc->delayChkptFlags);
@@ -1680,6 +1689,7 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)

 	/* inferred after ProcArrayLock is released */
 	h->catalog_oldest_nonremovable = InvalidTransactionId;
+	h->proc_catalog_xmin = InvalidTransactionId;

 	LWLockAcquire(ProcArrayLock, LW_SHARED);

@@ -1801,6 +1811,20 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
 			h->data_oldest_nonremovable =
 				TransactionIdOlder(h->data_oldest_nonremovable, xmin);
 		}
+
+		/*
+		 * Accumulate catalog_xmin from PGPROC entries.  This is set by
+		 * walsenders without a replication slot to separately track the
+		 * catalog horizon from hot standby feedback, so that it only holds
+		 * back catalog table vacuuming, not user data table vacuuming.
+		 */
+		{
+			TransactionId catxmin = UINT32_ACCESS_ONCE(proc->catalog_xmin);
+
+			if (TransactionIdIsValid(catxmin))
+				h->proc_catalog_xmin =
+					TransactionIdOlder(h->proc_catalog_xmin, catxmin);
+		}
 	}

 	/*
@@ -1842,19 +1866,25 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)

 	/*
 	 * The only difference between catalog / data horizons is that the slot's
-	 * catalog xmin is applied to the catalog one (so catalogs can be accessed
-	 * for logical decoding). Initialize with data horizon, and then back up
-	 * further if necessary. Have to back up the shared horizon as well, since
-	 * that also can contain catalogs.
+	 * catalog xmin and proc catalog_xmin are applied to the catalog one (so
+	 * catalogs can be accessed for logical decoding). Initialize with data
+	 * horizon, and then back up further if necessary. Have to back up the
+	 * shared horizon as well, since that also can contain catalogs.
 	 */
 	h->shared_oldest_nonremovable_raw = h->shared_oldest_nonremovable;
 	h->shared_oldest_nonremovable =
 		TransactionIdOlder(h->shared_oldest_nonremovable,
 						   h->slot_catalog_xmin);
+	h->shared_oldest_nonremovable =
+		TransactionIdOlder(h->shared_oldest_nonremovable,
+						   h->proc_catalog_xmin);
 	h->catalog_oldest_nonremovable = h->data_oldest_nonremovable;
 	h->catalog_oldest_nonremovable =
 		TransactionIdOlder(h->catalog_oldest_nonremovable,
 						   h->slot_catalog_xmin);
+	h->catalog_oldest_nonremovable =
+		TransactionIdOlder(h->catalog_oldest_nonremovable,
+						   h->proc_catalog_xmin);

 	/*
 	 * It's possible that slots backed up the horizons further than
@@ -1897,6 +1927,9 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
 	Assert(!TransactionIdIsValid(h->slot_catalog_xmin) ||
 		   TransactionIdPrecedesOrEquals(h->oldest_considered_running,
 										 h->slot_catalog_xmin));
+	Assert(!TransactionIdIsValid(h->proc_catalog_xmin) ||
+		   TransactionIdPrecedesOrEquals(h->oldest_considered_running,
+										 h->proc_catalog_xmin));

 	/* update approximate horizons with the computed horizons */
 	GlobalVisUpdateApply(h);
@@ -1991,12 +2024,13 @@ GetReplicationHorizons(TransactionId *xmin, TransactionId *catalog_xmin)

 	/*
 	 * Don't want to use shared_oldest_nonremovable here, as that contains the
-	 * effect of replication slot's catalog_xmin. We want to send a separate
-	 * feedback for the catalog horizon, so the primary can remove data table
-	 * contents more aggressively.
+	 * effect of replication slot's catalog_xmin and proc catalog_xmin. We want
+	 * to send a separate feedback for the catalog horizon, so the primary can
+	 * remove data table contents more aggressively.
 	 */
 	*xmin = horizons.shared_oldest_nonremovable_raw;
-	*catalog_xmin = horizons.slot_catalog_xmin;
+	*catalog_xmin = TransactionIdOlder(horizons.slot_catalog_xmin,
+									   horizons.proc_catalog_xmin);
 }

 /*
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 3d4ab92..4a5b74c 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -2610,6 +2610,7 @@ PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin, TransactionId feedbac

 	SpinLockAcquire(&slot->mutex);
 	MyProc->xmin = InvalidTransactionId;
+	MyProc->catalog_xmin = InvalidTransactionId;

 	/*
 	 * For physical replication we don't need the interlock provided by xmin
@@ -2739,6 +2740,7 @@ ProcessStandbyHSFeedbackMessage(void)
 		&& !TransactionIdIsNormal(feedbackCatalogXmin))
 	{
 		MyProc->xmin = InvalidTransactionId;
+		MyProc->catalog_xmin = InvalidTransactionId;
 		if (MyReplicationSlot != NULL)
 			PhysicalReplicationSlotNewXmin(feedbackXmin, feedbackCatalogXmin);
 		return;
@@ -2780,22 +2782,17 @@ ProcessStandbyHSFeedbackMessage(void)
 	 * risk already since a VACUUM could already have determined the horizon.)
 	 *
 	 * If we're using a replication slot we reserve the xmin via that,
-	 * otherwise via the walsender's PGPROC entry. We can only track the
-	 * catalog xmin separately when using a slot, so we store the least of the
-	 * two provided when not using a slot.
-	 *
-	 * XXX: It might make sense to generalize the ephemeral slot concept and
-	 * always use the slot mechanism to handle the feedback xmin.
+	 * otherwise via the walsender's PGPROC entry. With the PGPROC entry we
+	 * track catalog_xmin separately from xmin, so that catalog_xmin only
+	 * affects catalog table horizons and does not hold back vacuuming of
+	 * regular user data tables.
 	 */
 	if (MyReplicationSlot != NULL)	/* XXX: persistency configurable? */
 		PhysicalReplicationSlotNewXmin(feedbackXmin, feedbackCatalogXmin);
 	else
 	{
-		if (TransactionIdIsNormal(feedbackCatalogXmin)
-			&& TransactionIdPrecedes(feedbackCatalogXmin, feedbackXmin))
-			MyProc->xmin = feedbackCatalogXmin;
-		else
-			MyProc->xmin = feedbackXmin;
+		MyProc->xmin = feedbackXmin;
+		MyProc->catalog_xmin = feedbackCatalogXmin;
 	}
 }

diff --git a/src/test/recovery/t/053_hs_feedback_catalog_xmin.pl b/src/test/recovery/t/053_hs_feedback_catalog_xmin.pl
new file mode 100644
index 0000000..1234567
--- /dev/null
+++ b/src/test/recovery/t/053_hs_feedback_catalog_xmin.pl
@@ -0,0 +1,181 @@
+# Copyright (c) 2025-2026, PostgreSQL Global Development Group
+#
+# Test that hot standby feedback catalog_xmin does not hold back vacuuming
+# of regular user data tables on the primary.
+#
+# Scenario: primary -> standby (no physical replication slot), with
+# hot_standby_feedback = on. The standby has a logical replication slot
+# which sets a catalog_xmin. Before the fix, the catalog_xmin would be
+# merged into the walsender's MyProc->xmin on the primary, incorrectly
+# preventing vacuuming of dead tuples in user data tables. After the fix,
+# catalog_xmin is tracked separately via MyProc->catalog_xmin and only
+# affects catalog table horizons.
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Initialize primary - no replication slot will be used for the standby
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 'logical');
+$node_primary->append_conf(
+	'postgresql.conf', qq[
+wal_level = logical
+max_replication_slots = 10
+max_wal_senders = 10
+hot_standby_feedback = on
+autovacuum = off
+wal_receiver_status_interval = 1s
+]);
+$node_primary->start;
+
+# Take backup and create standby WITHOUT a replication slot
+$node_primary->backup('my_backup');
+
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+$node_standby->init_from_backup($node_primary, 'my_backup',
+	has_streaming => 1);
+$node_standby->append_conf(
+	'postgresql.conf', qq[
+hot_standby_feedback = on
+wal_receiver_status_interval = 1s
+]);
+$node_standby->start;
+
+# Wait for standby to connect and start streaming
+$node_primary->poll_query_until(
+	'postgres',
+	"SELECT count(*) >= 1 FROM pg_stat_replication WHERE state = 'streaming';")
+  or die "Timed out waiting for standby to start streaming";
+
+# Create a test table on the primary and generate some dead tuples
+$node_primary->safe_psql('postgres', qq[
+	CREATE TABLE test_data (id int, val text);
+	INSERT INTO test_data SELECT g, 'initial' FROM generate_series(1, 1000) g;
+]);
+
+# Wait for standby to catch up
+$node_primary->wait_for_catchup($node_standby);
+
+# Record the current xmin state - before creating logical slot on standby,
+# the walsender should have no xmin holding back vacuum.
+my $xmin_before = $node_primary->safe_psql('postgres',
+	"SELECT backend_xmin FROM pg_stat_replication WHERE state = 'streaming' LIMIT 1;");
+note "walsender xmin before logical slot: '$xmin_before'";
+
+# Create a logical replication slot on the standby. This will establish a
+# catalog_xmin on the standby, which will be reported back to the primary
+# via hot standby feedback.
+$node_standby->safe_psql('postgres', qq[
+	SELECT pg_create_logical_replication_slot('test_logical_slot', 'test_decoding');
+]);
+
+# Generate some transactions on the primary so the xmin/catalog_xmin diverge
+$node_primary->safe_psql('postgres', qq[
+	UPDATE test_data SET val = 'updated' WHERE id <= 500;
+]);
+
+# Wait for standby to catch up and feedback to be applied
+$node_primary->wait_for_catchup($node_standby);
+
+# Give some time for hot standby feedback to arrive (up to 5 seconds)
+$node_primary->poll_query_until(
+	'postgres', qq[
+	SELECT backend_xmin IS NOT NULL
+	FROM pg_stat_replication
+	WHERE state = 'streaming';
+]) or die "Timed out waiting for hot standby feedback to set backend_xmin";
+
+# Now check the key invariant: the walsender's backend_xmin on the primary
+# should reflect feedbackXmin (data horizon), NOT the older catalog_xmin.
+#
+# Get the standby's logical slot catalog_xmin
+my $standby_catalog_xmin = $node_standby->safe_psql('postgres',
+	"SELECT catalog_xmin FROM pg_replication_slots WHERE slot_name = 'test_logical_slot';");
+note "standby logical slot catalog_xmin: $standby_catalog_xmin";
+
+# Get the walsender's backend_xmin on the primary
+my $walsender_xmin = $node_primary->safe_psql('postgres',
+	"SELECT backend_xmin FROM pg_stat_replication WHERE state = 'streaming' LIMIT 1;");
+note "primary walsender backend_xmin: $walsender_xmin";
+
+# The walsender's xmin should NOT be the catalog_xmin (which is older).
+# If they are equal, the old bug is still present - catalog_xmin is leaking
+# into data horizon.
+#
+# Note: we can't just compare them directly because xmin could be the same
+# if no new transactions happened. Instead, generate more transactions and
+# verify that vacuum can clean up dead tuples in user tables.
+
+# Generate dead tuples on the primary
+$node_primary->safe_psql('postgres', qq[
+	UPDATE test_data SET val = 'updated2';
+]);
+
+# More transactions to advance xmin
+$node_primary->safe_psql('postgres', qq[
+	BEGIN;
+	CREATE TABLE dummy_advance AS SELECT generate_series(1, 100) AS x;
+	DROP TABLE dummy_advance;
+	COMMIT;
+]);
+
+# Wait for the standby to replay
+$node_primary->wait_for_catchup($node_standby);
+sleep(2);  # Wait for hs feedback
+
+# Now vacuum the user table - this should be able to clean dead tuples
+# because catalog_xmin should NOT hold back data table vacuum
+my $dead_before = $node_primary->safe_psql('postgres',
+	"SELECT n_dead_tup FROM pg_stat_user_tables WHERE relname = 'test_data';");
+note "dead tuples before vacuum: $dead_before";
+
+$node_primary->safe_psql('postgres', "VACUUM VERBOSE test_data;");
+
+my $dead_after = $node_primary->safe_psql('postgres',
+	"SELECT n_dead_tup FROM pg_stat_user_tables WHERE relname = 'test_data';");
+note "dead tuples after vacuum: $dead_after";
+
+# The dead tuples should have been cleaned up.
+# With the old code, catalog_xmin would prevent this.
+ok($dead_after < $dead_before,
+	'vacuum cleaned dead tuples in user table despite standby catalog_xmin');
+
+# Verify that the catalog horizon is still properly protected.
+# The walsender's catalog_xmin should still be reflected in the system.
+# Check that the standby's logical slot still has its catalog_xmin.
+my $slot_catalog_xmin_after = $node_standby->safe_psql('postgres',
+	"SELECT catalog_xmin FROM pg_replication_slots WHERE slot_name = 'test_logical_slot';");
+note "standby slot catalog_xmin after vacuum: $slot_catalog_xmin_after";
+ok($slot_catalog_xmin_after ne '',
+	'standby logical slot catalog_xmin is still set after primary vacuum');
+
+# Also verify via pg_stat_replication that the walsender xmin is set
+# (data protection is still working)
+my $final_xmin = $node_primary->safe_psql('postgres',
+	"SELECT backend_xmin FROM pg_stat_replication WHERE state = 'streaming' LIMIT 1;");
+note "final walsender backend_xmin: $final_xmin";
+
+# Additional test: verify that catalog_xmin and xmin are different on the
+# primary's walsender (xmin should be newer/larger than catalog_xmin).
+# The standby should report both separately and the primary should track
+# them independently.
+#
+# Get the replication horizons: the walsender's proc->xmin should be the
+# data horizon (feedbackXmin), not min(feedbackXmin, feedbackCatalogXmin).
+# We test this indirectly: if catalog_xmin were merged into xmin, the
+# walsender's xmin would be very old (= catalog_xmin), and vacuum would
+# not be able to clean dead tuples. Since vacuum DID clean them (tested
+# above), the separation is working.
+
+# Cleanup
+$node_standby->safe_psql('postgres',
+	"SELECT pg_drop_replication_slot('test_logical_slot');");
+
+$node_standby->stop;
+$node_primary->stop;
+
+done_testing();
--
2.39.0