From bf90fe696e3384885f8a993ab03821ab62c80e72 Mon Sep 17 00:00:00 2001 From: Ajin Cherian Date: Mon, 28 Jul 2025 08:00:23 -0400 Subject: [PATCH v5] Fix a deadlock during ALTER SUBSCRIPTION... DROP PUBLICATION When user drops a publication from a subscription, this will result in a publication refresh on the subscriber which will try and drop any pending origins. Meanwhile the apply worker could also be trying to cleanup origins. There could be a deadlock if the order of locking of SubscriptionRelationId, SubscriptionRelRelationId and ReplicationOriginRelationId are not aligned between functions process_syncing_tables_for_apply() and AlterSubscription_refresh(). The fix is to get locks on SubscriptionRelationId, SubscriptionRelRelationId and ReplicationOriginRelationId in that order when dropping tracking origins. --- src/backend/catalog/pg_subscription.c | 22 ++++++++++++++++++++-- src/backend/replication/logical/tablesync.c | 22 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c index add51ca..657d6ff 100644 --- a/src/backend/catalog/pg_subscription.c +++ b/src/backend/catalog/pg_subscription.c @@ -331,10 +331,28 @@ UpdateSubscriptionRelState(Oid subid, Oid relid, char state, bool nulls[Natts_pg_subscription_rel]; Datum values[Natts_pg_subscription_rel]; bool replaces[Natts_pg_subscription_rel]; + bool sub_rel_locked, sub_locked; + LOCKTAG tag; - LockSharedObject(SubscriptionRelationId, subid, 0, AccessShareLock); - rel = table_open(SubscriptionRelRelationId, RowExclusiveLock); + /* + * Check if we already hold locks for SubscriptionRelRelationId + * and SubscriptionRelationId. If we do, we don't need to take + * them again. + */ + sub_rel_locked = CheckRelationOidLockedByMe(SubscriptionRelRelationId, + RowExclusiveLock, true); + + SET_LOCKTAG_OBJECT(tag, InvalidOid, SubscriptionRelationId, subid, 0); + sub_locked = LockHeldByMe(&tag, AccessShareLock); + + if (sub_rel_locked && sub_locked) + rel = table_open(SubscriptionRelRelationId, NoLock); + else + { + LockSharedObject(SubscriptionRelationId, subid, 0, AccessShareLock); + rel = table_open(SubscriptionRelRelationId, RowExclusiveLock); + } /* Try finding existing mapping. */ tup = SearchSysCacheCopy2(SUBSCRIPTIONRELMAP, diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c index e6159ac..6d5024b 100644 --- a/src/backend/replication/logical/tablesync.c +++ b/src/backend/replication/logical/tablesync.c @@ -379,6 +379,7 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) static HTAB *last_start_times = NULL; ListCell *lc; bool started_tx = false; + Relation rel = NULL; Assert(!IsTransactionState()); @@ -470,7 +471,17 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) * refresh for the subscription where we remove the table * state and its origin and by this time the origin might be * already removed. So passing missing_ok = true. + * + * Lock SubscriptionRelationId with AccessShareLock and + * take RowExclusiveLock on SubscriptionRelRelationId to + * keep the same locking order as refresh from an + * user issued DDL command to prevent any deadlocks. */ + LockSharedObject(SubscriptionRelationId, MyLogicalRepWorker->subid, + 0, AccessShareLock); + if (!rel) + rel = table_open(SubscriptionRelRelationId, RowExclusiveLock); + ReplicationOriginNameForTablesync(MyLogicalRepWorker->subid, rstate->relid, originname, @@ -533,7 +544,14 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) * This is required to avoid any undetected deadlocks * due to any existing lock as deadlock detector won't * be able to detect the waits on the latch. + * + * Also close any tables prior to the commit. */ + if (rel) + { + table_close(rel, NoLock); + rel = NULL; + } CommitTransactionCommand(); pgstat_report_stat(false); } @@ -593,6 +611,10 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) } } + /* Close table if opened */ + if (rel) + table_close(rel, NoLock); + if (started_tx) { CommitTransactionCommand(); -- 1.8.3.1