From 59a06062903b9634becb424c8846e91ef51c920c Mon Sep 17 00:00:00 2001 From: Dilip Kumar Date: Wed, 13 Aug 2025 08:15:05 +0000 Subject: [PATCH v5] Fix DROP SUBSCRIPTION deadlock with new database creation DROP SUBSCRIPTION previously acquired an AccessExclusiveLock on the pg_subscription catalog to prevent the replication launcher from starting a new worker. However, this caused a deadlock. New database creation also need to acquire an AccessShareLock on this same catalog during their initialization phase. This created a lock conflict where DROP SUBSCRIPTION would block this, while simultaneously waiting for a connection to complete for dropping the replication slot. This commit resolves the deadlock by having DROP SUBSCRIPTION acquire a less restrictive RowExclusiveLock on the catalog instead. To address the original concern of orphaned workers, a new check is implemented. The replication worker now takes a shared object lock on the subscription itself. If a worker starts for a subscription that no longer exists, it immediately detects this condition and exits. This ensures that no orphan workers are created without the need for an overly broad AccessExclusiveLock on the system catalog. --- src/backend/commands/subscriptioncmds.c | 8 +++++--- src/backend/replication/logical/worker.c | 7 +++++++ src/bin/pg_upgrade/t/002_pg_upgrade.pl | 2 ++ src/test/regress/expected/subscription.out | 13 +++++++++++++ src/test/regress/sql/subscription.sql | 13 +++++++++++++ 5 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index faa3650d287..4c01d21b2f3 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -1803,10 +1803,12 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel) bool must_use_password; /* - * Lock pg_subscription with AccessExclusiveLock to ensure that the - * launcher doesn't restart new worker during dropping the subscription + * The launcher may concurrently start a new worker for this subscription. + * During initialization, the worker checks for subscription validity and + * exits if the subscription has already been dropped. See + * InitializeLogRepWorker. */ - rel = table_open(SubscriptionRelationId, AccessExclusiveLock); + rel = table_open(SubscriptionRelationId, RowExclusiveLock); tup = SearchSysCache2(SUBSCRIPTIONNAME, ObjectIdGetDatum(MyDatabaseId), CStringGetDatum(stmt->subname)); diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 59b6ae7719a..8ed17aa6b69 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -5415,6 +5415,13 @@ InitializeLogRepWorker(void) StartTransactionCommand(); oldctx = MemoryContextSwitchTo(ApplyContext); + /* + * Lock the subscription to prevent it from being concurrently dropped, + * then re-verify its existence. After the initialization, the worker will + * be terminated gracefully if the subscription is dropped. + */ + LockSharedObject(SubscriptionRelationId, MyLogicalRepWorker->subid, 0, + AccessShareLock); MySubscription = GetSubscription(MyLogicalRepWorker->subid, true); if (!MySubscription) { diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index 4b5e895809b..284b2ef07e0 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -225,6 +225,8 @@ $oldnode->init(%old_node_params); # Override log_statement=all set by Cluster.pm. This avoids large amounts # of log traffic that slow this test down even more when run under valgrind. $oldnode->append_conf('postgresql.conf', 'log_statement = none'); +$oldnode->append_conf('postgresql.conf', 'wal_level = replica'); +$oldnode->append_conf('postgresql.conf', 'max_wal_senders = 1'); $oldnode->start; my $result; diff --git a/src/test/regress/expected/subscription.out b/src/test/regress/expected/subscription.out index a98c97f7616..d24ba40f5bf 100644 --- a/src/test/regress/expected/subscription.out +++ b/src/test/regress/expected/subscription.out @@ -441,6 +441,19 @@ HINT: To initiate replication, you must manually create the replication slot, e ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); DROP SUBSCRIPTION regress_testsub; +-- this test verifies that DROP SUBSCRIPTION returns an immediate error +-- and does not hang when it makes connection to a recently created database +-- whose caches are not yet initialized. +CREATE DATABASE regression_db; +CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regression_db' PUBLICATION testpub WITH (connect=false); +WARNING: subscription was created, but is not connected +HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. +DROP SUBSCRIPTION regress_testsub; +ERROR: could not drop replication slot "regress_testsub" on publisher: ERROR: replication slot "regress_testsub" does not exist +-- now set the slot_name to NONE and DROP SUBSCRIPTION, it should work +ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); +DROP SUBSCRIPTION regress_testsub; +DROP DATABASE regression_db; -- let's do some tests with pg_create_subscription rather than superuser SET SESSION AUTHORIZATION regress_subscription_user3; -- fail, not enough privileges diff --git a/src/test/regress/sql/subscription.sql b/src/test/regress/sql/subscription.sql index f0f714fe747..22c7eb4e01a 100644 --- a/src/test/regress/sql/subscription.sql +++ b/src/test/regress/sql/subscription.sql @@ -298,6 +298,19 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); DROP SUBSCRIPTION regress_testsub; +-- this test verifies that DROP SUBSCRIPTION returns an immediate error +-- and does not hang when it makes connection to a recently created database +-- whose caches are not yet initialized. +CREATE DATABASE regression_db; +CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regression_db' PUBLICATION testpub WITH (connect=false); + +DROP SUBSCRIPTION regress_testsub; + +-- now set the slot_name to NONE and DROP SUBSCRIPTION, it should work +ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); +DROP SUBSCRIPTION regress_testsub; +DROP DATABASE regression_db; + -- let's do some tests with pg_create_subscription rather than superuser SET SESSION AUTHORIZATION regress_subscription_user3; -- 2.51.0.rc1.163.g2494970778-goog