From 72414a340b39bc814c4a77b5a6250bae4f85e079 Mon Sep 17 00:00:00 2001 From: Mikhail Nikalayeu Date: Sun, 12 Apr 2026 14:24:02 +0200 Subject: [PATCH v1] Prefer canceling blockers for deadlock-protected repack waits Teach the lock manager to treat selected waits as deadlock-protected, so REPACK CONCURRENTLY can preserve the work done before waiting for the final AccessExclusiveLock set. When deadlock_protected is set and a hard deadlock is found, cancel a blocking lock wait and let the protected backend continue waiting instead of making it the deadlock victim. Keep anti-wraparound autovacuum workers ineligible as cancellation targets, using a conditional ProcArrayLock check for the wraparound flag so deadlock detection does not block on LWLocks. Set this protection around repack's final lock acquisition and clear it through the normal wait/transaction cleanup paths. --- src/backend/commands/repack.c | 8 ++ src/backend/storage/ipc/procarray.c | 3 + src/backend/storage/lmgr/deadlock.c | 103 +++++++++++++- src/backend/storage/lmgr/lock.c | 18 ++- src/backend/storage/lmgr/proc.c | 63 +++++++-- src/include/storage/lock.h | 6 +- src/include/storage/proc.h | 9 +- .../injection_points/expected/repack.out | 130 +++++++++++++++++- .../injection_points/specs/repack.spec | 80 +++++++++++ 9 files changed, 399 insertions(+), 21 deletions(-) diff --git a/src/backend/commands/repack.c b/src/backend/commands/repack.c index 58e3867246f..cee2aa49c06 100644 --- a/src/backend/commands/repack.c +++ b/src/backend/commands/repack.c @@ -3055,7 +3055,15 @@ rebuild_relation_finish_concurrent(Relation NewHeap, Relation OldHeap, /* * Acquire AccessExclusiveLock on the table, its TOAST relation (if there * is one), all its indexes, so that we can swap the files. + * + * Set deadlock_protected so that if a deadlock occurs while we are + * waiting for these locks, the deadlock detector cancels the blocking + * proc's lock wait instead of ours. At this point we have done the + * expensive data copy and index rebuild - aborting now would waste all + * that work. */ + deadlock_protected = true; + LockRelationOid(old_table_oid, AccessExclusiveLock); /* diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 9299bcebbda..1dbbae54017 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -714,6 +714,9 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid) LWLockRelease(ProcArrayLock); } } + + /* Clear backend-local deadlock protection flag at end of transaction. */ + deadlock_protected = false; } /* diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c index b8962d875b6..7b53e2fb19a 100644 --- a/src/backend/storage/lmgr/deadlock.c +++ b/src/backend/storage/lmgr/deadlock.c @@ -125,6 +125,15 @@ static int maxPossibleConstraints; static DEADLOCK_INFO *deadlockDetails; static int nDeadlockDetails; +/* + * Per-edge parallel array to deadlockDetails[]: the waiting proc whose wait + * forms the edge at the corresponding index. Unlike deadlockDetails[], these + * pointers reference live PGPROCs and are only valid while all partition + * locks are held (i.e., during DeadLockCheck). Used by deadlock_protected + * resolution in DeadLockCheck to pick a cancellation victim. + */ +static PGPROC **deadlockProcs; + /* PGPROC pointer of any blocking autovacuum worker found */ static PGPROC *blocking_autovacuum_proc = NULL; @@ -148,11 +157,12 @@ InitDeadLockChecking(void) oldcxt = MemoryContextSwitchTo(TopMemoryContext); /* - * FindLockCycle needs at most MaxBackends entries in visitedProcs[] and - * deadlockDetails[]. + * FindLockCycle needs at most MaxBackends entries in visitedProcs[], + * deadlockDetails[], and deadlockProcs[]. */ visitedProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *)); deadlockDetails = (DEADLOCK_INFO *) palloc(MaxBackends * sizeof(DEADLOCK_INFO)); + deadlockProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *)); /* * TopoSort needs to consider at most MaxBackends wait-queue entries, and @@ -219,6 +229,9 @@ InitDeadLockChecking(void) DeadLockState DeadLockCheck(PGPROC *proc) { + bool victims_canceled = false; + +retry: /* Initialize to "no constraints" */ nCurConstraints = 0; nPossibleConstraints = 0; @@ -242,6 +255,76 @@ DeadLockCheck(PGPROC *proc) if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges)) elog(FATAL, "deadlock seems to have disappeared"); + /* + * If deadlock_protected is set, try to resolve the deadlock by + * canceling a blocking proc's lock wait, then restarting the whole + * deadlock check. We re-run DeadLockCheckRecurse (not just + * FindLockCycle) after each cancellation so that any remaining cycle + * also gets a chance to be resolved by the ordinary soft-deadlock + * queue-reordering path. + * + * The victim is chosen by walking deadlockProcs[] which FindLockCycle + * just populated with one live PGPROC pointer per edge in the cycle. + * Index 0 is always MyProc (we started the search from it), so we + * start at index 1, which is the direct blocker of MyProc. + * + * We skip any proc running an anti-wraparound autovacuum: canceling that + * would risk XID wraparound, which is strictly worse than losing the + * work the deadlock_protected caller is trying to preserve. XXX: is it true? + * + * If no acceptable victim exists, we fall through to DS_HARD_DEADLOCK + * and the protected proc cancels itself as usual. + */ + if (deadlock_protected) + { + PGPROC *victim = NULL; + + for (int i = 1; i < nDeadlockDetails; i++) + { + PGPROC *w = deadlockProcs[i]; + + if (w == MyProc) + continue; + + /* + * PROC_IS_AUTOVACUUM is safe to check locklessly because it is + * set at process start and never reset. For autovac workers, + * avoid waiting for ProcArrayLock here; if we can acquire it + * conditionally, check the mirrored wraparound flag to avoid + * canceling an emergency vacuum. If not, skip this candidate + * conservatively and keep looking for another victim. + */ + if (w->statusFlags & PROC_IS_AUTOVACUUM) + { + uint8 statusFlags; + + if (!LWLockConditionalAcquire(ProcArrayLock, LW_SHARED)) + continue; + + statusFlags = ProcGlobal->statusFlags[w->pgxactoff]; + LWLockRelease(ProcArrayLock); + + if (statusFlags & PROC_VACUUM_FOR_WRAPAROUND) + continue; + } + + Assert(w->waitLock != NULL); + Assert(!dlist_node_is_detached(&w->waitLink)); + victim = w; + break; + } + + if (victim != NULL) + { + RemoveFromWaitQueue(victim, + LockTagHashCode(&(victim->waitLock->tag)), + PROC_WAIT_STATUS_DEADLOCK_CANCEL); + SetLatch(&victim->procLatch); + victims_canceled = true; + goto retry; + } + } + return DS_HARD_DEADLOCK; /* cannot find a non-deadlocked state */ } @@ -272,8 +355,16 @@ DeadLockCheck(PGPROC *proc) ProcLockWakeup(GetLocksMethodTable(lock), lock); } - /* Return code tells caller if we had to escape a deadlock or not */ - if (nWaitOrders > 0) + /* + * Return code tells caller if we had to escape a deadlock or not. + * + * If we canceled any waiter because deadlock_protected was set, report + * DS_HARD_DEADLOCK_CANCELED regardless of whether the remainder was + * resolved by queue rearrangement or needed nothing further. + */ + if (victims_canceled) + return DS_HARD_DEADLOCK_CANCELED; + else if (nWaitOrders > 0) return DS_SOFT_DEADLOCK; else if (blocking_autovacuum_proc != NULL) return DS_BLOCKED_BY_AUTOVACUUM; @@ -590,6 +681,7 @@ FindLockCycleRecurseMember(PGPROC *checkProc, info->locktag = lock->tag; info->lockmode = checkProc->waitLockMode; info->pid = checkProc->pid; + deadlockProcs[depth] = checkProc; return true; } @@ -679,6 +771,7 @@ FindLockCycleRecurseMember(PGPROC *checkProc, info->locktag = lock->tag; info->lockmode = checkProc->waitLockMode; info->pid = checkProc->pid; + deadlockProcs[depth] = checkProc; /* * Add this edge to the list of soft edges in the cycle @@ -753,6 +846,7 @@ FindLockCycleRecurseMember(PGPROC *checkProc, info->locktag = lock->tag; info->lockmode = checkProc->waitLockMode; info->pid = checkProc->pid; + deadlockProcs[depth] = checkProc; /* * Add this edge to the list of soft edges in the cycle @@ -1159,4 +1253,5 @@ RememberSimpleDeadLock(PGPROC *proc1, info->lockmode = proc2->waitLockMode; info->pid = proc2->pid; nDeadlockDetails = 2; + deadlockProcs[0] = deadlockProcs[1] = NULL; } diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index c221fe96889..1a268f05965 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -1245,6 +1245,14 @@ LockAcquireExtended(const LOCKTAG *locktag, DeadLockReport(); /* DeadLockReport() will not return */ } + else if (waitResult == PROC_WAIT_STATUS_DEADLOCK_CANCEL) + { + Assert(!dontWait); + ereport(ERROR, + (errcode(ERRCODE_T_R_DEADLOCK_DETECTED), + errmsg("deadlock detected"), + errdetail("This backend's lock wait was canceled to resolve a deadlock with a protected process."))); + } } else LWLockRelease(partitionLock); @@ -2043,8 +2051,8 @@ waitonlock_error_callback(void *arg) /* * Remove a proc from the wait-queue it is on (caller must know it is on one). - * This is only used when the proc has failed to get the lock, so we set its - * waitStatus to PROC_WAIT_STATUS_ERROR. + * This is only used when the proc has failed to get the lock, so caller must + * specify the failure waitStatus to store. * * Appropriate partition lock must be held by caller. Also, caller is * responsible for signaling the proc if needed. @@ -2052,7 +2060,7 @@ waitonlock_error_callback(void *arg) * NB: this does not clean up any locallock object that may exist for the lock. */ void -RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode) +RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode, ProcWaitStatus waitStatus) { LOCK *waitLock = proc->waitLock; PROCLOCK *proclock = proc->waitProcLock; @@ -2065,6 +2073,8 @@ RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode) Assert(waitLock); Assert(!dclist_is_empty(&waitLock->waitProcs)); Assert(0 < lockmethodid && lockmethodid < lengthof(LockMethods)); + Assert(waitStatus == PROC_WAIT_STATUS_ERROR || + waitStatus == PROC_WAIT_STATUS_DEADLOCK_CANCEL); /* Remove proc from lock's wait queue */ dclist_delete_from_thoroughly(&waitLock->waitProcs, &proc->waitLink); @@ -2082,7 +2092,7 @@ RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode) /* Clean up the proc's own state, and pass it the ok/fail signal */ proc->waitLock = NULL; proc->waitProcLock = NULL; - proc->waitStatus = PROC_WAIT_STATUS_ERROR; + proc->waitStatus = waitStatus; /* * Delete the proclock immediately if it represents no already-held locks. diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 1ac25068d62..8afc247bebe 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -92,6 +92,9 @@ static size_t FastPathLockArrayShmemSize; /* Is a deadlock check pending? */ static volatile sig_atomic_t got_deadlock_timeout; +/* See proc.h */ +bool deadlock_protected = false; + static void RemoveProcFromArray(int code, Datum arg); static void ProcKill(int code, Datum arg); static void AuxiliaryProcKill(int code, Datum arg); @@ -825,6 +828,8 @@ LockErrorCleanup(void) AbortStrongLockAcquire(); + deadlock_protected = false; + /* Nothing to do if we weren't waiting for a lock */ lockAwaited = GetAwaitedLock(); if (lockAwaited == NULL) @@ -854,7 +859,7 @@ LockErrorCleanup(void) if (!dlist_node_is_detached(&MyProc->waitLink)) { /* We could not have been granted the lock yet */ - RemoveFromWaitQueue(MyProc, lockAwaited->hashcode); + RemoveFromWaitQueue(MyProc, lockAwaited->hashcode, PROC_WAIT_STATUS_ERROR); } else { @@ -1266,8 +1271,16 @@ JoinWaitQueue(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait) /* * If we detected deadlock, give up without waiting. This must agree with * CheckDeadLock's recovery code. + * + * However, if deadlock_protected is set, skip the early exit and proceed + * to ProcSleep so the full deadlock detector can handle it (by making a + * blocking proc the victim instead of us). + * + * XXX: when we bypass the early-exit here, we lose the ability to react to + * the deadlock immediately: ProcSleep will only run the full detector + * after deadlock_timeout elapses. Should we fix that? */ - if (early_deadlock) + if (early_deadlock && !deadlock_protected) return PROC_WAIT_STATUS_ERROR; /* @@ -1308,8 +1321,10 @@ JoinWaitQueue(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait) * * Result is one of the following: * - * PROC_WAIT_STATUS_OK - lock was granted - * PROC_WAIT_STATUS_ERROR - a deadlock was detected + * PROC_WAIT_STATUS_OK - lock was granted + * PROC_WAIT_STATUS_ERROR - a deadlock was detected + * PROC_WAIT_STATUS_DEADLOCK_CANCEL - this backend was chosen as the + * victim of a protected deadlock */ ProcWaitStatus ProcSleep(LOCALLOCK *locallock) @@ -1345,8 +1360,8 @@ ProcSleep(LOCALLOCK *locallock) /* * Set timer so we can wake up after awhile and check for a deadlock. If a * deadlock is detected, the handler sets MyProc->waitStatus = - * PROC_WAIT_STATUS_ERROR, allowing us to know that we must report failure - * rather than success. + * one of the failure wait statuses, allowing us to know that we must + * report failure rather than success. * * By delaying the check until we've waited for a bit, we can avoid * running the rather expensive deadlock-check code in most cases. @@ -1560,6 +1575,17 @@ ProcSleep(LOCALLOCK *locallock) allow_autovacuum_cancel = false; } + /* + * If the deadlock detector resolved a hard deadlock by canceling a + * blocking proc's lock wait (because we have deadlock_protected set), + * restart the deadlock timeout and continue waiting for the lock. + */ + if (deadlock_state == DS_HARD_DEADLOCK_CANCELED) + { + got_deadlock_timeout = false; + enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout); + } + /* * If awoken after the deadlock check interrupt has run, increment the * lock statistics counters and if log_lock_waits is on, then report @@ -1627,6 +1653,13 @@ ProcSleep(LOCALLOCK *locallock) "Processes holding the lock: %s. Wait queue: %s.", lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); } + else if (deadlock_state == DS_HARD_DEADLOCK_CANCELED) + ereport(LOG, + (errmsg("process %d resolved deadlock for %s on %s by canceling a blocking lock wait after %ld.%03d ms", + MyProcPid, modename, buf.data, msecs, usecs), + (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.", + "Processes holding the lock: %s. Wait queue: %s.", + lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); if (myWaitStatus == PROC_WAIT_STATUS_WAITING) { @@ -1657,13 +1690,21 @@ ProcSleep(LOCALLOCK *locallock) ereport(LOG, (errmsg("process %d acquired %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs))); + else if (myWaitStatus == PROC_WAIT_STATUS_DEADLOCK_CANCEL) + ereport(LOG, + (errmsg("process %d was chosen as deadlock victim for %s on %s after %ld.%03d ms", + MyProcPid, modename, buf.data, msecs, usecs), + (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.", + "Processes holding the lock: %s. Wait queue: %s.", + lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); else { Assert(myWaitStatus == PROC_WAIT_STATUS_ERROR); /* - * Currently, the deadlock checker always kicks its own - * process, which means that we'll only see + * A plain PROC_WAIT_STATUS_ERROR means the deadlock + * checker kicked our own process, which means that we'll + * only see * PROC_WAIT_STATUS_ERROR when deadlock_state == * DS_HARD_DEADLOCK, and there's no need to print * redundant messages. But for completeness and @@ -1870,14 +1911,16 @@ CheckDeadLock(void) * Get this process out of wait state. (Note: we could do this more * efficiently by relying on lockAwaited, but use this coding to * preserve the flexibility to kill some other transaction than the - * one detecting the deadlock.) + * one detecting the deadlock. (DS_HARD_DEADLOCK_CANCELED above + * already exercises that flexibility: when deadlock_protected is set, + * we cancel a *blocking* proc's lock wait instead of our own.) * * RemoveFromWaitQueue sets MyProc->waitStatus to * PROC_WAIT_STATUS_ERROR, so ProcSleep will report an error after we * return. */ Assert(MyProc->waitLock != NULL); - RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag))); + RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)), PROC_WAIT_STATUS_ERROR); /* * We're done here. Transaction abort caused by the error that diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index ee3cb1dc203..5e13de529c5 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -29,6 +29,7 @@ /* struct PGPROC is declared in proc.h, but must forward-reference it */ typedef struct PGPROC PGPROC; +typedef enum ProcWaitStatus ProcWaitStatus; /* GUC variables */ extern PGDLLIMPORT int max_locks_per_xact; @@ -342,6 +343,9 @@ typedef enum DS_NO_DEADLOCK, /* no deadlock detected */ DS_SOFT_DEADLOCK, /* deadlock avoided by queue rearrangement */ DS_HARD_DEADLOCK, /* deadlock, no way out but ERROR */ + DS_HARD_DEADLOCK_CANCELED, /* deadlock resolved by canceling a blocking + * proc's lock wait (when + * deadlock_protected is set) */ DS_BLOCKED_BY_AUTOVACUUM, /* no deadlock; queue blocked by autovacuum * worker */ } DeadLockState; @@ -418,7 +422,7 @@ extern void GrantAwaitedLock(void); extern LOCALLOCK *GetAwaitedLock(void); extern void ResetAwaitedLock(void); -extern void RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode); +extern void RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode, ProcWaitStatus waitStatus); extern LockData *GetLockStatusData(void); extern BlockedProcsData *GetBlockerStatusData(int blocked_pid); diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 3e1d1fad5f9..ef02639834e 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -146,11 +146,12 @@ extern PGDLLIMPORT int FastPathLockGroupsPerBackend; #define DELAY_CHKPT_COMPLETE (1<<1) #define DELAY_CHKPT_IN_COMMIT (DELAY_CHKPT_START | 1<<2) -typedef enum +typedef enum ProcWaitStatus { PROC_WAIT_STATUS_OK, PROC_WAIT_STATUS_WAITING, PROC_WAIT_STATUS_ERROR, + PROC_WAIT_STATUS_DEADLOCK_CANCEL, } ProcWaitStatus; /* @@ -543,6 +544,12 @@ extern PGDLLIMPORT int TransactionTimeout; extern PGDLLIMPORT int IdleSessionTimeout; extern PGDLLIMPORT bool log_lock_waits; +/* + * Backend-local flag: when true, the deadlock detector will make a blocking + * proc the victim instead of this backend. + */ +extern PGDLLIMPORT bool deadlock_protected; + #ifdef EXEC_BACKEND extern PGDLLIMPORT PGPROC *AuxiliaryProcs; #endif diff --git a/src/test/modules/injection_points/expected/repack.out b/src/test/modules/injection_points/expected/repack.out index b575e9052ee..ae706125360 100644 --- a/src/test/modules/injection_points/expected/repack.out +++ b/src/test/modules/injection_points/expected/repack.out @@ -1,4 +1,4 @@ -Parsed test spec with 2 sessions +Parsed test spec with 3 sessions starting permutation: wait_before_lock change_existing change_new change_subxact1 change_subxact2 check2 wakeup_before_lock check1 injection_points_attach @@ -111,3 +111,131 @@ injection_points_detach (1 row) + +starting permutation: wait_before_lock begin_txn lock_table s3_wakeup end_txn +injection_points_attach +----------------------- + +(1 row) + +step wait_before_lock: + REPACK (CONCURRENTLY) repack_test USING INDEX repack_test_pkey; + +step begin_txn: + BEGIN; + +step lock_table: + LOCK TABLE repack_test IN ACCESS EXCLUSIVE MODE; + +step s3_wakeup: + SELECT injection_points_wakeup('repack-concurrently-before-lock'); + +injection_points_wakeup +----------------------- + +(1 row) + +step wait_before_lock: <... completed> +step lock_table: <... completed> +step end_txn: + COMMIT; + +injection_points_detach +----------------------- + +(1 row) + + +starting permutation: s1_timeout_fast s2_timeout_slow wait_before_lock begin_and_read lock_table s3_wakeup end_txn +injection_points_attach +----------------------- + +(1 row) + +step s1_timeout_fast: + SET deadlock_timeout = '10ms'; + +step s2_timeout_slow: + SET deadlock_timeout = '10s'; + +step wait_before_lock: + REPACK (CONCURRENTLY) repack_test USING INDEX repack_test_pkey; + +step begin_and_read: + BEGIN; + SELECT 1 FROM repack_test LIMIT 1; + +?column? +-------- + 1 +(1 row) + +step lock_table: + LOCK TABLE repack_test IN ACCESS EXCLUSIVE MODE; + +step s3_wakeup: + SELECT injection_points_wakeup('repack-concurrently-before-lock'); + +injection_points_wakeup +----------------------- + +(1 row) + +step wait_before_lock: <... completed> +step lock_table: <... completed> +ERROR: deadlock detected +step end_txn: + COMMIT; + +injection_points_detach +----------------------- + +(1 row) + + +starting permutation: s1_timeout_slow s2_timeout_fast wait_before_lock begin_and_read lock_table s3_wakeup end_txn +injection_points_attach +----------------------- + +(1 row) + +step s1_timeout_slow: + SET deadlock_timeout = '10s'; + +step s2_timeout_fast: + SET deadlock_timeout = '10ms'; + +step wait_before_lock: + REPACK (CONCURRENTLY) repack_test USING INDEX repack_test_pkey; + +step begin_and_read: + BEGIN; + SELECT 1 FROM repack_test LIMIT 1; + +?column? +-------- + 1 +(1 row) + +step lock_table: + LOCK TABLE repack_test IN ACCESS EXCLUSIVE MODE; + +step s3_wakeup: + SELECT injection_points_wakeup('repack-concurrently-before-lock'); + +injection_points_wakeup +----------------------- + +(1 row) + +step lock_table: <... completed> +ERROR: deadlock detected +step wait_before_lock: <... completed> +step end_txn: + COMMIT; + +injection_points_detach +----------------------- + +(1 row) + diff --git a/src/test/modules/injection_points/specs/repack.spec b/src/test/modules/injection_points/specs/repack.spec index d727a9b056b..f95f6782355 100644 --- a/src/test/modules/injection_points/specs/repack.spec +++ b/src/test/modules/injection_points/specs/repack.spec @@ -28,6 +28,14 @@ setup SELECT injection_points_set_local(); SELECT injection_points_attach('repack-concurrently-before-lock', 'wait'); } +step s1_timeout_fast +{ + SET deadlock_timeout = '10ms'; +} +step s1_timeout_slow +{ + SET deadlock_timeout = '10s'; +} # Perform the initial load and wait for s2 to do some data changes. step wait_before_lock { @@ -61,6 +69,14 @@ teardown } session s2 +step s2_timeout_fast +{ + SET deadlock_timeout = '10ms'; +} +step s2_timeout_slow +{ + SET deadlock_timeout = '10s'; +} # Change the existing data. UPDATE changes both key and non-key columns. Also # update one row twice to test whether tuple version generated by this session # can be found. @@ -128,6 +144,30 @@ step wakeup_before_lock { SELECT injection_points_wakeup('repack-concurrently-before-lock'); } +# Steps used in lock contention tests. +step begin_txn +{ + BEGIN; +} +step begin_and_read +{ + BEGIN; + SELECT 1 FROM repack_test LIMIT 1; +} +step lock_table +{ + LOCK TABLE repack_test IN ACCESS EXCLUSIVE MODE; +} +step end_txn +{ + COMMIT; +} + +session s3 +step s3_wakeup +{ + SELECT injection_points_wakeup('repack-concurrently-before-lock'); +} # Test if data changes introduced while one session is performing REPACK # CONCURRENTLY find their way into the table. @@ -140,3 +180,43 @@ permutation check2 wakeup_before_lock check1 + +# Test the soft-deadlock path of REPACK CONCURRENTLY's deadlock_protected +# behavior. s2 is queued ahead of s1 (REPACK) waiting for AccessExclusiveLock +# but holds no lock on the table itself, so the cycle has only soft edges and +# the deadlock detector resolves it by reordering the wait queue. Both +# sessions complete successfully. +permutation + wait_before_lock + begin_txn + lock_table + s3_wakeup + end_txn + +# Test the hard-deadlock path of REPACK CONCURRENTLY's deadlock_protected +# behavior when REPACK's deadlock detector runs first. s2 holds +# AccessShareLock (from the SELECT) and then requests AccessExclusiveLock, +# creating a hard cycle with s1 (REPACK) that cannot be resolved by queue +# reordering. Because s1 has a much shorter deadlock_timeout and sets +# deadlock_protected before acquiring AccessExclusiveLock, its deadlock check +# runs first, cancels s2's lock wait, and lets REPACK complete. +permutation + s1_timeout_fast + s2_timeout_slow + wait_before_lock + begin_and_read + lock_table(wait_before_lock) + s3_wakeup + end_txn + +# Same hard-deadlock setup, but with s2's deadlock detector forced to run +# first. In this case s2 follows the ordinary hard-deadlock path and cancels +# itself before REPACK's protected deadlock detector has a chance to act. +permutation + s1_timeout_slow + s2_timeout_fast + wait_before_lock(lock_table) + begin_and_read + lock_table + s3_wakeup + end_txn -- 2.43.0