From 7e052a5f3aafa3a9ca1d7fc866c597001251bcc2 Mon Sep 17 00:00:00 2001 From: Mikhail Nikalayeu Date: Fri, 17 Apr 2026 02:09:40 +0200 Subject: [PATCH v2] Detect deadlocks involving declared future lock requests Introduce a mechanism for a backend to declare its intent to acquire a lock at a future point, before actually calling LockAcquire(). The deadlock detector treats each declaration as a hard waits-for edge from the declarer to current holders whose mode conflicts, and to already- queued waiters whose requests would be ordered before the future request. This makes it possible to detect cycles that the existing detector cannot see, because the declarer has not yet queued for its stronger lock. The motivating caller is REPACK (CONCURRENTLY), which holds a ShareUpdateExclusiveLock throughout and later needs AccessExclusiveLock to swap relfilenodes. A concurrent backend that takes an intermediate lock and waits for REPACK to finish can form a cycle that today is invisible until REPACK finally requests its strong lock. Declaring the future AccessExclusiveLock from RangeVarCallbackForRepack surfaces the edge immediately after the initial lookup. Implementation notes: * PGPROC gains a single FutureWaitLock slot (locktag + mode). Only the owning proc writes it; remote readers must hold the partition lock covering the tag. The invariant "slot set implies not yet really waiting for that (tag, mode)" is maintained by clearing the slot inside LockAcquireExtended() at the point the proc attaches its waitLink for that same request. InitProcess, ProcKill, LockErrorCleanup, and LockReleaseAll also clear the slot. * Fast-path holders of relation locks are invisible to the deadlock detector, so CheckDeadLock() now snapshots the currently declared fast-path-relevant future waits, bumps FastPathStrongRelationLocks counters for those tags, and transfers existing fast-path holders into the main lock table before walking the waits-for graph. A shared atomic count lets the common case (no active declarations) skip this work entirely. If a new fast-path-relevant declaration appears between the snapshot and the partition-locked walk, the check restarts with a fresh snapshot. * FindLockCycleRecurseFuture() follows the declared edge to current holders and to earlier queued waiters that JoinWaitQueue() would place ahead of the future request. These edges are not recorded as soft edges: the declarer is not in the wait queue, so reordering cannot break such a cycle. * DeadLockReport() emits "future deadlock detected" and a "will request ... (declared future intent)" detail line when the cycle involves a declared edge. SQLSTATE remains ERRCODE_T_R_DEADLOCK_DETECTED. New isolation permutations under src/test/modules/injection_points cover the no-deadlock case (waiter with no conflicting held lock), two two-backend future-deadlock cycles (held AccessShare + future AccessExclusive / ShareUpdateExclusive), and a three-backend cycle through an unrelated table. --- src/backend/commands/repack.c | 4 +- src/backend/commands/tablecmds.c | 33 ++ src/backend/storage/lmgr/deadlock.c | 195 ++++++++- src/backend/storage/lmgr/lock.c | 410 ++++++++++++++++++ src/backend/storage/lmgr/proc.c | 36 ++ src/include/commands/tablecmds.h | 3 + src/include/storage/lmgr.h | 2 + src/include/storage/lock.h | 33 ++ src/include/storage/proc.h | 9 + .../injection_points/expected/repack.out | 181 +++++++- .../injection_points/specs/repack.spec | 118 +++++ 11 files changed, 1018 insertions(+), 6 deletions(-) diff --git a/src/backend/commands/repack.c b/src/backend/commands/repack.c index 58e3867246f..8261c58daca 100644 --- a/src/backend/commands/repack.c +++ b/src/backend/commands/repack.c @@ -2326,8 +2326,8 @@ process_single_relation(RepackStmt *stmt, LOCKMODE lockmode, bool isTopLevel, tableOid = RangeVarGetRelidExtended(stmt->relation->relation, lockmode, 0, - RangeVarCallbackMaintainsTable, - NULL); + RangeVarCallbackForRepack, + params); rel = table_open(tableOid, NoLock); /* diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index eec09ba1ded..b08f57017e3 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -19761,6 +19761,39 @@ RangeVarCallbackMaintainsTable(const RangeVar *relation, relation->relname); } +/* + * Callback to RangeVarGetRelidExtended() for REPACK. For concurrent repack, + * declare the future AccessExclusiveLock before the caller locks the table + * with ShareUpdateExclusiveLock, so deadlock checks can see the pending + * request while the weaker lock is held or awaited. + */ +void +RangeVarCallbackForRepack(const RangeVar *relation, + Oid relId, Oid oldRelId, void *arg) +{ + ClusterParams *params = arg; + LOCKTAG locktag; + Oid dbid; + + RangeVarCallbackMaintainsTable(relation, relId, oldRelId, NULL); + + if ((params->options & CLUOPT_CONCURRENT) == 0) + return; + + if (relId == oldRelId) + return; + + if (OidIsValid(oldRelId)) + LockClearFutureWaitSlot(false); + + if (!OidIsValid(relId)) + return; + + dbid = IsSharedRelation(relId) ? InvalidOid : MyDatabaseId; + SET_LOCKTAG_RELATION(locktag, dbid, relId); + LockDeclareFutureWait(&locktag, AccessExclusiveLock); +} + /* * Callback to RangeVarGetRelidExtended() for TRUNCATE processing. */ diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c index b8962d875b6..a8ea9337e8d 100644 --- a/src/backend/storage/lmgr/deadlock.c +++ b/src/backend/storage/lmgr/deadlock.c @@ -74,6 +74,7 @@ typedef struct LOCKTAG locktag; /* ID of awaited lock object */ LOCKMODE lockmode; /* type of lock we're waiting for */ int pid; /* PID of blocked backend */ + bool is_future; /* was this a declared future-wait edge? */ } DEADLOCK_INFO; @@ -86,6 +87,11 @@ static bool FindLockCycleRecurse(PGPROC *checkProc, int depth, static bool FindLockCycleRecurseMember(PGPROC *checkProc, PGPROC *checkProcLeader, int depth, EDGE *softEdges, int *nSoftEdges); +static bool FindLockCycleRecurseWait(PGPROC *checkProc, + PGPROC *checkProcLeader, + int depth, EDGE *softEdges, int *nSoftEdges); +static bool FindLockCycleRecurseFuture(PGPROC *checkProc, + int depth, EDGE *softEdges, int *nSoftEdges); static bool ExpandConstraints(EDGE *constraints, int nConstraints); static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints, PGPROC **ordering); @@ -504,7 +510,8 @@ FindLockCycleRecurse(PGPROC *checkProc, * If the process is waiting, there is an outgoing waits-for edge to each * process that blocks it. */ - if (!dlist_node_is_detached(&checkProc->waitLink) && + if ((!dlist_node_is_detached(&checkProc->waitLink) || + FutureWaitLockIsSet(&checkProc->futureWaitLock)) && FindLockCycleRecurseMember(checkProc, checkProc, depth, softEdges, nSoftEdges)) return true; @@ -538,6 +545,34 @@ FindLockCycleRecurseMember(PGPROC *checkProc, int depth, EDGE *softEdges, /* output argument */ int *nSoftEdges) /* output argument */ +{ + /* Follow outgoing edges from a real lock wait, if any. */ + if (!dlist_node_is_detached(&checkProc->waitLink) && + FindLockCycleRecurseWait(checkProc, checkProcLeader, depth, + softEdges, nSoftEdges)) + return true; + + /* + * A future-wait slot is only useful when another backend is already + * waiting and reaches this proc through the waits-for graph. Do not start + * from our own future slot: until we actually request that lock, there is + * no current wait to break. + */ + if (checkProc != MyProc && + checkProc == checkProcLeader && + FutureWaitLockIsSet(&checkProc->futureWaitLock) && + FindLockCycleRecurseFuture(checkProc, depth, softEdges, nSoftEdges)) + return true; + + return false; +} + +static bool +FindLockCycleRecurseWait(PGPROC *checkProc, + PGPROC *checkProcLeader, + int depth, + EDGE *softEdges, /* output argument */ + int *nSoftEdges) /* output argument */ { PGPROC *proc; LOCK *lock = checkProc->waitLock; @@ -590,6 +625,7 @@ FindLockCycleRecurseMember(PGPROC *checkProc, info->locktag = lock->tag; info->lockmode = checkProc->waitLockMode; info->pid = checkProc->pid; + info->is_future = false; return true; } @@ -679,6 +715,7 @@ FindLockCycleRecurseMember(PGPROC *checkProc, info->locktag = lock->tag; info->lockmode = checkProc->waitLockMode; info->pid = checkProc->pid; + info->is_future = false; /* * Add this edge to the list of soft edges in the cycle @@ -753,6 +790,7 @@ FindLockCycleRecurseMember(PGPROC *checkProc, info->locktag = lock->tag; info->lockmode = checkProc->waitLockMode; info->pid = checkProc->pid; + info->is_future = false; /* * Add this edge to the list of soft edges in the cycle @@ -774,6 +812,149 @@ FindLockCycleRecurseMember(PGPROC *checkProc, return false; } +/* + * Follow a declared future-wait edge as a hard edge. + */ +static bool +FindLockCycleRecurseFuture(PGPROC *checkProc, + int depth, + EDGE *softEdges, /* output argument */ + int *nSoftEdges) /* output argument */ +{ + FutureWaitLock *futureWaitLock = &checkProc->futureWaitLock; + LOCK *lock; + LockMethod lockMethodTable; + uint32 hashcode; + int conflictMask; + LOCKMASK myHeldLocks = 0; + dlist_iter proclock_iter; + dlist_iter proc_iter; + + Assert(FutureWaitLockIsSet(futureWaitLock)); + Assert(checkProc != MyProc); + + /* + * Invariant maintained by LockAcquireExtended(): if a proc is really + * waiting on a lock, its future-wait slot (if any) must not describe the + * same (locktag, lockmode). The slot is cleared at the point we attach + * our waitLink for that exact lock and mode, so the wait walker's + * procLocks scan already subsumes anything this walker would find. + */ + Assert(dlist_node_is_detached(&checkProc->waitLink) || + checkProc->waitLockMode != futureWaitLock->mode || + memcmp(&checkProc->waitLock->tag, &futureWaitLock->locktag, + sizeof(LOCKTAG)) != 0); + + /* + * Look up the shared LOCK object for the declared tag. It is legal for + * the entry not to exist: the hash table only contains LOCK objects that + * currently have at least one holder or requester, and a future-wait + * declaration can be made before any proclock for that tag is created + * (e.g. RangeVarCallbackForRepack runs inside RangeVarGetRelidExtended + * *before* the caller has actually taken the initial SUE lock on the + * relation, and no other backend may have any lock on it either). With + * no LOCK object there are no current holders, hence no outgoing edges + * from this future-wait slot, so simply return. + */ + hashcode = LockTagHashCode(&futureWaitLock->locktag); + lock = LockHashLookup(&futureWaitLock->locktag, hashcode); + if (lock == NULL) + return false; + + lockMethodTable = GetLocksMethodTable(lock); + Assert(futureWaitLock->mode > 0 && + futureWaitLock->mode <= lockMethodTable->numLockModes); + conflictMask = lockMethodTable->conflictTab[futureWaitLock->mode]; + + /* + * First, follow edges to current holders of conflicting modes. These + * edges are the future-wait equivalent of the normal hard edges from a + * real waiter to current holders. + */ + dlist_foreach(proclock_iter, &lock->procLocks) + { + PROCLOCK *proclock = dlist_container(PROCLOCK, lockLink, proclock_iter.cur); + PGPROC *proc = proclock->tag.myProc; + PGPROC *leader; + + leader = proc->lockGroupLeader == NULL ? proc : proc->lockGroupLeader; + + /* A proc never blocks itself or any other lock group member. */ + if (leader == checkProc) + { + myHeldLocks |= proclock->holdMask; + continue; + } + + if ((proclock->holdMask & conflictMask) != 0) + { + if (FindLockCycleRecurse(proc, depth + 1, + softEdges, nSoftEdges)) + { + DEADLOCK_INFO *info = &deadlockDetails[depth]; + + info->locktag = futureWaitLock->locktag; + info->lockmode = futureWaitLock->mode; + info->pid = checkProc->pid; + info->is_future = true; + + return true; + } + } + } + + /* + * Existing waiters can also become blockers for the future request. A + * real LockAcquireExtended() first checks lock->waitMask, and if there + * are conflicting waiters it calls JoinWaitQueue(). Model the queue + * position JoinWaitQueue() would choose today, and follow only queued + * requests that would be ahead of that position. + * + * Do not record these as soft edges. The future requester is not present + * in lock->waitProcs, so the deadlock detector cannot repair such an edge + * by reordering the current wait queue. + */ + dclist_foreach(proc_iter, &lock->waitProcs) + { + PGPROC *proc = dlist_container(PGPROC, waitLink, proc_iter.cur); + PGPROC *leader; + LOCKMODE waitLockMode = proc->waitLockMode; + + leader = proc->lockGroupLeader == NULL ? proc : proc->lockGroupLeader; + + if (leader == checkProc) + continue; + + /* + * If checkProc already holds locks that conflict with this waiter's + * request, JoinWaitQueue() would insert the future request before + * this waiter. We are done scanning the queue after considering + * earlier waiters. + */ + if (myHeldLocks != 0 && + (lockMethodTable->conflictTab[waitLockMode] & myHeldLocks) != 0) + break; + + if ((LOCKBIT_ON(waitLockMode) & conflictMask) != 0) + { + if (FindLockCycleRecurse(proc, depth + 1, + softEdges, nSoftEdges)) + { + DEADLOCK_INFO *info = &deadlockDetails[depth]; + + info->locktag = futureWaitLock->locktag; + info->lockmode = futureWaitLock->mode; + info->pid = checkProc->pid; + info->is_future = true; + + return true; + } + } + } + + return false; +} + /* * ExpandConstraints -- expand a list of constraints into a set of @@ -1078,6 +1259,7 @@ DeadLockReport(void) StringInfoData logbuf; /* errdetail for server log */ StringInfoData locktagbuf; int i; + bool any_future = false; initStringInfo(&clientbuf); initStringInfo(&logbuf); @@ -1099,12 +1281,15 @@ DeadLockReport(void) resetStringInfo(&locktagbuf); DescribeLockTag(&locktagbuf, &info->locktag); + any_future |= info->is_future; if (i > 0) appendStringInfoChar(&clientbuf, '\n'); appendStringInfo(&clientbuf, - _("Process %d waits for %s on %s; blocked by process %d."), + info->is_future + ? _("Process %d will request %s on %s (declared future intent); blocked by process %d.") + : _("Process %d waits for %s on %s; blocked by process %d."), info->pid, GetLockmodeName(info->locktag.locktag_lockmethodid, info->lockmode), @@ -1132,7 +1317,9 @@ DeadLockReport(void) ereport(ERROR, (errcode(ERRCODE_T_R_DEADLOCK_DETECTED), - errmsg("deadlock detected"), + errmsg(any_future + ? "future deadlock detected" + : "deadlock detected"), errdetail_internal("%s", clientbuf.data), errdetail_log("%s", logbuf.data), errhint("See server log for query details."))); @@ -1154,9 +1341,11 @@ RememberSimpleDeadLock(PGPROC *proc1, info->locktag = lock->tag; info->lockmode = lockmode; info->pid = proc1->pid; + info->is_future = false; info++; info->locktag = proc2->waitLock->tag; info->lockmode = proc2->waitLockMode; info->pid = proc2->pid; + info->is_future = false; nDeadlockDetails = 2; } diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index c221fe96889..12107e07bec 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -314,6 +314,19 @@ typedef struct static volatile FastPathStrongRelationLockData *FastPathStrongRelationLocks; +/* + * Count of active future-wait declarations that could conflict with relation + * fast-path locks. This is only a skip hint for CheckDeadLock(); the actual + * future-wait locktags are read from PGPROC.futureWaitLock while all lock + * partitions are held. + */ +typedef struct FutureWaitFastPathCtlData +{ + pg_atomic_uint32 count; +} FutureWaitFastPathCtlData; + +static FutureWaitFastPathCtlData *FutureWaitFastPathCtl = NULL; + static void LockManagerShmemRequest(void *arg); static void LockManagerShmemInit(void *arg); @@ -488,12 +501,18 @@ LockManagerShmemRequest(void *arg) .size = sizeof(FastPathStrongRelationLockData), .ptr = (void **) (void *) &FastPathStrongRelationLocks, ); + + ShmemRequestStruct(.name = "Future Wait Fast Path Control", + .size = sizeof(FutureWaitFastPathCtlData), + .ptr = (void **) &FutureWaitFastPathCtl, + ); } static void LockManagerShmemInit(void *arg) { SpinLockInit(&FastPathStrongRelationLocks->mutex); + pg_atomic_init_u32(&FutureWaitFastPathCtl->count, 0); } /* @@ -628,6 +647,374 @@ DoLockModesConflict(LOCKMODE mode1, LOCKMODE mode2) return false; } +/* + * LockHashLookup -- look up a LOCK by locktag and precomputed hashcode. + * + * Returns NULL if no LOCK exists for this tag. + * + * Callers MUST hold the partition lock covering hashcode in LW_EXCLUSIVE + * mode, and MUST treat the returned pointer as valid only while that + * partition lock remains held: the LOCK entry can be removed from the hash + * table by LockRelease() under the same partition lock, so dropping the + * lock invalidates the pointer. + * + * Today the only caller is the deadlock detector, which satisfies both + * requirements by acquiring every lock-manager partition lock exclusively + * before walking the wait-for graph, and uses the result to resolve a + * declared future-wait edge's locktag into a live LOCK *. New callers + * from outside that context should be reviewed carefully. + */ +LOCK * +LockHashLookup(const LOCKTAG *locktag, uint32 hashcode) +{ + Assert(LWLockHeldByMeInMode(LockHashPartitionLock(hashcode), + LW_EXCLUSIVE)); + + return (LOCK *) hash_search_with_hash_value(LockMethodLockHash, + locktag, + hashcode, + HASH_FIND, + NULL); +} + +static inline bool +FutureWaitLockMatches(const FutureWaitLock *futureWaitLock, + const LOCKTAG *locktag, LOCKMODE lockmode) +{ + return FutureWaitLockIsSet(futureWaitLock) && + futureWaitLock->mode == lockmode && + memcmp(&futureWaitLock->locktag, locktag, sizeof(LOCKTAG)) == 0; +} + +static inline bool +FutureWaitNeedsFastPathMigration(const FutureWaitLock *futureWaitLock) +{ + return FutureWaitLockIsSet(futureWaitLock) && + ConflictsWithRelationFastPath(&futureWaitLock->locktag, + futureWaitLock->mode); +} + +/* + * LockClearFutureWaitSlotIfMatch -- clear our future-wait slot if it matches + * a lock that we have now acquired or started waiting for. + */ +static void +LockClearFutureWaitSlotIfMatch(const LOCKTAG *locktag, LOCKMODE lockmode, + bool partitionLockHeld) +{ + if (!FutureWaitLockMatches(&MyProc->futureWaitLock, locktag, lockmode)) + return; + + LockClearFutureWaitSlot(partitionLockHeld); +} + +/* + * LockDeclareFutureWait -- publish a hard future waits-for edge. + * + * The caller declares that this backend intends to request lockmode on + * locktag later. The declaration becomes visible to deadlock detection + * before the real lock acquisition happens. + * + * Only one future-wait slot is supported per backend, and parallel workers + * (non-leader lock-group members) cannot declare a future wait. REPACK + * CONCURRENTLY is the only current caller and satisfies both constraints. + */ +void +LockDeclareFutureWait(const LOCKTAG *locktag, LOCKMODE lockmode) +{ + uint32 hashcode; + LWLock *partitionLock; + + Assert(MyProc != NULL); + Assert(MyProc->lockGroupLeader == NULL || + MyProc->lockGroupLeader == MyProc); + Assert(!LockHeldByMe(locktag, lockmode, true)); + Assert(FutureWaitLockIsEmpty(&MyProc->futureWaitLock)); + + hashcode = LockTagHashCode(locktag); + partitionLock = LockHashPartitionLock(hashcode); + + /* + * Bump the fast-path migration hint before publishing the slot. A + * concurrent CheckDeadLock() may see the hint before the slot is visible, + * which is harmless; the reverse ordering would allow it to observe a + * future edge without migrating fast-path holders for that relation. + */ + if (ConflictsWithRelationFastPath(locktag, lockmode)) + pg_atomic_fetch_add_u32(&FutureWaitFastPathCtl->count, 1); + + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + + MyProc->futureWaitLock.locktag = *locktag; + MyProc->futureWaitLock.mode = lockmode; + + LWLockRelease(partitionLock); +} + +/* + * LockClearFutureWaitSlot -- clear this backend's declared future wait, if any. + */ +void +LockClearFutureWaitSlot(bool partitionLockHeld) +{ + LWLock *partitionLock; + LOCKTAG clearedTag; + LOCKMODE clearedMode; + bool cleared = false; + + if (MyProc == NULL || FutureWaitLockIsEmpty(&MyProc->futureWaitLock)) + return; + + partitionLock = + LockHashPartitionLock(LockTagHashCode(&MyProc->futureWaitLock.locktag)); + if (!partitionLockHeld) + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + else + Assert(LWLockHeldByMeInMode(partitionLock, LW_EXCLUSIVE)); + + if (FutureWaitLockIsSet(&MyProc->futureWaitLock)) + { + clearedTag = MyProc->futureWaitLock.locktag; + clearedMode = MyProc->futureWaitLock.mode; + MemSet(&MyProc->futureWaitLock, 0, sizeof(FutureWaitLock)); + cleared = true; + } + + if (!partitionLockHeld) + LWLockRelease(partitionLock); + + if (cleared && ConflictsWithRelationFastPath(&clearedTag, clearedMode)) + { + Assert(pg_atomic_read_u32(&FutureWaitFastPathCtl->count) > 0); + pg_atomic_fetch_sub_u32(&FutureWaitFastPathCtl->count, 1); + } +} + +/* + * FutureWaitLocktagSnapshotContains -- linear membership test over a + * locktag snapshot. + * + * n is bounded by the number of declared future waits (effectively the + * number of concurrent REPACK CONCURRENTLY commands), which in practice + * is 0 or 1, so a linear scan is cheaper than building a hash or keeping + * the array sorted. + */ +static bool +FutureWaitLocktagSnapshotContains(LOCKTAG *locktags, int n, + const LOCKTAG *locktag) +{ + for (int i = 0; i < n; i++) + { + if (memcmp(&locktags[i], locktag, sizeof(LOCKTAG)) == 0) + return true; + } + + return false; +} + +/* + * SnapshotFutureWaitFastPathLocks -- copy the current fast-path-relevant + * future-wait tags while all lock partitions are held. + * + * The caller holds every lock-manager partition lock exclusively, so this + * function must not take any LWLock. palloc is safe here because the + * memory context machinery does not take LWLocks; a palloc failure would + * ereport out of the deadlock check but not deadlock against itself. + */ +static LOCKTAG * +SnapshotFutureWaitFastPathLocks(int *out_count) +{ + LOCKTAG *locktags; + int n = 0; + + *out_count = 0; + + locktags = palloc_array(LOCKTAG, ProcGlobal->allProcCount); + + for (int i = 0; i < ProcGlobal->allProcCount; i++) + { + PGPROC *proc = GetPGProcByNumber(i); + FutureWaitLock *futureWaitLock = &proc->futureWaitLock; + + if (!FutureWaitNeedsFastPathMigration(futureWaitLock)) + continue; + + Assert(LWLockHeldByMeInMode(LockHashPartitionLock( + LockTagHashCode(&futureWaitLock->locktag)), + LW_EXCLUSIVE)); + + if (!FutureWaitLocktagSnapshotContains(locktags, n, + &futureWaitLock->locktag)) + { + Assert(n < ProcGlobal->allProcCount); + locktags[n++] = futureWaitLock->locktag; + } + } + + if (n == 0) + { + pfree(locktags); + return NULL; + } + + *out_count = n; + return locktags; +} + +/* + * MigrateFutureWaitFastPathLocks -- make fast-path holders visible to the + * deadlock detector for currently declared future waits. + * + * Future-wait declarations do not take a real strong lock yet, so they do not + * trigger the normal fast-path transfer performed by LockAcquireExtended(). + * This helper takes a snapshot of current fast-path-relevant future waits, + * installs temporary strong-lock counter bumps for those relation hash + * partitions, and transfers existing fast-path holders of those relations + * into the main lock table. + * + * The returned snapshot must remain active until CheckDeadLock() has finished + * its partition-locked graph walk. New fast-path holders cannot appear for + * migrated tags while the temporary strong-lock counter bumps are active. If + * a new future-wait tag appears after this snapshot, CheckDeadLock() detects + * that with FutureWaitFastPathSnapshotCoversCurrentLocks() and retries. + */ +LOCKTAG * +MigrateFutureWaitFastPathLocks(int *out_count) +{ + LockMethod lockMethodTable = LockMethods[DEFAULT_LOCKMETHOD]; + LOCKTAG *locktags; + int n; + + *out_count = 0; + + /* Cheap path: no active declaration can involve fast-path holders. */ + if (pg_atomic_read_u32(&FutureWaitFastPathCtl->count) == 0) + return NULL; + + for (int i = 0; i < NUM_LOCK_PARTITIONS; i++) + LWLockAcquire(LockHashPartitionLockByIndex(i), LW_EXCLUSIVE); + + locktags = SnapshotFutureWaitFastPathLocks(&n); + + for (int i = NUM_LOCK_PARTITIONS; --i >= 0;) + LWLockRelease(LockHashPartitionLockByIndex(i)); + + if (locktags == NULL) + return NULL; + + /* + * Block new fast-path acquisitions for this snapshot before transferring + * existing holders to the main table. + */ + SpinLockAcquire(&FastPathStrongRelationLocks->mutex); + for (int i = 0; i < n; i++) + { + uint32 hashcode = LockTagHashCode(&locktags[i]); + uint32 fasthashcode = FastPathStrongLockHashPartition(hashcode); + + FastPathStrongRelationLocks->count[fasthashcode]++; + } + SpinLockRelease(&FastPathStrongRelationLocks->mutex); + + for (int i = 0; i < n; i++) + { + uint32 hashcode = LockTagHashCode(&locktags[i]); + + if (!FastPathTransferRelationLocks(lockMethodTable, &locktags[i], + hashcode)) + { + SpinLockAcquire(&FastPathStrongRelationLocks->mutex); + for (int j = 0; j < n; j++) + { + uint32 restorehashcode = LockTagHashCode(&locktags[j]); + uint32 restorefasthashcode = + FastPathStrongLockHashPartition(restorehashcode); + + Assert(FastPathStrongRelationLocks->count[restorefasthashcode] + > 0); + FastPathStrongRelationLocks->count[restorefasthashcode]--; + } + SpinLockRelease(&FastPathStrongRelationLocks->mutex); + pfree(locktags); + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of shared memory"), + errhint("You might need to increase \"%s\".", "max_locks_per_transaction"))); + } + } + + *out_count = n; + return locktags; +} + +/* + * FutureWaitFastPathSnapshotCoversCurrentLocks -- does the active snapshot + * cover all fast-path-relevant future waits currently visible to the deadlock + * detector? + * + * Caller must hold all lock partitions. If this returns false, a declaration + * raced with the previous snapshot and CheckDeadLock() must retry migration + * before walking the waits-for graph. + */ +bool +FutureWaitFastPathSnapshotCoversCurrentLocks(LOCKTAG *locktags, int n) +{ + /* + * With no snapshot and no in-progress fast-path-relevant declaration, + * there cannot be anything to validate. If the counter is nonzero, still + * scan: a declaration may have incremented the counter before publishing + * its slot, causing the snapshot to be empty. + */ + if (locktags == NULL && + pg_atomic_read_u32(&FutureWaitFastPathCtl->count) == 0) + return true; + + for (int i = 0; i < ProcGlobal->allProcCount; i++) + { + PGPROC *proc = GetPGProcByNumber(i); + FutureWaitLock *futureWaitLock = &proc->futureWaitLock; + + if (!FutureWaitNeedsFastPathMigration(futureWaitLock)) + continue; + + Assert(LWLockHeldByMeInMode(LockHashPartitionLock( + LockTagHashCode(&futureWaitLock->locktag)), + LW_EXCLUSIVE)); + + if (!FutureWaitLocktagSnapshotContains(locktags, n, + &futureWaitLock->locktag)) + return false; + } + + return true; +} + +/* + * RestoreFutureWaitFastPathSnapshot -- undo the fast-path strong-lock counter + * bumps for a snapshot returned by MigrateFutureWaitFastPathLocks(). + * + * Must be called after CheckDeadLock() has released all partition locks. + */ +void +RestoreFutureWaitFastPathSnapshot(LOCKTAG *locktags, int n) +{ + if (locktags == NULL) + return; + + SpinLockAcquire(&FastPathStrongRelationLocks->mutex); + for (int i = 0; i < n; i++) + { + uint32 hashcode = LockTagHashCode(&locktags[i]); + uint32 fasthashcode = FastPathStrongLockHashPartition(hashcode); + + Assert(FastPathStrongRelationLocks->count[fasthashcode] > 0); + FastPathStrongRelationLocks->count[fasthashcode]--; + } + SpinLockRelease(&FastPathStrongRelationLocks->mutex); + + pfree(locktags); +} + /* * LockHeldByMe -- test whether lock 'locktag' is held by the current * transaction @@ -937,6 +1324,8 @@ LockAcquireExtended(const LOCKTAG *locktag, */ if (locallock->nLocks > 0) { + Assert(!FutureWaitLockMatches(&MyProc->futureWaitLock, locktag, + lockmode)); GrantLockLocal(locallock, owner); if (locallock->lockCleared) return LOCKACQUIRE_ALREADY_CLEAR; @@ -1012,6 +1401,7 @@ LockAcquireExtended(const LOCKTAG *locktag, */ locallock->lock = NULL; locallock->proclock = NULL; + LockClearFutureWaitSlotIfMatch(locktag, lockmode, false); GrantLockLocal(locallock, owner); return LOCKACQUIRE_OK; } @@ -1225,6 +1615,20 @@ LockAcquireExtended(const LOCKTAG *locktag, Assert(!dontWait); PROCLOCK_PRINT("LockAcquire: sleeping on lock", proclock); LOCK_PRINT("LockAcquire: sleeping on lock", lock, lockmode); + + /* + * We have attached waitLink and are about to sleep on exactly the + * (locktag, lockmode) we had declared as a future wait. Remote + * walkers will now see us as a real waiter on this lock, and + * FindLockCycleRecurseWait() will scan lock->procLocks with the same + * conflictMask the future walker would have used. Clearing the + * future slot now, while we still hold the partition lock, keeps + * the invariant "future slot set means not yet really waiting for the + * same lock and mode" and removes duplicate work from the deadlock + * detector. + */ + LockClearFutureWaitSlotIfMatch(locktag, lockmode, true); + LWLockRelease(partitionLock); waitResult = WaitOnLock(locallock, owner); @@ -1247,7 +1651,10 @@ LockAcquireExtended(const LOCKTAG *locktag, } } else + { + LockClearFutureWaitSlotIfMatch(locktag, lockmode, true); LWLockRelease(partitionLock); + } Assert(waitResult == PROC_WAIT_STATUS_OK); /* The lock was granted to us. Update the local lock entry accordingly */ @@ -2328,6 +2735,9 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) elog(ERROR, "unrecognized lock method: %d", lockmethodid); lockMethodTable = LockMethods[lockmethodid]; + if (lockmethodid == DEFAULT_LOCKMETHOD) + LockClearFutureWaitSlot(false); + #ifdef LOCK_DEBUG if (*(lockMethodTable->trace_flag)) elog(LOG, "LockReleaseAll: lockmethod=%d", lockmethodid); diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 1ac25068d62..b6b9de2d51c 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -494,6 +494,7 @@ InitProcess(void) MyProc->waitLock = NULL; dlist_node_init(&MyProc->waitLink); MyProc->waitProcLock = NULL; + MemSet(&MyProc->futureWaitLock, 0, sizeof(FutureWaitLock)); pg_atomic_write_u64(&MyProc->waitStart, 0); #ifdef USE_ASSERT_CHECKING { @@ -691,6 +692,7 @@ InitAuxiliaryProcess(void) MyProc->waitLock = NULL; dlist_node_init(&MyProc->waitLink); MyProc->waitProcLock = NULL; + MemSet(&MyProc->futureWaitLock, 0, sizeof(FutureWaitLock)); pg_atomic_write_u64(&MyProc->waitStart, 0); #ifdef USE_ASSERT_CHECKING { @@ -824,6 +826,7 @@ LockErrorCleanup(void) HOLD_INTERRUPTS(); AbortStrongLockAcquire(); + LockClearFutureWaitSlot(false); /* Nothing to do if we weren't waiting for a lock */ lockAwaited = GetAwaitedLock(); @@ -934,6 +937,7 @@ ProcKill(int code, Datum arg) /* Make sure we're out of the sync rep lists */ SyncRepCleanupAtProcExit(); + LockClearFutureWaitSlot(false); #ifdef USE_ASSERT_CHECKING { @@ -1824,6 +1828,17 @@ CheckDeadLock(void) { int i; DeadLockState result; + LOCKTAG *futureLocks; + int futureLockCount = 0; + +retry: + /* + * Migrate fast-path holders for relation locks named by declared + * future-wait declarations. The helper takes and releases lock + * partitions internally while taking its snapshot, so it must run before + * this routine freezes the lock table for the real deadlock check. + */ + futureLocks = MigrateFutureWaitFastPathLocks(&futureLockCount); /* * Acquire exclusive lock on the entire shared lock data structures. Must @@ -1838,6 +1853,25 @@ CheckDeadLock(void) for (i = 0; i < NUM_LOCK_PARTITIONS; i++) LWLockAcquire(LockHashPartitionLockByIndex(i), LW_EXCLUSIVE); + /* + * A future-wait declaration can appear after the migration snapshot but + * before this partition-locked graph walk. If so, release the temporary + * strong-lock counts and retry, so the newly visible future edge cannot + * miss holders that are still in fast-path arrays. + * + * In theory a stream of concurrent declarations could force repeated + * retries, but in practice future-wait declarations are issued only by + * REPACK CONCURRENTLY and are rare, so the loop terminates quickly. + */ + if (!FutureWaitFastPathSnapshotCoversCurrentLocks(futureLocks, + futureLockCount)) + { + for (i = NUM_LOCK_PARTITIONS; --i >= 0;) + LWLockRelease(LockHashPartitionLockByIndex(i)); + RestoreFutureWaitFastPathSnapshot(futureLocks, futureLockCount); + goto retry; + } + /* * Check to see if we've been awoken by anyone in the interim. * @@ -1902,6 +1936,8 @@ check_done: for (i = NUM_LOCK_PARTITIONS; --i >= 0;) LWLockRelease(LockHashPartitionLockByIndex(i)); + RestoreFutureWaitFastPathSnapshot(futureLocks, futureLockCount); + return result; } diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h index c3d8518cb62..648435eb618 100644 --- a/src/include/commands/tablecmds.h +++ b/src/include/commands/tablecmds.h @@ -102,6 +102,9 @@ extern void AtEOSubXact_on_commit_actions(bool isCommit, extern void RangeVarCallbackMaintainsTable(const RangeVar *relation, Oid relId, Oid oldRelId, void *arg); +extern void RangeVarCallbackForRepack(const RangeVar *relation, + Oid relId, Oid oldRelId, + void *arg); extern void RangeVarCallbackOwnsRelation(const RangeVar *relation, Oid relId, Oid oldRelId, void *arg); diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index 2a985ce5e15..b301edafb31 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -51,6 +51,8 @@ extern bool CheckRelationLockedByMe(Relation relation, LOCKMODE lockmode, extern bool CheckRelationOidLockedByMe(Oid relid, LOCKMODE lockmode, bool orstronger); extern bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode); +extern void LockDeclareFutureWait(const LOCKTAG *locktag, LOCKMODE lockmode); +extern void LockClearFutureWaitSlot(bool partitionLockHeld); extern void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode); extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index ee3cb1dc203..dca5a3d33d6 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -275,6 +275,30 @@ typedef struct LOCALLOCK #define LOCALLOCK_LOCKTAG(llock) ((LockTagType) (llock).tag.lock.locktag_type) +/* + * Declared "future wait" for a lock the backend intends to acquire later. + * + * A process may publish its intent to acquire a particular lock mode on a + * particular locktag before actually calling LockAcquire(). The deadlock + * detector treats a populated slot as a hard waits-for edge from the + * declarer to every current holder whose mode conflicts with .mode. The + * slot is empty when locktag.locktag_lockmethodid == 0. + * + * Grouped in a struct so a future change can turn the slot into a short + * list without touching every reader. + */ +typedef struct FutureWaitLock +{ + LOCKTAG locktag; /* locktag_lockmethodid == 0 when empty */ + LOCKMODE mode; /* mode the proc will eventually request */ +} FutureWaitLock; + +#define FutureWaitLockIsSet(futureWaitLock) \ + ((futureWaitLock)->locktag.locktag_lockmethodid != 0) +#define FutureWaitLockIsEmpty(futureWaitLock) \ + (!FutureWaitLockIsSet(futureWaitLock)) + + /* * These structures hold information passed from lmgr internals to the lock * listing user-level functions (in lockfuncs.c). @@ -419,6 +443,15 @@ extern LOCALLOCK *GetAwaitedLock(void); extern void ResetAwaitedLock(void); extern void RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode); + +extern LOCK *LockHashLookup(const LOCKTAG *locktag, uint32 hashcode); + +/* Future-wait fast-path migration, called by CheckDeadLock(). */ +extern LOCKTAG *MigrateFutureWaitFastPathLocks(int *out_count); +extern bool FutureWaitFastPathSnapshotCoversCurrentLocks(LOCKTAG *locktags, + int n); +extern void RestoreFutureWaitFastPathSnapshot(LOCKTAG *locktags, int n); + extern LockData *GetLockStatusData(void); extern BlockedProcsData *GetBlockerStatusData(int blocked_pid); diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 3e1d1fad5f9..66bfce6f8b4 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -314,6 +314,15 @@ typedef struct PGPROC LOCKMASK heldLocks; /* bitmask for lock types already held on this * lock object by this backend */ + /* + * Declared future-wait slot: a lock this proc intends to acquire later. + * Empty when .locktag.locktag_lockmethodid == 0. Publishing, clearing, + * and remote reads are protected by the partition lock of .locktag + * (deadlock walkers hold all partition locks and can safely inspect the + * slot). The owning backend may inspect its own slot locklessly. + */ + FutureWaitLock futureWaitLock; + pg_atomic_uint64 waitStart; /* time at which wait for lock acquisition * started */ diff --git a/src/test/modules/injection_points/expected/repack.out b/src/test/modules/injection_points/expected/repack.out index b575e9052ee..22c051f702e 100644 --- a/src/test/modules/injection_points/expected/repack.out +++ b/src/test/modules/injection_points/expected/repack.out @@ -1,4 +1,4 @@ -Parsed test spec with 2 sessions +Parsed test spec with 4 sessions starting permutation: wait_before_lock change_existing change_new change_subxact1 change_subxact2 check2 wakeup_before_lock check1 injection_points_attach @@ -111,3 +111,182 @@ injection_points_detach (1 row) + +starting permutation: wait_before_lock begin_txn lock_table_ae s3_wakeup end_txn +injection_points_attach +----------------------- + +(1 row) + +step wait_before_lock: + REPACK (CONCURRENTLY) repack_test USING INDEX repack_test_pkey; + +step begin_txn: + BEGIN; + +step lock_table_ae: + LOCK TABLE repack_test IN ACCESS EXCLUSIVE MODE; + +step s3_wakeup: + SELECT injection_points_wakeup('repack-concurrently-before-lock'); + +injection_points_wakeup +----------------------- + +(1 row) + +step wait_before_lock: <... completed> +step lock_table_ae: <... completed> +step end_txn: + COMMIT; + +injection_points_detach +----------------------- + +(1 row) + + +starting permutation: s2_timeout_fast wait_before_lock begin_and_read lock_table_ae end_txn s3_wakeup +injection_points_attach +----------------------- + +(1 row) + +step s2_timeout_fast: + SET deadlock_timeout = '10ms'; + +step wait_before_lock: + REPACK (CONCURRENTLY) repack_test USING INDEX repack_test_pkey; + +step begin_and_read: + BEGIN; + SELECT 1 FROM repack_test LIMIT 1; + +?column? +-------- + 1 +(1 row) + +step lock_table_ae: + LOCK TABLE repack_test IN ACCESS EXCLUSIVE MODE; + +step lock_table_ae: <... completed> +ERROR: future deadlock detected +step end_txn: + COMMIT; + +step s3_wakeup: + SELECT injection_points_wakeup('repack-concurrently-before-lock'); + +injection_points_wakeup +----------------------- + +(1 row) + +step wait_before_lock: <... completed> +injection_points_detach +----------------------- + +(1 row) + + +starting permutation: s2_timeout_fast wait_before_lock begin_and_read lock_table_sue end_txn s3_wakeup +injection_points_attach +----------------------- + +(1 row) + +step s2_timeout_fast: + SET deadlock_timeout = '10ms'; + +step wait_before_lock: + REPACK (CONCURRENTLY) repack_test USING INDEX repack_test_pkey; + +step begin_and_read: + BEGIN; + SELECT 1 FROM repack_test LIMIT 1; + +?column? +-------- + 1 +(1 row) + +step lock_table_sue: + LOCK TABLE repack_test IN SHARE UPDATE EXCLUSIVE MODE; + +step lock_table_sue: <... completed> +ERROR: future deadlock detected +step end_txn: + COMMIT; + +step s3_wakeup: + SELECT injection_points_wakeup('repack-concurrently-before-lock'); + +injection_points_wakeup +----------------------- + +(1 row) + +step wait_before_lock: <... completed> +injection_points_detach +----------------------- + +(1 row) + + +starting permutation: s2_timeout_fast wait_before_lock begin_and_read s3_begin_txn s3_lock_table_y_sue s3_lock_table_sue lock_table_y_sue end_txn s4_wakeup s3_end_txn +injection_points_attach +----------------------- + +(1 row) + +step s2_timeout_fast: + SET deadlock_timeout = '10ms'; + +step wait_before_lock: + REPACK (CONCURRENTLY) repack_test USING INDEX repack_test_pkey; + +step begin_and_read: + BEGIN; + SELECT 1 FROM repack_test LIMIT 1; + +?column? +-------- + 1 +(1 row) + +step s3_begin_txn: + BEGIN; + +step s3_lock_table_y_sue: + LOCK TABLE repack_test_y IN SHARE UPDATE EXCLUSIVE MODE; + +step s3_lock_table_sue: + LOCK TABLE repack_test IN SHARE UPDATE EXCLUSIVE MODE; + +step lock_table_y_sue: + LOCK TABLE repack_test_y IN SHARE UPDATE EXCLUSIVE MODE; + +step lock_table_y_sue: <... completed> +ERROR: future deadlock detected +step end_txn: + COMMIT; + +step s4_wakeup: + SELECT injection_points_wakeup('repack-concurrently-before-lock'); + +injection_points_wakeup +----------------------- + +(1 row) + +step wait_before_lock: <... completed> +step s3_lock_table_sue: <... completed> +step s3_end_txn: + COMMIT; + +injection_points_detach +----------------------- + +(1 row) + diff --git a/src/test/modules/injection_points/specs/repack.spec b/src/test/modules/injection_points/specs/repack.spec index d727a9b056b..d214821a2da 100644 --- a/src/test/modules/injection_points/specs/repack.spec +++ b/src/test/modules/injection_points/specs/repack.spec @@ -5,6 +5,7 @@ setup CREATE TABLE repack_test(i int PRIMARY KEY, j int); INSERT INTO repack_test(i, j) VALUES (1, 1), (2, 2), (3, 3), (4, 4); + CREATE TABLE repack_test_y(i int); CREATE TABLE relfilenodes(node oid); @@ -15,6 +16,7 @@ setup teardown { DROP TABLE repack_test; + DROP TABLE repack_test_y; DROP EXTENSION injection_points; DROP TABLE relfilenodes; @@ -61,6 +63,10 @@ teardown } session s2 +step s2_timeout_fast +{ + SET deadlock_timeout = '10ms'; +} # Change the existing data. UPDATE changes both key and non-key columns. Also # update one row twice to test whether tuple version generated by this session # can be found. @@ -128,6 +134,60 @@ step wakeup_before_lock { SELECT injection_points_wakeup('repack-concurrently-before-lock'); } +# Steps used in lock contention tests. +step begin_txn +{ + BEGIN; +} +step begin_and_read +{ + BEGIN; + SELECT 1 FROM repack_test LIMIT 1; +} +step lock_table_ae +{ + LOCK TABLE repack_test IN ACCESS EXCLUSIVE MODE; +} +step lock_table_sue +{ + LOCK TABLE repack_test IN SHARE UPDATE EXCLUSIVE MODE; +} +step lock_table_y_sue +{ + LOCK TABLE repack_test_y IN SHARE UPDATE EXCLUSIVE MODE; +} +step end_txn +{ + COMMIT; +} + +session s3 +step s3_begin_txn +{ + BEGIN; +} +step s3_lock_table_y_sue +{ + LOCK TABLE repack_test_y IN SHARE UPDATE EXCLUSIVE MODE; +} +step s3_lock_table_sue +{ + LOCK TABLE repack_test IN SHARE UPDATE EXCLUSIVE MODE; +} +step s3_wakeup +{ + SELECT injection_points_wakeup('repack-concurrently-before-lock'); +} +step s3_end_txn +{ + COMMIT; +} + +session s4 +step s4_wakeup +{ + SELECT injection_points_wakeup('repack-concurrently-before-lock'); +} # Test if data changes introduced while one session is performing REPACK # CONCURRENTLY find their way into the table. @@ -140,3 +200,61 @@ permutation check2 wakeup_before_lock check1 + +# A waiter that does not already hold a conflicting lock on the table is not a +# future deadlock. It waits until REPACK finishes and then acquires its lock. +permutation + wait_before_lock + begin_txn + lock_table_ae + s3_wakeup + end_txn + +# In the deadlock-expecting permutations below, all deadlock detections must +# run while s1 is still parked at the injection point, i.e. before s1 wakes +# up and attempts AccessExclusiveLock. Once s1 attaches waitLink for AEL, +# it clears its future-wait slot, and any cycle from that point on would be +# reported as a plain "deadlock detected" rather than "future deadlock +# detected". To force this ordering, every wakeup step is placed after the +# COMMIT from the session whose 10 ms deadlock_timeout we rely on: the +# framework cannot run that COMMIT until the blocked session has unblocked +# (via its deadlock check), so the wakeup necessarily follows. + +# A waiter that already holds AccessShareLock then waits for AccessExclusiveLock +# behind REPACK's ShareUpdateExclusiveLock. +permutation + s2_timeout_fast + wait_before_lock + begin_and_read + lock_table_ae(*) + end_txn + s3_wakeup + +# Same shape as above, but the waiter requests ShareUpdateExclusiveLock. +permutation + s2_timeout_fast + wait_before_lock + begin_and_read + lock_table_sue(*) + end_txn + s3_wakeup + +# Three-backend future deadlock: +# +# - s1 holds ShareUpdateExclusiveLock on repack_test and has declared a future +# AccessExclusiveLock on it. +# - s2 holds AccessShareLock on repack_test. +# - s3 holds ShareUpdateExclusiveLock on repack_test_y, then waits for +# ShareUpdateExclusiveLock on repack_test behind s1. +# - s2 waits for ShareUpdateExclusiveLock on repack_test_y behind s3. +permutation + s2_timeout_fast + wait_before_lock + begin_and_read + s3_begin_txn + s3_lock_table_y_sue + s3_lock_table_sue + lock_table_y_sue(*) + end_txn + s4_wakeup + s3_end_txn -- 2.43.0