commit 1c8586f390d5942fd95ef1e0efea371128cf06a5 Author: Alexander Korotkov Date: Tue Jul 31 00:33:31 2018 +0300 "Fair" LWLock version 4 Wait for 15 sequential shared lock holders before switching to "fair" mode. New exclusive lock waiters (including group clear xid and group clog update waiters) resets counter. diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 8b7ff5b0c24..eef69b63453 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -479,6 +479,8 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status, { int extraWaits = 0; + LWLockCleanUsageCount(CLogControlLock); + /* Sleep until the leader updates our XID status. */ pgstat_report_wait_start(WAIT_EVENT_CLOG_GROUP_UPDATE); for (;;) diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index bd20497d81a..36a606a7a5a 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -514,6 +514,8 @@ ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid) { int extraWaits = 0; + LWLockCleanUsageCount(ProcArrayLock); + /* Sleep until the leader clears our XID. */ pgstat_report_wait_start(WAIT_EVENT_PROCARRAY_GROUP_UPDATE); for (;;) diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index a6fda81feb6..487ece8118e 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -96,16 +96,19 @@ /* We use the ShmemLock spinlock to protect LWLockCounter */ extern slock_t *ShmemLock; +#define LW_FLAG_USAGE_COUNT_LOCK ((uint32) 1 << 31) #define LW_FLAG_HAS_WAITERS ((uint32) 1 << 30) #define LW_FLAG_RELEASE_OK ((uint32) 1 << 29) #define LW_FLAG_LOCKED ((uint32) 1 << 28) - -#define LW_VAL_EXCLUSIVE ((uint32) 1 << 24) +#define LW_FLAG_USAGE_COUNT_ONE ((uint32) 1 << 24) +#define LW_VAL_EXCLUSIVE ((uint32) 1 << 23) #define LW_VAL_SHARED 1 -#define LW_LOCK_MASK ((uint32) ((1 << 25)-1)) +#define LW_FLAG_USAGE_COUNT_MASK (LW_FLAG_LOCKED - LW_FLAG_USAGE_COUNT_ONE) + +#define LW_LOCK_MASK ((uint32) ((1 << 24)-1)) /* Must be greater than MAX_BACKENDS - which is 2^23-1, so we're fine. */ -#define LW_SHARED_MASK ((uint32) ((1 << 24)-1)) +#define LW_SHARED_MASK ((uint32) ((1 << 23)-1)) /* * This is indexed by tranche ID and stores the names of all tranches known @@ -137,6 +140,7 @@ typedef struct LWLockHandle { LWLock *lock; LWLockMode mode; + bool countLock; } LWLockHandle; static int num_held_lwlocks = 0; @@ -736,7 +740,7 @@ GetLWLockIdentifier(uint32 classId, uint16 eventId) * Returns true if the lock isn't free and we need to wait. */ static bool -LWLockAttemptLock(LWLock *lock, LWLockMode mode) +LWLockAttemptLock(LWLock *lock, LWLockMode mode, bool wakeup, bool *countLock) { uint32 old_state; @@ -752,7 +756,8 @@ LWLockAttemptLock(LWLock *lock, LWLockMode mode) while (true) { uint32 desired_state; - bool lock_free; + bool lock_free, + count_lock = false; desired_state = old_state; @@ -760,13 +765,36 @@ LWLockAttemptLock(LWLock *lock, LWLockMode mode) { lock_free = (old_state & LW_LOCK_MASK) == 0; if (lock_free) + { desired_state += LW_VAL_EXCLUSIVE; + Assert((old_state & LW_FLAG_USAGE_COUNT_LOCK) == 0); + desired_state &= ~LW_FLAG_USAGE_COUNT_MASK; + } } else { - lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0; + if (wakeup) + lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0; + else + lock_free = ((old_state & LW_VAL_EXCLUSIVE) == 0) && + ((old_state & (LW_FLAG_HAS_WAITERS | LW_FLAG_USAGE_COUNT_MASK)) != + (LW_FLAG_HAS_WAITERS | LW_FLAG_USAGE_COUNT_MASK)); if (lock_free) + { desired_state += LW_VAL_SHARED; + if ((old_state & LW_FLAG_HAS_WAITERS) == 0 || wakeup) + { + desired_state &= ~LW_FLAG_USAGE_COUNT_MASK; + } + else if ((old_state & LW_FLAG_USAGE_COUNT_LOCK) == 0 && + (old_state & LW_FLAG_USAGE_COUNT_MASK) != LW_FLAG_USAGE_COUNT_MASK) + { + desired_state += LW_FLAG_USAGE_COUNT_LOCK + + LW_FLAG_USAGE_COUNT_ONE; + count_lock = true; + } + + } } /* @@ -789,6 +817,11 @@ LWLockAttemptLock(LWLock *lock, LWLockMode mode) if (mode == LW_EXCLUSIVE) lock->owner = MyProc; #endif + if (mode == LW_SHARED) + { + *countLock = count_lock; + } + return false; } else @@ -798,6 +831,12 @@ LWLockAttemptLock(LWLock *lock, LWLockMode mode) pg_unreachable(); } +void +LWLockCleanUsageCount(LWLock *lock) +{ + pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_USAGE_COUNT_MASK); +} + /* * Lock the LWLock's wait list against concurrent activity. * @@ -978,9 +1017,11 @@ LWLockWakeup(LWLock *lock) * * NB: Mode can be LW_WAIT_UNTIL_FREE here! */ -static void +static bool LWLockQueueSelf(LWLock *lock, LWLockMode mode) { + bool first; + /* * If we don't have a PGPROC structure, there's no way to wait. This * should never occur, since MyProc should only be null during shared @@ -1002,9 +1043,15 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode) /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */ if (mode == LW_WAIT_UNTIL_FREE) + { + first = true; proclist_push_head(&lock->waiters, MyProc->pgprocno, lwWaitLink); + } else + { + first = proclist_is_empty(&lock->waiters); proclist_push_tail(&lock->waiters, MyProc->pgprocno, lwWaitLink); + } /* Can release the mutex now */ LWLockWaitListUnlock(lock); @@ -1013,6 +1060,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode) pg_atomic_fetch_add_u32(&lock->nwaiters, 1); #endif + return first; } /* @@ -1122,6 +1170,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) { PGPROC *proc = MyProc; bool result = true; + bool countLock = false; int extraWaits = 0; #ifdef LWLOCK_STATS lwlock_stats *lwstats; @@ -1177,13 +1226,13 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) */ for (;;) { - bool mustwait; + bool mustwait, first; /* * Try to grab the lock the first time, we're not in the waitqueue * yet/anymore. */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, !result, &countLock); if (!mustwait) { @@ -1203,10 +1252,10 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) */ /* add to the queue */ - LWLockQueueSelf(lock, mode); + first = LWLockQueueSelf(lock, mode); /* we're now guaranteed to be woken up if necessary */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, (!result) || first, &countLock); /* ok, grabbed the lock the second time round, need to undo queueing */ if (!mustwait) @@ -1235,6 +1284,8 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) lwstats->block_count++; #endif + LWLockCleanUsageCount(lock); + LWLockReportWaitStart(lock); TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode); @@ -1271,6 +1322,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) /* Add lock to list of locks held by this backend */ held_lwlocks[num_held_lwlocks].lock = lock; + held_lwlocks[num_held_lwlocks].countLock = countLock; held_lwlocks[num_held_lwlocks++].mode = mode; /* @@ -1292,7 +1344,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode) { - bool mustwait; + bool mustwait, countLock = false; AssertArg(mode == LW_SHARED || mode == LW_EXCLUSIVE); @@ -1310,7 +1362,7 @@ LWLockConditionalAcquire(LWLock *lock, LWLockMode mode) HOLD_INTERRUPTS(); /* Check for the lock */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, false, &countLock); if (mustwait) { @@ -1324,6 +1376,7 @@ LWLockConditionalAcquire(LWLock *lock, LWLockMode mode) { /* Add lock to list of locks held by this backend */ held_lwlocks[num_held_lwlocks].lock = lock; + held_lwlocks[num_held_lwlocks].countLock = countLock; held_lwlocks[num_held_lwlocks++].mode = mode; TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode); } @@ -1348,7 +1401,8 @@ bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) { PGPROC *proc = MyProc; - bool mustwait; + bool mustwait, + countLock; int extraWaits = 0; #ifdef LWLOCK_STATS lwlock_stats *lwstats; @@ -1375,13 +1429,13 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) * NB: We're using nearly the same twice-in-a-row lock acquisition * protocol as LWLockAcquire(). Check its comments for details. */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, true, &countLock); if (mustwait) { LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE); - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, true, &countLock); if (mustwait) { @@ -1396,6 +1450,8 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) lwstats->block_count++; #endif + LWLockCleanUsageCount(lock); + LWLockReportWaitStart(lock); TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode); @@ -1452,6 +1508,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded"); /* Add lock to list of locks held by this backend */ held_lwlocks[num_held_lwlocks].lock = lock; + held_lwlocks[num_held_lwlocks].countLock = countLock; held_lwlocks[num_held_lwlocks++].mode = mode; TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode); } @@ -1611,6 +1668,7 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval) #ifdef LWLOCK_STATS lwstats->block_count++; #endif + LWLockCleanUsageCount(lock); LWLockReportWaitStart(lock); TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE); @@ -1725,8 +1783,10 @@ void LWLockRelease(LWLock *lock) { LWLockMode mode; - uint32 oldstate; - bool check_waiters; + uint32 oldstate, + sub; + bool check_waiters, + countLock; int i; /* @@ -1735,7 +1795,10 @@ LWLockRelease(LWLock *lock) */ for (i = num_held_lwlocks; --i >= 0;) if (lock == held_lwlocks[i].lock) + { + countLock = held_lwlocks[i].countLock; break; + } if (i < 0) elog(ERROR, "lock %s is not held", T_NAME(lock)); @@ -1753,9 +1816,19 @@ LWLockRelease(LWLock *lock) * others, even if we still have to wakeup other waiters. */ if (mode == LW_EXCLUSIVE) - oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE); + sub = LW_VAL_EXCLUSIVE; else - oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED); + sub = LW_VAL_SHARED; + + if (countLock) + sub += LW_FLAG_USAGE_COUNT_LOCK; + + oldstate = pg_atomic_fetch_sub_u32(&lock->state, sub); + + /* If we were first shared locker, LW_FLAG_FAIR shouldn't be set */ + Assert(!countLock || (oldstate & LW_FLAG_USAGE_COUNT_LOCK)); + + oldstate -= sub; /* nobody else can have that kind of lock */ Assert(!(oldstate & LW_VAL_EXCLUSIVE)); diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index c21bfe2f666..7170e7604a5 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -144,6 +144,7 @@ extern bool Trace_lwlocks; #endif extern bool LWLockAcquire(LWLock *lock, LWLockMode mode); +extern void LWLockCleanUsageCount(LWLock *lock); extern bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode); extern bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode); extern void LWLockRelease(LWLock *lock);