diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index af24902..69ae046 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -151,7 +151,7 @@ SimpleLruShmemSize(int nslots, int nlsns) sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */ sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */ sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */ - sz += MAXALIGN(nslots * sizeof(LWLockId)); /* buffer_locks[] */ + sz += MAXALIGN(nslots * sizeof(LWLock *)); /* buffer_locks[] */ if (nlsns > 0) sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */ @@ -161,7 +161,7 @@ SimpleLruShmemSize(int nslots, int nlsns) void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, - LWLockId ctllock, const char *subdir) + LWLock *ctllock, const char *subdir) { SlruShared shared; bool found; @@ -202,8 +202,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, offset += MAXALIGN(nslots * sizeof(int)); shared->page_lru_count = (int *) (ptr + offset); offset += MAXALIGN(nslots * sizeof(int)); - shared->buffer_locks = (LWLockId *) (ptr + offset); - offset += MAXALIGN(nslots * sizeof(LWLockId)); + shared->buffer_locks = (LWLock **) (ptr + offset); + offset += MAXALIGN(nslots * sizeof(LWLock *)); if (nlsns > 0) { diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 048a189..1692dd2 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -449,8 +449,6 @@ typedef struct typedef int InheritableSocket; #endif -typedef struct LWLock LWLock; /* ugly kluge */ - /* * Structure contains all variables passed to exec:ed backends */ @@ -471,7 +469,7 @@ typedef struct slock_t *ShmemLock; VariableCache ShmemVariableCache; Backend *ShmemBackendArray; - LWLock *LWLockArray; + LWLock *MainLWLockArray; slock_t *ProcStructLock; PROC_HDR *ProcGlobal; PGPROC *AuxiliaryProcs; @@ -5580,7 +5578,6 @@ PostmasterMarkPIDForWorkerNotify(int pid) * functions. They are marked NON_EXEC_STATIC in their home modules. */ extern slock_t *ShmemLock; -extern LWLock *LWLockArray; extern slock_t *ProcStructLock; extern PGPROC *AuxiliaryProcs; extern PMSignalData *PMSignalState; @@ -5626,7 +5623,7 @@ save_backend_variables(BackendParameters *param, Port *port, param->ShmemVariableCache = ShmemVariableCache; param->ShmemBackendArray = ShmemBackendArray; - param->LWLockArray = LWLockArray; + param->MainLWLockArray = MainLWLockArray; param->ProcStructLock = ProcStructLock; param->ProcGlobal = ProcGlobal; param->AuxiliaryProcs = AuxiliaryProcs; @@ -5854,7 +5851,7 @@ restore_backend_variables(BackendParameters *param, Port *port) ShmemVariableCache = param->ShmemVariableCache; ShmemBackendArray = param->ShmemBackendArray; - LWLockArray = param->LWLockArray; + MainLWLockArray = param->MainLWLockArray; ProcStructLock = param->ProcStructLock; ProcGlobal = param->ProcGlobal; AuxiliaryProcs = param->AuxiliaryProcs; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 081165f..af923ec 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -536,10 +536,10 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, { BufferTag newTag; /* identity of requested block */ uint32 newHash; /* hash value for newTag */ - LWLockId newPartitionLock; /* buffer partition lock for it */ + LWLock *newPartitionLock; /* buffer partition lock for it */ BufferTag oldTag; /* previous identity of selected buffer */ uint32 oldHash; /* hash value for oldTag */ - LWLockId oldPartitionLock; /* buffer partition lock for it */ + LWLock *oldPartitionLock; /* buffer partition lock for it */ BufFlags oldFlags; int buf_id; volatile BufferDesc *buf; @@ -888,7 +888,7 @@ InvalidateBuffer(volatile BufferDesc *buf) { BufferTag oldTag; uint32 oldHash; /* hash value for oldTag */ - LWLockId oldPartitionLock; /* buffer partition lock for it */ + LWLock *oldPartitionLock; /* buffer partition lock for it */ BufFlags oldFlags; /* Save the original buffer tag before dropping the spinlock */ diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 536be44..38776e9 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -565,7 +565,7 @@ LockHasWaiters(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) LOCALLOCK *locallock; LOCK *lock; PROCLOCK *proclock; - LWLockId partitionLock; + LWLock *partitionLock; bool hasWaiters = false; if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) @@ -702,7 +702,7 @@ LockAcquireExtended(const LOCKTAG *locktag, bool found; ResourceOwner owner; uint32 hashcode; - LWLockId partitionLock; + LWLock *partitionLock; int status; bool log_lock = false; @@ -1744,7 +1744,7 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) LOCALLOCK *locallock; LOCK *lock; PROCLOCK *proclock; - LWLockId partitionLock; + LWLock *partitionLock; bool wakeupNeeded; if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) @@ -2096,10 +2096,13 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) */ for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++) { - LWLockId partitionLock = FirstLockMgrLock + partition; + LWLock *partitionLock; SHM_QUEUE *procLocks = &(MyProc->myProcLocks[partition]); PROCLOCK *nextplock; + partitionLock = + &MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + partition].lock; + /* * If the proclock list for this partition is empty, we can skip * acquiring the partition lock. This optimization is trickier than @@ -2475,7 +2478,7 @@ static bool FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag, uint32 hashcode) { - LWLockId partitionLock = LockHashPartitionLock(hashcode); + LWLock *partitionLock = LockHashPartitionLock(hashcode); Oid relid = locktag->locktag_field2; uint32 i; @@ -2565,7 +2568,7 @@ FastPathGetRelationLockEntry(LOCALLOCK *locallock) LockMethod lockMethodTable = LockMethods[DEFAULT_LOCKMETHOD]; LOCKTAG *locktag = &locallock->tag.lock; PROCLOCK *proclock = NULL; - LWLockId partitionLock = LockHashPartitionLock(locallock->hashcode); + LWLock *partitionLock = LockHashPartitionLock(locallock->hashcode); Oid relid = locktag->locktag_field2; uint32 f; @@ -2671,7 +2674,7 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode) SHM_QUEUE *procLocks; PROCLOCK *proclock; uint32 hashcode; - LWLockId partitionLock; + LWLock *partitionLock; int count = 0; int fast_count = 0; @@ -2883,7 +2886,7 @@ LockRefindAndRelease(LockMethod lockMethodTable, PGPROC *proc, PROCLOCKTAG proclocktag; uint32 hashcode; uint32 proclock_hashcode; - LWLockId partitionLock; + LWLock *partitionLock; bool wakeupNeeded; hashcode = LockTagHashCode(locktag); @@ -3159,10 +3162,13 @@ PostPrepare_Locks(TransactionId xid) */ for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++) { - LWLockId partitionLock = FirstLockMgrLock + partition; + LWLock *partitionLock; SHM_QUEUE *procLocks = &(MyProc->myProcLocks[partition]); PROCLOCK *nextplock; + partitionLock = + &MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + partition].lock; + /* * If the proclock list for this partition is empty, we can skip * acquiring the partition lock. This optimization is safer than the @@ -3400,7 +3406,12 @@ GetLockStatusData(void) * Must grab LWLocks in partition-number order to avoid LWLock deadlock. */ for (i = 0; i < NUM_LOCK_PARTITIONS; i++) - LWLockAcquire(FirstLockMgrLock + i, LW_SHARED); + { + LWLock *partitionLock; + + partitionLock = &MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + i].lock; + LWLockAcquire(partitionLock, LW_SHARED); + } /* Now we can safely count the number of proclocks */ data->nelements = el + hash_get_num_entries(LockMethodProcLockHash); @@ -3442,7 +3453,12 @@ GetLockStatusData(void) * behavior inside LWLockRelease. */ for (i = NUM_LOCK_PARTITIONS; --i >= 0;) - LWLockRelease(FirstLockMgrLock + i); + { + LWLock *partitionLock; + + partitionLock = &MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + i].lock; + LWLockRelease(partitionLock); + } Assert(el == data->nelements); @@ -3477,7 +3493,12 @@ GetRunningTransactionLocks(int *nlocks) * Must grab LWLocks in partition-number order to avoid LWLock deadlock. */ for (i = 0; i < NUM_LOCK_PARTITIONS; i++) - LWLockAcquire(FirstLockMgrLock + i, LW_SHARED); + { + LWLock *partitionLock; + + partitionLock = &MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + i].lock; + LWLockAcquire(partitionLock, LW_SHARED); + } /* Now we can safely count the number of proclocks */ els = hash_get_num_entries(LockMethodProcLockHash); @@ -3537,7 +3558,12 @@ GetRunningTransactionLocks(int *nlocks) * behavior inside LWLockRelease. */ for (i = NUM_LOCK_PARTITIONS; --i >= 0;) - LWLockRelease(FirstLockMgrLock + i); + { + LWLock *partitionLock; + + partitionLock = &MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + i].lock; + LWLockRelease(partitionLock); + } *nlocks = index; return accessExclusiveLocks; @@ -3673,7 +3699,7 @@ lock_twophase_recover(TransactionId xid, uint16 info, uint32 hashcode; uint32 proclock_hashcode; int partition; - LWLockId partitionLock; + LWLock *partitionLock; LockMethod lockMethodTable; Assert(len == sizeof(TwoPhaseLockRecord)); @@ -4044,7 +4070,7 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait) { PROCLOCK *proclock; uint32 hashcode; - LWLockId partitionLock; + LWLock *partitionLock; hashcode = LockTagHashCode(&tag); diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 4f88d3f..11fff91 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -37,43 +37,12 @@ extern slock_t *ShmemLock; -typedef struct LWLock -{ - slock_t mutex; /* Protects LWLock and queue of PGPROCs */ - bool releaseOK; /* T if ok to release waiters */ - char exclusive; /* # of exclusive holders (0 or 1) */ - int shared; /* # of shared holders (0..MaxBackends) */ - PGPROC *head; /* head of list of waiting PGPROCs */ - PGPROC *tail; /* tail of list of waiting PGPROCs */ - /* tail is undefined when head is NULL */ -} LWLock; - -/* - * All the LWLock structs are allocated as an array in shared memory. - * (LWLockIds are indexes into the array.) We force the array stride to - * be a power of 2, which saves a few cycles in indexing, but more - * importantly also ensures that individual LWLocks don't cross cache line - * boundaries. This reduces cache contention problems, especially on AMD - * Opterons. (Of course, we have to also ensure that the array start - * address is suitably aligned.) - * - * LWLock is between 16 and 32 bytes on all known platforms, so these two - * cases are sufficient. - */ -#define LWLOCK_PADDED_SIZE (sizeof(LWLock) <= 16 ? 16 : 32) - -typedef union LWLockPadded -{ - LWLock lock; - char pad[LWLOCK_PADDED_SIZE]; -} LWLockPadded; - /* * This points to the array of LWLocks in shared memory. Backends inherit * the pointer by fork from the postmaster (except in the EXEC_BACKEND case, * where we have special measures to pass it down). */ -NON_EXEC_STATIC LWLockPadded *LWLockArray = NULL; +LWLockPadded *MainLWLockArray = NULL; /* @@ -85,7 +54,7 @@ NON_EXEC_STATIC LWLockPadded *LWLockArray = NULL; #define MAX_SIMUL_LWLOCKS 100 static int num_held_lwlocks = 0; -static LWLockId held_lwlocks[MAX_SIMUL_LWLOCKS]; +static LWLock *held_lwlocks[MAX_SIMUL_LWLOCKS]; static int lock_addin_request = 0; static bool lock_addin_request_allowed = true; @@ -130,7 +99,7 @@ static void print_lwlock_stats(int code, Datum arg); static void init_lwlock_stats(void) { - int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); + int *LWLockCounter = (int *) ((char *) MainLWLockArray - 2 * sizeof(int)); int numLocks = LWLockCounter[1]; sh_acquire_counts = calloc(numLocks, sizeof(int)); @@ -145,7 +114,7 @@ static void print_lwlock_stats(int code, Datum arg) { int i; - int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); + int *LWLockCounter = (int *) ((char *) MainLWLockArray - 2 * sizeof(int)); int numLocks = LWLockCounter[1]; /* Grab an LWLock to keep different backends from mixing reports */ @@ -180,7 +149,7 @@ NumLWLocks(void) */ /* Predefined LWLocks */ - numLocks = (int) NumFixedLWLocks; + numLocks = NUM_FIXED_LWLOCKS; /* bufmgr.c needs two for each shared buffer */ numLocks += 2 * NBuffers; @@ -276,12 +245,12 @@ CreateLWLocks(void) /* Ensure desired alignment of LWLock array */ ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE; - LWLockArray = (LWLockPadded *) ptr; + MainLWLockArray = (LWLockPadded *) ptr; /* - * Initialize all LWLocks to "unlocked" state + * Initialize all LWLocks in main array to "unlocked" state */ - for (id = 0, lock = LWLockArray; id < numLocks; id++, lock++) + for (id = 0, lock = MainLWLockArray; id < numLocks; id++, lock++) { SpinLockInit(&lock->lock.mutex); lock->lock.releaseOK = true; @@ -295,8 +264,8 @@ CreateLWLocks(void) * Initialize the dynamic-allocation counter, which is stored just before * the first LWLock. */ - LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); - LWLockCounter[0] = (int) NumFixedLWLocks; + LWLockCounter = (int *) ((char *) MainLWLockArray - 2 * sizeof(int)); + LWLockCounter[0] = NUM_FIXED_LWLOCKS; LWLockCounter[1] = numLocks; } @@ -309,22 +278,22 @@ CreateLWLocks(void) * startup, but it is needed if any user-defined code tries to allocate * LWLocks after startup. */ -LWLockId +LWLock * LWLockAssign(void) { - LWLockId result; + LWLock *result; /* use volatile pointer to prevent code rearrangement */ volatile int *LWLockCounter; - LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); + LWLockCounter = (int *) ((char *) MainLWLockArray - 2 * sizeof(int)); SpinLockAcquire(ShmemLock); if (LWLockCounter[0] >= LWLockCounter[1]) { SpinLockRelease(ShmemLock); - elog(ERROR, "no more LWLockIds available"); + elog(ERROR, "no more LWLocks available"); } - result = (LWLockId) (LWLockCounter[0]++); + result = &MainLWLockArray[LWLockCounter[0]++].lock; SpinLockRelease(ShmemLock); return result; } @@ -338,9 +307,9 @@ LWLockAssign(void) * Side effect: cancel/die interrupts are held off until lock release. */ void -LWLockAcquire(LWLockId lockid, LWLockMode mode) +LWLockAcquire(LWLock *l, LWLockMode mode) { - volatile LWLock *lock = &(LWLockArray[lockid].lock); + volatile LWLock *lock = l; PGPROC *proc = MyProc; bool retry = false; int extraWaits = 0; @@ -497,7 +466,7 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode) TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode); /* Add lock to list of locks held by this backend */ - held_lwlocks[num_held_lwlocks++] = lockid; + held_lwlocks[num_held_lwlocks++] = l; /* * Fix the process wait semaphore's count for any absorbed wakeups. @@ -514,9 +483,9 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode) * If successful, cancel/die interrupts are held off until lock release. */ bool -LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode) +LWLockConditionalAcquire(LWLock *l, LWLockMode mode) { - volatile LWLock *lock = &(LWLockArray[lockid].lock); + volatile LWLock *lock = l; bool mustwait; PRINT_LWDEBUG("LWLockConditionalAcquire", lockid, lock); @@ -570,7 +539,7 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode) else { /* Add lock to list of locks held by this backend */ - held_lwlocks[num_held_lwlocks++] = lockid; + held_lwlocks[num_held_lwlocks++] = l; TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(lockid, mode); } @@ -592,9 +561,9 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode) * wake up, observe that their records have already been flushed, and return. */ bool -LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode) +LWLockAcquireOrWait(LWLock *l, LWLockMode mode) { - volatile LWLock *lock = &(LWLockArray[lockid].lock); + volatile LWLock *lock = l; PGPROC *proc = MyProc; bool mustwait; int extraWaits = 0; @@ -714,7 +683,7 @@ LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode) else { /* Add lock to list of locks held by this backend */ - held_lwlocks[num_held_lwlocks++] = lockid; + held_lwlocks[num_held_lwlocks++] = l; TRACE_POSTGRESQL_LWLOCK_WAIT_UNTIL_FREE(lockid, mode); } @@ -725,9 +694,9 @@ LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode) * LWLockRelease - release a previously acquired lock */ void -LWLockRelease(LWLockId lockid) +LWLockRelease(LWLock *l) { - volatile LWLock *lock = &(LWLockArray[lockid].lock); + volatile LWLock *lock = l; PGPROC *head; PGPROC *proc; int i; @@ -740,11 +709,11 @@ LWLockRelease(LWLockId lockid) */ for (i = num_held_lwlocks; --i >= 0;) { - if (lockid == held_lwlocks[i]) + if (l == held_lwlocks[i]) break; } if (i < 0) - elog(ERROR, "lock %d is not held", (int) lockid); + elog(ERROR, "lock %p is not held", l); num_held_lwlocks--; for (; i < num_held_lwlocks; i++) held_lwlocks[i] = held_lwlocks[i + 1]; @@ -874,13 +843,13 @@ LWLockReleaseAll(void) * lock is held shared or exclusive. */ bool -LWLockHeldByMe(LWLockId lockid) +LWLockHeldByMe(LWLock *l) { int i; for (i = 0; i < num_held_lwlocks; i++) { - if (held_lwlocks[i] == lockid) + if (held_lwlocks[i] == l) return true; } return false; diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index a8a0e98..0987596 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -241,7 +241,8 @@ #define PredicateLockHashPartition(hashcode) \ ((hashcode) % NUM_PREDICATELOCK_PARTITIONS) #define PredicateLockHashPartitionLock(hashcode) \ - ((LWLockId) (FirstPredicateLockMgrLock + PredicateLockHashPartition(hashcode))) + (&MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + \ + PredicateLockHashPartition(hashcode)].lock) #define NPREDICATELOCKTARGETENTS() \ mul_size(max_predicate_locks_per_xact, add_size(MaxBackends, max_prepared_xacts)) @@ -383,7 +384,7 @@ static SHM_QUEUE *FinishedSerializableTransactions; */ static const PREDICATELOCKTARGETTAG ScratchTargetTag = {0, 0, 0, 0}; static uint32 ScratchTargetTagHash; -static int ScratchPartitionLock; +static LWLock *ScratchPartitionLock; /* * The local hash table used to determine when to combine multiple fine- @@ -1398,7 +1399,13 @@ GetPredicateLockStatusData(void) * in ascending order, then SerializableXactHashLock. */ for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++) - LWLockAcquire(FirstPredicateLockMgrLock + i, LW_SHARED); + { + LWLock *partitionLock; + + partitionLock = + &MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + i].lock; + LWLockAcquire(partitionLock, LW_SHARED); + } LWLockAcquire(SerializableXactHashLock, LW_SHARED); /* Get number of locks and allocate appropriately-sized arrays. */ @@ -1427,7 +1434,13 @@ GetPredicateLockStatusData(void) /* Release locks in reverse order */ LWLockRelease(SerializableXactHashLock); for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--) - LWLockRelease(FirstPredicateLockMgrLock + i); + { + LWLock *partitionLock; + + partitionLock = + &MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + i].lock; + LWLockRelease(partitionLock); + } return data; } @@ -1856,7 +1869,7 @@ PageIsPredicateLocked(Relation relation, BlockNumber blkno) { PREDICATELOCKTARGETTAG targettag; uint32 targettaghash; - LWLockId partitionLock; + LWLock *partitionLock; PREDICATELOCKTARGET *target; SET_PREDICATELOCKTARGETTAG_PAGE(targettag, @@ -2089,7 +2102,7 @@ DeleteChildTargetLocks(const PREDICATELOCKTARGETTAG *newtargettag) if (TargetTagIsCoveredBy(oldtargettag, *newtargettag)) { uint32 oldtargettaghash; - LWLockId partitionLock; + LWLock *partitionLock; PREDICATELOCK *rmpredlock PG_USED_FOR_ASSERTS_ONLY; oldtargettaghash = PredicateLockTargetTagHashCode(&oldtargettag); @@ -2301,7 +2314,7 @@ CreatePredicateLock(const PREDICATELOCKTARGETTAG *targettag, PREDICATELOCKTARGET *target; PREDICATELOCKTAG locktag; PREDICATELOCK *lock; - LWLockId partitionLock; + LWLock *partitionLock; bool found; partitionLock = PredicateLockHashPartitionLock(targettaghash); @@ -2599,10 +2612,10 @@ TransferPredicateLocksToNewTarget(PREDICATELOCKTARGETTAG oldtargettag, bool removeOld) { uint32 oldtargettaghash; - LWLockId oldpartitionLock; + LWLock *oldpartitionLock; PREDICATELOCKTARGET *oldtarget; uint32 newtargettaghash; - LWLockId newpartitionLock; + LWLock *newpartitionLock; bool found; bool outOfShmem = false; @@ -2858,7 +2871,13 @@ DropAllPredicateLocksFromTable(Relation relation, bool transfer) /* Acquire locks on all lock partitions */ LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE); for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++) - LWLockAcquire(FirstPredicateLockMgrLock + i, LW_EXCLUSIVE); + { + LWLock *partitionLock; + + partitionLock = + &MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + i].lock; + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + } LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE); /* @@ -2996,7 +3015,13 @@ DropAllPredicateLocksFromTable(Relation relation, bool transfer) /* Release locks in reverse order */ LWLockRelease(SerializableXactHashLock); for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--) - LWLockRelease(FirstPredicateLockMgrLock + i); + { + LWLock *partitionLock; + + partitionLock = + &MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + i].lock; + LWLockRelease(partitionLock); + } LWLockRelease(SerializablePredicateLockListLock); } @@ -3611,7 +3636,7 @@ ClearOldPredicateLocks(void) PREDICATELOCKTARGET *target; PREDICATELOCKTARGETTAG targettag; uint32 targettaghash; - LWLockId partitionLock; + LWLock *partitionLock; tag = predlock->tag; target = tag.myTarget; @@ -3690,7 +3715,7 @@ ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial, PREDICATELOCKTARGET *target; PREDICATELOCKTARGETTAG targettag; uint32 targettaghash; - LWLockId partitionLock; + LWLock *partitionLock; nextpredlock = (PREDICATELOCK *) SHMQueueNext(&(sxact->predicateLocks), @@ -4068,7 +4093,7 @@ static void CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag) { uint32 targettaghash; - LWLockId partitionLock; + LWLock *partitionLock; PREDICATELOCKTARGET *target; PREDICATELOCK *predlock; PREDICATELOCK *mypredlock = NULL; @@ -4360,7 +4385,13 @@ CheckTableForSerializableConflictIn(Relation relation) LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE); for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++) - LWLockAcquire(FirstPredicateLockMgrLock + i, LW_SHARED); + { + LWLock *partitionLock; + + partitionLock = + &MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + i].lock; + LWLockAcquire(partitionLock, LW_SHARED); + } LWLockAcquire(SerializableXactHashLock, LW_SHARED); /* Scan through target list */ @@ -4407,7 +4438,13 @@ CheckTableForSerializableConflictIn(Relation relation) /* Release locks in reverse order */ LWLockRelease(SerializableXactHashLock); for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--) - LWLockRelease(FirstPredicateLockMgrLock + i); + { + LWLock *partitionLock; + + partitionLock = + &MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + i].lock; + LWLockRelease(partitionLock); + } LWLockRelease(SerializablePredicateLockListLock); } diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 122afb2..a991092 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -189,7 +189,8 @@ InitProcGlobal(void) */ procs = (PGPROC *) ShmemAlloc(TotalProcs * sizeof(PGPROC)); ProcGlobal->allProcs = procs; - ProcGlobal->allProcCount = TotalProcs; + /* XXX allProcCount isn't really all of them; it excludes prepared xacts */ + ProcGlobal->allProcCount = MaxBackends + NUM_AUXILIARY_PROCS; if (!procs) ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), @@ -663,7 +664,7 @@ IsWaitingForLock(void) void LockErrorCleanup(void) { - LWLockId partitionLock; + LWLock *partitionLock; DisableTimeoutParams timeouts[2]; AbortStrongLockAcquire(); @@ -942,7 +943,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) LOCK *lock = locallock->lock; PROCLOCK *proclock = locallock->proclock; uint32 hashcode = locallock->hashcode; - LWLockId partitionLock = LockHashPartitionLock(hashcode); + LWLock *partitionLock = LockHashPartitionLock(hashcode); PROC_QUEUE *waitQueue = &(lock->waitProcs); LOCKMASK myHeldLocks = MyProc->heldLocks; bool early_deadlock = false; @@ -1440,7 +1441,12 @@ CheckDeadLock(void) * interrupts. */ for (i = 0; i < NUM_LOCK_PARTITIONS; i++) - LWLockAcquire(FirstLockMgrLock + i, LW_EXCLUSIVE); + { + LWLock *partitionLock; + + partitionLock = &MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + i].lock; + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + } /* * Check to see if we've been awoken by anyone in the interim. @@ -1522,7 +1528,12 @@ CheckDeadLock(void) */ check_done: for (i = NUM_LOCK_PARTITIONS; --i >= 0;) - LWLockRelease(FirstLockMgrLock + i); + { + LWLock *partitionLock; + + partitionLock = &MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + i].lock; + LWLockRelease(partitionLock); + } } diff --git a/src/include/access/slru.h b/src/include/access/slru.h index fc2c503..3c668f5 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -55,7 +55,7 @@ typedef enum */ typedef struct SlruSharedData { - LWLockId ControlLock; + LWLock *ControlLock; /* Number of buffers managed by this SLRU structure */ int num_slots; @@ -69,7 +69,7 @@ typedef struct SlruSharedData bool *page_dirty; int *page_number; int *page_lru_count; - LWLockId *buffer_locks; + LWLock **buffer_locks; /* * Optional array of WAL flush LSNs associated with entries in the SLRU @@ -136,7 +136,7 @@ typedef SlruCtlData *SlruCtl; extern Size SimpleLruShmemSize(int nslots, int nlsns); extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, - LWLockId ctllock, const char *subdir); + LWLock *ctllock, const char *subdir); extern int SimpleLruZeroPage(SlruCtl ctl, int pageno); extern int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, TransactionId xid); diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index f0e5144..bc6353e 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -104,7 +104,8 @@ typedef struct buftag #define BufTableHashPartition(hashcode) \ ((hashcode) % NUM_BUFFER_PARTITIONS) #define BufMappingPartitionLock(hashcode) \ - ((LWLockId) (FirstBufMappingLock + BufTableHashPartition(hashcode))) + (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + \ + BufTableHashPartition(hashcode)].lock) /* * BufferDesc -- shared descriptor/state data for a single shared buffer. @@ -144,8 +145,8 @@ typedef struct sbufdesc int buf_id; /* buffer's index number (from 0) */ int freeNext; /* link in freelist chain */ - LWLockId io_in_progress_lock; /* to wait for I/O to complete */ - LWLockId content_lock; /* to lock access to buffer contents */ + LWLock *io_in_progress_lock; /* to wait for I/O to complete */ + LWLock *content_lock; /* to lock access to buffer contents */ } BufferDesc; #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1) diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 99bd945..f337af0 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -483,8 +483,8 @@ typedef enum #define LockHashPartition(hashcode) \ ((hashcode) % NUM_LOCK_PARTITIONS) #define LockHashPartitionLock(hashcode) \ - ((LWLockId) (FirstLockMgrLock + LockHashPartition(hashcode))) - + (&MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + \ + LockHashPartition(hashcode)].lock) /* * function prototypes diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 3e42f6a..c6ffc5c 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -14,10 +14,94 @@ #ifndef LWLOCK_H #define LWLOCK_H +#include "storage/s_lock.h" + +struct PGPROC; + +typedef struct LWLock +{ + slock_t mutex; /* Protects LWLock and queue of PGPROCs */ + bool releaseOK; /* T if ok to release waiters */ + char exclusive; /* # of exclusive holders (0 or 1) */ + int shared; /* # of shared holders (0..MaxBackends) */ + struct PGPROC *head; /* head of list of waiting PGPROCs */ + struct PGPROC *tail; /* tail of list of waiting PGPROCs */ + /* tail is undefined when head is NULL */ +} LWLock; + +/* + * Prior to PostgreSQL 9.4, every lightweight lock in the system was stored + * in a single array. For convenience and for compatibility with past + * releases, we still have a main array, but it's now also permissible to + * store LWLocks elsewhere in the main shared memory segment or in a dynamic + * shared memory segment. In the main array, we force the array stride to + * be a power of 2, which saves a few cycles in indexing, but more importantly + * also ensures that individual LWLocks don't cross cache line boundaries. + * This reduces cache contention problems, especially on AMD Opterons. + * (Of course, we have to also ensure that the array start address is suitably + * aligned.) + * + * LWLock is between 16 and 32 bytes on all known platforms, so these two + * cases are sufficient. + */ +#define LWLOCK_PADDED_SIZE (sizeof(LWLock) <= 16 ? 16 : 32) + +typedef union LWLockPadded +{ + LWLock lock; + char pad[LWLOCK_PADDED_SIZE]; +} LWLockPadded; +extern LWLockPadded *MainLWLockArray; + +/* + * Some commonly-used locks have predefined positions within MainLWLockArray; + * defining macros here makes it much easier to keep track of these. If you + * add a lock, add it to the end to avoid renumbering the existing locks; + * if you remove a lock, consider leaving a gap in the numbering sequence for + * the benefit of DTrace and other external debugging scripts. + */ +#define BufFreelistLock (&MainLWLockArray[0].lock) +#define ShmemIndexLock (&MainLWLockArray[1].lock) +#define OidGenLock (&MainLWLockArray[2].lock) +#define XidGenLock (&MainLWLockArray[3].lock) +#define ProcArrayLock (&MainLWLockArray[4].lock) +#define SInvalReadLock (&MainLWLockArray[5].lock) +#define SInvalWriteLock (&MainLWLockArray[6].lock) +#define WALBufMappingLock (&MainLWLockArray[7].lock) +#define WALWriteLock (&MainLWLockArray[8].lock) +#define ControlFileLock (&MainLWLockArray[9].lock) +#define CheckpointLock (&MainLWLockArray[10].lock) +#define CLogControlLock (&MainLWLockArray[11].lock) +#define SubtransControlLock (&MainLWLockArray[12].lock) +#define MultiXactGenLock (&MainLWLockArray[13].lock) +#define MultiXactOffsetControlLock (&MainLWLockArray[14].lock) +#define MultiXactMemberControlLock (&MainLWLockArray[15].lock) +#define RelCacheInitLock (&MainLWLockArray[16].lock) +#define CheckpointerCommLock (&MainLWLockArray[17].lock) +#define TwoPhaseStateLock (&MainLWLockArray[18].lock) +#define TablespaceCreateLock (&MainLWLockArray[19].lock) +#define BtreeVacuumLock (&MainLWLockArray[20].lock) +#define AddinShmemInitLock (&MainLWLockArray[21].lock) +#define AutovacuumLock (&MainLWLockArray[22].lock) +#define AutovacuumScheduleLock (&MainLWLockArray[23].lock) +#define SyncScanLock (&MainLWLockArray[24].lock) +#define RelationMappingLock (&MainLWLockArray[25].lock) +#define AsyncCtlLock (&MainLWLockArray[26].lock) +#define AsyncQueueLock (&MainLWLockArray[27].lock) +#define SerializableXactHashLock (&MainLWLockArray[28].lock) +#define SerializableFinishedListLock (&MainLWLockArray[29].lock) +#define SerializablePredicateLockListLock (&MainLWLockArray[30].lock) +#define OldSerXidLock (&MainLWLockArray[31].lock) +#define SyncRepLock (&MainLWLockArray[32].lock) +#define BackgroundWorkerLock (&MainLWLockArray[33].lock) +#define DynamicSharedMemoryControlLock (&MainLWLockArray[34].lock) +#define AutoFileLock (&MainLWLockArray[35].lock) +#define NUM_INDIVIDUAL_LWLOCKS 36 + /* * It's a bit odd to declare NUM_BUFFER_PARTITIONS and NUM_LOCK_PARTITIONS - * here, but we need them to set up enum LWLockId correctly, and having - * this file include lock.h or bufmgr.h would be backwards. + * here, but we need them to figure out offsets within MainLWLockArray, and + * having this file include lock.h or bufmgr.h would be backwards. */ /* Number of partitions of the shared buffer mapping hashtable */ @@ -31,68 +115,14 @@ #define LOG2_NUM_PREDICATELOCK_PARTITIONS 4 #define NUM_PREDICATELOCK_PARTITIONS (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS) -/* - * We have a number of predefined LWLocks, plus a bunch of LWLocks that are - * dynamically assigned (e.g., for shared buffers). The LWLock structures - * live in shared memory (since they contain shared data) and are identified - * by values of this enumerated type. We abuse the notion of an enum somewhat - * by allowing values not listed in the enum declaration to be assigned. - * The extra value MaxDynamicLWLock is there to keep the compiler from - * deciding that the enum can be represented as char or short ... - * - * If you remove a lock, please replace it with a placeholder. This retains - * the lock numbering, which is helpful for DTrace and other external - * debugging scripts. - */ -typedef enum LWLockId -{ - BufFreelistLock, - ShmemIndexLock, - OidGenLock, - XidGenLock, - ProcArrayLock, - SInvalReadLock, - SInvalWriteLock, - WALBufMappingLock, - WALWriteLock, - ControlFileLock, - CheckpointLock, - CLogControlLock, - SubtransControlLock, - MultiXactGenLock, - MultiXactOffsetControlLock, - MultiXactMemberControlLock, - RelCacheInitLock, - CheckpointerCommLock, - TwoPhaseStateLock, - TablespaceCreateLock, - BtreeVacuumLock, - AddinShmemInitLock, - AutovacuumLock, - AutovacuumScheduleLock, - SyncScanLock, - RelationMappingLock, - AsyncCtlLock, - AsyncQueueLock, - SerializableXactHashLock, - SerializableFinishedListLock, - SerializablePredicateLockListLock, - OldSerXidLock, - SyncRepLock, - BackgroundWorkerLock, - DynamicSharedMemoryControlLock, - AutoFileLock, - /* Individual lock IDs end here */ - FirstBufMappingLock, - FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS, - FirstPredicateLockMgrLock = FirstLockMgrLock + NUM_LOCK_PARTITIONS, - - /* must be last except for MaxDynamicLWLock: */ - NumFixedLWLocks = FirstPredicateLockMgrLock + NUM_PREDICATELOCK_PARTITIONS, - - MaxDynamicLWLock = 1000000000 -} LWLockId; - +/* Offsets for various chunks of preallocated lwlocks. */ +#define BUFFER_MAPPING_LWLOCK_OFFSET NUM_INDIVIDUAL_LWLOCKS +#define LOCK_MANAGER_LWLOCK_OFFSET \ + (BUFFER_MAPPING_LWLOCK_OFFSET + NUM_BUFFER_PARTITIONS) +#define PREDICATELOCK_MANAGER_LWLOCK_OFFSET \ + (NUM_INDIVIDUAL_LWLOCKS + NUM_LOCK_PARTITIONS) +#define NUM_FIXED_LWLOCKS \ + (PREDICATELOCK_MANAGER_LWLOCK_OFFSET + NUM_PREDICATELOCK_PARTITIONS) typedef enum LWLockMode { @@ -108,13 +138,14 @@ typedef enum LWLockMode extern bool Trace_lwlocks; #endif -extern LWLockId LWLockAssign(void); -extern void LWLockAcquire(LWLockId lockid, LWLockMode mode); -extern bool LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode); -extern bool LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode); -extern void LWLockRelease(LWLockId lockid); +extern LWLock *LWLockAssign(void); +extern void LWLockInitialize(LWLock *); +extern void LWLockAcquire(LWLock *lock, LWLockMode mode); +extern bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode); +extern bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode); +extern void LWLockRelease(LWLock *lock); extern void LWLockReleaseAll(void); -extern bool LWLockHeldByMe(LWLockId lockid); +extern bool LWLockHeldByMe(LWLock *lock); extern int NumLWLocks(void); extern Size LWLockShmemSize(void); diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 3b04d3c..fb00e79 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -131,7 +131,7 @@ struct PGPROC struct XidCache subxids; /* cache for subtransaction XIDs */ /* Per-backend LWLock. Protects fields below. */ - LWLockId backendLock; /* protects the fields below */ + LWLock *backendLock; /* protects the fields below */ /* Lock manager data, recording fast-path locks taken by this backend. */ uint64 fpLockBits; /* lock modes held for each fast-path slot */