diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 8dc3054..51b24d0 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -105,7 +105,7 @@ typedef struct pgssEntry */ typedef struct pgssSharedState { - LWLockId lock; /* protects hashtable search/modification */ + FlexLockId lock; /* protects hashtable search/modification */ int query_size; /* max query length in bytes */ } pgssSharedState; diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index d1e628f..8517b36 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -6199,14 +6199,14 @@ LOG: CleanUpLock: deleting: lock(0xb7acd844) id(24688,24696,0,0,0,1) - trace_lwlocks (boolean) + trace_flexlocks (boolean) - trace_lwlocks configuration parameter + trace_flexlocks configuration parameter - If on, emit information about lightweight lock usage. Lightweight - locks are intended primarily to provide mutual exclusion of access + If on, emit information about FlexLock usage. FlexLocks + are intended primarily to provide mutual exclusion of access to shared-memory data structures. diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index b9dc1d2..98ed0d3 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -1724,49 +1724,49 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS procpid, or kilobytes of memory used for an internal sort. - lwlock-acquire - (LWLockId, LWLockMode) - Probe that fires when an LWLock has been acquired. - arg0 is the LWLock's ID. - arg1 is the requested lock mode, either exclusive or shared. + flexlock-acquire + (FlexLockId, FlexLockMode) + Probe that fires when an FlexLock has been acquired. + arg0 is the FlexLock's ID. + arg1 is the requested lock mode. - lwlock-release - (LWLockId) - Probe that fires when an LWLock has been released (but note + flexlock-release + (FlexLockId) + Probe that fires when a FlexLock has been released (but note that any released waiters have not yet been awakened). - arg0 is the LWLock's ID. + arg0 is the FlexLock's ID. - lwlock-wait-start - (LWLockId, LWLockMode) - Probe that fires when an LWLock was not immediately available and + flexlock-wait-start + (FlexLockId, FlexLockMode) + Probe that fires when an FlexLock was not immediately available and a server process has begun to wait for the lock to become available. - arg0 is the LWLock's ID. + arg0 is the FlexLock's ID. arg1 is the requested lock mode, either exclusive or shared. - lwlock-wait-done - (LWLockId, LWLockMode) + flexlock-wait-done + (FlexLockId, FlexLockMode) Probe that fires when a server process has been released from its - wait for an LWLock (it does not actually have the lock yet). - arg0 is the LWLock's ID. + wait for an FlexLock (it does not actually have the lock yet). + arg0 is the FlexLock's ID. arg1 is the requested lock mode, either exclusive or shared. - lwlock-condacquire - (LWLockId, LWLockMode) - Probe that fires when an LWLock was successfully acquired when the - caller specified no waiting. - arg0 is the LWLock's ID. + flexlock-condacquire + (FlexLockId, FlexLockMode) + Probe that fires when an FlexLock was successfully acquired when + the caller specified no waiting. + arg0 is the FlexLock's ID. arg1 is the requested lock mode, either exclusive or shared. - lwlock-condacquire-fail - (LWLockId, LWLockMode) - Probe that fires when an LWLock was not successfully acquired when - the caller specified no waiting. - arg0 is the LWLock's ID. + flexlock-condacquire-fail + (FlexLockId, FlexLockMode) + Probe that fires when an FlexLock was not successfully acquired + when the caller specified no waiting. + arg0 is the FlexLock's ID. arg1 is the requested lock mode, either exclusive or shared. @@ -1813,11 +1813,11 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS procpid, unsigned int - LWLockId + FlexLockId int - LWLockMode + FlexLockMode int diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index f7caa34..09d5862 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -151,7 +151,7 @@ SimpleLruShmemSize(int nslots, int nlsns) sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */ sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */ sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */ - sz += MAXALIGN(nslots * sizeof(LWLockId)); /* buffer_locks[] */ + sz += MAXALIGN(nslots * sizeof(FlexLockId)); /* buffer_locks[] */ if (nlsns > 0) sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */ @@ -161,7 +161,7 @@ SimpleLruShmemSize(int nslots, int nlsns) void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, - LWLockId ctllock, const char *subdir) + FlexLockId ctllock, const char *subdir) { SlruShared shared; bool found; @@ -202,8 +202,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, offset += MAXALIGN(nslots * sizeof(int)); shared->page_lru_count = (int *) (ptr + offset); offset += MAXALIGN(nslots * sizeof(int)); - shared->buffer_locks = (LWLockId *) (ptr + offset); - offset += MAXALIGN(nslots * sizeof(LWLockId)); + shared->buffer_locks = (FlexLockId *) (ptr + offset); + offset += MAXALIGN(nslots * sizeof(FlexLockId)); if (nlsns > 0) { diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 477982d..0805f9c 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -113,7 +113,8 @@ int max_prepared_xacts = 0; typedef struct GlobalTransactionData { - PGPROC proc; /* dummy proc */ + GlobalTransaction next; + int pgprocno; /* dummy proc */ BackendId dummyBackendId; /* similar to backend id for backends */ TimestampTz prepared_at; /* time of preparation */ XLogRecPtr prepare_lsn; /* XLOG offset of prepare record */ @@ -207,7 +208,8 @@ TwoPhaseShmemInit(void) sizeof(GlobalTransaction) * max_prepared_xacts)); for (i = 0; i < max_prepared_xacts; i++) { - gxacts[i].proc.links.next = (SHM_QUEUE *) TwoPhaseState->freeGXacts; + gxacts[i].pgprocno = PreparedXactProcs[i].pgprocno; + gxacts[i].next = TwoPhaseState->freeGXacts; TwoPhaseState->freeGXacts = &gxacts[i]; /* @@ -243,6 +245,8 @@ MarkAsPreparing(TransactionId xid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid) { GlobalTransaction gxact; + PGPROC *proc; + PGPROC_MINIMAL *proc_minimal; int i; if (strlen(gid) >= GIDSIZE) @@ -274,7 +278,7 @@ MarkAsPreparing(TransactionId xid, const char *gid, TwoPhaseState->numPrepXacts--; TwoPhaseState->prepXacts[i] = TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts]; /* and put it back in the freelist */ - gxact->proc.links.next = (SHM_QUEUE *) TwoPhaseState->freeGXacts; + gxact->next = TwoPhaseState->freeGXacts; TwoPhaseState->freeGXacts = gxact; /* Back up index count too, so we don't miss scanning one */ i--; @@ -302,32 +306,36 @@ MarkAsPreparing(TransactionId xid, const char *gid, errhint("Increase max_prepared_transactions (currently %d).", max_prepared_xacts))); gxact = TwoPhaseState->freeGXacts; - TwoPhaseState->freeGXacts = (GlobalTransaction) gxact->proc.links.next; + TwoPhaseState->freeGXacts = (GlobalTransaction) gxact->next; - /* Initialize it */ - MemSet(&gxact->proc, 0, sizeof(PGPROC)); - SHMQueueElemInit(&(gxact->proc.links)); - gxact->proc.waitStatus = STATUS_OK; + proc = &ProcGlobal->allProcs[gxact->pgprocno]; + proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno]; + + /* Initialize the PGPROC entry */ + MemSet(proc, 0, sizeof(PGPROC)); + proc->pgprocno = gxact->pgprocno; + SHMQueueElemInit(&(proc->links)); + proc->waitStatus = STATUS_OK; /* We set up the gxact's VXID as InvalidBackendId/XID */ - gxact->proc.lxid = (LocalTransactionId) xid; - gxact->proc.xid = xid; - gxact->proc.xmin = InvalidTransactionId; - gxact->proc.pid = 0; - gxact->proc.backendId = InvalidBackendId; - gxact->proc.databaseId = databaseid; - gxact->proc.roleId = owner; - gxact->proc.inCommit = false; - gxact->proc.vacuumFlags = 0; - gxact->proc.lwWaiting = false; - gxact->proc.lwExclusive = false; - gxact->proc.lwWaitLink = NULL; - gxact->proc.waitLock = NULL; - gxact->proc.waitProcLock = NULL; + proc->lxid = (LocalTransactionId) xid; + proc_minimal->xid = xid; + proc_minimal->xmin = InvalidTransactionId; + proc_minimal->inCommit = false; + proc_minimal->vacuumFlags = 0; + proc->pid = 0; + proc->backendId = InvalidBackendId; + proc->databaseId = databaseid; + proc->roleId = owner; + proc->flWaitResult = false; + proc->flWaitMode = false; + proc->flWaitLink = NULL; + proc->waitLock = NULL; + proc->waitProcLock = NULL; for (i = 0; i < NUM_LOCK_PARTITIONS; i++) - SHMQueueInit(&(gxact->proc.myProcLocks[i])); + SHMQueueInit(&(proc->myProcLocks[i])); /* subxid data must be filled later by GXactLoadSubxactData */ - gxact->proc.subxids.overflowed = false; - gxact->proc.subxids.nxids = 0; + proc_minimal->overflowed = false; + proc_minimal->nxids = 0; gxact->prepared_at = prepared_at; /* initialize LSN to 0 (start of WAL) */ @@ -358,17 +366,19 @@ static void GXactLoadSubxactData(GlobalTransaction gxact, int nsubxacts, TransactionId *children) { + PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno]; + PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno]; /* We need no extra lock since the GXACT isn't valid yet */ if (nsubxacts > PGPROC_MAX_CACHED_SUBXIDS) { - gxact->proc.subxids.overflowed = true; + proc_minimal->overflowed = true; nsubxacts = PGPROC_MAX_CACHED_SUBXIDS; } if (nsubxacts > 0) { - memcpy(gxact->proc.subxids.xids, children, + memcpy(proc->subxids.xids, children, nsubxacts * sizeof(TransactionId)); - gxact->proc.subxids.nxids = nsubxacts; + proc_minimal->nxids = nsubxacts; } } @@ -389,7 +399,7 @@ MarkAsPrepared(GlobalTransaction gxact) * Put it into the global ProcArray so TransactionIdIsInProgress considers * the XID as still running. */ - ProcArrayAdd(&gxact->proc); + ProcArrayAdd(&ProcGlobal->allProcs[gxact->pgprocno]); } /* @@ -406,6 +416,7 @@ LockGXact(const char *gid, Oid user) for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; + PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno]; /* Ignore not-yet-valid GIDs */ if (!gxact->valid) @@ -436,7 +447,7 @@ LockGXact(const char *gid, Oid user) * there may be some other issues as well. Hence disallow until * someone gets motivated to make it work. */ - if (MyDatabaseId != gxact->proc.databaseId) + if (MyDatabaseId != proc->databaseId) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("prepared transaction belongs to another database"), @@ -483,7 +494,7 @@ RemoveGXact(GlobalTransaction gxact) TwoPhaseState->prepXacts[i] = TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts]; /* and put it back in the freelist */ - gxact->proc.links.next = (SHM_QUEUE *) TwoPhaseState->freeGXacts; + gxact->next = TwoPhaseState->freeGXacts; TwoPhaseState->freeGXacts = gxact; LWLockRelease(TwoPhaseStateLock); @@ -518,8 +529,9 @@ TransactionIdIsPrepared(TransactionId xid) for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; + PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno]; - if (gxact->valid && gxact->proc.xid == xid) + if (gxact->valid && proc_minimal->xid == xid) { result = true; break; @@ -642,6 +654,8 @@ pg_prepared_xact(PG_FUNCTION_ARGS) while (status->array != NULL && status->currIdx < status->ngxacts) { GlobalTransaction gxact = &status->array[status->currIdx++]; + PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno]; + PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno]; Datum values[5]; bool nulls[5]; HeapTuple tuple; @@ -656,11 +670,11 @@ pg_prepared_xact(PG_FUNCTION_ARGS) MemSet(values, 0, sizeof(values)); MemSet(nulls, 0, sizeof(nulls)); - values[0] = TransactionIdGetDatum(gxact->proc.xid); + values[0] = TransactionIdGetDatum(proc_minimal->xid); values[1] = CStringGetTextDatum(gxact->gid); values[2] = TimestampTzGetDatum(gxact->prepared_at); values[3] = ObjectIdGetDatum(gxact->owner); - values[4] = ObjectIdGetDatum(gxact->proc.databaseId); + values[4] = ObjectIdGetDatum(proc->databaseId); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); @@ -711,10 +725,11 @@ TwoPhaseGetDummyProc(TransactionId xid) for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; + PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno]; - if (gxact->proc.xid == xid) + if (proc_minimal->xid == xid) { - result = &gxact->proc; + result = &ProcGlobal->allProcs[gxact->pgprocno]; break; } } @@ -841,7 +856,9 @@ save_state_data(const void *data, uint32 len) void StartPrepare(GlobalTransaction gxact) { - TransactionId xid = gxact->proc.xid; + PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno]; + PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno]; + TransactionId xid = proc_minimal->xid; TwoPhaseFileHeader hdr; TransactionId *children; RelFileNode *commitrels; @@ -865,7 +882,7 @@ StartPrepare(GlobalTransaction gxact) hdr.magic = TWOPHASE_MAGIC; hdr.total_len = 0; /* EndPrepare will fill this in */ hdr.xid = xid; - hdr.database = gxact->proc.databaseId; + hdr.database = proc->databaseId; hdr.prepared_at = gxact->prepared_at; hdr.owner = gxact->owner; hdr.nsubxacts = xactGetCommittedChildren(&children); @@ -913,7 +930,8 @@ StartPrepare(GlobalTransaction gxact) void EndPrepare(GlobalTransaction gxact) { - TransactionId xid = gxact->proc.xid; + PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno]; + TransactionId xid = proc_minimal->xid; TwoPhaseFileHeader *hdr; char path[MAXPGPATH]; XLogRecData *record; @@ -1021,7 +1039,7 @@ EndPrepare(GlobalTransaction gxact) */ START_CRIT_SECTION(); - MyProc->inCommit = true; + MyProcMinimal->inCommit = true; gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE, records.head); @@ -1069,7 +1087,7 @@ EndPrepare(GlobalTransaction gxact) * checkpoint starting after this will certainly see the gxact as a * candidate for fsyncing. */ - MyProc->inCommit = false; + MyProcMinimal->inCommit = false; END_CRIT_SECTION(); @@ -1242,6 +1260,8 @@ void FinishPreparedTransaction(const char *gid, bool isCommit) { GlobalTransaction gxact; + PGPROC *proc; + PGPROC_MINIMAL *proc_minimal; TransactionId xid; char *buf; char *bufptr; @@ -1260,7 +1280,9 @@ FinishPreparedTransaction(const char *gid, bool isCommit) * try to commit the same GID at once. */ gxact = LockGXact(gid, GetUserId()); - xid = gxact->proc.xid; + proc = &ProcGlobal->allProcs[gxact->pgprocno]; + proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno]; + xid = proc_minimal->xid; /* * Read and validate the state file @@ -1309,7 +1331,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit) hdr->nsubxacts, children, hdr->nabortrels, abortrels); - ProcArrayRemove(&gxact->proc, latestXid); + ProcArrayRemove(proc, latestXid); /* * In case we fail while running the callbacks, mark the gxact invalid so @@ -1540,10 +1562,11 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon) for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; + PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno]; if (gxact->valid && XLByteLE(gxact->prepare_lsn, redo_horizon)) - xids[nxids++] = gxact->proc.xid; + xids[nxids++] = proc_minimal->xid; } LWLockRelease(TwoPhaseStateLock); @@ -1972,7 +1995,7 @@ RecordTransactionCommitPrepared(TransactionId xid, START_CRIT_SECTION(); /* See notes in RecordTransactionCommit */ - MyProc->inCommit = true; + MyProcMinimal->inCommit = true; /* Emit the XLOG commit record */ xlrec.xid = xid; @@ -2037,7 +2060,7 @@ RecordTransactionCommitPrepared(TransactionId xid, TransactionIdCommitTree(xid, nchildren, children); /* Checkpoint can proceed now */ - MyProc->inCommit = false; + MyProcMinimal->inCommit = false; END_CRIT_SECTION(); diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 61dcfed..7c986aa 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -54,7 +54,7 @@ GetNewTransactionId(bool isSubXact) if (IsBootstrapProcessingMode()) { Assert(!isSubXact); - MyProc->xid = BootstrapTransactionId; + MyProcMinimal->xid = BootstrapTransactionId; return BootstrapTransactionId; } @@ -208,20 +208,21 @@ GetNewTransactionId(bool isSubXact) * TransactionId and int fetch/store are atomic. */ volatile PGPROC *myproc = MyProc; + volatile PGPROC_MINIMAL *myprocminimal = MyProcMinimal; if (!isSubXact) - myproc->xid = xid; + myprocminimal->xid = xid; else { - int nxids = myproc->subxids.nxids; + int nxids = myprocminimal->nxids; if (nxids < PGPROC_MAX_CACHED_SUBXIDS) { myproc->subxids.xids[nxids] = xid; - myproc->subxids.nxids = nxids + 1; + myprocminimal->nxids = nxids + 1; } else - myproc->subxids.overflowed = true; + myprocminimal->overflowed = true; } } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index c151d3b..21eb404 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -981,7 +981,7 @@ RecordTransactionCommit(void) * bit fuzzy, but it doesn't matter. */ START_CRIT_SECTION(); - MyProc->inCommit = true; + MyProcMinimal->inCommit = true; SetCurrentTransactionStopTimestamp(); @@ -1155,7 +1155,7 @@ RecordTransactionCommit(void) */ if (markXidCommitted) { - MyProc->inCommit = false; + MyProcMinimal->inCommit = false; END_CRIT_SECTION(); } @@ -2248,7 +2248,7 @@ AbortTransaction(void) * Releasing LW locks is critical since we might try to grab them again * while cleaning up! */ - LWLockReleaseAll(); + FlexLockReleaseAll(); /* Clean up buffer I/O and buffer context locks, too */ AbortBufferIO(); @@ -4138,7 +4138,7 @@ AbortSubTransaction(void) * FIXME This may be incorrect --- Are there some locks we should keep? * Buffer locks, for example? I don't think so but I'm not sure. */ - LWLockReleaseAll(); + FlexLockReleaseAll(); AbortBufferIO(); UnlockBuffers(); diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 6bf2421..9ceee91 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -562,13 +562,13 @@ bootstrap_signals(void) * Begin shutdown of an auxiliary process. This is approximately the equivalent * of ShutdownPostgres() in postinit.c. We can't run transactions in an * auxiliary process, so most of the work of AbortTransaction() is not needed, - * but we do need to make sure we've released any LWLocks we are holding. + * but we do need to make sure we've released any flex locks we are holding. * (This is only critical during an error exit.) */ static void ShutdownAuxiliaryProcess(int code, Datum arg) { - LWLockReleaseAll(); + FlexLockReleaseAll(); } /* ---------------------------------------------------------------- diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 32985a4..23556fa 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -40,6 +40,7 @@ #include "storage/lmgr.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "storage/procarraylock.h" #include "utils/acl.h" #include "utils/attoptcache.h" #include "utils/datum.h" @@ -222,9 +223,9 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy) /* * OK, let's do it. First let other backends know I'm in ANALYZE. */ - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); - MyProc->vacuumFlags |= PROC_IN_ANALYZE; - LWLockRelease(ProcArrayLock); + ProcArrayLockAcquire(PAL_EXCLUSIVE); + MyProcMinimal->vacuumFlags |= PROC_IN_ANALYZE; + ProcArrayLockRelease(); /* * Do the normal non-recursive ANALYZE. @@ -249,9 +250,9 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy) * Reset my PGPROC flag. Note: we need this here, and not in vacuum_rel, * because the vacuum flag is cleared by the end-of-xact code. */ - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); - MyProc->vacuumFlags &= ~PROC_IN_ANALYZE; - LWLockRelease(ProcArrayLock); + ProcArrayLockAcquire(PAL_EXCLUSIVE); + MyProcMinimal->vacuumFlags &= ~PROC_IN_ANALYZE; + ProcArrayLockRelease(); } /* diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index f42504c..480bf82 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -39,6 +39,7 @@ #include "storage/lmgr.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "storage/procarraylock.h" #include "utils/acl.h" #include "utils/fmgroids.h" #include "utils/guc.h" @@ -892,11 +893,11 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound) * MyProc->xid/xmin, else OldestXmin might appear to go backwards, * which is probably Not Good. */ - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); - MyProc->vacuumFlags |= PROC_IN_VACUUM; + ProcArrayLockAcquire(PAL_EXCLUSIVE); + MyProcMinimal->vacuumFlags |= PROC_IN_VACUUM; if (for_wraparound) - MyProc->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND; - LWLockRelease(ProcArrayLock); + MyProcMinimal->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND; + ProcArrayLockRelease(); } /* diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index cacedab..f33f573 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -176,9 +176,10 @@ BackgroundWriterMain(void) /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry - * about in bgwriter, but we do have LWLocks, buffers, and temp files. + * about in bgwriter, but we do have flex locks, buffers, and temp + * files. */ - LWLockReleaseAll(); + FlexLockReleaseAll(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index e9ae1e8..49f07a7 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -281,9 +281,10 @@ CheckpointerMain(void) /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry - * about in checkpointer, but we do have LWLocks, buffers, and temp files. + * about in checkpointer, but we do have flex locks, buffers, and temp + * files. */ - LWLockReleaseAll(); + FlexLockReleaseAll(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 6758083..14b4368 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -109,6 +109,7 @@ #include "postmaster/syslogger.h" #include "replication/walsender.h" #include "storage/fd.h" +#include "storage/flexlock_internals.h" #include "storage/ipc.h" #include "storage/pg_shmem.h" #include "storage/pmsignal.h" @@ -404,8 +405,6 @@ typedef struct typedef int InheritableSocket; #endif -typedef struct LWLock LWLock; /* ugly kluge */ - /* * Structure contains all variables passed to exec:ed backends */ @@ -426,7 +425,7 @@ typedef struct slock_t *ShmemLock; VariableCache ShmemVariableCache; Backend *ShmemBackendArray; - LWLock *LWLockArray; + FlexLock *FlexLockArray; slock_t *ProcStructLock; PROC_HDR *ProcGlobal; PGPROC *AuxiliaryProcs; @@ -4675,7 +4674,6 @@ MaxLivePostmasterChildren(void) * functions */ extern slock_t *ShmemLock; -extern LWLock *LWLockArray; extern slock_t *ProcStructLock; extern PGPROC *AuxiliaryProcs; extern PMSignalData *PMSignalState; @@ -4720,7 +4718,7 @@ save_backend_variables(BackendParameters *param, Port *port, param->ShmemVariableCache = ShmemVariableCache; param->ShmemBackendArray = ShmemBackendArray; - param->LWLockArray = LWLockArray; + param->FlexLockArray = FlexLockArray; param->ProcStructLock = ProcStructLock; param->ProcGlobal = ProcGlobal; param->AuxiliaryProcs = AuxiliaryProcs; @@ -4943,7 +4941,7 @@ restore_backend_variables(BackendParameters *param, Port *port) ShmemVariableCache = param->ShmemVariableCache; ShmemBackendArray = param->ShmemBackendArray; - LWLockArray = param->LWLockArray; + FlexLockArray = param->FlexLockArray; ProcStructLock = param->ProcStructLock; ProcGlobal = param->ProcGlobal; AuxiliaryProcs = param->AuxiliaryProcs; diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c index 157728e..587443d 100644 --- a/src/backend/postmaster/walwriter.c +++ b/src/backend/postmaster/walwriter.c @@ -167,9 +167,9 @@ WalWriterMain(void) /* * These operations are really just a minimal subset of * AbortTransaction(). We don't have very many resources to worry - * about in walwriter, but we do have LWLocks, and perhaps buffers? + * about in walwriter, but we do have flex locks, and perhaps buffers? */ - LWLockReleaseAll(); + FlexLockReleaseAll(); AbortBufferIO(); UnlockBuffers(); /* buffer pins are released here: */ diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index dd2d6ee..dc93b42 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -702,7 +702,7 @@ ProcessStandbyHSFeedbackMessage(void) * safe, and if we're moving it backwards, well, the data is at risk * already since a VACUUM could have just finished calling GetOldestXmin.) */ - MyProc->xmin = reply.xmin; + MyProcMinimal->xmin = reply.xmin; } /* Main loop of walsender process */ diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index e59af33..07356ec 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -141,7 +141,7 @@ PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum) { BufferTag newTag; /* identity of requested block */ uint32 newHash; /* hash value for newTag */ - LWLockId newPartitionLock; /* buffer partition lock for it */ + FlexLockId newPartitionLock; /* buffer partition lock for it */ int buf_id; /* create a tag so we can lookup the buffer */ @@ -512,10 +512,10 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, { BufferTag newTag; /* identity of requested block */ uint32 newHash; /* hash value for newTag */ - LWLockId newPartitionLock; /* buffer partition lock for it */ + FlexLockId newPartitionLock; /* buffer partition lock for it */ BufferTag oldTag; /* previous identity of selected buffer */ uint32 oldHash; /* hash value for oldTag */ - LWLockId oldPartitionLock; /* buffer partition lock for it */ + FlexLockId oldPartitionLock; /* buffer partition lock for it */ BufFlags oldFlags; int buf_id; volatile BufferDesc *buf; @@ -855,7 +855,7 @@ InvalidateBuffer(volatile BufferDesc *buf) { BufferTag oldTag; uint32 oldHash; /* hash value for oldTag */ - LWLockId oldPartitionLock; /* buffer partition lock for it */ + FlexLockId oldPartitionLock; /* buffer partition lock for it */ BufFlags oldFlags; /* Save the original buffer tag before dropping the spinlock */ diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 56c0bd8..a2c570a 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -113,7 +113,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) size = add_size(size, SUBTRANSShmemSize()); size = add_size(size, TwoPhaseShmemSize()); size = add_size(size, MultiXactShmemSize()); - size = add_size(size, LWLockShmemSize()); + size = add_size(size, FlexLockShmemSize()); size = add_size(size, ProcArrayShmemSize()); size = add_size(size, BackendStatusShmemSize()); size = add_size(size, SInvalShmemSize()); @@ -179,7 +179,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) * needed for InitShmemIndex. */ if (!IsUnderPostmaster) - CreateLWLocks(); + CreateFlexLocks(); /* * Set up shmem.c index hashtable @@ -192,7 +192,6 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) XLOGShmemInit(); CLOGShmemInit(); SUBTRANSShmemInit(); - TwoPhaseShmemInit(); MultiXactShmemInit(); InitBufferPool(); @@ -213,6 +212,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) InitProcGlobal(); CreateSharedProcArray(); CreateSharedBackendStatus(); + TwoPhaseShmemInit(); /* * Set up shared-inval messaging diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 1a48485..8b6a9ef 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -52,6 +52,7 @@ #include "access/twophase.h" #include "miscadmin.h" #include "storage/procarray.h" +#include "storage/procarraylock.h" #include "storage/spin.h" #include "utils/builtins.h" #include "utils/snapmgr.h" @@ -82,14 +83,17 @@ typedef struct ProcArrayStruct TransactionId lastOverflowedXid; /* - * We declare procs[] as 1 entry because C wants a fixed-size array, but + * We declare pgprocnos[] as 1 entry because C wants a fixed-size array, but * actually it is maxProcs entries long. */ - PGPROC *procs[1]; /* VARIABLE LENGTH ARRAY */ + int pgprocnos[1]; /* VARIABLE LENGTH ARRAY */ } ProcArrayStruct; static ProcArrayStruct *procArray; +static PGPROC *allProcs; +static PGPROC_MINIMAL *allProcs_Minimal; + /* * Bookkeeping for tracking emulated transactions in recovery */ @@ -169,8 +173,8 @@ ProcArrayShmemSize(void) /* Size of the ProcArray structure itself */ #define PROCARRAY_MAXPROCS (MaxBackends + max_prepared_xacts) - size = offsetof(ProcArrayStruct, procs); - size = add_size(size, mul_size(sizeof(PGPROC *), PROCARRAY_MAXPROCS)); + size = offsetof(ProcArrayStruct, pgprocnos); + size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS)); /* * During Hot Standby processing we have a data structure called @@ -211,8 +215,8 @@ CreateSharedProcArray(void) /* Create or attach to the ProcArray shared structure */ procArray = (ProcArrayStruct *) ShmemInitStruct("Proc Array", - add_size(offsetof(ProcArrayStruct, procs), - mul_size(sizeof(PGPROC *), + add_size(offsetof(ProcArrayStruct, pgprocnos), + mul_size(sizeof(int), PROCARRAY_MAXPROCS)), &found); @@ -231,6 +235,9 @@ CreateSharedProcArray(void) procArray->lastOverflowedXid = InvalidTransactionId; } + allProcs = ProcGlobal->allProcs; + allProcs_Minimal = ProcGlobal->allProcs_Minimal; + /* Create or attach to the KnownAssignedXids arrays too, if needed */ if (EnableHotStandby) { @@ -253,8 +260,9 @@ void ProcArrayAdd(PGPROC *proc) { ProcArrayStruct *arrayP = procArray; + int index; - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + ProcArrayLockAcquire(PAL_EXCLUSIVE); if (arrayP->numProcs >= arrayP->maxProcs) { @@ -263,16 +271,37 @@ ProcArrayAdd(PGPROC *proc) * fixed supply of PGPROC structs too, and so we should have failed * earlier.) */ - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("sorry, too many clients already"))); } - arrayP->procs[arrayP->numProcs] = proc; + /* + * Keep the procs array sorted by (PGPROC *) so that we can utilize + * locality of references much better. This is useful while traversing the + * ProcArray because there is a increased likelyhood of finding the next + * PGPROC structure in the cache. + * + * Since the occurance of adding/removing a proc is much lower than the + * access to the ProcArray itself, the overhead should be marginal + */ + for (index = 0; index < arrayP->numProcs; index++) + { + /* + * If we are the first PGPROC or if we have found our right position in + * the array, break + */ + if ((arrayP->pgprocnos[index] == -1) || (arrayP->pgprocnos[index] > proc->pgprocno)) + break; + } + + memmove(&arrayP->pgprocnos[index + 1], &arrayP->pgprocnos[index], + (arrayP->numProcs - index) * sizeof (int)); + arrayP->pgprocnos[index] = proc->pgprocno; arrayP->numProcs++; - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); } /* @@ -289,6 +318,7 @@ void ProcArrayRemove(PGPROC *proc, TransactionId latestXid) { ProcArrayStruct *arrayP = procArray; + PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[proc->pgprocno]; int index; #ifdef XIDCACHE_DEBUG @@ -297,11 +327,11 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid) DisplayXidCache(); #endif - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + ProcArrayLockAcquire(PAL_EXCLUSIVE); if (TransactionIdIsValid(latestXid)) { - Assert(TransactionIdIsValid(proc->xid)); + Assert(TransactionIdIsValid(proc_minimal->xid)); /* Advance global latestCompletedXid while holding the lock */ if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, @@ -311,23 +341,25 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid) else { /* Shouldn't be trying to remove a live transaction here */ - Assert(!TransactionIdIsValid(proc->xid)); + Assert(!TransactionIdIsValid(proc_minimal->xid)); } for (index = 0; index < arrayP->numProcs; index++) { - if (arrayP->procs[index] == proc) + if (arrayP->pgprocnos[index] == proc->pgprocno) { - arrayP->procs[index] = arrayP->procs[arrayP->numProcs - 1]; - arrayP->procs[arrayP->numProcs - 1] = NULL; /* for debugging */ + /* Keep the PGPROC array sorted. See notes above */ + memmove(&arrayP->pgprocnos[index], &arrayP->pgprocnos[index + 1], + (arrayP->numProcs - index - 1) * sizeof (int)); + arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */ arrayP->numProcs--; - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); return; } } /* Ooops */ - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); elog(LOG, "failed to find proc %p in ProcArray", proc); } @@ -349,56 +381,19 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid) void ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid) { + PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[proc->pgprocno]; + if (TransactionIdIsValid(latestXid)) { - /* - * We must lock ProcArrayLock while clearing proc->xid, so that we do - * not exit the set of "running" transactions while someone else is - * taking a snapshot. See discussion in - * src/backend/access/transam/README. - */ - Assert(TransactionIdIsValid(proc->xid)); - - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); - - proc->xid = InvalidTransactionId; - proc->lxid = InvalidLocalTransactionId; - proc->xmin = InvalidTransactionId; - /* must be cleared with xid/xmin: */ - proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; - proc->inCommit = false; /* be sure this is cleared in abort */ - proc->recoveryConflictPending = false; - - /* Clear the subtransaction-XID cache too while holding the lock */ - proc->subxids.nxids = 0; - proc->subxids.overflowed = false; - - /* Also advance global latestCompletedXid while holding the lock */ - if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, - latestXid)) - ShmemVariableCache->latestCompletedXid = latestXid; - - LWLockRelease(ProcArrayLock); + Assert(proc == MyProc); + ProcArrayLockClearTransaction(latestXid); } else - { - /* - * If we have no XID, we don't need to lock, since we won't affect - * anyone else's calculation of a snapshot. We might change their - * estimate of global xmin, but that's OK. - */ - Assert(!TransactionIdIsValid(proc->xid)); + proc_minimal->xmin = InvalidTransactionId; - proc->lxid = InvalidLocalTransactionId; - proc->xmin = InvalidTransactionId; - /* must be cleared with xid/xmin: */ - proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; - proc->inCommit = false; /* be sure this is cleared in abort */ - proc->recoveryConflictPending = false; - - Assert(proc->subxids.nxids == 0); - Assert(proc->subxids.overflowed == false); - } + proc->lxid = InvalidLocalTransactionId; + proc_minimal->inCommit = false; /* be sure this is cleared in abort */ + proc->recoveryConflictPending = false; } @@ -413,24 +408,26 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid) void ProcArrayClearTransaction(PGPROC *proc) { + PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[proc->pgprocno]; + /* * We can skip locking ProcArrayLock here, because this action does not * actually change anyone's view of the set of running XIDs: our entry is * duplicate with the gxact that has already been inserted into the * ProcArray. */ - proc->xid = InvalidTransactionId; + proc_minimal->xid = InvalidTransactionId; proc->lxid = InvalidLocalTransactionId; - proc->xmin = InvalidTransactionId; + proc_minimal->xmin = InvalidTransactionId; proc->recoveryConflictPending = false; /* redundant, but just in case */ - proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; - proc->inCommit = false; + proc_minimal->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; + proc_minimal->inCommit = false; /* Clear the subtransaction-XID cache too */ - proc->subxids.nxids = 0; - proc->subxids.overflowed = false; + proc_minimal->nxids = 0; + proc_minimal->overflowed = false; } /* @@ -528,7 +525,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running) /* * Nobody else is running yet, but take locks anyhow */ - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + ProcArrayLockAcquire(PAL_EXCLUSIVE); /* * KnownAssignedXids is sorted so we cannot just add the xids, we have to @@ -635,7 +632,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running) Assert(TransactionIdIsNormal(ShmemVariableCache->latestCompletedXid)); Assert(TransactionIdIsValid(ShmemVariableCache->nextXid)); - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); KnownAssignedXidsDisplay(trace_recovery(DEBUG3)); if (standbyState == STANDBY_SNAPSHOT_READY) @@ -690,7 +687,7 @@ ProcArrayApplyXidAssignment(TransactionId topxid, /* * Uses same locking as transaction commit */ - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + ProcArrayLockAcquire(PAL_EXCLUSIVE); /* * Remove subxids from known-assigned-xacts. @@ -703,7 +700,7 @@ ProcArrayApplyXidAssignment(TransactionId topxid, if (TransactionIdPrecedes(procArray->lastOverflowedXid, max_xid)) procArray->lastOverflowedXid = max_xid; - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); } /* @@ -795,7 +792,7 @@ TransactionIdIsInProgress(TransactionId xid) errmsg("out of memory"))); } - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); /* * Now that we have the lock, we can check latestCompletedXid; if the @@ -803,7 +800,7 @@ TransactionIdIsInProgress(TransactionId xid) */ if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, xid)) { - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); xc_by_latest_xid_inc(); return true; } @@ -811,7 +808,9 @@ TransactionIdIsInProgress(TransactionId xid) /* No shortcuts, gotta grovel through the array */ for (i = 0; i < arrayP->numProcs; i++) { - volatile PGPROC *proc = arrayP->procs[i]; + int pgprocno = arrayP->pgprocnos[i]; + volatile PGPROC *proc = &allProcs[pgprocno]; + volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno]; TransactionId pxid; /* Ignore my own proc --- dealt with it above */ @@ -819,7 +818,7 @@ TransactionIdIsInProgress(TransactionId xid) continue; /* Fetch xid just once - see GetNewTransactionId */ - pxid = proc->xid; + pxid = proc_minimal->xid; if (!TransactionIdIsValid(pxid)) continue; @@ -829,7 +828,7 @@ TransactionIdIsInProgress(TransactionId xid) */ if (TransactionIdEquals(pxid, xid)) { - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); xc_by_main_xid_inc(); return true; } @@ -844,14 +843,14 @@ TransactionIdIsInProgress(TransactionId xid) /* * Step 2: check the cached child-Xids arrays */ - for (j = proc->subxids.nxids - 1; j >= 0; j--) + for (j = proc_minimal->nxids - 1; j >= 0; j--) { /* Fetch xid just once - see GetNewTransactionId */ TransactionId cxid = proc->subxids.xids[j]; if (TransactionIdEquals(cxid, xid)) { - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); xc_by_child_xid_inc(); return true; } @@ -864,7 +863,7 @@ TransactionIdIsInProgress(TransactionId xid) * we hold ProcArrayLock. So we can't miss an Xid that we need to * worry about.) */ - if (proc->subxids.overflowed) + if (proc_minimal->overflowed) xids[nxids++] = pxid; } @@ -879,7 +878,7 @@ TransactionIdIsInProgress(TransactionId xid) if (KnownAssignedXidExists(xid)) { - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); xc_by_known_assigned_inc(); return true; } @@ -895,7 +894,7 @@ TransactionIdIsInProgress(TransactionId xid) nxids = KnownAssignedXidsGet(xids, xid); } - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); /* * If none of the relevant caches overflowed, we know the Xid is not @@ -961,14 +960,17 @@ TransactionIdIsActive(TransactionId xid) if (TransactionIdPrecedes(xid, RecentXmin)) return false; - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); for (i = 0; i < arrayP->numProcs; i++) { - volatile PGPROC *proc = arrayP->procs[i]; + int pgprocno = arrayP->pgprocnos[i]; + volatile PGPROC *proc = &allProcs[pgprocno]; + volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno]; + TransactionId pxid; /* Fetch xid just once - see GetNewTransactionId */ - TransactionId pxid = proc->xid; + pxid = proc_minimal->xid; if (!TransactionIdIsValid(pxid)) continue; @@ -983,7 +985,7 @@ TransactionIdIsActive(TransactionId xid) } } - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); return result; } @@ -1046,7 +1048,7 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum) /* Cannot look for individual databases during recovery */ Assert(allDbs || !RecoveryInProgress()); - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); /* * We initialize the MIN() calculation with latestCompletedXid + 1. This @@ -1060,9 +1062,11 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum) for (index = 0; index < arrayP->numProcs; index++) { - volatile PGPROC *proc = arrayP->procs[index]; + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC *proc = &allProcs[pgprocno]; + volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno]; - if (ignoreVacuum && (proc->vacuumFlags & PROC_IN_VACUUM)) + if (ignoreVacuum && (proc_minimal->vacuumFlags & PROC_IN_VACUUM)) continue; if (allDbs || @@ -1070,7 +1074,7 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum) proc->databaseId == 0) /* always include WalSender */ { /* Fetch xid just once - see GetNewTransactionId */ - TransactionId xid = proc->xid; + TransactionId xid = proc_minimal->xid; /* First consider the transaction's own Xid, if any */ if (TransactionIdIsNormal(xid) && @@ -1084,7 +1088,7 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum) * have an Xmin but not (yet) an Xid; conversely, if it has an * Xid, that could determine some not-yet-set Xmin. */ - xid = proc->xmin; /* Fetch just once */ + xid = proc_minimal->xmin; /* Fetch just once */ if (TransactionIdIsNormal(xid) && TransactionIdPrecedes(xid, result)) result = xid; @@ -1099,7 +1103,7 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum) */ TransactionId kaxmin = KnownAssignedXidsGetOldestXmin(); - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); if (TransactionIdIsNormal(kaxmin) && TransactionIdPrecedes(kaxmin, result)) @@ -1110,7 +1114,7 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum) /* * No other information needed, so release the lock immediately. */ - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); /* * Compute the cutoff XID by subtracting vacuum_defer_cleanup_age, @@ -1200,6 +1204,8 @@ GetSnapshotData(Snapshot snapshot) int count = 0; int subcount = 0; bool suboverflowed = false; + static TransactionId *xmins = NULL; + int numProcs; Assert(snapshot != NULL); @@ -1235,11 +1241,20 @@ GetSnapshotData(Snapshot snapshot) errmsg("out of memory"))); } + if (xmins == NULL) + { + xmins = malloc(procArray->maxProcs * sizeof(TransactionId)); + if (xmins == NULL) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + } + /* * It is sufficient to get shared lock on ProcArrayLock, even if we are * going to set MyProc->xmin. */ - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); /* xmax is always latestCompletedXid + 1 */ xmax = ShmemVariableCache->latestCompletedXid; @@ -1261,6 +1276,8 @@ GetSnapshotData(Snapshot snapshot) if (!snapshot->takenDuringRecovery) { + int *pgprocnos = arrayP->pgprocnos; + /* * Spin over procArray checking xid, xmin, and subxids. The goal is * to gather all active xids, find the lowest xmin, and try to record @@ -1269,23 +1286,25 @@ GetSnapshotData(Snapshot snapshot) * prepared transaction xids are held in KnownAssignedXids, so these * will be seen without needing to loop through procs here. */ - for (index = 0; index < arrayP->numProcs; index++) + numProcs = arrayP->numProcs; + for (index = 0; index < numProcs; index++) { - volatile PGPROC *proc = arrayP->procs[index]; + int pgprocno = pgprocnos[index]; + volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno]; TransactionId xid; /* Ignore procs running LAZY VACUUM */ - if (proc->vacuumFlags & PROC_IN_VACUUM) + if (proc_minimal->vacuumFlags & PROC_IN_VACUUM) + { + xmins[index] = InvalidTransactionId; continue; + } /* Update globalxmin to be the smallest valid xmin */ - xid = proc->xmin; /* fetch just once */ - if (TransactionIdIsNormal(xid) && - TransactionIdPrecedes(xid, globalxmin)) - globalxmin = xid; + xmins[index] = proc_minimal->xmin; /* fetch just once */ /* Fetch xid just once - see GetNewTransactionId */ - xid = proc->xid; + xid = proc_minimal->xid; /* * If the transaction has been assigned an xid < xmax we add it to @@ -1300,7 +1319,7 @@ GetSnapshotData(Snapshot snapshot) { if (TransactionIdFollowsOrEquals(xid, xmax)) continue; - if (proc != MyProc) + if (proc_minimal != MyProcMinimal) snapshot->xip[count++] = xid; if (TransactionIdPrecedes(xid, xmin)) xmin = xid; @@ -1321,16 +1340,17 @@ GetSnapshotData(Snapshot snapshot) * * Again, our own XIDs are not included in the snapshot. */ - if (!suboverflowed && proc != MyProc) + if (!suboverflowed && proc_minimal != MyProcMinimal) { - if (proc->subxids.overflowed) + if (proc_minimal->overflowed) suboverflowed = true; else { - int nxids = proc->subxids.nxids; + int nxids = proc_minimal->nxids; if (nxids > 0) { + volatile PGPROC *proc = &allProcs[pgprocno]; memcpy(snapshot->subxip + subcount, (void *) proc->subxids.xids, nxids * sizeof(TransactionId)); @@ -1342,6 +1362,7 @@ GetSnapshotData(Snapshot snapshot) } else { + numProcs = 0; /* * We're in hot standby, so get XIDs from KnownAssignedXids. * @@ -1372,16 +1393,23 @@ GetSnapshotData(Snapshot snapshot) suboverflowed = true; } - if (!TransactionIdIsValid(MyProc->xmin)) - MyProc->xmin = TransactionXmin = xmin; - - LWLockRelease(ProcArrayLock); + if (!TransactionIdIsValid(MyProcMinimal->xmin)) + MyProcMinimal->xmin = TransactionXmin = xmin; + ProcArrayLockRelease(); /* * Update globalxmin to include actual process xids. This is a slightly * different way of computing it than GetOldestXmin uses, but should give * the same result. */ + for (index = 0; index < numProcs; index++) + { + TransactionId xid = xmins[index]; + if (TransactionIdIsNormal(xid) && + TransactionIdPrecedes(xid, globalxmin)) + globalxmin = xid; + } + if (TransactionIdPrecedes(xmin, globalxmin)) globalxmin = xmin; @@ -1432,18 +1460,20 @@ ProcArrayInstallImportedXmin(TransactionId xmin, TransactionId sourcexid) return false; /* Get lock so source xact can't end while we're doing this */ - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); for (index = 0; index < arrayP->numProcs; index++) { - volatile PGPROC *proc = arrayP->procs[index]; + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC *proc = &allProcs[pgprocno]; + volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno]; TransactionId xid; /* Ignore procs running LAZY VACUUM */ - if (proc->vacuumFlags & PROC_IN_VACUUM) + if (proc_minimal->vacuumFlags & PROC_IN_VACUUM) continue; - xid = proc->xid; /* fetch just once */ + xid = proc_minimal->xid; /* fetch just once */ if (xid != sourcexid) continue; @@ -1459,7 +1489,7 @@ ProcArrayInstallImportedXmin(TransactionId xmin, TransactionId sourcexid) /* * Likewise, let's just make real sure its xmin does cover us. */ - xid = proc->xmin; /* fetch just once */ + xid = proc_minimal->xmin; /* fetch just once */ if (!TransactionIdIsNormal(xid) || !TransactionIdPrecedesOrEquals(xid, xmin)) continue; @@ -1470,13 +1500,13 @@ ProcArrayInstallImportedXmin(TransactionId xmin, TransactionId sourcexid) * GetSnapshotData first, we'll be overwriting a valid xmin here, * so we don't check that.) */ - MyProc->xmin = TransactionXmin = xmin; + MyProcMinimal->xmin = TransactionXmin = xmin; result = true; break; } - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); return result; } @@ -1550,7 +1580,7 @@ GetRunningTransactionData(void) * Ensure that no xids enter or leave the procarray while we obtain * snapshot. */ - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); LWLockAcquire(XidGenLock, LW_SHARED); latestCompletedXid = ShmemVariableCache->latestCompletedXid; @@ -1562,12 +1592,14 @@ GetRunningTransactionData(void) */ for (index = 0; index < arrayP->numProcs; index++) { - volatile PGPROC *proc = arrayP->procs[index]; + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC *proc = &allProcs[pgprocno]; + volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno]; TransactionId xid; int nxids; /* Fetch xid just once - see GetNewTransactionId */ - xid = proc->xid; + xid = proc_minimal->xid; /* * We don't need to store transactions that don't have a TransactionId @@ -1585,7 +1617,7 @@ GetRunningTransactionData(void) * Save subtransaction XIDs. Other backends can't add or remove * entries while we're holding XidGenLock. */ - nxids = proc->subxids.nxids; + nxids = proc_minimal->nxids; if (nxids > 0) { memcpy(&xids[count], (void *) proc->subxids.xids, @@ -1593,7 +1625,7 @@ GetRunningTransactionData(void) count += nxids; subcount += nxids; - if (proc->subxids.overflowed) + if (proc_minimal->overflowed) suboverflowed = true; /* @@ -1611,7 +1643,7 @@ GetRunningTransactionData(void) CurrentRunningXacts->latestCompletedXid = latestCompletedXid; /* We don't release XidGenLock here, the caller is responsible for that */ - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid)); Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid)); @@ -1644,7 +1676,7 @@ GetOldestActiveTransactionId(void) Assert(!RecoveryInProgress()); - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); oldestRunningXid = ShmemVariableCache->nextXid; @@ -1653,11 +1685,12 @@ GetOldestActiveTransactionId(void) */ for (index = 0; index < arrayP->numProcs; index++) { - volatile PGPROC *proc = arrayP->procs[index]; + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno]; TransactionId xid; /* Fetch xid just once - see GetNewTransactionId */ - xid = proc->xid; + xid = proc_minimal->xid; if (!TransactionIdIsNormal(xid)) continue; @@ -1672,7 +1705,7 @@ GetOldestActiveTransactionId(void) */ } - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); return oldestRunningXid; } @@ -1705,20 +1738,22 @@ GetTransactionsInCommit(TransactionId **xids_p) xids = (TransactionId *) palloc(arrayP->maxProcs * sizeof(TransactionId)); nxids = 0; - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); for (index = 0; index < arrayP->numProcs; index++) { - volatile PGPROC *proc = arrayP->procs[index]; + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno]; + TransactionId pxid; /* Fetch xid just once - see GetNewTransactionId */ - TransactionId pxid = proc->xid; + pxid = proc_minimal->xid; - if (proc->inCommit && TransactionIdIsValid(pxid)) + if (proc_minimal->inCommit && TransactionIdIsValid(pxid)) xids[nxids++] = pxid; } - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); *xids_p = xids; return nxids; @@ -1740,16 +1775,18 @@ HaveTransactionsInCommit(TransactionId *xids, int nxids) ProcArrayStruct *arrayP = procArray; int index; - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); for (index = 0; index < arrayP->numProcs; index++) { - volatile PGPROC *proc = arrayP->procs[index]; + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno]; + TransactionId pxid; /* Fetch xid just once - see GetNewTransactionId */ - TransactionId pxid = proc->xid; + pxid = proc_minimal->xid; - if (proc->inCommit && TransactionIdIsValid(pxid)) + if (proc_minimal->inCommit && TransactionIdIsValid(pxid)) { int i; @@ -1766,7 +1803,7 @@ HaveTransactionsInCommit(TransactionId *xids, int nxids) } } - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); return result; } @@ -1788,11 +1825,11 @@ BackendPidGetProc(int pid) if (pid == 0) /* never match dummy PGPROCs */ return NULL; - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); for (index = 0; index < arrayP->numProcs; index++) { - PGPROC *proc = arrayP->procs[index]; + PGPROC *proc = &allProcs[arrayP->pgprocnos[index]]; if (proc->pid == pid) { @@ -1801,7 +1838,7 @@ BackendPidGetProc(int pid) } } - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); return result; } @@ -1829,20 +1866,22 @@ BackendXidGetPid(TransactionId xid) if (xid == InvalidTransactionId) /* never match invalid xid */ return 0; - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); for (index = 0; index < arrayP->numProcs; index++) { - volatile PGPROC *proc = arrayP->procs[index]; + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC *proc = &allProcs[pgprocno]; + volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno]; - if (proc->xid == xid) + if (proc_minimal->xid == xid) { result = proc->pid; break; } } - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); return result; } @@ -1897,22 +1936,24 @@ GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, vxids = (VirtualTransactionId *) palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs); - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); for (index = 0; index < arrayP->numProcs; index++) { - volatile PGPROC *proc = arrayP->procs[index]; + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC *proc = &allProcs[pgprocno]; + volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno]; if (proc == MyProc) continue; - if (excludeVacuum & proc->vacuumFlags) + if (excludeVacuum & proc_minimal->vacuumFlags) continue; if (allDbs || proc->databaseId == MyDatabaseId) { /* Fetch xmin just once - might change on us */ - TransactionId pxmin = proc->xmin; + TransactionId pxmin = proc_minimal->xmin; if (excludeXmin0 && !TransactionIdIsValid(pxmin)) continue; @@ -1933,7 +1974,7 @@ GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, } } - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); *nvxids = count; return vxids; @@ -1992,11 +2033,13 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid) errmsg("out of memory"))); } - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); for (index = 0; index < arrayP->numProcs; index++) { - volatile PGPROC *proc = arrayP->procs[index]; + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC *proc = &allProcs[pgprocno]; + volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno]; /* Exclude prepared transactions */ if (proc->pid == 0) @@ -2006,7 +2049,7 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid) proc->databaseId == dbOid) { /* Fetch xmin just once - can't change on us, but good coding */ - TransactionId pxmin = proc->xmin; + TransactionId pxmin = proc_minimal->xmin; /* * We ignore an invalid pxmin because this means that backend has @@ -2025,7 +2068,7 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid) } } - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); /* add the terminator */ vxids[count].backendId = InvalidBackendId; @@ -2046,12 +2089,13 @@ CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode) int index; pid_t pid = 0; - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); for (index = 0; index < arrayP->numProcs; index++) { + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC *proc = &allProcs[pgprocno]; VirtualTransactionId procvxid; - PGPROC *proc = arrayP->procs[index]; GET_VXID_FROM_PGPROC(procvxid, *proc); @@ -2072,7 +2116,7 @@ CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode) } } - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); return pid; } @@ -2104,7 +2148,9 @@ MinimumActiveBackends(int min) */ for (index = 0; index < arrayP->numProcs; index++) { - volatile PGPROC *proc = arrayP->procs[index]; + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC *proc = &allProcs[pgprocno]; + volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno]; /* * Since we're not holding a lock, need to check that the pointer is @@ -2122,10 +2168,10 @@ MinimumActiveBackends(int min) if (proc == MyProc) continue; /* do not count myself */ + if (proc_minimal->xid == InvalidTransactionId) + continue; /* do not count if no XID assigned */ if (proc->pid == 0) continue; /* do not count prepared xacts */ - if (proc->xid == InvalidTransactionId) - continue; /* do not count if no XID assigned */ if (proc->waitLock != NULL) continue; /* do not count if blocked on a lock */ count++; @@ -2146,11 +2192,12 @@ CountDBBackends(Oid databaseid) int count = 0; int index; - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); for (index = 0; index < arrayP->numProcs; index++) { - volatile PGPROC *proc = arrayP->procs[index]; + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC *proc = &allProcs[pgprocno]; if (proc->pid == 0) continue; /* do not count prepared xacts */ @@ -2159,7 +2206,7 @@ CountDBBackends(Oid databaseid) count++; } - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); return count; } @@ -2175,11 +2222,12 @@ CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending) pid_t pid = 0; /* tell all backends to die */ - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + ProcArrayLockAcquire(PAL_EXCLUSIVE); for (index = 0; index < arrayP->numProcs; index++) { - volatile PGPROC *proc = arrayP->procs[index]; + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC *proc = &allProcs[pgprocno]; if (databaseid == InvalidOid || proc->databaseId == databaseid) { @@ -2200,7 +2248,7 @@ CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending) } } - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); } /* @@ -2213,11 +2261,12 @@ CountUserBackends(Oid roleid) int count = 0; int index; - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); for (index = 0; index < arrayP->numProcs; index++) { - volatile PGPROC *proc = arrayP->procs[index]; + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC *proc = &allProcs[pgprocno]; if (proc->pid == 0) continue; /* do not count prepared xacts */ @@ -2225,7 +2274,7 @@ CountUserBackends(Oid roleid) count++; } - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); return count; } @@ -2273,11 +2322,13 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared) *nbackends = *nprepared = 0; - LWLockAcquire(ProcArrayLock, LW_SHARED); + ProcArrayLockAcquire(PAL_SHARED); for (index = 0; index < arrayP->numProcs; index++) { - volatile PGPROC *proc = arrayP->procs[index]; + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC *proc = &allProcs[pgprocno]; + volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno]; if (proc->databaseId != databaseId) continue; @@ -2291,13 +2342,13 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared) else { (*nbackends)++; - if ((proc->vacuumFlags & PROC_IS_AUTOVACUUM) && + if ((proc_minimal->vacuumFlags & PROC_IS_AUTOVACUUM) && nautovacs < MAXAUTOVACPIDS) autovac_pids[nautovacs++] = proc->pid; } } - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); if (!found) return false; /* no conflicting backends, so done */ @@ -2321,8 +2372,8 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared) #define XidCacheRemove(i) \ do { \ - MyProc->subxids.xids[i] = MyProc->subxids.xids[MyProc->subxids.nxids - 1]; \ - MyProc->subxids.nxids--; \ + MyProc->subxids.xids[i] = MyProc->subxids.xids[MyProcMinimal->nxids - 1]; \ + MyProcMinimal->nxids--; \ } while (0) /* @@ -2350,7 +2401,7 @@ XidCacheRemoveRunningXids(TransactionId xid, * to abort subtransactions, but pending closer analysis we'd best be * conservative. */ - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + ProcArrayLockAcquire(PAL_EXCLUSIVE); /* * Under normal circumstances xid and xids[] will be in increasing order, @@ -2361,7 +2412,7 @@ XidCacheRemoveRunningXids(TransactionId xid, { TransactionId anxid = xids[i]; - for (j = MyProc->subxids.nxids - 1; j >= 0; j--) + for (j = MyProcMinimal->nxids - 1; j >= 0; j--) { if (TransactionIdEquals(MyProc->subxids.xids[j], anxid)) { @@ -2377,11 +2428,11 @@ XidCacheRemoveRunningXids(TransactionId xid, * error during AbortSubTransaction. So instead of Assert, emit a * debug warning. */ - if (j < 0 && !MyProc->subxids.overflowed) + if (j < 0 && !MyProcMinimal->overflowed) elog(WARNING, "did not find subXID %u in MyProc", anxid); } - for (j = MyProc->subxids.nxids - 1; j >= 0; j--) + for (j = MyProcMinimal->nxids - 1; j >= 0; j--) { if (TransactionIdEquals(MyProc->subxids.xids[j], xid)) { @@ -2390,7 +2441,7 @@ XidCacheRemoveRunningXids(TransactionId xid, } } /* Ordinarily we should have found it, unless the cache has overflowed */ - if (j < 0 && !MyProc->subxids.overflowed) + if (j < 0 && !MyProcMinimal->overflowed) elog(WARNING, "did not find subXID %u in MyProc", xid); /* Also advance global latestCompletedXid while holding the lock */ @@ -2398,7 +2449,7 @@ XidCacheRemoveRunningXids(TransactionId xid, latestXid)) ShmemVariableCache->latestCompletedXid = latestXid; - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); } #ifdef XIDCACHE_DEBUG @@ -2565,7 +2616,7 @@ ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids, /* * Uses same locking as transaction commit */ - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + ProcArrayLockAcquire(PAL_EXCLUSIVE); KnownAssignedXidsRemoveTree(xid, nsubxids, subxids); @@ -2574,7 +2625,7 @@ ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids, max_xid)) ShmemVariableCache->latestCompletedXid = max_xid; - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); } /* @@ -2584,9 +2635,9 @@ ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids, void ExpireAllKnownAssignedTransactionIds(void) { - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + ProcArrayLockAcquire(PAL_EXCLUSIVE); KnownAssignedXidsRemovePreceding(InvalidTransactionId); - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); } /* @@ -2596,9 +2647,9 @@ ExpireAllKnownAssignedTransactionIds(void) void ExpireOldKnownAssignedTransactionIds(TransactionId xid) { - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + ProcArrayLockAcquire(PAL_EXCLUSIVE); KnownAssignedXidsRemovePreceding(xid); - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); } @@ -2820,7 +2871,7 @@ KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, { /* must hold lock to compress */ if (!exclusive_lock) - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + ProcArrayLockAcquire(PAL_EXCLUSIVE); KnownAssignedXidsCompress(true); @@ -2828,7 +2879,7 @@ KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, /* note: we no longer care about the tail pointer */ if (!exclusive_lock) - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); /* * If it still won't fit then we're out of memory diff --git a/src/backend/storage/lmgr/Makefile b/src/backend/storage/lmgr/Makefile index e12a854..27eaa97 100644 --- a/src/backend/storage/lmgr/Makefile +++ b/src/backend/storage/lmgr/Makefile @@ -12,7 +12,8 @@ subdir = src/backend/storage/lmgr top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o predicate.o +OBJS = flexlock.o lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o \ + procarraylock.o predicate.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c index 7e7f6af..4fd7bd7 100644 --- a/src/backend/storage/lmgr/deadlock.c +++ b/src/backend/storage/lmgr/deadlock.c @@ -450,6 +450,7 @@ FindLockCycleRecurse(PGPROC *checkProc, int *nSoftEdges) /* output argument */ { PGPROC *proc; + PGPROC_MINIMAL *proc_minimal; LOCK *lock; PROCLOCK *proclock; SHM_QUEUE *procLocks; @@ -516,6 +517,7 @@ FindLockCycleRecurse(PGPROC *checkProc, while (proclock) { proc = proclock->tag.myProc; + proc_minimal = &ProcGlobal->allProcs_Minimal[proc->pgprocno]; /* A proc never blocks itself */ if (proc != checkProc) @@ -541,7 +543,7 @@ FindLockCycleRecurse(PGPROC *checkProc, * vacuumFlag bit), but we don't do that here to avoid * grabbing ProcArrayLock. */ - if (proc->vacuumFlags & PROC_IS_AUTOVACUUM) + if (proc_minimal->vacuumFlags & PROC_IS_AUTOVACUUM) blocking_autovacuum_proc = proc; /* This proc hard-blocks checkProc */ diff --git a/src/backend/storage/lmgr/flexlock.c b/src/backend/storage/lmgr/flexlock.c new file mode 100644 index 0000000..c88bd24 --- /dev/null +++ b/src/backend/storage/lmgr/flexlock.c @@ -0,0 +1,366 @@ +/*------------------------------------------------------------------------- + * + * flexlock.c + * Low-level routines for managing flex locks. + * + * Flex locks are intended primarily to provide mutual exclusion of access + * to shared-memory data structures. Most, but not all, flex locks are + * lightweight locks (LWLocks). This file contains support routines that + * are used for all types of flex locks, including lwlocks. User-level + * locking should be done with the full lock manager --- which depends on + * LWLocks to protect its shared state. + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/storage/lmgr/flexlock.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "miscadmin.h" +#include "access/clog.h" +#include "access/multixact.h" +#include "access/subtrans.h" +#include "commands/async.h" +#include "storage/flexlock_internals.h" +#include "storage/lwlock.h" +#include "storage/predicate.h" +#include "storage/proc.h" +#include "storage/procarraylock.h" +#include "storage/spin.h" +#include "utils/elog.h" + +/* + * We use this structure to keep track of flex locks held, for release + * during error recovery. The maximum size could be determined at runtime + * if necessary, but it seems unlikely that more than a few locks could + * ever be held simultaneously. + */ +#define MAX_SIMUL_FLEXLOCKS 100 + +int num_held_flexlocks = 0; +FlexLockId held_flexlocks[MAX_SIMUL_FLEXLOCKS]; + +static int lock_addin_request = 0; +static bool lock_addin_request_allowed = true; + +#ifdef LOCK_DEBUG +bool Trace_flexlocks = false; +#endif + +/* + * This points to the array of FlexLocks in shared memory. Backends inherit + * the pointer by fork from the postmaster (except in the EXEC_BACKEND case, + * where we have special measures to pass it down). + */ +FlexLockPadded *FlexLockArray = NULL; + +/* We use the ShmemLock spinlock to protect LWLockAssign */ +extern slock_t *ShmemLock; + +static void FlexLockInit(FlexLock *flex, char locktype); + +/* + * Compute number of FlexLocks to allocate. + */ +int +NumFlexLocks(void) +{ + int numLocks; + + /* + * Possibly this logic should be spread out among the affected modules, + * the same way that shmem space estimation is done. But for now, there + * are few enough users of FlexLocks that we can get away with just keeping + * the knowledge here. + */ + + /* Predefined FlexLocks */ + numLocks = (int) NumFixedFlexLocks; + + /* bufmgr.c needs two for each shared buffer */ + numLocks += 2 * NBuffers; + + /* proc.c needs one for each backend or auxiliary process */ + numLocks += MaxBackends + NUM_AUXILIARY_PROCS; + + /* clog.c needs one per CLOG buffer */ + numLocks += NUM_CLOG_BUFFERS; + + /* subtrans.c needs one per SubTrans buffer */ + numLocks += NUM_SUBTRANS_BUFFERS; + + /* multixact.c needs two SLRU areas */ + numLocks += NUM_MXACTOFFSET_BUFFERS + NUM_MXACTMEMBER_BUFFERS; + + /* async.c needs one per Async buffer */ + numLocks += NUM_ASYNC_BUFFERS; + + /* predicate.c needs one per old serializable xid buffer */ + numLocks += NUM_OLDSERXID_BUFFERS; + + /* + * Add any requested by loadable modules; for backwards-compatibility + * reasons, allocate at least NUM_USER_DEFINED_FLEXLOCKS of them even if + * there are no explicit requests. + */ + lock_addin_request_allowed = false; + numLocks += Max(lock_addin_request, NUM_USER_DEFINED_FLEXLOCKS); + + return numLocks; +} + + +/* + * RequestAddinFlexLocks + * Request that extra FlexLocks be allocated for use by + * a loadable module. + * + * This is only useful if called from the _PG_init hook of a library that + * is loaded into the postmaster via shared_preload_libraries. Once + * shared memory has been allocated, calls will be ignored. (We could + * raise an error, but it seems better to make it a no-op, so that + * libraries containing such calls can be reloaded if needed.) + */ +void +RequestAddinFlexLocks(int n) +{ + if (IsUnderPostmaster || !lock_addin_request_allowed) + return; /* too late */ + lock_addin_request += n; +} + + +/* + * Compute shmem space needed for FlexLocks. + */ +Size +FlexLockShmemSize(void) +{ + Size size; + int numLocks = NumFlexLocks(); + + /* Space for the FlexLock array. */ + size = mul_size(numLocks, FLEX_LOCK_BYTES); + + /* Space for dynamic allocation counter, plus room for alignment. */ + size = add_size(size, 2 * sizeof(int) + FLEX_LOCK_BYTES); + + return size; +} + +/* + * Allocate shmem space for FlexLocks and initialize the locks. + */ +void +CreateFlexLocks(void) +{ + int numLocks = NumFlexLocks(); + Size spaceLocks = FlexLockShmemSize(); + FlexLockPadded *lock; + int *FlexLockCounter; + char *ptr; + int id; + + /* Allocate and zero space */ + ptr = (char *) ShmemAlloc(spaceLocks); + memset(ptr, 0, spaceLocks); + + /* Leave room for dynamic allocation counter */ + ptr += 2 * sizeof(int); + + /* Ensure desired alignment of FlexLock array */ + ptr += FLEX_LOCK_BYTES - ((uintptr_t) ptr) % FLEX_LOCK_BYTES; + + FlexLockArray = (FlexLockPadded *) ptr; + + /* All of the "fixed" FlexLocks are LWLocks - except ProcArrayLock. */ + for (id = 0, lock = FlexLockArray; id < NumFixedFlexLocks; id++, lock++) + { + if (id == ProcArrayLock) + FlexLockInit(&lock->flex, FLEXLOCK_TYPE_PROCARRAYLOCK); + else + FlexLockInit(&lock->flex, FLEXLOCK_TYPE_LWLOCK); + } + + /* + * Initialize the dynamic-allocation counter, which is stored just before + * the first FlexLock. + */ + FlexLockCounter = (int *) ((char *) FlexLockArray - 2 * sizeof(int)); + FlexLockCounter[0] = (int) NumFixedFlexLocks; + FlexLockCounter[1] = numLocks; +} + +/* + * FlexLockAssign - assign a dynamically-allocated FlexLock number + * + * We interlock this using the same spinlock that is used to protect + * ShmemAlloc(). Interlocking is not really necessary during postmaster + * startup, but it is needed if any user-defined code tries to allocate + * LWLocks after startup. + */ +FlexLockId +FlexLockAssign(char locktype) +{ + FlexLockId result; + + /* use volatile pointer to prevent code rearrangement */ + volatile int *FlexLockCounter; + + FlexLockCounter = (int *) ((char *) FlexLockArray - 2 * sizeof(int)); + SpinLockAcquire(ShmemLock); + if (FlexLockCounter[0] >= FlexLockCounter[1]) + { + SpinLockRelease(ShmemLock); + elog(ERROR, "no more FlexLockIds available"); + } + result = (FlexLockId) (FlexLockCounter[0]++); + SpinLockRelease(ShmemLock); + + FlexLockInit(&FlexLockArray[result].flex, locktype); + + return result; +} + +/* + * Initialize a FlexLock. + */ +static void +FlexLockInit(FlexLock *flex, char locktype) +{ + SpinLockInit(&flex->mutex); + flex->releaseOK = true; + flex->locktype = locktype; + /* + * We might need to think a little harder about what should happen here + * if some future type of FlexLock requires more initialization than this. + * For now, this will suffice. + */ +} + +/* + * Remove lock from list of locks held. Usually, but not always, it will + * be the latest-acquired lock; so search array backwards. + */ +void +FlexLockRemember(FlexLockId id) +{ + if (num_held_flexlocks >= MAX_SIMUL_FLEXLOCKS) + elog(PANIC, "too many FlexLocks taken"); + held_flexlocks[num_held_flexlocks++] = id; +} + +/* + * Remove lock from list of locks held. Usually, but not always, it will + * be the latest-acquired lock; so search array backwards. + */ +void +FlexLockForget(FlexLockId id) +{ + int i; + + for (i = num_held_flexlocks; --i >= 0;) + { + if (id == held_flexlocks[i]) + break; + } + if (i < 0) + elog(ERROR, "lock %d is not held", (int) id); + num_held_flexlocks--; + for (; i < num_held_flexlocks; i++) + held_flexlocks[i] = held_flexlocks[i + 1]; +} + +/* + * FlexLockWait - wait until awakened + * + * Since we share the process wait semaphore with the regular lock manager + * and ProcWaitForSignal, and we may need to acquire a FlexLock while one of + * those is pending, it is possible that we get awakened for a reason other + * than being signaled by a FlexLock release. If so, loop back and wait again. + * + * Returns the number of "extra" waits absorbed so that, once we've gotten the + * FlexLock, we can re-increment the sema by the number of additional signals + * received, so that the lock manager or signal manager will see the received + * signal when it next waits. + */ +int +FlexLockWait(FlexLockId id, int mode) +{ + int extraWaits = 0; + + FlexLockDebug("LWLockAcquire", id, "waiting"); + TRACE_POSTGRESQL_FLEXLOCK_WAIT_START(id, mode); + + for (;;) + { + /* "false" means cannot accept cancel/die interrupt here. */ + PGSemaphoreLock(&MyProc->sem, false); + /* + * FLEXTODO: I think we should return this, instead of ignoring it. + * Any non-zero value means "wake up". + */ + if (MyProc->flWaitResult) + break; + extraWaits++; + } + + TRACE_POSTGRESQL_FLEXLOCK_WAIT_DONE(id, mode); + FlexLockDebug("LWLockAcquire", id, "awakened"); + + return extraWaits; +} + +/* + * FlexLockReleaseAll - release all currently-held locks + * + * Used to clean up after ereport(ERROR). An important difference between this + * function and retail LWLockRelease calls is that InterruptHoldoffCount is + * unchanged by this operation. This is necessary since InterruptHoldoffCount + * has been set to an appropriate level earlier in error recovery. We could + * decrement it below zero if we allow it to drop for each released lock! + */ +void +FlexLockReleaseAll(void) +{ + while (num_held_flexlocks > 0) + { + FlexLockId id; + FlexLock *flex; + + HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */ + + id = held_flexlocks[num_held_flexlocks - 1]; + flex = &FlexLockArray[id].flex; + if (flex->locktype == FLEXLOCK_TYPE_LWLOCK) + LWLockRelease(id); + else + { + Assert(id == ProcArrayLock); + ProcArrayLockRelease(); + } + } +} + +/* + * FlexLockHeldByMe - test whether my process currently holds a lock + * + * This is meant as debug support only. We do not consider the lock mode. + */ +bool +FlexLockHeldByMe(FlexLockId id) +{ + int i; + + for (i = 0; i < num_held_flexlocks; i++) + { + if (held_flexlocks[i] == id) + return true; + } + return false; +} diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 905502f..edaff09 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -591,7 +591,7 @@ LockAcquireExtended(const LOCKTAG *locktag, bool found; ResourceOwner owner; uint32 hashcode; - LWLockId partitionLock; + FlexLockId partitionLock; int status; bool log_lock = false; @@ -1546,7 +1546,7 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) LOCALLOCK *locallock; LOCK *lock; PROCLOCK *proclock; - LWLockId partitionLock; + FlexLockId partitionLock; bool wakeupNeeded; if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) @@ -1912,7 +1912,7 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) */ for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++) { - LWLockId partitionLock = FirstLockMgrLock + partition; + FlexLockId partitionLock = FirstLockMgrLock + partition; SHM_QUEUE *procLocks = &(MyProc->myProcLocks[partition]); proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, @@ -2197,7 +2197,7 @@ static bool FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag, uint32 hashcode) { - LWLockId partitionLock = LockHashPartitionLock(hashcode); + FlexLockId partitionLock = LockHashPartitionLock(hashcode); Oid relid = locktag->locktag_field2; uint32 i; @@ -2281,7 +2281,7 @@ FastPathGetRelationLockEntry(LOCALLOCK *locallock) LockMethod lockMethodTable = LockMethods[DEFAULT_LOCKMETHOD]; LOCKTAG *locktag = &locallock->tag.lock; PROCLOCK *proclock = NULL; - LWLockId partitionLock = LockHashPartitionLock(locallock->hashcode); + FlexLockId partitionLock = LockHashPartitionLock(locallock->hashcode); Oid relid = locktag->locktag_field2; uint32 f; @@ -2382,7 +2382,7 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode) SHM_QUEUE *procLocks; PROCLOCK *proclock; uint32 hashcode; - LWLockId partitionLock; + FlexLockId partitionLock; int count = 0; int fast_count = 0; @@ -2593,7 +2593,7 @@ LockRefindAndRelease(LockMethod lockMethodTable, PGPROC *proc, PROCLOCKTAG proclocktag; uint32 hashcode; uint32 proclock_hashcode; - LWLockId partitionLock; + FlexLockId partitionLock; bool wakeupNeeded; hashcode = LockTagHashCode(locktag); @@ -2827,7 +2827,7 @@ PostPrepare_Locks(TransactionId xid) */ for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++) { - LWLockId partitionLock = FirstLockMgrLock + partition; + FlexLockId partitionLock = FirstLockMgrLock + partition; SHM_QUEUE *procLocks = &(MyProc->myProcLocks[partition]); proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, @@ -3188,9 +3188,10 @@ GetRunningTransactionLocks(int *nlocks) proclock->tag.myLock->tag.locktag_type == LOCKTAG_RELATION) { PGPROC *proc = proclock->tag.myProc; + PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[proc->pgprocno]; LOCK *lock = proclock->tag.myLock; - accessExclusiveLocks[index].xid = proc->xid; + accessExclusiveLocks[index].xid = proc_minimal->xid; accessExclusiveLocks[index].dbOid = lock->tag.locktag_field1; accessExclusiveLocks[index].relOid = lock->tag.locktag_field2; @@ -3342,7 +3343,7 @@ lock_twophase_recover(TransactionId xid, uint16 info, uint32 hashcode; uint32 proclock_hashcode; int partition; - LWLockId partitionLock; + FlexLockId partitionLock; LockMethod lockMethodTable; Assert(len == sizeof(TwoPhaseLockRecord)); diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 079eb29..ce6c931 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -21,74 +21,23 @@ */ #include "postgres.h" -#include "access/clog.h" -#include "access/multixact.h" -#include "access/subtrans.h" -#include "commands/async.h" #include "miscadmin.h" #include "pg_trace.h" +#include "storage/flexlock_internals.h" #include "storage/ipc.h" -#include "storage/predicate.h" #include "storage/proc.h" #include "storage/spin.h" - -/* We use the ShmemLock spinlock to protect LWLockAssign */ -extern slock_t *ShmemLock; - - typedef struct LWLock { - slock_t mutex; /* Protects LWLock and queue of PGPROCs */ - bool releaseOK; /* T if ok to release waiters */ + FlexLock flex; /* common FlexLock infrastructure */ char exclusive; /* # of exclusive holders (0 or 1) */ int shared; /* # of shared holders (0..MaxBackends) */ - PGPROC *head; /* head of list of waiting PGPROCs */ - PGPROC *tail; /* tail of list of waiting PGPROCs */ - /* tail is undefined when head is NULL */ } LWLock; -/* - * All the LWLock structs are allocated as an array in shared memory. - * (LWLockIds are indexes into the array.) We force the array stride to - * be a power of 2, which saves a few cycles in indexing, but more - * importantly also ensures that individual LWLocks don't cross cache line - * boundaries. This reduces cache contention problems, especially on AMD - * Opterons. (Of course, we have to also ensure that the array start - * address is suitably aligned.) - * - * LWLock is between 16 and 32 bytes on all known platforms, so these two - * cases are sufficient. - */ -#define LWLOCK_PADDED_SIZE (sizeof(LWLock) <= 16 ? 16 : 32) - -typedef union LWLockPadded -{ - LWLock lock; - char pad[LWLOCK_PADDED_SIZE]; -} LWLockPadded; - -/* - * This points to the array of LWLocks in shared memory. Backends inherit - * the pointer by fork from the postmaster (except in the EXEC_BACKEND case, - * where we have special measures to pass it down). - */ -NON_EXEC_STATIC LWLockPadded *LWLockArray = NULL; - - -/* - * We use this structure to keep track of locked LWLocks for release - * during error recovery. The maximum size could be determined at runtime - * if necessary, but it seems unlikely that more than a few locks could - * ever be held simultaneously. - */ -#define MAX_SIMUL_LWLOCKS 100 - -static int num_held_lwlocks = 0; -static LWLockId held_lwlocks[MAX_SIMUL_LWLOCKS]; - -static int lock_addin_request = 0; -static bool lock_addin_request_allowed = true; +#define LWLockPointer(lockid) \ + (AssertMacro(FlexLockArray[lockid].flex.locktype == FLEXLOCK_TYPE_LWLOCK), \ + (volatile LWLock *) &FlexLockArray[lockid]) #ifdef LWLOCK_STATS static int counts_for_pid = 0; @@ -98,27 +47,17 @@ static int *block_counts; #endif #ifdef LOCK_DEBUG -bool Trace_lwlocks = false; - inline static void -PRINT_LWDEBUG(const char *where, LWLockId lockid, const volatile LWLock *lock) +PRINT_LWDEBUG(const char *where, FlexLockId lockid, const volatile LWLock *lock) { - if (Trace_lwlocks) + if (Trace_flexlocks) elog(LOG, "%s(%d): excl %d shared %d head %p rOK %d", where, (int) lockid, - (int) lock->exclusive, lock->shared, lock->head, - (int) lock->releaseOK); -} - -inline static void -LOG_LWDEBUG(const char *where, LWLockId lockid, const char *msg) -{ - if (Trace_lwlocks) - elog(LOG, "%s(%d): %s", where, (int) lockid, msg); + (int) lock->exclusive, lock->shared, lock->flex.head, + (int) lock->flex.releaseOK); } #else /* not LOCK_DEBUG */ #define PRINT_LWDEBUG(a,b,c) -#define LOG_LWDEBUG(a,b,c) #endif /* LOCK_DEBUG */ #ifdef LWLOCK_STATS @@ -127,8 +66,8 @@ static void print_lwlock_stats(int code, Datum arg) { int i; - int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); - int numLocks = LWLockCounter[1]; + int *FlexLockCounter = (int *) ((char *) FlexLockArray - 2 * sizeof(int)); + int numLocks = FlexLockCounter[1]; /* Grab an LWLock to keep different backends from mixing reports */ LWLockAcquire(0, LW_EXCLUSIVE); @@ -145,173 +84,15 @@ print_lwlock_stats(int code, Datum arg) } #endif /* LWLOCK_STATS */ - /* - * Compute number of LWLocks to allocate. + * LWLockAssign - initialize a new lwlock and return its ID */ -int -NumLWLocks(void) -{ - int numLocks; - - /* - * Possibly this logic should be spread out among the affected modules, - * the same way that shmem space estimation is done. But for now, there - * are few enough users of LWLocks that we can get away with just keeping - * the knowledge here. - */ - - /* Predefined LWLocks */ - numLocks = (int) NumFixedLWLocks; - - /* bufmgr.c needs two for each shared buffer */ - numLocks += 2 * NBuffers; - - /* proc.c needs one for each backend or auxiliary process */ - numLocks += MaxBackends + NUM_AUXILIARY_PROCS; - - /* clog.c needs one per CLOG buffer */ - numLocks += NUM_CLOG_BUFFERS; - - /* subtrans.c needs one per SubTrans buffer */ - numLocks += NUM_SUBTRANS_BUFFERS; - - /* multixact.c needs two SLRU areas */ - numLocks += NUM_MXACTOFFSET_BUFFERS + NUM_MXACTMEMBER_BUFFERS; - - /* async.c needs one per Async buffer */ - numLocks += NUM_ASYNC_BUFFERS; - - /* predicate.c needs one per old serializable xid buffer */ - numLocks += NUM_OLDSERXID_BUFFERS; - - /* - * Add any requested by loadable modules; for backwards-compatibility - * reasons, allocate at least NUM_USER_DEFINED_LWLOCKS of them even if - * there are no explicit requests. - */ - lock_addin_request_allowed = false; - numLocks += Max(lock_addin_request, NUM_USER_DEFINED_LWLOCKS); - - return numLocks; -} - - -/* - * RequestAddinLWLocks - * Request that extra LWLocks be allocated for use by - * a loadable module. - * - * This is only useful if called from the _PG_init hook of a library that - * is loaded into the postmaster via shared_preload_libraries. Once - * shared memory has been allocated, calls will be ignored. (We could - * raise an error, but it seems better to make it a no-op, so that - * libraries containing such calls can be reloaded if needed.) - */ -void -RequestAddinLWLocks(int n) -{ - if (IsUnderPostmaster || !lock_addin_request_allowed) - return; /* too late */ - lock_addin_request += n; -} - - -/* - * Compute shmem space needed for LWLocks. - */ -Size -LWLockShmemSize(void) -{ - Size size; - int numLocks = NumLWLocks(); - - /* Space for the LWLock array. */ - size = mul_size(numLocks, sizeof(LWLockPadded)); - - /* Space for dynamic allocation counter, plus room for alignment. */ - size = add_size(size, 2 * sizeof(int) + LWLOCK_PADDED_SIZE); - - return size; -} - - -/* - * Allocate shmem space for LWLocks and initialize the locks. - */ -void -CreateLWLocks(void) -{ - int numLocks = NumLWLocks(); - Size spaceLocks = LWLockShmemSize(); - LWLockPadded *lock; - int *LWLockCounter; - char *ptr; - int id; - - /* Allocate space */ - ptr = (char *) ShmemAlloc(spaceLocks); - - /* Leave room for dynamic allocation counter */ - ptr += 2 * sizeof(int); - - /* Ensure desired alignment of LWLock array */ - ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE; - - LWLockArray = (LWLockPadded *) ptr; - - /* - * Initialize all LWLocks to "unlocked" state - */ - for (id = 0, lock = LWLockArray; id < numLocks; id++, lock++) - { - SpinLockInit(&lock->lock.mutex); - lock->lock.releaseOK = true; - lock->lock.exclusive = 0; - lock->lock.shared = 0; - lock->lock.head = NULL; - lock->lock.tail = NULL; - } - - /* - * Initialize the dynamic-allocation counter, which is stored just before - * the first LWLock. - */ - LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); - LWLockCounter[0] = (int) NumFixedLWLocks; - LWLockCounter[1] = numLocks; -} - - -/* - * LWLockAssign - assign a dynamically-allocated LWLock number - * - * We interlock this using the same spinlock that is used to protect - * ShmemAlloc(). Interlocking is not really necessary during postmaster - * startup, but it is needed if any user-defined code tries to allocate - * LWLocks after startup. - */ -LWLockId +FlexLockId LWLockAssign(void) { - LWLockId result; - - /* use volatile pointer to prevent code rearrangement */ - volatile int *LWLockCounter; - - LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); - SpinLockAcquire(ShmemLock); - if (LWLockCounter[0] >= LWLockCounter[1]) - { - SpinLockRelease(ShmemLock); - elog(ERROR, "no more LWLockIds available"); - } - result = (LWLockId) (LWLockCounter[0]++); - SpinLockRelease(ShmemLock); - return result; + return FlexLockAssign(FLEXLOCK_TYPE_LWLOCK); } - /* * LWLockAcquire - acquire a lightweight lock in the specified mode * @@ -320,9 +101,9 @@ LWLockAssign(void) * Side effect: cancel/die interrupts are held off until lock release. */ void -LWLockAcquire(LWLockId lockid, LWLockMode mode) +LWLockAcquire(FlexLockId lockid, LWLockMode mode) { - volatile LWLock *lock = &(LWLockArray[lockid].lock); + volatile LWLock *lock = LWLockPointer(lockid); PGPROC *proc = MyProc; bool retry = false; int extraWaits = 0; @@ -333,8 +114,8 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode) /* Set up local count state first time through in a given process */ if (counts_for_pid != MyProcPid) { - int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); - int numLocks = LWLockCounter[1]; + int *FlexLockCounter = (int *) ((char *) FlexLockArray - 2 * sizeof(int)); + int numLocks = FlexLockCounter[1]; sh_acquire_counts = calloc(numLocks, sizeof(int)); ex_acquire_counts = calloc(numLocks, sizeof(int)); @@ -356,10 +137,6 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode) */ Assert(!(proc == NULL && IsUnderPostmaster)); - /* Ensure we will have room to remember the lock */ - if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS) - elog(ERROR, "too many LWLocks taken"); - /* * Lock out cancel/die interrupts until we exit the code section protected * by the LWLock. This ensures that interrupts will not interfere with @@ -388,11 +165,11 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode) bool mustwait; /* Acquire mutex. Time spent holding mutex should be short! */ - SpinLockAcquire(&lock->mutex); + SpinLockAcquire(&lock->flex.mutex); /* If retrying, allow LWLockRelease to release waiters again */ if (retry) - lock->releaseOK = true; + lock->flex.releaseOK = true; /* If I can get the lock, do so quickly. */ if (mode == LW_EXCLUSIVE) @@ -419,72 +196,30 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode) if (!mustwait) break; /* got the lock */ - /* - * Add myself to wait queue. - * - * If we don't have a PGPROC structure, there's no way to wait. This - * should never occur, since MyProc should only be null during shared - * memory initialization. - */ - if (proc == NULL) - elog(PANIC, "cannot wait without a PGPROC structure"); - - proc->lwWaiting = true; - proc->lwExclusive = (mode == LW_EXCLUSIVE); - proc->lwWaitLink = NULL; - if (lock->head == NULL) - lock->head = proc; - else - lock->tail->lwWaitLink = proc; - lock->tail = proc; + /* Add myself to wait queue. */ + FlexLockJoinWaitQueue(lock, (int) mode); /* Can release the mutex now */ - SpinLockRelease(&lock->mutex); - - /* - * Wait until awakened. - * - * Since we share the process wait semaphore with the regular lock - * manager and ProcWaitForSignal, and we may need to acquire an LWLock - * while one of those is pending, it is possible that we get awakened - * for a reason other than being signaled by LWLockRelease. If so, - * loop back and wait again. Once we've gotten the LWLock, - * re-increment the sema by the number of additional signals received, - * so that the lock manager or signal manager will see the received - * signal when it next waits. - */ - LOG_LWDEBUG("LWLockAcquire", lockid, "waiting"); + SpinLockRelease(&lock->flex.mutex); + + /* Wait until awakened. */ + extraWaits += FlexLockWait(lockid, mode); #ifdef LWLOCK_STATS block_counts[lockid]++; #endif - TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode); - - for (;;) - { - /* "false" means cannot accept cancel/die interrupt here. */ - PGSemaphoreLock(&proc->sem, false); - if (!proc->lwWaiting) - break; - extraWaits++; - } - - TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode); - - LOG_LWDEBUG("LWLockAcquire", lockid, "awakened"); - /* Now loop back and try to acquire lock again. */ retry = true; } /* We are done updating shared state of the lock itself. */ - SpinLockRelease(&lock->mutex); + SpinLockRelease(&lock->flex.mutex); - TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode); + TRACE_POSTGRESQL_FLEXLOCK_ACQUIRE(lockid, mode); /* Add lock to list of locks held by this backend */ - held_lwlocks[num_held_lwlocks++] = lockid; + FlexLockRemember(lockid); /* * Fix the process wait semaphore's count for any absorbed wakeups. @@ -501,17 +236,13 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode) * If successful, cancel/die interrupts are held off until lock release. */ bool -LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode) +LWLockConditionalAcquire(FlexLockId lockid, LWLockMode mode) { - volatile LWLock *lock = &(LWLockArray[lockid].lock); + volatile LWLock *lock = LWLockPointer(lockid); bool mustwait; PRINT_LWDEBUG("LWLockConditionalAcquire", lockid, lock); - /* Ensure we will have room to remember the lock */ - if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS) - elog(ERROR, "too many LWLocks taken"); - /* * Lock out cancel/die interrupts until we exit the code section protected * by the LWLock. This ensures that interrupts will not interfere with @@ -520,7 +251,7 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode) HOLD_INTERRUPTS(); /* Acquire mutex. Time spent holding mutex should be short! */ - SpinLockAcquire(&lock->mutex); + SpinLockAcquire(&lock->flex.mutex); /* If I can get the lock, do so quickly. */ if (mode == LW_EXCLUSIVE) @@ -545,20 +276,20 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode) } /* We are done updating shared state of the lock itself. */ - SpinLockRelease(&lock->mutex); + SpinLockRelease(&lock->flex.mutex); if (mustwait) { /* Failed to get lock, so release interrupt holdoff */ RESUME_INTERRUPTS(); - LOG_LWDEBUG("LWLockConditionalAcquire", lockid, "failed"); - TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(lockid, mode); + FlexLockDebug("LWLockConditionalAcquire", lockid, "failed"); + TRACE_POSTGRESQL_FLEXLOCK_CONDACQUIRE_FAIL(lockid, mode); } else { /* Add lock to list of locks held by this backend */ - held_lwlocks[num_held_lwlocks++] = lockid; - TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(lockid, mode); + FlexLockRemember(lockid); + TRACE_POSTGRESQL_FLEXLOCK_CONDACQUIRE(lockid, mode); } return !mustwait; @@ -568,32 +299,18 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode) * LWLockRelease - release a previously acquired lock */ void -LWLockRelease(LWLockId lockid) +LWLockRelease(FlexLockId lockid) { - volatile LWLock *lock = &(LWLockArray[lockid].lock); + volatile LWLock *lock = LWLockPointer(lockid); PGPROC *head; PGPROC *proc; - int i; PRINT_LWDEBUG("LWLockRelease", lockid, lock); - /* - * Remove lock from list of locks held. Usually, but not always, it will - * be the latest-acquired lock; so search array backwards. - */ - for (i = num_held_lwlocks; --i >= 0;) - { - if (lockid == held_lwlocks[i]) - break; - } - if (i < 0) - elog(ERROR, "lock %d is not held", (int) lockid); - num_held_lwlocks--; - for (; i < num_held_lwlocks; i++) - held_lwlocks[i] = held_lwlocks[i + 1]; + FlexLockForget(lockid); /* Acquire mutex. Time spent holding mutex should be short! */ - SpinLockAcquire(&lock->mutex); + SpinLockAcquire(&lock->flex.mutex); /* Release my hold on lock */ if (lock->exclusive > 0) @@ -610,10 +327,10 @@ LWLockRelease(LWLockId lockid) * if someone has already awakened waiters that haven't yet acquired the * lock. */ - head = lock->head; + head = lock->flex.head; if (head != NULL) { - if (lock->exclusive == 0 && lock->shared == 0 && lock->releaseOK) + if (lock->exclusive == 0 && lock->shared == 0 && lock->flex.releaseOK) { /* * Remove the to-be-awakened PGPROCs from the queue. If the front @@ -621,17 +338,17 @@ LWLockRelease(LWLockId lockid) * as many waiters as want shared access. */ proc = head; - if (!proc->lwExclusive) + if (proc->flWaitMode != LW_EXCLUSIVE) { - while (proc->lwWaitLink != NULL && - !proc->lwWaitLink->lwExclusive) - proc = proc->lwWaitLink; + while (proc->flWaitLink != NULL && + proc->flWaitLink->flWaitMode != LW_EXCLUSIVE) + proc = proc->flWaitLink; } /* proc is now the last PGPROC to be released */ - lock->head = proc->lwWaitLink; - proc->lwWaitLink = NULL; + lock->flex.head = proc->flWaitLink; + proc->flWaitLink = NULL; /* prevent additional wakeups until retryer gets to run */ - lock->releaseOK = false; + lock->flex.releaseOK = false; } else { @@ -641,20 +358,20 @@ LWLockRelease(LWLockId lockid) } /* We are done updating shared state of the lock itself. */ - SpinLockRelease(&lock->mutex); + SpinLockRelease(&lock->flex.mutex); - TRACE_POSTGRESQL_LWLOCK_RELEASE(lockid); + TRACE_POSTGRESQL_FLEXLOCK_RELEASE(lockid); /* * Awaken any waiters I removed from the queue. */ while (head != NULL) { - LOG_LWDEBUG("LWLockRelease", lockid, "release waiter"); + FlexLockDebug("LWLockRelease", lockid, "release waiter"); proc = head; - head = proc->lwWaitLink; - proc->lwWaitLink = NULL; - proc->lwWaiting = false; + head = proc->flWaitLink; + proc->flWaitLink = NULL; + proc->flWaitResult = 1; /* any non-zero value will do */ PGSemaphoreUnlock(&proc->sem); } @@ -664,43 +381,17 @@ LWLockRelease(LWLockId lockid) RESUME_INTERRUPTS(); } - -/* - * LWLockReleaseAll - release all currently-held locks - * - * Used to clean up after ereport(ERROR). An important difference between this - * function and retail LWLockRelease calls is that InterruptHoldoffCount is - * unchanged by this operation. This is necessary since InterruptHoldoffCount - * has been set to an appropriate level earlier in error recovery. We could - * decrement it below zero if we allow it to drop for each released lock! - */ -void -LWLockReleaseAll(void) -{ - while (num_held_lwlocks > 0) - { - HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */ - - LWLockRelease(held_lwlocks[num_held_lwlocks - 1]); - } -} - - /* * LWLockHeldByMe - test whether my process currently holds a lock * - * This is meant as debug support only. We do not distinguish whether the - * lock is held shared or exclusive. + * The following convenience routine might not be worthwhile but for the fact + * that we've had a function by this name since long before FlexLocks existed. + * Callers who want to check whether an arbitrary FlexLock (that may or may not + * be an LWLock) is held can use FlexLockHeldByMe directly. */ bool -LWLockHeldByMe(LWLockId lockid) +LWLockHeldByMe(FlexLockId lockid) { - int i; - - for (i = 0; i < num_held_lwlocks; i++) - { - if (held_lwlocks[i] == lockid) - return true; - } - return false; + AssertMacro(FlexLockArray[lockid].flex.locktype == FLEXLOCK_TYPE_LWLOCK); + return FlexLockHeldByMe(lockid); } diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index 345f6f5..15978a4 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -239,7 +239,7 @@ #define PredicateLockHashPartition(hashcode) \ ((hashcode) % NUM_PREDICATELOCK_PARTITIONS) #define PredicateLockHashPartitionLock(hashcode) \ - ((LWLockId) (FirstPredicateLockMgrLock + PredicateLockHashPartition(hashcode))) + ((FlexLockId) (FirstPredicateLockMgrLock + PredicateLockHashPartition(hashcode))) #define NPREDICATELOCKTARGETENTS() \ mul_size(max_predicate_locks_per_xact, add_size(MaxBackends, max_prepared_xacts)) @@ -1840,7 +1840,7 @@ PageIsPredicateLocked(Relation relation, BlockNumber blkno) { PREDICATELOCKTARGETTAG targettag; uint32 targettaghash; - LWLockId partitionLock; + FlexLockId partitionLock; PREDICATELOCKTARGET *target; SET_PREDICATELOCKTARGETTAG_PAGE(targettag, @@ -2073,7 +2073,7 @@ DeleteChildTargetLocks(const PREDICATELOCKTARGETTAG *newtargettag) if (TargetTagIsCoveredBy(oldtargettag, *newtargettag)) { uint32 oldtargettaghash; - LWLockId partitionLock; + FlexLockId partitionLock; PREDICATELOCK *rmpredlock; oldtargettaghash = PredicateLockTargetTagHashCode(&oldtargettag); @@ -2285,7 +2285,7 @@ CreatePredicateLock(const PREDICATELOCKTARGETTAG *targettag, PREDICATELOCKTARGET *target; PREDICATELOCKTAG locktag; PREDICATELOCK *lock; - LWLockId partitionLock; + FlexLockId partitionLock; bool found; partitionLock = PredicateLockHashPartitionLock(targettaghash); @@ -2586,10 +2586,10 @@ TransferPredicateLocksToNewTarget(PREDICATELOCKTARGETTAG oldtargettag, bool removeOld) { uint32 oldtargettaghash; - LWLockId oldpartitionLock; + FlexLockId oldpartitionLock; PREDICATELOCKTARGET *oldtarget; uint32 newtargettaghash; - LWLockId newpartitionLock; + FlexLockId newpartitionLock; bool found; bool outOfShmem = false; @@ -3578,7 +3578,7 @@ ClearOldPredicateLocks(void) PREDICATELOCKTARGET *target; PREDICATELOCKTARGETTAG targettag; uint32 targettaghash; - LWLockId partitionLock; + FlexLockId partitionLock; tag = predlock->tag; target = tag.myTarget; @@ -3656,7 +3656,7 @@ ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial, PREDICATELOCKTARGET *target; PREDICATELOCKTARGETTAG targettag; uint32 targettaghash; - LWLockId partitionLock; + FlexLockId partitionLock; nextpredlock = (PREDICATELOCK *) SHMQueueNext(&(sxact->predicateLocks), @@ -4034,7 +4034,7 @@ static void CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag) { uint32 targettaghash; - LWLockId partitionLock; + FlexLockId partitionLock; PREDICATELOCKTARGET *target; PREDICATELOCK *predlock; PREDICATELOCK *mypredlock = NULL; diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index eda3a98..edb225a 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -36,6 +36,7 @@ #include #include "access/transam.h" +#include "access/twophase.h" #include "access/xact.h" #include "miscadmin.h" #include "postmaster/autovacuum.h" @@ -45,6 +46,7 @@ #include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "storage/procarraylock.h" #include "storage/procsignal.h" #include "storage/spin.h" #include "utils/timestamp.h" @@ -57,6 +59,7 @@ bool log_lock_waits = false; /* Pointer to this process's PGPROC struct, if any */ PGPROC *MyProc = NULL; +PGPROC_MINIMAL *MyProcMinimal = NULL; /* * This spinlock protects the freelist of recycled PGPROC structures. @@ -70,6 +73,7 @@ NON_EXEC_STATIC slock_t *ProcStructLock = NULL; /* Pointers to shared-memory structures */ PROC_HDR *ProcGlobal = NULL; NON_EXEC_STATIC PGPROC *AuxiliaryProcs = NULL; +PGPROC *PreparedXactProcs = NULL; /* If we are waiting for a lock, this points to the associated LOCALLOCK */ static LOCALLOCK *lockAwaited = NULL; @@ -106,13 +110,19 @@ ProcGlobalShmemSize(void) /* ProcGlobal */ size = add_size(size, sizeof(PROC_HDR)); - /* AuxiliaryProcs */ - size = add_size(size, mul_size(NUM_AUXILIARY_PROCS, sizeof(PGPROC))); /* MyProcs, including autovacuum workers and launcher */ size = add_size(size, mul_size(MaxBackends, sizeof(PGPROC))); + /* AuxiliaryProcs */ + size = add_size(size, mul_size(NUM_AUXILIARY_PROCS, sizeof(PGPROC))); + /* Prepared xacts */ + size = add_size(size, mul_size(max_prepared_xacts, sizeof(PGPROC))); /* ProcStructLock */ size = add_size(size, sizeof(slock_t)); + size = add_size(size, mul_size(MaxBackends, sizeof(PGPROC_MINIMAL))); + size = add_size(size, mul_size(NUM_AUXILIARY_PROCS, sizeof(PGPROC_MINIMAL))); + size = add_size(size, mul_size(max_prepared_xacts, sizeof(PGPROC_MINIMAL))); + return size; } @@ -157,10 +167,11 @@ void InitProcGlobal(void) { PGPROC *procs; + PGPROC_MINIMAL *procs_minimal; int i, j; bool found; - uint32 TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS; + uint32 TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS + max_prepared_xacts; /* Create the ProcGlobal shared structure */ ProcGlobal = (PROC_HDR *) @@ -195,14 +206,38 @@ InitProcGlobal(void) (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"))); MemSet(procs, 0, TotalProcs * sizeof(PGPROC)); + + /* + * Also allocate a separate array of PROC_MINIMAL structures. We keep this + * out of band of the main PGPROC array to ensure the very heavily accessed + * members of the PGPROC structure are stored contiguously in the memory. + * This provides significant performance benefits, especially on a + * multiprocessor system by improving cache hit ratio. + * + * Note: We separate the members needed by GetSnapshotData since that's the + * most frequently accessed code path. There is one PROC_MINIMAL structure + * for every PGPROC structure. + */ + procs_minimal = (PGPROC_MINIMAL *) ShmemAlloc(TotalProcs * sizeof(PGPROC_MINIMAL)); + MemSet(procs_minimal, 0, TotalProcs * sizeof(PGPROC_MINIMAL)); + ProcGlobal->allProcs_Minimal = procs_minimal; + for (i = 0; i < TotalProcs; i++) { /* Common initialization for all PGPROCs, regardless of type. */ - /* Set up per-PGPROC semaphore, latch, and backendLock */ - PGSemaphoreCreate(&(procs[i].sem)); - InitSharedLatch(&(procs[i].procLatch)); - procs[i].backendLock = LWLockAssign(); + /* + * Set up per-PGPROC semaphore, latch, and backendLock. Prepared + * xact dummy PGPROCs don't need these though - they're never + * associated with a real process + */ + if (i < MaxBackends + NUM_AUXILIARY_PROCS) + { + PGSemaphoreCreate(&(procs[i].sem)); + InitSharedLatch(&(procs[i].procLatch)); + procs[i].backendLock = LWLockAssign(); + } + procs[i].pgprocno = i; /* * Newly created PGPROCs for normal backends or for autovacuum must @@ -234,6 +269,7 @@ InitProcGlobal(void) * auxiliary proceses. */ AuxiliaryProcs = &procs[MaxBackends]; + PreparedXactProcs = &procs[MaxBackends + NUM_AUXILIARY_PROCS]; /* Create ProcStructLock spinlock, too */ ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t)); @@ -296,6 +332,7 @@ InitProcess(void) (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("sorry, too many clients already"))); } + MyProcMinimal = &ProcGlobal->allProcs_Minimal[MyProc->pgprocno]; /* * Now that we have a PGPROC, mark ourselves as an active postmaster @@ -313,21 +350,21 @@ InitProcess(void) SHMQueueElemInit(&(MyProc->links)); MyProc->waitStatus = STATUS_OK; MyProc->lxid = InvalidLocalTransactionId; - MyProc->xid = InvalidTransactionId; - MyProc->xmin = InvalidTransactionId; + MyProcMinimal->xid = InvalidTransactionId; + MyProcMinimal->xmin = InvalidTransactionId; MyProc->pid = MyProcPid; /* backendId, databaseId and roleId will be filled in later */ MyProc->backendId = InvalidBackendId; MyProc->databaseId = InvalidOid; MyProc->roleId = InvalidOid; - MyProc->inCommit = false; - MyProc->vacuumFlags = 0; + MyProcMinimal->inCommit = false; + MyProcMinimal->vacuumFlags = 0; /* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */ if (IsAutoVacuumWorkerProcess()) - MyProc->vacuumFlags |= PROC_IS_AUTOVACUUM; - MyProc->lwWaiting = false; - MyProc->lwExclusive = false; - MyProc->lwWaitLink = NULL; + MyProcMinimal->vacuumFlags |= PROC_IS_AUTOVACUUM; + MyProc->flWaitResult = 0; + MyProc->flWaitMode = 0; + MyProc->flWaitLink = NULL; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; #ifdef USE_ASSERT_CHECKING @@ -462,6 +499,7 @@ InitAuxiliaryProcess(void) ((volatile PGPROC *) auxproc)->pid = MyProcPid; MyProc = auxproc; + MyProcMinimal = &ProcGlobal->allProcs_Minimal[auxproc->pgprocno]; SpinLockRelease(ProcStructLock); @@ -472,16 +510,16 @@ InitAuxiliaryProcess(void) SHMQueueElemInit(&(MyProc->links)); MyProc->waitStatus = STATUS_OK; MyProc->lxid = InvalidLocalTransactionId; - MyProc->xid = InvalidTransactionId; - MyProc->xmin = InvalidTransactionId; + MyProcMinimal->xid = InvalidTransactionId; + MyProcMinimal->xmin = InvalidTransactionId; MyProc->backendId = InvalidBackendId; MyProc->databaseId = InvalidOid; MyProc->roleId = InvalidOid; - MyProc->inCommit = false; - MyProc->vacuumFlags = 0; - MyProc->lwWaiting = false; - MyProc->lwExclusive = false; - MyProc->lwWaitLink = NULL; + MyProcMinimal->inCommit = false; + MyProcMinimal->vacuumFlags = 0; + MyProc->flWaitMode = 0; + MyProc->flWaitResult = 0; + MyProc->flWaitLink = NULL; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; #ifdef USE_ASSERT_CHECKING @@ -607,7 +645,7 @@ IsWaitingForLock(void) void LockWaitCancel(void) { - LWLockId partitionLock; + FlexLockId partitionLock; /* Nothing to do if we weren't waiting for a lock */ if (lockAwaited == NULL) @@ -718,11 +756,11 @@ ProcKill(int code, Datum arg) #endif /* - * Release any LW locks I am holding. There really shouldn't be any, but - * it's cheap to check again before we cut the knees off the LWLock + * Release any felx locks I am holding. There really shouldn't be any, but + * it's cheap to check again before we cut the knees off the flex lock * facility by releasing our PGPROC ... */ - LWLockReleaseAll(); + FlexLockReleaseAll(); /* Release ownership of the process's latch, too */ DisownLatch(&MyProc->procLatch); @@ -779,8 +817,8 @@ AuxiliaryProcKill(int code, Datum arg) Assert(MyProc == auxproc); - /* Release any LW locks I am holding (see notes above) */ - LWLockReleaseAll(); + /* Release any flex locks I am holding (see notes above) */ + FlexLockReleaseAll(); /* Release ownership of the process's latch, too */ DisownLatch(&MyProc->procLatch); @@ -865,7 +903,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) LOCK *lock = locallock->lock; PROCLOCK *proclock = locallock->proclock; uint32 hashcode = locallock->hashcode; - LWLockId partitionLock = LockHashPartitionLock(hashcode); + FlexLockId partitionLock = LockHashPartitionLock(hashcode); PROC_QUEUE *waitQueue = &(lock->waitProcs); LOCKMASK myHeldLocks = MyProc->heldLocks; bool early_deadlock = false; @@ -1045,16 +1083,17 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) if (deadlock_state == DS_BLOCKED_BY_AUTOVACUUM && allow_autovacuum_cancel) { PGPROC *autovac = GetBlockingAutoVacuumPgproc(); + PGPROC_MINIMAL *autovac_minimal = &ProcGlobal->allProcs_Minimal[autovac->pgprocno]; - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + ProcArrayLockAcquire(PAL_EXCLUSIVE); /* * Only do it if the worker is not working to protect against Xid * wraparound. */ if ((autovac != NULL) && - (autovac->vacuumFlags & PROC_IS_AUTOVACUUM) && - !(autovac->vacuumFlags & PROC_VACUUM_FOR_WRAPAROUND)) + (autovac_minimal->vacuumFlags & PROC_IS_AUTOVACUUM) && + !(autovac_minimal->vacuumFlags & PROC_VACUUM_FOR_WRAPAROUND)) { int pid = autovac->pid; @@ -1062,7 +1101,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) pid); /* don't hold the lock across the kill() syscall */ - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); /* send the autovacuum worker Back to Old Kent Road */ if (kill(pid, SIGINT) < 0) @@ -1074,7 +1113,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) } } else - LWLockRelease(ProcArrayLock); + ProcArrayLockRelease(); /* prevent signal from being resent more than once */ allow_autovacuum_cancel = false; diff --git a/src/backend/storage/lmgr/procarraylock.c b/src/backend/storage/lmgr/procarraylock.c new file mode 100644 index 0000000..6aa51f2 --- /dev/null +++ b/src/backend/storage/lmgr/procarraylock.c @@ -0,0 +1,343 @@ +/*------------------------------------------------------------------------- + * + * procarraylock.c + * Lock management for the ProcArray + * + * Because the ProcArray data structure is highly trafficked, it is + * critical that mutual exclusion for ProcArray options be as efficient + * as possible. A particular problem is transaction end (commit or abort) + * which cannot be done in parallel with snapshot acquisition. We + * therefore include some special hacks to deal with this case efficiently. + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/storage/lmgr/procarraylock.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "miscadmin.h" +#include "pg_trace.h" +#include "access/transam.h" +#include "storage/flexlock_internals.h" +#include "storage/ipc.h" +#include "storage/procarraylock.h" +#include "storage/proc.h" +#include "storage/spin.h" + +typedef struct ProcArrayLockStruct +{ + FlexLock flex; /* common FlexLock infrastructure */ + char exclusive; /* # of exclusive holders (0 or 1) */ + int shared; /* # of shared holders (0..MaxBackends) */ + PGPROC *ending; /* transactions wishing to clear state */ + TransactionId latest_ending_xid; /* latest ending XID */ +} ProcArrayLockStruct; + +/* There is only one ProcArrayLock. */ +#define ProcArrayLockPointer() \ + (AssertMacro(FlexLockArray[ProcArrayLock].flex.locktype == \ + FLEXLOCK_TYPE_PROCARRAYLOCK), \ + (volatile ProcArrayLockStruct *) &FlexLockArray[ProcArrayLock]) + +/* + * ProcArrayLockAcquire - acquire a lightweight lock in the specified mode + * + * If the lock is not available, sleep until it is. + * + * Side effect: cancel/die interrupts are held off until lock release. + */ +void +ProcArrayLockAcquire(ProcArrayLockMode mode) +{ + volatile ProcArrayLockStruct *lock = ProcArrayLockPointer(); + PGPROC *proc = MyProc; + bool retry = false; + int extraWaits = 0; + + /* + * We can't wait if we haven't got a PGPROC. This should only occur + * during bootstrap or shared memory initialization. Put an Assert here + * to catch unsafe coding practices. + */ + Assert(!(proc == NULL && IsUnderPostmaster)); + + /* + * Lock out cancel/die interrupts until we exit the code section protected + * by the ProcArrayLock. This ensures that interrupts will not interfere + * with manipulations of data structures in shared memory. + */ + HOLD_INTERRUPTS(); + + /* + * Loop here to try to acquire lock after each time we are signaled by + * ProcArrayLockRelease. See comments in LWLockAcquire for an explanation + * of why do we not attempt to hand off the lock directly. + */ + for (;;) + { + bool mustwait; + + /* Acquire mutex. Time spent holding mutex should be short! */ + SpinLockAcquire(&lock->flex.mutex); + + /* If retrying, allow LWLockRelease to release waiters again */ + if (retry) + lock->flex.releaseOK = true; + + /* If I can get the lock, do so quickly. */ + if (mode == PAL_EXCLUSIVE) + { + if (lock->exclusive == 0 && lock->shared == 0) + { + lock->exclusive++; + mustwait = false; + } + else + mustwait = true; + } + else + { + if (lock->exclusive == 0) + { + lock->shared++; + mustwait = false; + } + else + mustwait = true; + } + + if (!mustwait) + break; /* got the lock */ + + /* Add myself to wait queue. */ + FlexLockJoinWaitQueue(lock, (int) mode); + + /* Can release the mutex now */ + SpinLockRelease(&lock->flex.mutex); + + /* Wait until awakened. */ + extraWaits += FlexLockWait(ProcArrayLock, mode); + + /* Now loop back and try to acquire lock again. */ + retry = true; + } + + /* We are done updating shared state of the lock itself. */ + SpinLockRelease(&lock->flex.mutex); + + TRACE_POSTGRESQL_FLEXLOCK_ACQUIRE(lockid, mode); + + /* Add lock to list of locks held by this backend */ + FlexLockRemember(ProcArrayLock); + + /* + * Fix the process wait semaphore's count for any absorbed wakeups. + */ + while (extraWaits-- > 0) + PGSemaphoreUnlock(&proc->sem); +} + +/* + * ProcArrayLockClearTransaction - safely clear transaction details + * + * This can't be done while ProcArrayLock is held, but it's so fast that + * we can afford to do it while holding the spinlock, rather than acquiring + * and releasing the lock. + */ +void +ProcArrayLockClearTransaction(TransactionId latestXid) +{ + volatile ProcArrayLockStruct *lock = ProcArrayLockPointer(); + PGPROC *proc = MyProc; + int extraWaits = 0; + bool mustwait; + + HOLD_INTERRUPTS(); + + /* Acquire mutex. Time spent holding mutex should be short! */ + SpinLockAcquire(&lock->flex.mutex); + + if (lock->exclusive == 0 && lock->shared == 0) + { + { + volatile PGPROC_MINIMAL *vproc_minimal = &ProcGlobal->allProcs_Minimal[proc->pgprocno]; + /* If there are no lockers, clar the critical PGPROC fields. */ + vproc_minimal->xid = InvalidTransactionId; + vproc_minimal->xmin = InvalidTransactionId; + /* must be cleared with xid/xmin: */ + vproc_minimal->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; + vproc_minimal->nxids = 0; + vproc_minimal->overflowed = false; + } + mustwait = false; + + /* Also advance global latestCompletedXid while holding the lock */ + if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, + latestXid)) + ShmemVariableCache->latestCompletedXid = latestXid; + } + else + { + /* Rats, must wait. */ + proc->flWaitLink = lock->ending; + lock->ending = proc; + if (!TransactionIdIsValid(lock->latest_ending_xid) || + TransactionIdPrecedes(lock->latest_ending_xid, latestXid)) + lock->latest_ending_xid = latestXid; + mustwait = true; + } + + /* Can release the mutex now */ + SpinLockRelease(&lock->flex.mutex); + + /* + * If we were not able to perfom the operation immediately, we must wait. + * But we need not retry after being awoken, because the last lock holder + * to release the lock will do the work first, on our behalf. + */ + if (mustwait) + { + extraWaits += FlexLockWait(ProcArrayLock, 2); + while (extraWaits-- > 0) + PGSemaphoreUnlock(&proc->sem); + } + + RESUME_INTERRUPTS(); +} + +/* + * ProcArrayLockRelease - release a previously acquired lock + */ +void +ProcArrayLockRelease(void) +{ + volatile ProcArrayLockStruct *lock = ProcArrayLockPointer(); + PGPROC *head; + PGPROC *ending = NULL; + PGPROC *proc; + + FlexLockForget(ProcArrayLock); + + /* Acquire mutex. Time spent holding mutex should be short! */ + SpinLockAcquire(&lock->flex.mutex); + + /* Release my hold on lock */ + if (lock->exclusive > 0) + lock->exclusive--; + else + { + Assert(lock->shared > 0); + lock->shared--; + } + + /* + * If the lock is now free, but there are some transactions trying to + * end, we must clear the critical PGPROC fields for them, and save a + * list of them so we can wake them up. + */ + if (lock->exclusive == 0 && lock->shared == 0 && lock->ending != NULL) + { + volatile PGPROC *vproc; + + ending = lock->ending; + vproc = ending; + + while (vproc != NULL) + { + volatile PGPROC_MINIMAL *vproc_minimal = &ProcGlobal->allProcs_Minimal[vproc->pgprocno]; + /* If there are no lockers, clar the critical PGPROC fields. */ + vproc_minimal->xid = InvalidTransactionId; + vproc_minimal->xmin = InvalidTransactionId; + /* must be cleared with xid/xmin: */ + vproc_minimal->vacuumFlags &= ~PROC_VACUUM_STATE_MASK; + vproc_minimal->nxids = 0; + vproc_minimal->overflowed = false; + vproc = vproc->flWaitLink; + } + + /* Also advance global latestCompletedXid */ + if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, + lock->latest_ending_xid)) + ShmemVariableCache->latestCompletedXid = lock->latest_ending_xid; + + /* Reset lock state. */ + lock->ending = NULL; + lock->latest_ending_xid = InvalidTransactionId; + } + + /* + * See if I need to awaken any waiters. If I released a non-last shared + * hold, there cannot be anything to do. Also, do not awaken any waiters + * if someone has already awakened waiters that haven't yet acquired the + * lock. + */ + head = lock->flex.head; + if (head != NULL) + { + if (lock->exclusive == 0 && lock->shared == 0 && lock->flex.releaseOK) + { + /* + * Remove the to-be-awakened PGPROCs from the queue. If the front + * waiter wants exclusive lock, awaken him only. Otherwise awaken + * as many waiters as want shared access. + */ + proc = head; + if (proc->flWaitMode != LW_EXCLUSIVE) + { + while (proc->flWaitLink != NULL && + proc->flWaitLink->flWaitMode != LW_EXCLUSIVE) + proc = proc->flWaitLink; + } + /* proc is now the last PGPROC to be released */ + lock->flex.head = proc->flWaitLink; + proc->flWaitLink = NULL; + /* prevent additional wakeups until retryer gets to run */ + lock->flex.releaseOK = false; + } + else + { + /* lock is still held, can't awaken anything */ + head = NULL; + } + } + + /* We are done updating shared state of the lock itself. */ + SpinLockRelease(&lock->flex.mutex); + + TRACE_POSTGRESQL_FLEXLOCK_RELEASE(lockid); + + /* + * Awaken any waiters I removed from the queue. + */ + while (head != NULL) + { + FlexLockDebug("LWLockRelease", lockid, "release waiter"); + proc = head; + head = proc->flWaitLink; + proc->flWaitLink = NULL; + proc->flWaitResult = 1; /* any non-zero value will do */ + PGSemaphoreUnlock(&proc->sem); + } + + /* + * Also awaken any processes whose critical PGPROC fields I cleared + */ + while (ending != NULL) + { + FlexLockDebug("LWLockRelease", lockid, "release ending"); + proc = ending; + ending = proc->flWaitLink; + proc->flWaitLink = NULL; + proc->flWaitResult = 1; /* any non-zero value will do */ + PGSemaphoreUnlock(&proc->sem); + } + + /* + * Now okay to allow cancel/die interrupts. + */ + RESUME_INTERRUPTS(); +} diff --git a/src/backend/utils/misc/check_guc b/src/backend/utils/misc/check_guc index 293fb03..1a19e36 100755 --- a/src/backend/utils/misc/check_guc +++ b/src/backend/utils/misc/check_guc @@ -19,7 +19,7 @@ INTENTIONALLY_NOT_INCLUDED="autocommit debug_deadlocks \ is_superuser lc_collate lc_ctype lc_messages lc_monetary lc_numeric lc_time \ pre_auth_delay role seed server_encoding server_version server_version_int \ -session_authorization trace_lock_oidmin trace_lock_table trace_locks trace_lwlocks \ +session_authorization trace_lock_oidmin trace_lock_table trace_locks trace_flexlocks \ trace_notify trace_userlocks transaction_isolation transaction_read_only \ zero_damaged_pages" diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index da7b6d4..52de233 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -59,6 +59,7 @@ #include "replication/walreceiver.h" #include "replication/walsender.h" #include "storage/bufmgr.h" +#include "storage/flexlock_internals.h" #include "storage/standby.h" #include "storage/fd.h" #include "storage/predicate.h" @@ -1071,12 +1072,12 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, { - {"trace_lwlocks", PGC_SUSET, DEVELOPER_OPTIONS, + {"trace_flexlocks", PGC_SUSET, DEVELOPER_OPTIONS, gettext_noop("No description available."), NULL, GUC_NOT_IN_SAMPLE }, - &Trace_lwlocks, + &Trace_flexlocks, false, NULL, NULL, NULL }, diff --git a/src/backend/utils/probes.d b/src/backend/utils/probes.d index 71c5ab0..5b9cfe6 100644 --- a/src/backend/utils/probes.d +++ b/src/backend/utils/probes.d @@ -15,8 +15,8 @@ * in probe definitions, as they cause compilation errors on Mac OS X 10.5. */ #define LocalTransactionId unsigned int -#define LWLockId int -#define LWLockMode int +#define FlexLockId int +#define FlexLockMode int #define LOCKMODE int #define BlockNumber unsigned int #define Oid unsigned int @@ -29,12 +29,12 @@ provider postgresql { probe transaction__commit(LocalTransactionId); probe transaction__abort(LocalTransactionId); - probe lwlock__acquire(LWLockId, LWLockMode); - probe lwlock__release(LWLockId); - probe lwlock__wait__start(LWLockId, LWLockMode); - probe lwlock__wait__done(LWLockId, LWLockMode); - probe lwlock__condacquire(LWLockId, LWLockMode); - probe lwlock__condacquire__fail(LWLockId, LWLockMode); + probe flexlock__acquire(FlexLockId, FlexLockMode); + probe flexlock__release(FlexLockId); + probe flexlock__wait__start(FlexLockId, FlexLockMode); + probe flexlock__wait__done(FlexLockId, FlexLockMode); + probe flexlock__condacquire(FlexLockId, FlexLockMode); + probe flexlock__condacquire__fail(FlexLockId, FlexLockMode); probe lock__wait__start(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, LOCKMODE); probe lock__wait__done(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, LOCKMODE); diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 50fb780..1f4f5b4 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -577,7 +577,7 @@ static void SnapshotResetXmin(void) { if (RegisteredSnapshots == 0 && ActiveSnapshot == NULL) - MyProc->xmin = InvalidTransactionId; + MyProcMinimal->xmin = InvalidTransactionId; } /* diff --git a/src/include/access/slru.h b/src/include/access/slru.h index e48743f..680a87f 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -55,7 +55,7 @@ typedef enum */ typedef struct SlruSharedData { - LWLockId ControlLock; + FlexLockId ControlLock; /* Number of buffers managed by this SLRU structure */ int num_slots; @@ -69,7 +69,7 @@ typedef struct SlruSharedData bool *page_dirty; int *page_number; int *page_lru_count; - LWLockId *buffer_locks; + FlexLockId *buffer_locks; /* * Optional array of WAL flush LSNs associated with entries in the SLRU @@ -136,7 +136,7 @@ typedef SlruCtlData *SlruCtl; extern Size SimpleLruShmemSize(int nslots, int nlsns); extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, - LWLockId ctllock, const char *subdir); + FlexLockId ctllock, const char *subdir); extern int SimpleLruZeroPage(SlruCtl ctl, int pageno); extern int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, TransactionId xid); diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h index 6c8e312..d3b74db 100644 --- a/src/include/pg_config_manual.h +++ b/src/include/pg_config_manual.h @@ -49,9 +49,9 @@ #define SEQ_MINVALUE (-SEQ_MAXVALUE) /* - * Number of spare LWLocks to allocate for user-defined add-on code. + * Number of spare FlexLocks to allocate for user-defined add-on code. */ -#define NUM_USER_DEFINED_LWLOCKS 4 +#define NUM_USER_DEFINED_FLEXLOCKS 4 /* * Define this if you want to allow the lo_import and lo_export SQL diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index b7d4ea5..ac7f665 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -103,7 +103,7 @@ typedef struct buftag #define BufTableHashPartition(hashcode) \ ((hashcode) % NUM_BUFFER_PARTITIONS) #define BufMappingPartitionLock(hashcode) \ - ((LWLockId) (FirstBufMappingLock + BufTableHashPartition(hashcode))) + ((FlexLockId) (FirstBufMappingLock + BufTableHashPartition(hashcode))) /* * BufferDesc -- shared descriptor/state data for a single shared buffer. @@ -143,8 +143,8 @@ typedef struct sbufdesc int buf_id; /* buffer's index number (from 0) */ int freeNext; /* link in freelist chain */ - LWLockId io_in_progress_lock; /* to wait for I/O to complete */ - LWLockId content_lock; /* to lock access to buffer contents */ + FlexLockId io_in_progress_lock; /* to wait for I/O to complete */ + FlexLockId content_lock; /* to lock access to buffer contents */ } BufferDesc; #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1) diff --git a/src/include/storage/flexlock.h b/src/include/storage/flexlock.h new file mode 100644 index 0000000..612c21a --- /dev/null +++ b/src/include/storage/flexlock.h @@ -0,0 +1,102 @@ +/*------------------------------------------------------------------------- + * + * flexlock.h + * Flex lock manager + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/storage/flexlock.h + * + *------------------------------------------------------------------------- + */ +#ifndef FLEXLOCK_H +#define FLEXLOCK_H + +/* + * It's a bit odd to declare NUM_BUFFER_PARTITIONS and NUM_LOCK_PARTITIONS + * here, but we need them to set up enum FlexLockId correctly, and having + * this file include lock.h or bufmgr.h would be backwards. + */ + +/* Number of partitions of the shared buffer mapping hashtable */ +#define NUM_BUFFER_PARTITIONS 16 + +/* Number of partitions the shared lock tables are divided into */ +#define LOG2_NUM_LOCK_PARTITIONS 4 +#define NUM_LOCK_PARTITIONS (1 << LOG2_NUM_LOCK_PARTITIONS) + +/* Number of partitions the shared predicate lock tables are divided into */ +#define LOG2_NUM_PREDICATELOCK_PARTITIONS 4 +#define NUM_PREDICATELOCK_PARTITIONS (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS) + +/* + * We have a number of predefined FlexLocks, plus a bunch of locks that are + * dynamically assigned (e.g., for shared buffers). The FlexLock structures + * live in shared memory (since they contain shared data) and are identified + * by values of this enumerated type. We abuse the notion of an enum somewhat + * by allowing values not listed in the enum declaration to be assigned. + * The extra value MaxDynamicFlexLock is there to keep the compiler from + * deciding that the enum can be represented as char or short ... + * + * If you remove a lock, please replace it with a placeholder. This retains + * the lock numbering, which is helpful for DTrace and other external + * debugging scripts. + */ +typedef enum FlexLockId +{ + BufFreelistLock, + ShmemIndexLock, + OidGenLock, + XidGenLock, + ProcArrayLock, + SInvalReadLock, + SInvalWriteLock, + WALInsertLock, + WALWriteLock, + ControlFileLock, + CheckpointLock, + CLogControlLock, + SubtransControlLock, + MultiXactGenLock, + MultiXactOffsetControlLock, + MultiXactMemberControlLock, + RelCacheInitLock, + BgWriterCommLock, + TwoPhaseStateLock, + TablespaceCreateLock, + BtreeVacuumLock, + AddinShmemInitLock, + AutovacuumLock, + AutovacuumScheduleLock, + SyncScanLock, + RelationMappingLock, + AsyncCtlLock, + AsyncQueueLock, + SerializableXactHashLock, + SerializableFinishedListLock, + SerializablePredicateLockListLock, + OldSerXidLock, + SyncRepLock, + /* Individual lock IDs end here */ + FirstBufMappingLock, + FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS, + FirstPredicateLockMgrLock = FirstLockMgrLock + NUM_LOCK_PARTITIONS, + + /* must be last except for MaxDynamicFlexLock: */ + NumFixedFlexLocks = FirstPredicateLockMgrLock + NUM_PREDICATELOCK_PARTITIONS, + + MaxDynamicFlexLock = 1000000000 +} FlexLockId; + +/* Shared memory setup. */ +extern int NumFlexLocks(void); +extern Size FlexLockShmemSize(void); +extern void RequestAddinFlexLocks(int n); +extern void CreateFlexLocks(void); + +/* Error recovery and debugging support functions. */ +extern void FlexLockReleaseAll(void); +extern bool FlexLockHeldByMe(FlexLockId id); + +#endif /* FLEXLOCK_H */ diff --git a/src/include/storage/flexlock_internals.h b/src/include/storage/flexlock_internals.h new file mode 100644 index 0000000..d1bca45 --- /dev/null +++ b/src/include/storage/flexlock_internals.h @@ -0,0 +1,89 @@ +/*------------------------------------------------------------------------- + * + * flexlock_internals.h + * Flex lock internals. Only files which implement a FlexLock + * type should need to include this. Merging this with flexlock.h + * creates a circular header dependency, but even if it didn't, this + * is cleaner. + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/storage/flexlock_internals.h + * + *------------------------------------------------------------------------- + */ +#ifndef FLEXLOCK_INTERNALS_H +#define FLEXLOCK_INTERNALS_H + +#include "pg_trace.h" +#include "storage/flexlock.h" +#include "storage/proc.h" +#include "storage/s_lock.h" + +/* + * Individual FlexLock implementations each get this many bytes to store + * its state; of course, a given implementation could also allocate additional + * shmem elsewhere, but we provide this many bytes within the array. The + * header fields common to all FlexLock types are included in this number. + * A power of two should probably be chosen, to avoid alignment issues and + * cache line splitting. It might be useful to increase this on systems where + * a cache line is more than 64 bytes in size. + */ +#define FLEX_LOCK_BYTES 64 + +typedef struct FlexLock +{ + char locktype; /* see FLEXLOCK_TYPE_* constants */ + slock_t mutex; /* Protects FlexLock state and wait queues */ + bool releaseOK; /* T if ok to release waiters */ + PGPROC *head; /* head of list of waiting PGPROCs */ + PGPROC *tail; /* tail of list of waiting PGPROCs */ + /* tail is undefined when head is NULL */ +} FlexLock; + +#define FLEXLOCK_TYPE_LWLOCK 'l' +#define FLEXLOCK_TYPE_PROCARRAYLOCK 'p' + +typedef union FlexLockPadded +{ + FlexLock flex; + char pad[FLEX_LOCK_BYTES]; +} FlexLockPadded; + +extern FlexLockPadded *FlexLockArray; + +extern FlexLockId FlexLockAssign(char locktype); +extern void FlexLockRemember(FlexLockId id); +extern void FlexLockForget(FlexLockId id); +extern int FlexLockWait(FlexLockId id, int mode); + +/* + * We must join the wait queue while holding the spinlock, so we define this + * as a macro, for speed. + */ +#define FlexLockJoinWaitQueue(lock, mode) \ + do { \ + Assert(MyProc != NULL); \ + MyProc->flWaitResult = 0; \ + MyProc->flWaitMode = mode; \ + MyProc->flWaitLink = NULL; \ + if (lock->flex.head == NULL) \ + lock->flex.head = MyProc; \ + else \ + lock->flex.tail->flWaitLink = MyProc; \ + lock->flex.tail = MyProc; \ + } while (0) + +#ifdef LOCK_DEBUG +extern bool Trace_flexlocks; +#define FlexLockDebug(where, id, msg) \ + do { \ + if (Trace_flexlocks) \ + elog(LOG, "%s(%d): %s", where, (int) id, msg); \ + } while (0) +#else +#define FlexLockDebug(where, id, msg) +#endif + +#endif /* FLEXLOCK_H */ diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index e106ad5..ba87db2 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -471,7 +471,7 @@ typedef enum #define LockHashPartition(hashcode) \ ((hashcode) % NUM_LOCK_PARTITIONS) #define LockHashPartitionLock(hashcode) \ - ((LWLockId) (FirstLockMgrLock + LockHashPartition(hashcode))) + ((FlexLockId) (FirstLockMgrLock + LockHashPartition(hashcode))) /* diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 438a48d..f68cddc 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -14,82 +14,7 @@ #ifndef LWLOCK_H #define LWLOCK_H -/* - * It's a bit odd to declare NUM_BUFFER_PARTITIONS and NUM_LOCK_PARTITIONS - * here, but we need them to set up enum LWLockId correctly, and having - * this file include lock.h or bufmgr.h would be backwards. - */ - -/* Number of partitions of the shared buffer mapping hashtable */ -#define NUM_BUFFER_PARTITIONS 16 - -/* Number of partitions the shared lock tables are divided into */ -#define LOG2_NUM_LOCK_PARTITIONS 4 -#define NUM_LOCK_PARTITIONS (1 << LOG2_NUM_LOCK_PARTITIONS) - -/* Number of partitions the shared predicate lock tables are divided into */ -#define LOG2_NUM_PREDICATELOCK_PARTITIONS 4 -#define NUM_PREDICATELOCK_PARTITIONS (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS) - -/* - * We have a number of predefined LWLocks, plus a bunch of LWLocks that are - * dynamically assigned (e.g., for shared buffers). The LWLock structures - * live in shared memory (since they contain shared data) and are identified - * by values of this enumerated type. We abuse the notion of an enum somewhat - * by allowing values not listed in the enum declaration to be assigned. - * The extra value MaxDynamicLWLock is there to keep the compiler from - * deciding that the enum can be represented as char or short ... - * - * If you remove a lock, please replace it with a placeholder. This retains - * the lock numbering, which is helpful for DTrace and other external - * debugging scripts. - */ -typedef enum LWLockId -{ - BufFreelistLock, - ShmemIndexLock, - OidGenLock, - XidGenLock, - ProcArrayLock, - SInvalReadLock, - SInvalWriteLock, - WALInsertLock, - WALWriteLock, - ControlFileLock, - CheckpointLock, - CLogControlLock, - SubtransControlLock, - MultiXactGenLock, - MultiXactOffsetControlLock, - MultiXactMemberControlLock, - RelCacheInitLock, - BgWriterCommLock, - TwoPhaseStateLock, - TablespaceCreateLock, - BtreeVacuumLock, - AddinShmemInitLock, - AutovacuumLock, - AutovacuumScheduleLock, - SyncScanLock, - RelationMappingLock, - AsyncCtlLock, - AsyncQueueLock, - SerializableXactHashLock, - SerializableFinishedListLock, - SerializablePredicateLockListLock, - OldSerXidLock, - SyncRepLock, - /* Individual lock IDs end here */ - FirstBufMappingLock, - FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS, - FirstPredicateLockMgrLock = FirstLockMgrLock + NUM_LOCK_PARTITIONS, - - /* must be last except for MaxDynamicLWLock: */ - NumFixedLWLocks = FirstPredicateLockMgrLock + NUM_PREDICATELOCK_PARTITIONS, - - MaxDynamicLWLock = 1000000000 -} LWLockId; - +#include "storage/flexlock.h" typedef enum LWLockMode { @@ -97,22 +22,10 @@ typedef enum LWLockMode LW_SHARED } LWLockMode; - -#ifdef LOCK_DEBUG -extern bool Trace_lwlocks; -#endif - -extern LWLockId LWLockAssign(void); -extern void LWLockAcquire(LWLockId lockid, LWLockMode mode); -extern bool LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode); -extern void LWLockRelease(LWLockId lockid); -extern void LWLockReleaseAll(void); -extern bool LWLockHeldByMe(LWLockId lockid); - -extern int NumLWLocks(void); -extern Size LWLockShmemSize(void); -extern void CreateLWLocks(void); - -extern void RequestAddinLWLocks(int n); +extern FlexLockId LWLockAssign(void); +extern void LWLockAcquire(FlexLockId lockid, LWLockMode mode); +extern bool LWLockConditionalAcquire(FlexLockId lockid, LWLockMode mode); +extern void LWLockRelease(FlexLockId lockid); +extern bool LWLockHeldByMe(FlexLockId lockid); #endif /* LWLOCK_H */ diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 6e798b1..9f377a8 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -35,8 +35,6 @@ struct XidCache { - bool overflowed; - int nxids; TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]; }; @@ -86,27 +84,14 @@ struct PGPROC LocalTransactionId lxid; /* local id of top-level transaction currently * being executed by this proc, if running; * else InvalidLocalTransactionId */ - - TransactionId xid; /* id of top-level transaction currently being - * executed by this proc, if running and XID - * is assigned; else InvalidTransactionId */ - - TransactionId xmin; /* minimal running XID as it was when we were - * starting our xact, excluding LAZY VACUUM: - * vacuum must not remove tuples deleted by - * xid >= xmin ! */ - int pid; /* Backend's process ID; 0 if prepared xact */ + int pgprocno; /* These fields are zero while a backend is still starting up: */ BackendId backendId; /* This backend's backend ID (if assigned) */ Oid databaseId; /* OID of database this backend is using */ Oid roleId; /* OID of role using this backend */ - bool inCommit; /* true if within commit critical section */ - - uint8 vacuumFlags; /* vacuum-related flags, see above */ - /* * While in hot standby mode, shows that a conflict signal has been sent * for the current transaction. Set/cleared while holding ProcArrayLock, @@ -114,10 +99,10 @@ struct PGPROC */ bool recoveryConflictPending; - /* Info about LWLock the process is currently waiting for, if any. */ - bool lwWaiting; /* true if waiting for an LW lock */ - bool lwExclusive; /* true if waiting for exclusive access */ - struct PGPROC *lwWaitLink; /* next waiter for same LW lock */ + /* Info about FlexLock the process is currently waiting for, if any. */ + int flWaitResult; /* result of wait, or 0 if still waiting */ + int flWaitMode; /* lock mode sought */ + struct PGPROC *flWaitLink; /* next waiter for same FlexLock */ /* Info about lock the process is currently waiting for, if any. */ /* waitLock and waitProcLock are NULL if not currently waiting. */ @@ -147,7 +132,7 @@ struct PGPROC struct XidCache subxids; /* cache for subtransaction XIDs */ /* Per-backend LWLock. Protects fields below. */ - LWLockId backendLock; /* protects the fields below */ + FlexLockId backendLock; /* protects the fields below */ /* Lock manager data, recording fast-path locks taken by this backend. */ uint64 fpLockBits; /* lock modes held for each fast-path slot */ @@ -160,7 +145,35 @@ struct PGPROC extern PGDLLIMPORT PGPROC *MyProc; +extern PGDLLIMPORT struct PGPROC_MINIMAL *MyProcMinimal; + +/* + * A minimal part of the PGPROC. We store these members out of the main PGPROC + * structure since they are very heavily accessed members and usually in a loop + * for all active PGPROCs. Storing them in a separate array ensures that these + * members can be very effeciently accessed with minimum cache misses. On a + * large multiprocessor system, this can show a significant performance + * improvement. + */ +struct PGPROC_MINIMAL +{ + TransactionId xid; /* id of top-level transaction currently being + * executed by this proc, if running and XID + * is assigned; else InvalidTransactionId */ + TransactionId xmin; /* minimal running XID as it was when we were + * starting our xact, excluding LAZY VACUUM: + * vacuum must not remove tuples deleted by + * xid >= xmin ! */ + + uint8 vacuumFlags; /* vacuum-related flags, see above */ + bool overflowed; + bool inCommit; /* true if within commit critical section */ + + uint8 nxids; +}; + +typedef struct PGPROC_MINIMAL PGPROC_MINIMAL; /* * There is one ProcGlobal struct for the whole database cluster. @@ -169,6 +182,8 @@ typedef struct PROC_HDR { /* Array of PGPROC structures (not including dummies for prepared txns) */ PGPROC *allProcs; + /* Array of PGPROC_MINIMAL structures (not including dummies for prepared txns */ + PGPROC_MINIMAL *allProcs_Minimal; /* Length of allProcs array */ uint32 allProcCount; /* Head of list of free PGPROC structures */ @@ -186,6 +201,8 @@ typedef struct PROC_HDR extern PROC_HDR *ProcGlobal; +extern PGPROC *PreparedXactProcs; + /* * We set aside some extra PGPROC structures for auxiliary processes, * ie things that aren't full-fledged backends but need shmem access. diff --git a/src/include/storage/procarraylock.h b/src/include/storage/procarraylock.h new file mode 100644 index 0000000..678ca6f --- /dev/null +++ b/src/include/storage/procarraylock.h @@ -0,0 +1,28 @@ +/*------------------------------------------------------------------------- + * + * procarraylock.h + * Lock management for the ProcArray + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/storage/lwlock.h + * + *------------------------------------------------------------------------- + */ +#ifndef PROCARRAYLOCK_H +#define PROCARRAYLOCK_H + +#include "storage/flexlock.h" + +typedef enum ProcArrayLockMode +{ + PAL_EXCLUSIVE, + PAL_SHARED +} ProcArrayLockMode; + +extern void ProcArrayLockAcquire(ProcArrayLockMode mode); +extern void ProcArrayLockClearTransaction(TransactionId latestXid); +extern void ProcArrayLockRelease(void); + +#endif /* PROCARRAYLOCK_H */