Re: Startup process on a hot standby crashes with an error "invalid memory alloc request size 1073741824" while replaying "Standby/LOCK" records - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: Startup process on a hot standby crashes with an error "invalid memory alloc request size 1073741824" while replaying "Standby/LOCK" records |
Date | |
Msg-id | 2208668.1664927591@sss.pgh.pa.us Whole thread Raw |
In response to | Re: Startup process on a hot standby crashes with an error "invalid memory alloc request size 1073741824" while replaying "Standby/LOCK" records (Tom Lane <tgl@sss.pgh.pa.us>) |
Responses |
Re: Startup process on a hot standby crashes with an error "invalid memory alloc request size 1073741824" while replaying "Standby/LOCK" records
|
List | pgsql-hackers |
I wrote: > PFA a quick-hack fix that solves this issue by making per-transaction > subsidiary hash tables. That's overkill perhaps; I'm a little worried > about whether this slows down normal cases more than it's worth. > But we ought to do something about this, because aside from the > duplication aspect the current storage of these lists seems mighty > space-inefficient. After further thought, maybe it'd be better to do it as attached, with one long-lived hash table for all the locks. This is a shade less space-efficient than the current code once you account for dynahash overhead, but the per-transaction overhead should be lower than the previous patch since we only need to create/destroy a hash table entry not a whole hash table. regards, tom lane diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 9dab931990..7db86f7885 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -42,7 +42,29 @@ int max_standby_archive_delay = 30 * 1000; int max_standby_streaming_delay = 30 * 1000; bool log_recovery_conflict_waits = false; -static HTAB *RecoveryLockLists; +/* + * Keep track of all the exclusive locks owned by original transactions. + * For each known exclusive lock, there is a RecoveryLockEntry in the + * RecoveryLockHash hash table. All RecoveryLockEntrys belonging to a + * given XID are chained together so that we can find them easily. + * For each original transaction that is known to have any such locks, + * there is a RecoveryLockXidEntry in the RecoveryLockXidHash hash table, + * which stores the head of the chain of its locks. + */ +typedef struct RecoveryLockEntry +{ + xl_standby_lock key; /* hash key: xid, dbOid, relOid */ + struct RecoveryLockEntry *next; /* chain link */ +} RecoveryLockEntry; + +typedef struct RecoveryLockXidEntry +{ + TransactionId xid; /* hash key -- must be first */ + struct RecoveryLockEntry *head; /* chain head */ +} RecoveryLockXidEntry; + +static HTAB *RecoveryLockHash = NULL; +static HTAB *RecoveryLockXidHash = NULL; /* Flags set by timeout handlers */ static volatile sig_atomic_t got_standby_deadlock_timeout = false; @@ -58,15 +80,6 @@ static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts); static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks); static const char *get_recovery_conflict_desc(ProcSignalReason reason); -/* - * Keep track of all the locks owned by a given transaction. - */ -typedef struct RecoveryLockListsEntry -{ - TransactionId xid; - List *locks; -} RecoveryLockListsEntry; - /* * InitRecoveryTransactionEnvironment * Initialize tracking of our primary's in-progress transactions. @@ -85,16 +98,24 @@ InitRecoveryTransactionEnvironment(void) VirtualTransactionId vxid; HASHCTL hash_ctl; + Assert(RecoveryLockHash == NULL); /* don't run this twice */ + /* - * Initialize the hash table for tracking the list of locks held by each + * Initialize the hash tables for tracking the locks held by each * transaction. */ + hash_ctl.keysize = sizeof(xl_standby_lock); + hash_ctl.entrysize = sizeof(RecoveryLockEntry); + RecoveryLockHash = hash_create("RecoveryLockHash", + 64, + &hash_ctl, + HASH_ELEM | HASH_BLOBS); hash_ctl.keysize = sizeof(TransactionId); - hash_ctl.entrysize = sizeof(RecoveryLockListsEntry); - RecoveryLockLists = hash_create("RecoveryLockLists", - 64, - &hash_ctl, - HASH_ELEM | HASH_BLOBS); + hash_ctl.entrysize = sizeof(RecoveryLockXidEntry); + RecoveryLockXidHash = hash_create("RecoveryLockXidHash", + 64, + &hash_ctl, + HASH_ELEM | HASH_BLOBS); /* * Initialize shared invalidation management for Startup process, being @@ -140,12 +161,12 @@ void ShutdownRecoveryTransactionEnvironment(void) { /* - * Do nothing if RecoveryLockLists is NULL because which means that - * transaction tracking has not been yet initialized or has been already - * shutdowned. This prevents transaction tracking from being shutdowned - * unexpectedly more than once. + * Do nothing if RecoveryLockHash is NULL because that means that + * transaction tracking has not yet been initialized or has already been + * shut down. This makes it safe to have possibly-redundant calls of this + * function during process exit. */ - if (RecoveryLockLists == NULL) + if (RecoveryLockHash == NULL) return; /* Mark all tracked in-progress transactions as finished. */ @@ -154,9 +175,11 @@ ShutdownRecoveryTransactionEnvironment(void) /* Release all locks the tracked transactions were holding */ StandbyReleaseAllLocks(); - /* Destroy the hash table of locks. */ - hash_destroy(RecoveryLockLists); - RecoveryLockLists = NULL; + /* Destroy the lock hash tables. */ + hash_destroy(RecoveryLockHash); + hash_destroy(RecoveryLockXidHash); + RecoveryLockHash = NULL; + RecoveryLockXidHash = NULL; /* Cleanup our VirtualTransaction */ VirtualXactLockTableCleanup(); @@ -932,12 +955,12 @@ StandbyLockTimeoutHandler(void) * We only keep track of AccessExclusiveLocks, which are only ever held by * one transaction on one relation. * - * We keep a hash table of lists of locks in local memory keyed by xid, - * RecoveryLockLists, so we can keep track of the various entries made by - * the Startup process's virtual xid in the shared lock table. - * - * List elements use type xl_standby_lock, since the WAL record type exactly - * matches the information that we need to keep track of. + * We keep a table of known locks in the RecoveryLockHash hash table. + * The point of that table is to let us efficiently de-duplicate locks, + * which is important because checkpoints will re-report the same locks + * already held. There is also a RecoveryLockXidHash table with one entry + * per xid, which allows us to efficiently find all the locks held by a + * given original transaction. * * We use session locks rather than normal locks so we don't need * ResourceOwners. @@ -947,8 +970,9 @@ StandbyLockTimeoutHandler(void) void StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid) { - RecoveryLockListsEntry *entry; - xl_standby_lock *newlock; + RecoveryLockXidEntry *xidentry; + RecoveryLockEntry *lockentry; + xl_standby_lock key; LOCKTAG locktag; bool found; @@ -964,62 +988,79 @@ StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid) /* dbOid is InvalidOid when we are locking a shared relation. */ Assert(OidIsValid(relOid)); - /* Create a new list for this xid, if we don't have one already. */ - entry = hash_search(RecoveryLockLists, &xid, HASH_ENTER, &found); + /* Create a hash entry for this xid, if we don't have one already. */ + xidentry = hash_search(RecoveryLockXidHash, &xid, HASH_ENTER, &found); if (!found) { - entry->xid = xid; - entry->locks = NIL; + Assert(xidentry->xid == xid); /* dynahash should have set this */ + xidentry->head = NULL; } - newlock = palloc(sizeof(xl_standby_lock)); - newlock->xid = xid; - newlock->dbOid = dbOid; - newlock->relOid = relOid; - entry->locks = lappend(entry->locks, newlock); + /* Create a hash entry for this lock, unless we have one already. */ + key.xid = xid; + key.dbOid = dbOid; + key.relOid = relOid; + lockentry = hash_search(RecoveryLockHash, &key, HASH_ENTER, &found); + if (!found) + { + /* It's new, so link it into the XID's list ... */ + lockentry->next = xidentry->head; + xidentry->head = lockentry; - SET_LOCKTAG_RELATION(locktag, newlock->dbOid, newlock->relOid); + /* ... and acquire the lock locally. */ + SET_LOCKTAG_RELATION(locktag, dbOid, relOid); - (void) LockAcquire(&locktag, AccessExclusiveLock, true, false); + (void) LockAcquire(&locktag, AccessExclusiveLock, true, false); + } } +/* + * Release all the locks associated with this RecoveryLockXidEntry. + */ static void -StandbyReleaseLockList(List *locks) +StandbyReleaseXidEntryLocks(RecoveryLockXidEntry *xidentry) { - ListCell *lc; + RecoveryLockEntry *entry; + RecoveryLockEntry *next; - foreach(lc, locks) + for (entry = xidentry->head; entry != NULL; entry = next) { - xl_standby_lock *lock = (xl_standby_lock *) lfirst(lc); LOCKTAG locktag; elog(trace_recovery(DEBUG4), "releasing recovery lock: xid %u db %u rel %u", - lock->xid, lock->dbOid, lock->relOid); - SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); + entry->key.xid, entry->key.dbOid, entry->key.relOid); + /* Release the lock ... */ + SET_LOCKTAG_RELATION(locktag, entry->key.dbOid, entry->key.relOid); if (!LockRelease(&locktag, AccessExclusiveLock, true)) { elog(LOG, - "RecoveryLockLists contains entry for lock no longer recorded by lock manager: xid %u database %u relation%u", - lock->xid, lock->dbOid, lock->relOid); + "RecoveryLockHash contains entry for lock no longer recorded by lock manager: xid %u database %u relation%u", + entry->key.xid, entry->key.dbOid, entry->key.relOid); Assert(false); } + /* ... and remove the per-lock hash entry */ + next = entry->next; + hash_search(RecoveryLockHash, entry, HASH_REMOVE, NULL); } - list_free_deep(locks); + xidentry->head = NULL; /* just for paranoia */ } +/* + * Release locks for specific XID, or all locks if it's InvalidXid. + */ static void StandbyReleaseLocks(TransactionId xid) { - RecoveryLockListsEntry *entry; + RecoveryLockXidEntry *entry; if (TransactionIdIsValid(xid)) { - if ((entry = hash_search(RecoveryLockLists, &xid, HASH_FIND, NULL))) + if ((entry = hash_search(RecoveryLockXidHash, &xid, HASH_FIND, NULL))) { - StandbyReleaseLockList(entry->locks); - hash_search(RecoveryLockLists, entry, HASH_REMOVE, NULL); + StandbyReleaseXidEntryLocks(entry); + hash_search(RecoveryLockXidHash, entry, HASH_REMOVE, NULL); } } else @@ -1028,7 +1069,7 @@ StandbyReleaseLocks(TransactionId xid) /* * Release locks for a transaction tree, starting at xid down, from - * RecoveryLockLists. + * RecoveryLockXidHash. * * Called during WAL replay of COMMIT/ROLLBACK when in hot standby mode, * to remove any AccessExclusiveLocks requested by a transaction. @@ -1051,15 +1092,15 @@ void StandbyReleaseAllLocks(void) { HASH_SEQ_STATUS status; - RecoveryLockListsEntry *entry; + RecoveryLockXidEntry *entry; elog(trace_recovery(DEBUG2), "release all standby locks"); - hash_seq_init(&status, RecoveryLockLists); + hash_seq_init(&status, RecoveryLockXidHash); while ((entry = hash_seq_search(&status))) { - StandbyReleaseLockList(entry->locks); - hash_search(RecoveryLockLists, entry, HASH_REMOVE, NULL); + StandbyReleaseXidEntryLocks(entry); + hash_search(RecoveryLockXidHash, entry, HASH_REMOVE, NULL); } } @@ -1072,9 +1113,9 @@ void StandbyReleaseOldLocks(TransactionId oldxid) { HASH_SEQ_STATUS status; - RecoveryLockListsEntry *entry; + RecoveryLockXidEntry *entry; - hash_seq_init(&status, RecoveryLockLists); + hash_seq_init(&status, RecoveryLockXidHash); while ((entry = hash_seq_search(&status))) { Assert(TransactionIdIsValid(entry->xid)); @@ -1088,8 +1129,8 @@ StandbyReleaseOldLocks(TransactionId oldxid) continue; /* Remove all locks and hash table entry. */ - StandbyReleaseLockList(entry->locks); - hash_search(RecoveryLockLists, entry, HASH_REMOVE, NULL); + StandbyReleaseXidEntryLocks(entry); + hash_search(RecoveryLockXidHash, entry, HASH_REMOVE, NULL); } }
pgsql-hackers by date: