From d9f0763b29fdfabe4c6c645e5b9be8febb6174af Mon Sep 17 00:00:00 2001 From: Amul Sul Date: Tue, 11 May 2021 04:07:59 -0400 Subject: [PATCH v27 1/4] Refactor: separate WAL writing code from StartupXLOG(). Introduced a new function as XLogAcceptWrites() and moved following code from StartupXLOG(): 1. UpdateFullPageWrites(), 2. The following block of code that does either CreateEndOfRecoveryRecord() or RequestCheckpoint() or CreateCheckPoint(), 3. The next block of code that runs recovery_end_command, 4. XLogReportParameters(), and 5. CompleteCommitTsInitialization(). This function XLogAcceptWrites() planned to call from the place where XLogReportParameters() was in StartupXLOG(). Now, InRecovery flag will be reset after XLogAcceptWrites() call, and due to this assertion from SetMultiXactIdLimit() need to removed since that function get called via TrimMultiXact() before InRecovery get reset. --- src/backend/access/transam/multixact.c | 2 - src/backend/access/transam/xlog.c | 220 ++++++++++++++----------- 2 files changed, 124 insertions(+), 98 deletions(-) diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 1f9f1a1fa10..ec742f86b50 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -2290,8 +2290,6 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, if (!MultiXactState->finishedStartup) return; - Assert(!InRecovery); - /* Set limits for offset vacuum. */ needs_offset_vacuum = SetOffsetVacuumLimit(is_startup); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index c1d4415a433..8af45ac1a33 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -981,6 +981,9 @@ static void WALInsertLockAcquireExclusive(void); static void WALInsertLockRelease(void); static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); +static bool XLogAcceptWrites(XLogReaderState *xlogreader, XLogRecPtr EndOfLog, + TimeLineID EndOfLogTLI); + /* * Insert an XLOG record represented by an already-constructed chain of data * chunks. This is a low-level routine; to construct the WAL record header @@ -7850,11 +7853,119 @@ StartupXLOG(void) XLogCtl->LogwrtRqst.Flush = EndOfLog; /* - * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE - * record before resource manager writes cleanup WAL records or checkpoint - * record is written. + * Update full_page_writes in shared memory. XLOG_FPW_CHANGE record will be + * written later in XLogAcceptWrites(). */ Insert->fullPageWrites = lastFullPageWrites; + + /* + * Preallocate additional log files, if wanted. + */ + PreallocXlogFiles(EndOfLog); + + /* start the archive_timeout timer and LSN running */ + XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL); + XLogCtl->lastSegSwitchLSN = EndOfLog; + + /* also initialize latestCompletedXid, to nextXid - 1 */ + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + ShmemVariableCache->latestCompletedXid = ShmemVariableCache->nextXid; + FullTransactionIdRetreat(&ShmemVariableCache->latestCompletedXid); + LWLockRelease(ProcArrayLock); + + /* + * Start up subtrans, if not already done for hot standby. (commit + * timestamps are started below, if necessary.) + */ + if (standbyState == STANDBY_DISABLED) + StartupSUBTRANS(oldestActiveXID); + + /* + * Perform end of recovery actions for any SLRUs that need it. + */ + TrimCLOG(); + TrimMultiXact(); + + /* Reload shared-memory state for prepared transactions */ + RecoverPreparedTransactions(); + + /* + * Shutdown the recovery environment. This must occur after + * RecoverPreparedTransactions(), see notes for lock_twophase_recover() + */ + if (standbyState != STANDBY_DISABLED) + ShutdownRecoveryTransactionEnvironment(); + + promoted = XLogAcceptWrites(xlogreader, EndOfLog, EndOfLogTLI); + + /* + * Okay, we're officially UP. + */ + InRecovery = false; + + /* Shut down xlogreader */ + if (readFile >= 0) + { + close(readFile); + readFile = -1; + } + XLogReaderFree(xlogreader); + + /* + * All done with end-of-recovery actions. + * + * Now allow backends to write WAL and update the control file status in + * consequence. SharedRecoveryState, that controls if backends can write + * WAL, is updated while holding ControlFileLock to prevent other backends + * to look at an inconsistent state of the control file in shared memory. + * There is still a small window during which backends can write WAL and + * the control file is still referring to a system not in DB_IN_PRODUCTION + * state while looking at the on-disk control file. + * + * Also, we use info_lck to update SharedRecoveryState to ensure that + * there are no race conditions concerning visibility of other recent + * updates to shared memory. + */ + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + ControlFile->state = DB_IN_PRODUCTION; + ControlFile->time = (pg_time_t) time(NULL); + + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->SharedRecoveryState = RECOVERY_STATE_DONE; + SpinLockRelease(&XLogCtl->info_lck); + + UpdateControlFile(); + LWLockRelease(ControlFileLock); + + /* + * If there were cascading standby servers connected to us, nudge any wal + * sender processes to notice that we've been promoted. + */ + WalSndWakeup(); + + /* + * If this was a promotion, request an (online) checkpoint now. This + * isn't required for consistency, but the last restartpoint might be far + * back, and in case of a crash, recovering from it might take a longer + * than is appropriate now that we're not in standby mode anymore. + */ + if (promoted) + RequestCheckpoint(CHECKPOINT_FORCE); +} + +static bool +XLogAcceptWrites(XLogReaderState *xlogreader, XLogRecPtr EndOfLog, + TimeLineID EndOfLogTLI) +{ + bool promoted = false; + + /* Only Startup or standalone backend allowed to be here. */ + Assert(AmStartupProcess() || !IsPostmasterEnvironment); + + /* + * Write an XLOG_FPW_CHANGE record before resource manager writes cleanup + * WAL records or checkpoint record is written. + */ LocalSetXLogInsertAllowed(); UpdateFullPageWrites(); LocalXLogInsertAllowed = -1; @@ -7877,15 +7988,20 @@ StartupXLOG(void) */ if (bgwriterLaunched) { + /* bgwriterLaunched is only true in startup process */ + Assert(AmStartupProcess()); + if (LocalPromoteIsTriggered) { - checkPointLoc = ControlFile->checkPoint; + XLogRecord *record; /* * Confirm the last checkpoint is available for us to recover * from if we fail. */ - record = ReadCheckpointRecord(xlogreader, checkPointLoc, 1, false); + record = ReadCheckpointRecord(xlogreader, + ControlFile->checkPoint, + 1, false); if (record != NULL) { promoted = true; @@ -7916,6 +8032,8 @@ StartupXLOG(void) if (ArchiveRecoveryRequested) { + Assert(AmStartupProcess()); + /* * And finally, execute the recovery_end_command, if any. */ @@ -7995,57 +8113,6 @@ StartupXLOG(void) } } - /* - * Preallocate additional log files, if wanted. - */ - PreallocXlogFiles(EndOfLog); - - /* - * Okay, we're officially UP. - */ - InRecovery = false; - - /* start the archive_timeout timer and LSN running */ - XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL); - XLogCtl->lastSegSwitchLSN = EndOfLog; - - /* also initialize latestCompletedXid, to nextXid - 1 */ - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); - ShmemVariableCache->latestCompletedXid = ShmemVariableCache->nextXid; - FullTransactionIdRetreat(&ShmemVariableCache->latestCompletedXid); - LWLockRelease(ProcArrayLock); - - /* - * Start up subtrans, if not already done for hot standby. (commit - * timestamps are started below, if necessary.) - */ - if (standbyState == STANDBY_DISABLED) - StartupSUBTRANS(oldestActiveXID); - - /* - * Perform end of recovery actions for any SLRUs that need it. - */ - TrimCLOG(); - TrimMultiXact(); - - /* Reload shared-memory state for prepared transactions */ - RecoverPreparedTransactions(); - - /* - * Shutdown the recovery environment. This must occur after - * RecoverPreparedTransactions(), see notes for lock_twophase_recover() - */ - if (standbyState != STANDBY_DISABLED) - ShutdownRecoveryTransactionEnvironment(); - - /* Shut down xlogreader */ - if (readFile >= 0) - { - close(readFile); - readFile = -1; - } - XLogReaderFree(xlogreader); - /* * If any of the critical GUCs have changed, log them before we allow * backends to write WAL. @@ -8059,46 +8126,7 @@ StartupXLOG(void) */ CompleteCommitTsInitialization(); - /* - * All done with end-of-recovery actions. - * - * Now allow backends to write WAL and update the control file status in - * consequence. SharedRecoveryState, that controls if backends can write - * WAL, is updated while holding ControlFileLock to prevent other backends - * to look at an inconsistent state of the control file in shared memory. - * There is still a small window during which backends can write WAL and - * the control file is still referring to a system not in DB_IN_PRODUCTION - * state while looking at the on-disk control file. - * - * Also, we use info_lck to update SharedRecoveryState to ensure that - * there are no race conditions concerning visibility of other recent - * updates to shared memory. - */ - LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); - ControlFile->state = DB_IN_PRODUCTION; - ControlFile->time = (pg_time_t) time(NULL); - - SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->SharedRecoveryState = RECOVERY_STATE_DONE; - SpinLockRelease(&XLogCtl->info_lck); - - UpdateControlFile(); - LWLockRelease(ControlFileLock); - - /* - * If there were cascading standby servers connected to us, nudge any wal - * sender processes to notice that we've been promoted. - */ - WalSndWakeup(); - - /* - * If this was a promotion, request an (online) checkpoint now. This - * isn't required for consistency, but the last restartpoint might be far - * back, and in case of a crash, recovering from it might take a longer - * than is appropriate now that we're not in standby mode anymore. - */ - if (promoted) - RequestCheckpoint(CHECKPOINT_FORCE); + return promoted; } /* -- 2.18.0