From 15329d85e26967602e5aedb14e10f31a8631e33c Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Fri, 23 Jul 2021 13:07:56 -0400
Subject: [PATCH v34 1/3] Refactor some end-of-recovery code out of
 StartupXLOG().

Moved the code that performs whether to write a checkpoint or an
end-of-recovery record into PerformRecoveryXlogAction().

Also create a new function CleanupAfterArchiveRecovery() to
perform a few tasks that we want to do after we've actually exited
archive recovery but before we start accepting new WAL writes.
This is straightforward code movement to make StartupXLOG() a
little bit shorter and a little bit easier to understand.
---
 src/backend/access/transam/xlog.c | 277 +++++++++++++++++-------------
 1 file changed, 159 insertions(+), 118 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index e51a7a749da..cd1d87c14b3 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -637,6 +637,12 @@ typedef struct XLogCtlData
 	 */
 	RecoveryState SharedRecoveryState;
 
+	/*
+	 * SharedArchiveRecoveryRequested indicates whether an archive recovery is
+	 * requested. Protected by info_lck.
+	 */
+	bool		SharedArchiveRecoveryRequested;
+
 	/*
 	 * SharedHotStandbyActive indicates if we allow hot standby queries to be
 	 * run.  Protected by info_lck.
@@ -880,6 +886,7 @@ static MemoryContext walDebugCxt = NULL;
 static void readRecoverySignalFile(void);
 static void validateRecoveryParameters(void);
 static void exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog);
+static void CleanupAfterArchiveRecovery(void);
 static bool recoveryStopsBefore(XLogReaderState *record);
 static bool recoveryStopsAfter(XLogReaderState *record);
 static char *getRecoveryStopReason(void);
@@ -925,6 +932,7 @@ static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
 static XLogRecord *ReadRecord(XLogReaderState *xlogreader,
 							  int emode, bool fetching_ckpt);
 static void CheckRecoveryConsistency(void);
+static bool PerformRecoveryXLogAction(void);
 static XLogRecord *ReadCheckpointRecord(XLogReaderState *xlogreader,
 										XLogRecPtr RecPtr, int whichChkpt, bool report);
 static bool rescanLatestTimeLine(void);
@@ -5223,6 +5231,7 @@ XLOGShmemInit(void)
 	 */
 	XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
 	XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
+	XLogCtl->SharedArchiveRecoveryRequested = false;
 	XLogCtl->SharedHotStandbyActive = false;
 	XLogCtl->InstallXLogFileSegmentActive = false;
 	XLogCtl->SharedPromoteIsTriggered = false;
@@ -5507,6 +5516,12 @@ readRecoverySignalFile(void)
 		ereport(FATAL,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("standby mode is not supported by single-user servers")));
+
+	/*
+	 * Remember archive recovery request in shared memory state.  A lock is not
+	 * needed since we are the only ones who updating this.
+	 */
+	XLogCtl->SharedArchiveRecoveryRequested = ArchiveRecoveryRequested;
 }
 
 static void
@@ -5694,6 +5709,95 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog)
 			(errmsg("archive recovery complete")));
 }
 
+/*
+ * Perform cleanup actions at the conclusion of archive recovery.
+ */
+static void
+CleanupAfterArchiveRecovery(void)
+{
+	XLogRecPtr EndOfLog;
+
+	/*
+	 * Execute the recovery_end_command, if any.
+	 */
+	if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
+		ExecuteRecoveryCommand(recoveryEndCommand,
+							   "recovery_end_command",
+							   true);
+
+	/*
+	 * We switched to a new timeline. Clean up segments on the old
+	 * timeline.
+	 *
+	 * If there are any higher-numbered segments on the old timeline,
+	 * remove them. They might contain valid WAL, but they might also be
+	 * pre-allocated files containing garbage. In any case, they are not
+	 * part of the new timeline's history so we don't need them.
+	 */
+	(void) GetLastSegSwitchData(&EndOfLog);
+	RemoveNonParentXlogFiles(EndOfLog, ThisTimeLineID);
+
+	/*
+	 * If the switch happened in the middle of a segment, what to do with
+	 * the last, partial segment on the old timeline? If we don't archive
+	 * it, and the server that created the WAL never archives it either
+	 * (e.g. because it was hit by a meteor), it will never make it to the
+	 * archive. That's OK from our point of view, because the new segment
+	 * that we created with the new TLI contains all the WAL from the old
+	 * timeline up to the switch point. But if you later try to do PITR to
+	 * the "missing" WAL on the old timeline, recovery won't find it in
+	 * the archive. It's physically present in the new file with new TLI,
+	 * but recovery won't look there when it's recovering to the older
+	 * timeline. On the other hand, if we archive the partial segment, and
+	 * the original server on that timeline is still running and archives
+	 * the completed version of the same segment later, it will fail. (We
+	 * used to do that in 9.4 and below, and it caused such problems).
+	 *
+	 * As a compromise, we rename the last segment with the .partial
+	 * suffix, and archive it. Archive recovery will never try to read
+	 * .partial segments, so they will normally go unused. But in the odd
+	 * PITR case, the administrator can copy them manually to the pg_wal
+	 * directory (removing the suffix). They can be useful in debugging,
+	 * too.
+	 *
+	 * If a .done or .ready file already exists for the old timeline,
+	 * however, we had already determined that the segment is complete, so
+	 * we can let it be archived normally. (In particular, if it was
+	 * restored from the archive to begin with, it's expected to have a
+	 * .done file).
+	 */
+	if (XLogSegmentOffset(EndOfLog, wal_segment_size) != 0 &&
+		XLogArchivingActive())
+	{
+		char		origfname[MAXFNAMELEN];
+		XLogSegNo	endLogSegNo;
+		TimeLineID EndOfLogTLI = XLogCtl->replayEndTLI;
+
+		XLByteToPrevSeg(EndOfLog, endLogSegNo, wal_segment_size);
+		XLogFileName(origfname, EndOfLogTLI, endLogSegNo, wal_segment_size);
+
+		if (!XLogArchiveIsReadyOrDone(origfname))
+		{
+			char		origpath[MAXPGPATH];
+			char		partialfname[MAXFNAMELEN];
+			char		partialpath[MAXPGPATH];
+
+			XLogFilePath(origpath, EndOfLogTLI, endLogSegNo, wal_segment_size);
+			snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
+			snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
+
+			/*
+			 * Make sure there's no .done or .ready file for the .partial
+			 * file.
+			 */
+			XLogArchiveCleanup(partialfname);
+
+			durable_rename(origpath, partialpath, ERROR);
+			XLogArchiveNotify(partialfname);
+		}
+	}
+}
+
 /*
  * Extract timestamp from WAL record.
  *
@@ -7883,127 +7987,13 @@ StartupXLOG(void)
 	UpdateFullPageWrites();
 	LocalXLogInsertAllowed = -1;
 
+	/* Emit checkpoint or end-of-recovery record in XLOG, if required. */
 	if (InRecovery)
-	{
-		/*
-		 * Perform a checkpoint to update all our recovery activity to disk.
-		 *
-		 * Note that we write a shutdown checkpoint rather than an on-line
-		 * one. This is not particularly critical, but since we may be
-		 * assigning a new TLI, using a shutdown checkpoint allows us to have
-		 * the rule that TLI only changes in shutdown checkpoints, which
-		 * allows some extra error checking in xlog_redo.
-		 *
-		 * In promotion, only create a lightweight end-of-recovery record
-		 * instead of a full checkpoint. A checkpoint is requested later,
-		 * after we're fully out of recovery mode and already accepting
-		 * queries.
-		 */
-		if (ArchiveRecoveryRequested && IsUnderPostmaster &&
-			LocalPromoteIsTriggered)
-		{
-			promoted = true;
-
-			/*
-			 * Insert a special WAL record to mark the end of recovery, since
-			 * we aren't doing a checkpoint. That means that the checkpointer
-			 * process may likely be in the middle of a time-smoothed
-			 * restartpoint and could continue to be for minutes after this.
-			 * That sounds strange, but the effect is roughly the same and it
-			 * would be stranger to try to come out of the restartpoint and
-			 * then checkpoint. We request a checkpoint later anyway, just for
-			 * safety.
-			 */
-			CreateEndOfRecoveryRecord();
-		}
-		else
-		{
-			RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY |
-							  CHECKPOINT_IMMEDIATE |
-							  CHECKPOINT_WAIT);
-		}
-	}
+		promoted = PerformRecoveryXLogAction();
 
+	/* If this is archive recovery, perform post-recovery cleanup actions. */
 	if (ArchiveRecoveryRequested)
-	{
-		/*
-		 * And finally, execute the recovery_end_command, if any.
-		 */
-		if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
-			ExecuteRecoveryCommand(recoveryEndCommand,
-								   "recovery_end_command",
-								   true);
-
-		/*
-		 * We switched to a new timeline. Clean up segments on the old
-		 * timeline.
-		 *
-		 * If there are any higher-numbered segments on the old timeline,
-		 * remove them. They might contain valid WAL, but they might also be
-		 * pre-allocated files containing garbage. In any case, they are not
-		 * part of the new timeline's history so we don't need them.
-		 */
-		RemoveNonParentXlogFiles(EndOfLog, ThisTimeLineID);
-
-		/*
-		 * If the switch happened in the middle of a segment, what to do with
-		 * the last, partial segment on the old timeline? If we don't archive
-		 * it, and the server that created the WAL never archives it either
-		 * (e.g. because it was hit by a meteor), it will never make it to the
-		 * archive. That's OK from our point of view, because the new segment
-		 * that we created with the new TLI contains all the WAL from the old
-		 * timeline up to the switch point. But if you later try to do PITR to
-		 * the "missing" WAL on the old timeline, recovery won't find it in
-		 * the archive. It's physically present in the new file with new TLI,
-		 * but recovery won't look there when it's recovering to the older
-		 * timeline. On the other hand, if we archive the partial segment, and
-		 * the original server on that timeline is still running and archives
-		 * the completed version of the same segment later, it will fail. (We
-		 * used to do that in 9.4 and below, and it caused such problems).
-		 *
-		 * As a compromise, we rename the last segment with the .partial
-		 * suffix, and archive it. Archive recovery will never try to read
-		 * .partial segments, so they will normally go unused. But in the odd
-		 * PITR case, the administrator can copy them manually to the pg_wal
-		 * directory (removing the suffix). They can be useful in debugging,
-		 * too.
-		 *
-		 * If a .done or .ready file already exists for the old timeline,
-		 * however, we had already determined that the segment is complete, so
-		 * we can let it be archived normally. (In particular, if it was
-		 * restored from the archive to begin with, it's expected to have a
-		 * .done file).
-		 */
-		if (XLogSegmentOffset(EndOfLog, wal_segment_size) != 0 &&
-			XLogArchivingActive())
-		{
-			char		origfname[MAXFNAMELEN];
-			XLogSegNo	endLogSegNo;
-
-			XLByteToPrevSeg(EndOfLog, endLogSegNo, wal_segment_size);
-			XLogFileName(origfname, EndOfLogTLI, endLogSegNo, wal_segment_size);
-
-			if (!XLogArchiveIsReadyOrDone(origfname))
-			{
-				char		origpath[MAXPGPATH];
-				char		partialfname[MAXFNAMELEN];
-				char		partialpath[MAXPGPATH];
-
-				XLogFilePath(origpath, EndOfLogTLI, endLogSegNo, wal_segment_size);
-				snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
-				snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
-
-				/*
-				 * Make sure there's no .done or .ready file for the .partial
-				 * file.
-				 */
-				XLogArchiveCleanup(partialfname);
-
-				durable_rename(origpath, partialpath, ERROR);
-				XLogArchiveNotify(partialfname);
-			}
-		}
-	}
+		CleanupAfterArchiveRecovery();
 
 	/*
 	 * Preallocate additional log files, if wanted.
@@ -8207,6 +8197,57 @@ CheckRecoveryConsistency(void)
 	}
 }
 
+/*
+ * Perform whatever XLOG actions are necessary at end of REDO.
+ *
+ * The goal here is to make sure that we'll be able to recover properly if
+ * we crash again. If we choose to write a checkpoint, we'll write a shutdown
+ * checkpoint rather than an on-line one. This is not particularly critical,
+ * but since we may be assigning a new TLI, using a shutdown checkpoint allows
+ * us to have the rule that TLI only changes in shutdown checkpoints, which
+ * allows some extra error checking in xlog_redo.
+ */
+static bool
+PerformRecoveryXLogAction(void)
+{
+	bool		promoted = false;
+
+	/*
+	 * In promotion, only create a lightweight end-of-recovery record
+	 * instead of a full checkpoint. A checkpoint is requested later,
+	 * after we're fully out of recovery mode and already accepting
+	 * queries.
+	 *
+	 * NB: Check does not rely on the global variables are valid only in the
+	 * startup process only.
+	 */
+	if (((volatile XLogCtlData *) XLogCtl)->SharedArchiveRecoveryRequested &&
+		IsUnderPostmaster && PromoteIsTriggered())
+	{
+		promoted = true;
+
+		/*
+		 * Insert a special WAL record to mark the end of recovery, since
+		 * we aren't doing a checkpoint. That means that the checkpointer
+		 * process may likely be in the middle of a time-smoothed
+		 * restartpoint and could continue to be for minutes after this.
+		 * That sounds strange, but the effect is roughly the same and it
+		 * would be stranger to try to come out of the restartpoint and
+		 * then checkpoint. We request a checkpoint later anyway, just for
+		 * safety.
+		 */
+		CreateEndOfRecoveryRecord();
+	}
+	else
+	{
+		RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY |
+						  CHECKPOINT_IMMEDIATE |
+						  CHECKPOINT_WAIT);
+	}
+
+	return promoted;
+}
+
 /*
  * Is the system still in recovery?
  *
-- 
2.18.0