From 04e712d73b32b7148fa8fcf019367ea3230050f3 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Wed, 20 Sep 2023 16:11:31 -0400 Subject: [PATCH v6 3/3] WIP: Insert XLOG_CHECKPOINT_REDO at the redo point. Merge this into previous commit. --- src/backend/access/transam/xlog.c | 129 ++++++++++++++++++++++-------- 1 file changed, 95 insertions(+), 34 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 026e2fc9da..292646d1cf 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -559,6 +559,16 @@ typedef struct XLogCtlData slock_t info_lck; /* locks shared variables shown above */ } XLogCtlData; +/* + * Classification of XLogRecordInsert operations. + */ +typedef enum +{ + WALINSERT_NORMAL, + WALINSERT_SPECIAL_SWITCH, + WALINSERT_SPECIAL_CHECKPOINT +} WalInsertClass; + static XLogCtlData *XLogCtl = NULL; /* a private copy of XLogCtl->Insert.WALInsertLocks, for convenience */ @@ -739,13 +749,21 @@ XLogInsertRecord(XLogRecData *rdata, bool inserted; XLogRecord *rechdr = (XLogRecord *) rdata->data; uint8 info = rechdr->xl_info & ~XLR_INFO_MASK; - bool isLogSwitch = (rechdr->xl_rmid == RM_XLOG_ID && - info == XLOG_SWITCH); + WalInsertClass class = WALINSERT_NORMAL; XLogRecPtr StartPos; XLogRecPtr EndPos; bool prevDoPageWrites = doPageWrites; TimeLineID insertTLI; + /* Does this record type require special handling? */ + if (rechdr->xl_rmid == RM_XLOG_ID) + { + if (info == XLOG_SWITCH) + class = WALINSERT_SPECIAL_SWITCH; + else if (XLOG_CHECKPOINT_REDO) + class = WALINSERT_SPECIAL_CHECKPOINT; + } + /* we assume that all of the record header is in the first chunk */ Assert(rdata->len >= SizeOfXLogRecord); @@ -793,7 +811,7 @@ XLogInsertRecord(XLogRecData *rdata, */ START_CRIT_SECTION(); - if (isLogSwitch) + if (class == WALINSERT_SPECIAL_SWITCH) { /* * In order to insert an XLOG_SWITCH record, we need to hold all of @@ -804,12 +822,27 @@ XLogInsertRecord(XLogRecData *rdata, * Nonetheless, this case is simpler than the normal cases handled * below, which must check for changes in doPageWrites and RedoRecPtr. * Those checks are only needed for records that can contain - * full-pages images, and an XLOG_SWITCH record never does. + * buffer references, and an XLOG_SWITCH record never does. */ Assert(fpw_lsn == InvalidXLogRecPtr); WALInsertLockAcquireExclusive(); inserted = ReserveXLogSwitch(&StartPos, &EndPos, &rechdr->xl_prev); } + else if (class == WALINSERT_SPECIAL_CHECKPOINT) + { + /* + * We need to update both the local and shared copies of RedoRecPtr, which + * means that we need to hold all the WAL insertion locks. However, there + * can't be any buffer references, so as above, we need not check RedoRecPtr + * before inserting the record; we just need to update it afterwards. + */ + Assert(fpw_lsn == InvalidXLogRecPtr); + WALInsertLockAcquireExclusive(); + ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos, + &rechdr->xl_prev); + RedoRecPtr = Insert->RedoRecPtr = StartPos; + inserted = true; + } else { WALInsertLockAcquire(); @@ -876,7 +909,8 @@ XLogInsertRecord(XLogRecData *rdata, * All the record data, including the header, is now ready to be * inserted. Copy the record in the space reserved. */ - CopyXLogRecordToWAL(rechdr->xl_tot_len, isLogSwitch, rdata, + CopyXLogRecordToWAL(rechdr->xl_tot_len, + class == WALINSERT_SPECIAL_SWITCH, rdata, StartPos, EndPos, insertTLI); /* @@ -935,7 +969,7 @@ XLogInsertRecord(XLogRecData *rdata, * padding space that fills the rest of the segment, and perform * end-of-segment actions (eg, notifying archiver). */ - if (isLogSwitch) + if (class == WALINSERT_SPECIAL_SWITCH) { TRACE_POSTGRESQL_WAL_SWITCH(); XLogFlush(EndPos); @@ -6487,6 +6521,8 @@ update_checkpoint_display(int flags, bool restartpoint, bool reset) * All of this mechanism allows us to continue working while we checkpoint. * As a result, timing of actions is critical here and be careful to note that * this function will likely take minutes to execute on a busy system. + * + * XXX FIX ABOVE COMMENTS */ void CreateCheckPoint(int flags) @@ -6609,36 +6645,37 @@ CreateCheckPoint(int flags) checkPoint.fullPageWrites = Insert->fullPageWrites; - /* - * Compute new REDO record ptr = location of next XLOG record. - * - * NB: this is NOT necessarily where the checkpoint record itself will be, - * since other backends may insert more XLOG records while we're off doing - * the buffer flush work. Those XLOG records are logically after the - * checkpoint, even though physically before it. Got that? - */ - freespace = INSERT_FREESPACE(curInsert); - if (freespace == 0) + if (shutdown) { - if (XLogSegmentOffset(curInsert, wal_segment_size) == 0) - curInsert += SizeOfXLogLongPHD; - else - curInsert += SizeOfXLogShortPHD; - } - checkPoint.redo = curInsert; + /* + * Compute new REDO record ptr = location of next XLOG record. + * + * Since this is a shutdown checkpoint, there can't be any concurrent WAL + * insertion. + */ + freespace = INSERT_FREESPACE(curInsert); + if (freespace == 0) + { + if (XLogSegmentOffset(curInsert, wal_segment_size) == 0) + curInsert += SizeOfXLogLongPHD; + else + curInsert += SizeOfXLogShortPHD; + } + checkPoint.redo = curInsert; - /* - * Here we update the shared RedoRecPtr for future XLogInsert calls; this - * must be done while holding all the insertion locks. - * - * Note: if we fail to complete the checkpoint, RedoRecPtr will be left - * pointing past where it really needs to point. This is okay; the only - * consequence is that XLogInsert might back up whole buffers that it - * didn't really need to. We can't postpone advancing RedoRecPtr because - * XLogInserts that happen while we are dumping buffers must assume that - * their buffer changes are not included in the checkpoint. - */ - RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo; + /* + * Here we update the shared RedoRecPtr for future XLogInsert calls; this + * must be done while holding all the insertion locks. + * + * Note: if we fail to complete the checkpoint, RedoRecPtr will be left + * pointing past where it really needs to point. This is okay; the only + * consequence is that XLogInsert might back up whole buffers that it + * didn't really need to. We can't postpone advancing RedoRecPtr because + * XLogInserts that happen while we are dumping buffers must assume that + * their buffer changes are not included in the checkpoint. + */ + RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo; + } /* * Now we can release the WAL insertion locks, allowing other xacts to @@ -6646,6 +6683,30 @@ CreateCheckPoint(int flags) */ WALInsertLockRelease(); + /* + * If this is an online checkpoint, we have not yet determined the redo + * point. We do so now by inserting the special XLOG_CHECKPOINT_REDO record; + * the LSN at which it starts becomes the new redo pointer. We don't do this + * for a shutdown checkpoint, because in that case no WAL can be written + * between the redo point and the insertion of the checkpoint record itself, + * so the checkpoint record itself services to mark the redo point. + */ + if (!shutdown) + { + int dummy = 0; + + /* Record must have payload to avoid assertion failure. */ + XLogBeginInsert(); + XLogRegisterData((char *) &dummy, sizeof(dummy)); + (void) XLogInsert(RM_XLOG_ID, XLOG_CHECKPOINT_REDO); + + /* + * XLogInsertRecord will have updated RedoRecPtr, but we need to copy that + * into the record that will be inserted when the checkpoint is complete. + */ + checkPoint.redo = RedoRecPtr; + } + /* Update the info_lck-protected copy of RedoRecPtr as well */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->RedoRecPtr = checkPoint.redo; -- 2.37.1 (Apple Git-137.1)