From 08f6818351edce949b6cf37add8f59410d0d4a01 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Mon, 14 Sep 2020 23:20:55 +1200 Subject: [PATCH v11 5/6] WIP: Avoid extra buffer lookup when prefetching WAL blocks. Provide a some workspace in decoded WAL records, so that we can remember which buffer recently contained we found a block cached in, for later use when replaying the record. Provide a new way to look up a recently-known buffer and check if it's still valid and has the right tag. XXX Needs review to figure out if it's safe or steamrolling over subtleties --- src/backend/access/transam/xlog.c | 2 +- src/backend/access/transam/xlogprefetch.c | 6 ++-- src/backend/access/transam/xlogreader.c | 13 ++++++++ src/backend/access/transam/xlogutils.c | 23 ++++++++++--- src/backend/storage/buffer/bufmgr.c | 40 +++++++++++++++++++++++ src/backend/storage/freespace/freespace.c | 3 +- src/include/access/xlogreader.h | 7 ++++ src/include/access/xlogutils.h | 3 +- src/include/storage/bufmgr.h | 2 ++ 9 files changed, 89 insertions(+), 10 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 5f929de671..475abe9e10 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -1452,7 +1452,7 @@ checkXLogConsistency(XLogReaderState *record) * temporary page. */ buf = XLogReadBufferExtended(rnode, forknum, blkno, - RBM_NORMAL_NO_LOG); + RBM_NORMAL_NO_LOG, InvalidBuffer); if (!BufferIsValid(buf)) continue; diff --git a/src/backend/access/transam/xlogprefetch.c b/src/backend/access/transam/xlogprefetch.c index a8149b946c..948a63f25d 100644 --- a/src/backend/access/transam/xlogprefetch.c +++ b/src/backend/access/transam/xlogprefetch.c @@ -624,10 +624,10 @@ XLogPrefetcherScanBlocks(XLogPrefetcher *prefetcher) if (BufferIsValid(prefetch.recent_buffer)) { /* - * It was already cached, so do nothing. Perhaps in future we - * could remember the buffer so that recovery doesn't have to look - * it up again. + * It was already cached, so do nothing. We'll remember the + * buffer, so that recovery can try to avoid looking it up again. */ + block->recent_buffer = prefetch.recent_buffer; pg_atomic_unlocked_add_fetch_u64(&Stats->skip_hit, 1); } else if (prefetch.initiated_io) diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index fb0d80e7c7..9640899ea7 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -1651,6 +1651,8 @@ DecodeXLogRecord(XLogReaderState *state, blk->has_image = ((fork_flags & BKPBLOCK_HAS_IMAGE) != 0); blk->has_data = ((fork_flags & BKPBLOCK_HAS_DATA) != 0); + blk->recent_buffer = InvalidBuffer; + COPY_HEADER_FIELD(&blk->data_len, sizeof(uint16)); /* cross-check that the HAS_DATA flag is set iff data_length > 0 */ if (blk->has_data && blk->data_len == 0) @@ -1860,6 +1862,15 @@ err: bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum) +{ + return XLogRecGetRecentBuffer(record, block_id, rnode, forknum, blknum, + NULL); +} + +bool +XLogRecGetRecentBuffer(XLogReaderState *record, uint8 block_id, + RelFileNode *rnode, ForkNumber *forknum, + BlockNumber *blknum, Buffer *recent_buffer) { DecodedBkpBlock *bkpb; @@ -1874,6 +1885,8 @@ XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, *forknum = bkpb->forknum; if (blknum) *blknum = bkpb->blkno; + if (recent_buffer) + *recent_buffer = bkpb->recent_buffer; return true; } diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index db0c801456..8a7eac65cf 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -336,11 +336,13 @@ XLogReadBufferForRedoExtended(XLogReaderState *record, RelFileNode rnode; ForkNumber forknum; BlockNumber blkno; + Buffer recent_buffer; Page page; bool zeromode; bool willinit; - if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno)) + if (!XLogRecGetRecentBuffer(record, block_id, &rnode, &forknum, &blkno, + &recent_buffer)) { /* Caller specified a bogus block_id */ elog(PANIC, "failed to locate backup block with ID %d", block_id); @@ -362,7 +364,8 @@ XLogReadBufferForRedoExtended(XLogReaderState *record, { Assert(XLogRecHasBlockImage(record, block_id)); *buf = XLogReadBufferExtended(rnode, forknum, blkno, - get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK); + get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK, + recent_buffer); page = BufferGetPage(*buf); if (!RestoreBlockImage(record, block_id, page)) elog(ERROR, "failed to restore block image"); @@ -391,7 +394,8 @@ XLogReadBufferForRedoExtended(XLogReaderState *record, } else { - *buf = XLogReadBufferExtended(rnode, forknum, blkno, mode); + *buf = XLogReadBufferExtended(rnode, forknum, blkno, mode, + recent_buffer); if (BufferIsValid(*buf)) { if (mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK) @@ -439,7 +443,8 @@ XLogReadBufferForRedoExtended(XLogReaderState *record, */ Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, - BlockNumber blkno, ReadBufferMode mode) + BlockNumber blkno, ReadBufferMode mode, + Buffer recent_buffer) { BlockNumber lastblock; Buffer buffer; @@ -447,6 +452,15 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, Assert(blkno != P_NEW); + /* Do we have a clue where the buffer might be already? */ + if (BufferIsValid(recent_buffer) && + mode == RBM_NORMAL && + ReadRecentBuffer(rnode, forknum, blkno, recent_buffer)) + { + buffer = recent_buffer; + goto recent_buffer_fast_path; + } + /* Open the relation at smgr level */ smgr = smgropen(rnode, InvalidBackendId); @@ -505,6 +519,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, } } +recent_buffer_fast_path: if (mode == RBM_NORMAL) { /* check that page has been initialized */ diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index a2a963bd5b..c8a755fb09 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -598,6 +598,46 @@ PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum) } } +/* + * ReadRecentBuffer -- try to refind a buffer that we suspect holds a given + * block + * + * Return true if the buffer is valid, has the correct tag, and we managed + * to pin it. + */ +bool +ReadRecentBuffer(RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, + Buffer recent_buffer) +{ + BufferDesc *bufHdr; + BufferTag tag; + + Assert(BufferIsValid(recent_buffer)); + + /* Look up the header by index, and try to pin if shared. */ + if (BufferIsLocal(recent_buffer)) + bufHdr = GetBufferDescriptor(-recent_buffer - 1); + else + { + bufHdr = GetBufferDescriptor(recent_buffer - 1); + ResourceOwnerEnlargeBuffers(CurrentResourceOwner); + if (!PinBuffer(bufHdr, NULL)) + { + /* Not valid, couldn't pin it. */ + UnpinBuffer(bufHdr, true); + return false; + } + } + + /* Does the tag match? */ + INIT_BUFFERTAG(tag, rnode, forkNum, blockNum); + if (BUFFERTAGS_EQUAL(tag, bufHdr->tag)) + return true; + + /* Nope -- this isn't the block we seek. */ + UnpinBuffer(bufHdr, true); + return false; +} /* * ReadBuffer -- a shorthand for ReadBufferExtended, for reading from main diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index 6a96126b0c..c998b52c13 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -210,7 +210,8 @@ XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk, blkno = fsm_logical_to_physical(addr); /* If the page doesn't exist already, extend */ - buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR); + buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR, + InvalidBuffer); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index ad77c04d0f..84c5fa744b 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -39,6 +39,7 @@ #endif #include "access/xlogrecord.h" +#include "storage/buf.h" /* WALOpenSegment represents a WAL segment being read. */ typedef struct WALOpenSegment @@ -126,6 +127,9 @@ typedef struct ForkNumber forknum; BlockNumber blkno; + /* Workspace for remembering last known buffer holding this block. */ + Buffer recent_buffer; + /* copy of the fork_flags field from the XLogRecordBlockHeader */ uint8 flags; @@ -377,5 +381,8 @@ extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size * extern bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum); +extern bool XLogRecGetRecentBuffer(XLogReaderState *record, uint8 block_id, + RelFileNode *rnode, ForkNumber *forknum, + BlockNumber *blknum, Buffer *recent_buffer); #endif /* XLOGREADER_H */ diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h index 374c1b16ce..a0c2b60c57 100644 --- a/src/include/access/xlogutils.h +++ b/src/include/access/xlogutils.h @@ -42,7 +42,8 @@ extern XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record, Buffer *buf); extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, - BlockNumber blkno, ReadBufferMode mode); + BlockNumber blkno, ReadBufferMode mode, + Buffer recent_buffer); extern Relation CreateFakeRelcacheEntry(RelFileNode rnode); extern void FreeFakeRelcacheEntry(Relation fakerel); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index ee91b8fa26..c3280b754e 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -176,6 +176,8 @@ extern PrefetchBufferResult PrefetchSharedBuffer(struct SMgrRelationData *smgr_r BlockNumber blockNum); extern PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum); +extern bool ReadRecentBuffer(RelFileNode rnode, ForkNumber forkNum, + BlockNumber blockNum, Buffer recent_buffer); extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum); extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, -- 2.20.1