From f670d8135cb220e96eac8037912a92c4a8eb3eb1 Mon Sep 17 00:00:00 2001 From: Kirk Jamison Date: Fri, 11 Sep 2020 13:00:33 +0000 Subject: [PATCH] Speedup dropping of relation buffers during recovery Added ereport/elog for debugging --- src/backend/storage/buffer/bufmgr.c | 182 ++++++++++++++++++++++++++++++------ src/backend/storage/smgr/smgr.c | 2 +- src/include/storage/bufmgr.h | 2 +- 3 files changed, 155 insertions(+), 31 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index a2a963b..bd1d89b 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -70,6 +70,8 @@ #define RELS_BSEARCH_THRESHOLD 20 +#define BUF_DROP_FULLSCAN_THRESHOLD (uint32)(NBuffers / 500) + typedef struct PrivateRefCountEntry { Buffer buffer; @@ -473,6 +475,8 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, BufferAccessStrategy strategy, bool *foundPtr); static void FlushBuffer(BufferDesc *buf, SMgrRelation reln); +static void DropRelFileNodeBuffersOfFork(RelFileNode rnode, ForkNumber forkNum, + BlockNumber firstDelBlock); static void AtProcExit_Buffers(int code, Datum arg); static void CheckForBufferLeaks(void); static int rnode_comparator(const void *p1, const void *p2); @@ -2979,65 +2983,185 @@ BufferGetLSNAtomic(Buffer buffer) * -------------------------------------------------------------------- */ void -DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, +DropRelFileNodeBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock) { - int i; - int j; + int buf_num; + int fork_num; + RelFileNodeBackend rnode; + BufferDesc *bufHdr; + uint32 buf_state; + + rnode = smgr_reln->smgr_rnode; /* If it's a local relation, it's localbuf.c's problem. */ if (RelFileNodeBackendIsTemp(rnode)) { if (rnode.backend == MyBackendId) { - for (j = 0; j < nforks; j++) - DropRelFileNodeLocalBuffers(rnode.node, forkNum[j], - firstDelBlock[j]); + for (fork_num = 0; fork_num < nforks; fork_num++) + DropRelFileNodeLocalBuffers(rnode.node, forkNum[fork_num], + firstDelBlock[fork_num]); } return; } - for (i = 0; i < NBuffers; i++) + /* + * We only speedup this path during recovery, because that's the only + * timing when we can get a valid cached value of blocks for relation. + * See comment in smgrnblocks() in smgr.c. Otherwise, proceed to usual + * buffer invalidation process (scanning of whole shared buffers). + */ + if (InRecovery) { - BufferDesc *bufHdr = GetBufferDescriptor(i); - uint32 buf_state; + for (fork_num = 0; fork_num < nforks; fork_num++) + { + BlockNumber nblocks; + + /* Get the number of blocks for the supplied relation's fork */ + nblocks = smgrnblocks(smgr_reln, forkNum[fork_num]); + Assert(BlockNumberIsValid(nblocks)); + + if (nblocks < BUF_DROP_FULLSCAN_THRESHOLD) + { + BlockNumber block_num; + + for (block_num = 0; block_num <= nblocks; block_num++) + { + uint32 newHash; /* hash value for newTag */ + BufferTag newTag; /* identity of requested block */ + LWLock *newPartitionLock; /* buffer partition lock for it */ + int buf_id; + + /* create a tag with respect to the block so we can lookup the buffer */ + INIT_BUFFERTAG(newTag, rnode.node, forkNum[fork_num], + firstDelBlock[block_num]); + + /* determine its hash code and partition lock ID */ + newHash = BufTableHashCode(&newTag); + newPartitionLock = BufMappingPartitionLock(newHash); + + /* Check that it is in the buffer pool. If not, do nothing */ + LWLockAcquire(newPartitionLock, LW_SHARED); + buf_id = BufTableLookup(&newTag, newHash); + + if (buf_id < 0) + { + LWLockRelease(newPartitionLock); + continue; + } + LWLockRelease(newPartitionLock); + + bufHdr = GetBufferDescriptor(buf_id); + + /* + * We can make this a tad faster by prechecking the buffer tag before + * we attempt to lock the buffer; this saves a lot of lock + * acquisitions in typical cases. It should be safe because the + * caller must have AccessExclusiveLock on the relation, or some other + * reason to be certain that no one is loading new pages of the rel + * into the buffer pool. (Otherwise we might well miss such pages + * entirely.) Therefore, while the tag might be changing while we + * look at it, it can't be changing *to* a value we care about, only + * *away* from such a value. So false negatives are impossible, and + * false positives are safe because we'll recheck after getting the + * buffer lock. + * + * We could check forkNum and blockNum as well as the rnode, but the + * incremental win from doing so seems small. + */ + if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node)) + continue; + + buf_state = LockBufHdr(bufHdr); + + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) && + bufHdr->tag.forkNum == forkNum[fork_num] && + bufHdr->tag.blockNum >= firstDelBlock[block_num]) + InvalidateBuffer(bufHdr); /* releases spinlock */ + else + UnlockBufHdr(bufHdr, buf_state); + } + /* + * We've invalidated the nblocks already. Scan the shared buffers + * for each fork. + */ + if (block_num > nblocks) + { + DropRelFileNodeBuffersOfFork(rnode.node, forkNum[fork_num], + firstDelBlock[fork_num]); + } + } + else + { + /* + * Relation fork is bigger than the threshold. Scan the shared buffers + * for each fork. + */ + DropRelFileNodeBuffersOfFork(rnode.node, forkNum[fork_num], + firstDelBlock[fork_num]); + } + } + return; + } + for (buf_num = 0; buf_num < NBuffers; buf_num++) + { + bufHdr = GetBufferDescriptor(buf_num); - /* - * We can make this a tad faster by prechecking the buffer tag before - * we attempt to lock the buffer; this saves a lot of lock - * acquisitions in typical cases. It should be safe because the - * caller must have AccessExclusiveLock on the relation, or some other - * reason to be certain that no one is loading new pages of the rel - * into the buffer pool. (Otherwise we might well miss such pages - * entirely.) Therefore, while the tag might be changing while we - * look at it, it can't be changing *to* a value we care about, only - * *away* from such a value. So false negatives are impossible, and - * false positives are safe because we'll recheck after getting the - * buffer lock. - * - * We could check forkNum and blockNum as well as the rnode, but the - * incremental win from doing so seems small. - */ if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node)) continue; buf_state = LockBufHdr(bufHdr); - for (j = 0; j < nforks; j++) + for (fork_num = 0; fork_num < nforks; fork_num++) { if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) && - bufHdr->tag.forkNum == forkNum[j] && - bufHdr->tag.blockNum >= firstDelBlock[j]) + bufHdr->tag.forkNum == forkNum[fork_num] && + bufHdr->tag.blockNum >= firstDelBlock[fork_num]) { InvalidateBuffer(bufHdr); /* releases spinlock */ break; } } - if (j >= nforks) + if (fork_num >= nforks) + UnlockBufHdr(bufHdr, buf_state); + } +} + + +/* ----------------------------------------------------------------- + * DropRelFileNodeBuffersOfFork + * + * This function removes from the buffer pool the pages for + * each fork of the specified relation. + * ----------------------------------------------------------------- + */ +static void +DropRelFileNodeBuffersOfFork(RelFileNode rnode, ForkNumber forkNum, + BlockNumber firstDelBlock) +{ + int buf_num; + + for (buf_num = 0; buf_num < NBuffers; buf_num++) + { + BufferDesc *bufHdr = GetBufferDescriptor(buf_num); + uint32 buf_state; + + if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode)) + continue; + + buf_state = LockBufHdr(bufHdr); + + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) && + bufHdr->tag.forkNum == forkNum && + bufHdr->tag.blockNum >= firstDelBlock) + InvalidateBuffer(bufHdr); /* releases spinlock */ + else UnlockBufHdr(bufHdr, buf_state); } } + /* --------------------------------------------------------------------- * DropRelFileNodesAllBuffers * diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index dcc09df..5238c6c 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -583,7 +583,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will * just drop them without bothering to write the contents. */ - DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nforks, nblocks); + DropRelFileNodeBuffers(reln, forknum, nforks, nblocks); /* * Send a shared-inval message to force other backends to close any smgr diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index ee91b8f..056f65e 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -203,7 +203,7 @@ extern void FlushOneBuffer(Buffer buffer); extern void FlushRelationBuffers(Relation rel); extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels); extern void FlushDatabaseBuffers(Oid dbid); -extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, +extern void DropRelFileNodeBuffers(struct SMgrRelationData *smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock); extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes); extern void DropDatabaseBuffers(Oid dbid); -- 1.8.3.1