From e59bb976b1804ce6f2a2436b176deb3309bf49ce Mon Sep 17 00:00:00 2001 From: Kirk Jamison Date: Fri, 11 Sep 2020 13:00:33 +0000 Subject: [PATCH] Speedup dropping of relation buffers during recovery --- src/backend/storage/buffer/bufmgr.c | 126 ++++++++++++++++++++++++++++++++++-- src/backend/storage/smgr/smgr.c | 9 ++- src/include/storage/bufmgr.h | 2 +- 3 files changed, 128 insertions(+), 9 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index a2a963b..6494f9f 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -70,6 +70,8 @@ #define RELS_BSEARCH_THRESHOLD 20 +#define BUF_DROP_FULLSCAN_THRESHOLD (uint32)(NBuffers / 500) + typedef struct PrivateRefCountEntry { Buffer buffer; @@ -473,6 +475,8 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, BufferAccessStrategy strategy, bool *foundPtr); static void FlushBuffer(BufferDesc *buf, SMgrRelation reln); +static void DropRelFileNodeBuffersOfFork(RelFileNode rnode, ForkNumber forkNum, + BlockNumber firstDelBlock); static void AtProcExit_Buffers(int code, Datum arg); static void CheckForBufferLeaks(void); static int rnode_comparator(const void *p1, const void *p2); @@ -2972,18 +2976,27 @@ BufferGetLSNAtomic(Buffer buffer) * that no other process could be trying to load more pages of the * relation into buffers. * - * XXX currently it sequentially searches the buffer pool, should be - * changed to more clever ways of searching. However, this routine - * is used only in code paths that aren't very performance-critical, - * and we shouldn't slow down the hot paths to make it faster ... + * XXX The relation might have extended before this, so this path is + * only optimized during recovery when we can get a reliable cached + * value of blocks for specified relation. See comment in + * smgrnblocks() in smgr.c. In addition, it is safe to do this since + * there are no other processes but the startup process that changes + * the relation size during recovery. Otherwise, or if not in recovery, + * proceed to usual invalidation process, where it sequentially + * searches the buffer pool. * -------------------------------------------------------------------- */ void -DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, +DropRelFileNodeBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock) { int i; int j; + RelFileNodeBackend rnode; + BufferDesc *bufHdr; + uint32 buf_state; + + rnode = smgr_reln->smgr_rnode; /* If it's a local relation, it's localbuf.c's problem. */ if (RelFileNodeBackendIsTemp(rnode)) @@ -2997,10 +3010,78 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, return; } + if (InRecovery) + { + BlockNumber nblocks = 0; + + /* Get the total number of blocks for the supplied relation's fork */ + for (j = 0; j < nforks; j++) + { + BlockNumber block = smgrnblocks(smgr_reln, forkNum[j]); + nblocks += block; + } + + for (j = 0; j < nforks; j++) + { + /* + * Do explicit hashtable probe iff the ratio of total number of buffers to be + * truncated against NBuffers is less than the threshold for full-scanning of + * buffer pool. IOW, relation is small enough for its buffers to be removed. + */ + if ((nblocks / (uint32)NBuffers) < BUF_DROP_FULLSCAN_THRESHOLD && + BlockNumberIsValid(nblocks)) + { + BlockNumber cur_blk; + + for (cur_blk = firstDelBlock[j]; cur_blk < nblocks; cur_blk++) + { + uint32 newHash; /* hash value for newTag */ + BufferTag newTag; /* identity of requested block */ + LWLock *newPartitionLock; /* buffer partition lock for it */ + int buf_id; + + /* create a tag so we can lookup the buffer */ + INIT_BUFFERTAG(newTag, rnode.node, forkNum[j], cur_blk); + + /* determine its hash code and partition lock ID */ + newHash = BufTableHashCode(&newTag); + newPartitionLock = BufMappingPartitionLock(newHash); + + /* Check that it is in the buffer pool. If not, do nothing */ + LWLockAcquire(newPartitionLock, LW_SHARED); + buf_id = BufTableLookup(&newTag, newHash); + LWLockRelease(newPartitionLock); + + if (buf_id < 0) + continue; + + bufHdr = GetBufferDescriptor(buf_id); + + buf_state = LockBufHdr(bufHdr); + + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) && + bufHdr->tag.forkNum == forkNum[j] && + bufHdr->tag.blockNum == cur_blk) + InvalidateBuffer(bufHdr); /* releases spinlock */ + else + UnlockBufHdr(bufHdr, buf_state); + } + } + else + { + /* + * Relation is larger than the threshold. Execute full scan of + * buffer pool for each fork. + */ + DropRelFileNodeBuffersOfFork(rnode.node, forkNum[j], + firstDelBlock[j]); + } + } + return; + } for (i = 0; i < NBuffers; i++) { - BufferDesc *bufHdr = GetBufferDescriptor(i); - uint32 buf_state; + bufHdr = GetBufferDescriptor(i); /* * We can make this a tad faster by prechecking the buffer tag before @@ -3038,6 +3119,37 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, } } + +/* ----------------------------------------------------------------- + * DropRelFileNodeBuffersOfFork + * + * This function removes from the buffer pool the pages for + * the specified relation's fork. + * ----------------------------------------------------------------- + */ +static void +DropRelFileNodeBuffersOfFork(RelFileNode rnode, ForkNumber forkNum, + BlockNumber firstDelBlock) +{ + int i; + + for (i = 0; i < NBuffers; i++) + { + BufferDesc *bufHdr = GetBufferDescriptor(i); + uint32 buf_state; + + buf_state = LockBufHdr(bufHdr); + + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) && + bufHdr->tag.forkNum == forkNum && + bufHdr->tag.blockNum >= firstDelBlock) + InvalidateBuffer(bufHdr); /* releases spinlock */ + else + UnlockBufHdr(bufHdr, buf_state); + } +} + + /* --------------------------------------------------------------------- * DropRelFileNodesAllBuffers * diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index dcc09df..450819f 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -474,7 +474,14 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, if (reln->smgr_cached_nblocks[forknum] == blocknum) reln->smgr_cached_nblocks[forknum] = blocknum + 1; else + { + /* + * DropRelFileNodeBuffers relies on the behavior that cached nblocks + * won't be invalidated by file extension while recovering. + */ + Assert(!InRecovery); reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber; + } } /* @@ -583,7 +590,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will * just drop them without bothering to write the contents. */ - DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nforks, nblocks); + DropRelFileNodeBuffers(reln, forknum, nforks, nblocks); /* * Send a shared-inval message to force other backends to close any smgr diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index ee91b8f..056f65e 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -203,7 +203,7 @@ extern void FlushOneBuffer(Buffer buffer); extern void FlushRelationBuffers(Relation rel); extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels); extern void FlushDatabaseBuffers(Oid dbid); -extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, +extern void DropRelFileNodeBuffers(struct SMgrRelationData *smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock); extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes); extern void DropDatabaseBuffers(Oid dbid); -- 1.8.3.1