From 3da0557b06ef03b190ba1f22522b4cf56fb2a52d Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 28 Mar 2024 21:48:02 +0200 Subject: [PATCH v13-bmr 5/5] Refactor how BufferManagerRelations are used --- contrib/bloom/blutils.c | 8 +- contrib/pg_prewarm/pg_prewarm.c | 7 +- src/backend/access/brin/brin.c | 10 +- src/backend/access/brin/brin_revmap.c | 8 +- src/backend/access/gin/gininsert.c | 9 +- src/backend/access/gin/ginutil.c | 5 +- src/backend/access/gist/gist.c | 5 +- src/backend/access/gist/gistutil.c | 5 +- src/backend/access/hash/hashpage.c | 9 +- src/backend/access/heap/hio.c | 5 +- src/backend/access/heap/visibilitymap.c | 4 +- src/backend/access/nbtree/nbtpage.c | 7 +- src/backend/access/spgist/spgutils.c | 8 +- src/backend/access/transam/xlogutils.c | 10 +- src/backend/commands/sequence.c | 6 +- src/backend/storage/aio/read_stream.c | 32 +- src/backend/storage/buffer/bufmgr.c | 374 +++++++++++----------- src/backend/storage/buffer/localbuf.c | 11 +- src/backend/storage/freespace/freespace.c | 6 +- src/include/storage/buf_internals.h | 3 +- src/include/storage/bufmgr.h | 67 ++-- src/include/storage/read_stream.h | 4 +- 22 files changed, 304 insertions(+), 299 deletions(-) diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c index 6836129c90d..177c8698278 100644 --- a/contrib/bloom/blutils.c +++ b/contrib/bloom/blutils.c @@ -354,8 +354,11 @@ BloomPageAddItem(BloomState *state, Page page, BloomTuple *tuple) Buffer BloomNewBuffer(Relation index) { + BufferManagerRelation bmr; Buffer buffer; + InitBMRForRel(&bmr, index, MAIN_FORKNUM, NULL); + /* First, try to get a page from FSM */ for (;;) { @@ -364,7 +367,7 @@ BloomNewBuffer(Relation index) if (blkno == InvalidBlockNumber) break; - buffer = ReadBuffer(index, blkno); + buffer = ReadBufferBMR(&bmr, blkno, RBM_NORMAL); /* * We have to guard against the possibility that someone else already @@ -388,8 +391,7 @@ BloomNewBuffer(Relation index) } /* Must extend the file */ - buffer = ExtendBufferedRel(BMR_REL(index), MAIN_FORKNUM, NULL, - EB_LOCK_FIRST); + buffer = ExtendBufferedRel(&bmr, EB_LOCK_FIRST); return buffer; } diff --git a/contrib/pg_prewarm/pg_prewarm.c b/contrib/pg_prewarm/pg_prewarm.c index f6269562b54..72ec9378581 100644 --- a/contrib/pg_prewarm/pg_prewarm.c +++ b/contrib/pg_prewarm/pg_prewarm.c @@ -205,6 +205,9 @@ pg_prewarm(PG_FUNCTION_ARGS) { struct pg_prewarm_read_stream_private p; ReadStream *stream; + BufferManagerRelation bmr; + + InitBMRForRel(&bmr, rel, forkNumber, NULL); /* * In buffer mode, we actually pull the data into shared_buffers. @@ -215,9 +218,7 @@ pg_prewarm(PG_FUNCTION_ARGS) p.last_block = last_block; stream = read_stream_begin_relation(READ_STREAM_FULL, - NULL, - BMR_REL(rel), - forkNumber, + &bmr, pg_prewarm_read_stream_next_block, &p, 0); diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 041415a40e7..32b4189314d 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -1088,6 +1088,7 @@ brinbuildCallbackParallel(Relation index, IndexBuildResult * brinbuild(Relation heap, Relation index, IndexInfo *indexInfo) { + BufferManagerRelation bmr; IndexBuildResult *result; double reltuples; double idxtuples; @@ -1108,8 +1109,8 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo) * whole relation will be rolled back. */ - meta = ExtendBufferedRel(BMR_REL(index), MAIN_FORKNUM, NULL, - EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); + InitBMRForRel(&bmr, index, MAIN_FORKNUM, NULL); + meta = ExtendBufferedRel(&bmr, EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); Assert(BufferGetBlockNumber(meta) == BRIN_METAPAGE_BLKNO); brin_metapage_init(BufferGetPage(meta), BrinGetPagesPerRange(index), @@ -1258,11 +1259,12 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo) void brinbuildempty(Relation index) { + BufferManagerRelation bmr; Buffer metabuf; /* An empty BRIN index has a metapage only. */ - metabuf = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL, - EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); + InitBMRForRel(&bmr, index, INIT_FORKNUM, NULL); + metabuf = ExtendBufferedRel(&bmr, EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); /* Initialize and xlog metabuffer. */ START_CRIT_SECTION(); diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c index 5a9ed40ab64..15746942f55 100644 --- a/src/backend/access/brin/brin_revmap.c +++ b/src/backend/access/brin/brin_revmap.c @@ -528,6 +528,9 @@ revmap_physical_extend(BrinRevmap *revmap) BlockNumber mapBlk; BlockNumber nblocks; Relation irel = revmap->rm_irel; + BufferManagerRelation bmr; + + InitBMRForRel(&bmr, irel, MAIN_FORKNUM, NULL); /* * Lock the metapage. This locks out concurrent extensions of the revmap, @@ -553,14 +556,13 @@ revmap_physical_extend(BrinRevmap *revmap) nblocks = RelationGetNumberOfBlocks(irel); if (mapBlk < nblocks) { - buf = ReadBuffer(irel, mapBlk); + buf = ReadBufferBMR(&bmr, mapBlk, RBM_NORMAL); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); } else { - buf = ExtendBufferedRel(BMR_REL(irel), MAIN_FORKNUM, NULL, - EB_LOCK_FIRST); + buf = ExtendBufferedRel(&bmr, EB_LOCK_FIRST); if (BufferGetBlockNumber(buf) != mapBlk) { /* diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 71f38be90c3..ccddba919f6 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -433,14 +433,15 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo) void ginbuildempty(Relation index) { + BufferManagerRelation bmr; Buffer RootBuffer, MetaBuffer; + InitBMRForRel(&bmr, index, INIT_FORKNUM, NULL); + /* An empty GIN index has two pages. */ - MetaBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL, - EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); - RootBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL, - EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); + MetaBuffer = ExtendBufferedRel(&bmr, EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); + RootBuffer = ExtendBufferedRel(&bmr, EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); /* Initialize and xlog metabuffer and root buffer. */ START_CRIT_SECTION(); diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 5747ae6a4ca..0da4a218402 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -299,6 +299,7 @@ gintuple_get_key(GinState *ginstate, IndexTuple tuple, Buffer GinNewBuffer(Relation index) { + BufferManagerRelation bmr; Buffer buffer; /* First, try to get a page from FSM */ @@ -328,8 +329,8 @@ GinNewBuffer(Relation index) } /* Must extend the file */ - buffer = ExtendBufferedRel(BMR_REL(index), MAIN_FORKNUM, NULL, - EB_LOCK_FIRST); + InitBMRForRel(&bmr, index, MAIN_FORKNUM, NULL); + buffer = ExtendBufferedRel(&bmr, EB_LOCK_FIRST); return buffer; } diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index ed4ffa63a77..d1761b73fe2 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -132,11 +132,12 @@ createTempGistContext(void) void gistbuildempty(Relation index) { + BufferManagerRelation bmr; Buffer buffer; /* Initialize the root page */ - buffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL, - EB_SKIP_EXTENSION_LOCK | EB_LOCK_FIRST); + InitBMRForRel(&bmr, index, INIT_FORKNUM, NULL); + buffer = ExtendBufferedRel(&bmr, EB_SKIP_EXTENSION_LOCK | EB_LOCK_FIRST); /* Initialize and xlog buffer */ START_CRIT_SECTION(); diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index d2d0b36d4ea..1d8a955b093 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -823,6 +823,7 @@ gistcheckpage(Relation rel, Buffer buf) Buffer gistNewBuffer(Relation r, Relation heaprel) { + BufferManagerRelation bmr; Buffer buffer; /* First, try to get a page from FSM */ @@ -877,8 +878,8 @@ gistNewBuffer(Relation r, Relation heaprel) } /* Must extend the file */ - buffer = ExtendBufferedRel(BMR_REL(r), MAIN_FORKNUM, NULL, - EB_LOCK_FIRST); + InitBMRForRel(&bmr, r, MAIN_FORKNUM, NULL); + buffer = ExtendBufferedRel(&bmr, EB_LOCK_FIRST); return buffer; } diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index d09c349e28f..bab6b22b0b3 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -197,6 +197,7 @@ _hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag, Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum) { + BufferManagerRelation bmr; BlockNumber nblocks = RelationGetNumberOfBlocksInFork(rel, forkNum); Buffer buf; @@ -206,19 +207,19 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum) elog(ERROR, "access to noncontiguous page in hash index \"%s\"", RelationGetRelationName(rel)); + InitBMRForRel(&bmr, rel, forkNum, NULL); + /* smgr insists we explicitly extend the relation */ if (blkno == nblocks) { - buf = ExtendBufferedRel(BMR_REL(rel), forkNum, NULL, - EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); + buf = ExtendBufferedRel(&bmr, EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); if (BufferGetBlockNumber(buf) != blkno) elog(ERROR, "unexpected hash relation size: %u, should be %u", BufferGetBlockNumber(buf), blkno); } else { - buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO_AND_LOCK, - NULL); + buf = ReadBufferBMR(&bmr, blkno, RBM_ZERO_AND_LOCK); } /* ref count and lock type are correct */ diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 7c662cdf46e..f17ffe8a64b 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -246,6 +246,7 @@ RelationAddBlocks(Relation relation, BulkInsertState bistate, uint32 not_in_fsm_pages; Buffer buffer; Page page; + BufferManagerRelation bmr; /* * Determine by how many pages to try to extend by. @@ -338,8 +339,8 @@ RelationAddBlocks(Relation relation, BulkInsertState bistate, * [auto]vacuum trying to truncate later pages as REL_TRUNCATE_MINIMUM is * way larger. */ - first_block = ExtendBufferedRelBy(BMR_REL(relation), MAIN_FORKNUM, - bistate ? bistate->strategy : NULL, + InitBMRForRel(&bmr, relation, MAIN_FORKNUM, bistate ? bistate->strategy : NULL); + first_block = ExtendBufferedRelBy(&bmr, EB_LOCK_FIRST, extend_by_pages, victim_buffers, diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 1ab6c865e3c..e04752adecd 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -626,9 +626,11 @@ vm_readbuf(Relation rel, BlockNumber blkno, bool extend) static Buffer vm_extend(Relation rel, BlockNumber vm_nblocks) { + BufferManagerRelation bmr; Buffer buf; - buf = ExtendBufferedRelTo(BMR_REL(rel), VISIBILITYMAP_FORKNUM, NULL, + InitBMRForRel(&bmr, rel, VISIBILITYMAP_FORKNUM, NULL); + buf = ExtendBufferedRelTo(&bmr, EB_CREATE_FORK_IF_NEEDED | EB_CLEAR_SIZE_CACHE, vm_nblocks, diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 01bbece6bfd..6f34ed36ad3 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -868,12 +868,15 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) Buffer _bt_allocbuf(Relation rel, Relation heaprel) { + BufferManagerRelation bmr; Buffer buf; BlockNumber blkno; Page page; Assert(heaprel != NULL); + InitBMRForRel(&bmr, rel, MAIN_FORKNUM, NULL); + /* * First see if the FSM knows of any free pages. * @@ -903,7 +906,7 @@ _bt_allocbuf(Relation rel, Relation heaprel) blkno = GetFreeIndexPage(rel); if (blkno == InvalidBlockNumber) break; - buf = ReadBuffer(rel, blkno); + buf = ReadBufferBMR(&bmr, blkno, RBM_NORMAL); if (_bt_conditionallockbuf(rel, buf)) { page = BufferGetPage(buf); @@ -975,7 +978,7 @@ _bt_allocbuf(Relation rel, Relation heaprel) * otherwise would make, as we can't use _bt_lockbuf() without introducing * a race. */ - buf = ExtendBufferedRel(BMR_REL(rel), MAIN_FORKNUM, NULL, EB_LOCK_FIRST); + buf = ExtendBufferedRel(&bmr, EB_LOCK_FIRST); if (!RelationUsesLocalBuffers(rel)) VALGRIND_MAKE_MEM_DEFINED(BufferGetPage(buf), BLCKSZ); diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index 3f793125f74..528bd6d3bfa 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -374,8 +374,11 @@ initSpGistState(SpGistState *state, Relation index) Buffer SpGistNewBuffer(Relation index) { + BufferManagerRelation bmr; Buffer buffer; + InitBMRForRel(&bmr, index, MAIN_FORKNUM, NULL); + /* First, try to get a page from FSM */ for (;;) { @@ -391,7 +394,7 @@ SpGistNewBuffer(Relation index) if (SpGistBlockIsFixed(blkno)) continue; - buffer = ReadBuffer(index, blkno); + buffer = ReadBufferBMR(&bmr, blkno, RBM_NORMAL); /* * We have to guard against the possibility that someone else already @@ -414,8 +417,7 @@ SpGistNewBuffer(Relation index) ReleaseBuffer(buffer); } - buffer = ExtendBufferedRel(BMR_REL(index), MAIN_FORKNUM, NULL, - EB_LOCK_FIRST); + buffer = ExtendBufferedRel(&bmr, EB_LOCK_FIRST); return buffer; } diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 5295b85fe07..ca055a85bb1 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -472,6 +472,7 @@ XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode, Buffer recent_buffer) { + BufferManagerRelation bmr; BlockNumber lastblock; Buffer buffer; SMgrRelation smgr; @@ -502,11 +503,11 @@ XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum, lastblock = smgrnblocks(smgr, forknum); + InitBMRForSMgr(&bmr, smgr, RELPERSISTENCE_PERMANENT, forknum, NULL); if (blkno < lastblock) { /* page exists in file */ - buffer = ReadBufferWithoutRelcache(rlocator, forknum, blkno, - mode, NULL, true); + buffer = ReadBufferBMR(&bmr, blkno, mode); } else { @@ -521,9 +522,8 @@ XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum, /* OK to extend the file */ /* we do this in recovery only - no rel-extension lock needed */ Assert(InRecovery); - buffer = ExtendBufferedRelTo(BMR_SMGR(smgr, RELPERSISTENCE_PERMANENT), - forknum, - NULL, + + buffer = ExtendBufferedRelTo(&bmr, EB_PERFORMING_RECOVERY | EB_SKIP_EXTENSION_LOCK, blkno + 1, diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 46103561c31..ac125407932 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -362,11 +362,13 @@ fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum) Page page; sequence_magic *sm; OffsetNumber offnum; + BufferManagerRelation bmr; + + InitBMRForRel(&bmr, rel, forkNum, NULL); /* Initialize first page of relation with special magic number */ - buf = ExtendBufferedRel(BMR_REL(rel), forkNum, NULL, - EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); + buf = ExtendBufferedRel(&bmr, EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); Assert(BufferGetBlockNumber(buf) == 0); page = BufferGetPage(buf); diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c index 6b4b97372c7..6ee81e5fee9 100644 --- a/src/backend/storage/aio/read_stream.c +++ b/src/backend/storage/aio/read_stream.c @@ -116,6 +116,8 @@ struct ReadStream int16 distance; bool advice_enabled; + BufferManagerRelation bmr; + /* * Sometimes we need to be able to 'unget' a block number to resolve a * flow control problem when I/Os are split. @@ -375,9 +377,7 @@ read_stream_look_ahead(ReadStream *stream, bool suppress_advice) */ ReadStream * read_stream_begin_relation(int flags, - BufferAccessStrategy strategy, - BufferManagerRelation bmr, - ForkNumber forknum, + BufferManagerRelation *bmr, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size) @@ -389,23 +389,16 @@ read_stream_begin_relation(int flags, uint32 max_pinned_buffers; Oid tablespace_id; - /* Make sure our bmr's smgr and persistent are populated. */ - if (bmr.smgr == NULL) - { - bmr.smgr = RelationGetSmgr(bmr.rel); - bmr.relpersistence = bmr.rel->rd_rel->relpersistence; - } - /* * Decide how many I/Os we will allow to run at the same time. That * currently means advice to the kernel to tell it that we will soon read. * This number also affects how far we look ahead for opportunities to * start more I/Os. */ - tablespace_id = bmr.smgr->smgr_rlocator.locator.spcOid; + tablespace_id = bmr->smgr->smgr_rlocator.locator.spcOid; if (!OidIsValid(MyDatabaseId) || - (bmr.rel && IsCatalogRelation(bmr.rel)) || - IsCatalogRelationOid(bmr.smgr->smgr_rlocator.locator.relNumber)) + (bmr->rel && IsCatalogRelation(bmr->rel)) || + IsCatalogRelationOid(bmr->smgr->smgr_rlocator.locator.relNumber)) { /* * Avoid circularity while trying to look up tablespace settings or @@ -432,7 +425,7 @@ read_stream_begin_relation(int flags, PG_INT16_MAX - io_combine_limit - 1); /* Don't allow this backend to pin more than its share of buffers. */ - if (SmgrIsTemp(bmr.smgr)) + if (SmgrIsTemp(bmr->smgr)) LimitAdditionalLocalPins(&max_pinned_buffers); else LimitAdditionalPins(&max_pinned_buffers); @@ -493,12 +486,6 @@ read_stream_begin_relation(int flags, stream->per_buffer_data_size = per_buffer_data_size; stream->max_pinned_buffers = max_pinned_buffers; stream->queue_size = queue_size; - - if (!bmr.smgr) - { - bmr.smgr = RelationGetSmgr(bmr.rel); - bmr.relpersistence = bmr.rel->rd_rel->relpersistence; - } stream->callback = callback; stream->callback_private_data = callback_private_data; @@ -517,11 +504,10 @@ read_stream_begin_relation(int flags, * initialize parts of the ReadBuffersOperation objects and leave them * that way, to avoid wasting CPU cycles writing to them for each read. */ + memcpy(&stream->bmr, bmr, sizeof(BufferManagerRelation)); for (int i = 0; i < max_ios; ++i) { - stream->ios[i].op.bmr = bmr; - stream->ios[i].op.forknum = forknum; - stream->ios[i].op.strategy = strategy; + stream->ios[i].op.bmr = &stream->bmr; } return stream; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 7123cbbaa2a..359981b4293 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -479,20 +479,13 @@ ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref) ) -static Buffer ReadBuffer_common(BufferManagerRelation bmr, - ForkNumber forkNum, BlockNumber blockNum, - ReadBufferMode mode, BufferAccessStrategy strategy); -static BlockNumber ExtendBufferedRelCommon(BufferManagerRelation bmr, - ForkNumber fork, - BufferAccessStrategy strategy, +static BlockNumber ExtendBufferedRelCommon(BufferManagerRelation *bmr, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by); -static BlockNumber ExtendBufferedRelShared(BufferManagerRelation bmr, - ForkNumber fork, - BufferAccessStrategy strategy, +static BlockNumber ExtendBufferedRelShared(BufferManagerRelation *bmr, uint32 flags, uint32 extend_by, BlockNumber extend_upto, @@ -788,6 +781,7 @@ Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy) { + BufferManagerRelation bmr; Buffer buf; /* @@ -800,8 +794,8 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); - buf = ReadBuffer_common(BMR_REL(reln), - forkNum, blockNum, mode, strategy); + InitBMRForRel(&bmr, reln, forkNum, strategy); + buf = ReadBufferBMR(&bmr, blockNum, mode); return buf; } @@ -822,27 +816,86 @@ ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool permanent) { + BufferManagerRelation bmr; SMgrRelation smgr = smgropen(rlocator, INVALID_PROC_NUMBER); - return ReadBuffer_common(BMR_SMGR(smgr, permanent ? RELPERSISTENCE_PERMANENT : - RELPERSISTENCE_UNLOGGED), - forkNum, blockNum, - mode, strategy); + InitBMRForSMgr(&bmr, smgr, + permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED, + forkNum, + strategy); + + return ReadBufferBMR(&bmr, blockNum, mode); +} + +void +InitBMRForRel(BufferManagerRelation *bmr, Relation rel, ForkNumber forkNum, BufferAccessStrategy strategy) +{ + bool isLocalBuf; + + bmr->rel = rel; + bmr->smgr = RelationGetSmgr(rel); + bmr->relpersistence = rel->rd_rel->relpersistence; + bmr->forkNum = forkNum; + bmr->strategy = strategy; + + /* + * Look up some further information that's needed in ReadBufferBMR + * upfront. + */ + isLocalBuf = bmr->relpersistence == RELPERSISTENCE_TEMP; + if (isLocalBuf) + { + bmr->io_context = IOCONTEXT_NORMAL; + bmr->io_object = IOOBJECT_TEMP_RELATION; + } + else + { + bmr->io_context = IOContextForStrategy(strategy); + bmr->io_object = IOOBJECT_RELATION; + } + + if (pgstat_should_count_relation(rel)) + bmr->pgstat_info = rel->pgstat_info; + else + bmr->pgstat_info = NULL; +} + +void +InitBMRForSMgr(BufferManagerRelation *bmr, struct SMgrRelationData *srel, char relpersistence, ForkNumber forkNum, BufferAccessStrategy strategy) +{ + bool isLocalBuf; + + bmr->rel = NULL; + bmr->smgr = srel; + bmr->relpersistence = relpersistence; + bmr->forkNum = forkNum; + bmr->strategy = strategy; + + isLocalBuf = relpersistence == RELPERSISTENCE_TEMP; + if (isLocalBuf) + { + bmr->io_context = IOCONTEXT_NORMAL; + bmr->io_object = IOOBJECT_TEMP_RELATION; + } + else + { + bmr->io_context = IOContextForStrategy(strategy); + bmr->io_object = IOOBJECT_RELATION; + } + + bmr->pgstat_info = NULL; } /* * Convenience wrapper around ExtendBufferedRelBy() extending by one block. */ Buffer -ExtendBufferedRel(BufferManagerRelation bmr, - ForkNumber forkNum, - BufferAccessStrategy strategy, - uint32 flags) +ExtendBufferedRel(BufferManagerRelation *bmr, uint32 flags) { Buffer buf; uint32 extend_by = 1; - ExtendBufferedRelBy(bmr, forkNum, strategy, flags, extend_by, + ExtendBufferedRelBy(bmr, flags, extend_by, &buf, &extend_by); return buf; @@ -866,25 +919,16 @@ ExtendBufferedRel(BufferManagerRelation bmr, * be empty. */ BlockNumber -ExtendBufferedRelBy(BufferManagerRelation bmr, - ForkNumber fork, - BufferAccessStrategy strategy, +ExtendBufferedRelBy(BufferManagerRelation *bmr, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by) { - Assert((bmr.rel != NULL) != (bmr.smgr != NULL)); - Assert(bmr.smgr == NULL || bmr.relpersistence != 0); + Assert(bmr->smgr != NULL); Assert(extend_by > 0); - if (bmr.smgr == NULL) - { - bmr.smgr = RelationGetSmgr(bmr.rel); - bmr.relpersistence = bmr.rel->rd_rel->relpersistence; - } - - return ExtendBufferedRelCommon(bmr, fork, strategy, flags, + return ExtendBufferedRelCommon(bmr, flags, extend_by, InvalidBlockNumber, buffers, extended_by); } @@ -898,9 +942,7 @@ ExtendBufferedRelBy(BufferManagerRelation bmr, * crash recovery). */ Buffer -ExtendBufferedRelTo(BufferManagerRelation bmr, - ForkNumber fork, - BufferAccessStrategy strategy, +ExtendBufferedRelTo(BufferManagerRelation *bmr, uint32 flags, BlockNumber extend_to, ReadBufferMode mode) @@ -910,33 +952,27 @@ ExtendBufferedRelTo(BufferManagerRelation bmr, Buffer buffer = InvalidBuffer; Buffer buffers[64]; - Assert((bmr.rel != NULL) != (bmr.smgr != NULL)); - Assert(bmr.smgr == NULL || bmr.relpersistence != 0); + Assert(bmr->smgr != NULL); Assert(extend_to != InvalidBlockNumber && extend_to > 0); - if (bmr.smgr == NULL) - { - bmr.smgr = RelationGetSmgr(bmr.rel); - bmr.relpersistence = bmr.rel->rd_rel->relpersistence; - } - /* * If desired, create the file if it doesn't exist. If * smgr_cached_nblocks[fork] is positive then it must exist, no need for * an smgrexists call. */ if ((flags & EB_CREATE_FORK_IF_NEEDED) && - (bmr.smgr->smgr_cached_nblocks[fork] == 0 || - bmr.smgr->smgr_cached_nblocks[fork] == InvalidBlockNumber) && - !smgrexists(bmr.smgr, fork)) + (bmr->smgr->smgr_cached_nblocks[bmr->forkNum] == 0 || + bmr->smgr->smgr_cached_nblocks[bmr->forkNum] == InvalidBlockNumber) && + !smgrexists(bmr->smgr, bmr->forkNum)) { - LockRelationForExtension(bmr.rel, ExclusiveLock); + Assert(bmr->rel); + LockRelationForExtension(bmr->rel, ExclusiveLock); /* recheck, fork might have been created concurrently */ - if (!smgrexists(bmr.smgr, fork)) - smgrcreate(bmr.smgr, fork, flags & EB_PERFORMING_RECOVERY); + if (!smgrexists(bmr->smgr, bmr->forkNum)) + smgrcreate(bmr->smgr, bmr->forkNum, flags & EB_PERFORMING_RECOVERY); - UnlockRelationForExtension(bmr.rel, ExclusiveLock); + UnlockRelationForExtension(bmr->rel, ExclusiveLock); } /* @@ -944,13 +980,13 @@ ExtendBufferedRelTo(BufferManagerRelation bmr, * kernel. */ if (flags & EB_CLEAR_SIZE_CACHE) - bmr.smgr->smgr_cached_nblocks[fork] = InvalidBlockNumber; + bmr->smgr->smgr_cached_nblocks[bmr->forkNum] = InvalidBlockNumber; /* * Estimate how many pages we'll need to extend by. This avoids acquiring * unnecessarily many victim buffers. */ - current_size = smgrnblocks(bmr.smgr, fork); + current_size = smgrnblocks(bmr->smgr, bmr->forkNum); /* * Since no-one else can be looking at the page contents yet, there is no @@ -969,7 +1005,7 @@ ExtendBufferedRelTo(BufferManagerRelation bmr, if ((uint64) current_size + num_pages > extend_to) num_pages = extend_to - current_size; - first_block = ExtendBufferedRelCommon(bmr, fork, strategy, flags, + first_block = ExtendBufferedRelCommon(bmr, flags, num_pages, extend_to, buffers, &extended_by); @@ -994,7 +1030,7 @@ ExtendBufferedRelTo(BufferManagerRelation bmr, if (buffer == InvalidBuffer) { Assert(extended_by == 0); - buffer = ReadBuffer_common(bmr, fork, extend_to - 1, mode, strategy); + buffer = ReadBufferBMR(bmr, extend_to - 1, mode); } return buffer; @@ -1047,75 +1083,59 @@ ZeroBuffer(Buffer buffer, ReadBufferMode mode) * zero it. */ static inline Buffer -PinBufferForBlock(BufferManagerRelation bmr, - ForkNumber forkNum, +PinBufferForBlock(BufferManagerRelation *const bmr, BlockNumber blockNum, - BufferAccessStrategy strategy, bool *foundPtr) { BufferDesc *bufHdr; bool isLocalBuf; - IOContext io_context; - IOObject io_object; Assert(blockNum != P_NEW); + Assert(bmr->smgr); - Assert(bmr.smgr); - - isLocalBuf = bmr.relpersistence == RELPERSISTENCE_TEMP; - if (isLocalBuf) - { - io_context = IOCONTEXT_NORMAL; - io_object = IOOBJECT_TEMP_RELATION; - } - else - { - io_context = IOContextForStrategy(strategy); - io_object = IOOBJECT_RELATION; - } - - TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum, - bmr.smgr->smgr_rlocator.locator.spcOid, - bmr.smgr->smgr_rlocator.locator.dbOid, - bmr.smgr->smgr_rlocator.locator.relNumber, - bmr.smgr->smgr_rlocator.backend); + TRACE_POSTGRESQL_BUFFER_READ_START(bmr->forkNum, blockNum, + bmr->smgr->smgr_rlocator.locator.spcOid, + bmr->smgr->smgr_rlocator.locator.dbOid, + bmr->smgr->smgr_rlocator.locator.relNumber, + bmr->smgr->smgr_rlocator.backend); + isLocalBuf = bmr->relpersistence == RELPERSISTENCE_TEMP; if (isLocalBuf) { - bufHdr = LocalBufferAlloc(bmr.smgr, forkNum, blockNum, foundPtr); + bufHdr = LocalBufferAlloc(bmr->smgr, bmr->forkNum, blockNum, foundPtr); if (*foundPtr) pgBufferUsage.local_blks_hit++; } else { - bufHdr = BufferAlloc(bmr.smgr, bmr.relpersistence, forkNum, blockNum, - strategy, foundPtr, io_context); + bufHdr = BufferAlloc(bmr->smgr, bmr->relpersistence, bmr->forkNum, blockNum, + bmr->strategy, foundPtr, bmr->io_context); if (*foundPtr) pgBufferUsage.shared_blks_hit++; } - if (bmr.rel) + if (bmr->pgstat_info) { /* * While pgBufferUsage's "read" counter isn't bumped unless we reach * WaitReadBuffers() (so, not for hits, and not for buffers that are * zeroed instead), the per-relation stats always count them. */ - pgstat_count_buffer_read(bmr.rel); + bmr->pgstat_info->counts.blocks_fetched++; if (*foundPtr) - pgstat_count_buffer_hit(bmr.rel); + bmr->pgstat_info->counts.blocks_hit++; } if (*foundPtr) { VacuumPageHit++; - pgstat_count_io_op(io_object, io_context, IOOP_HIT); + pgstat_count_io_op(bmr->io_object, bmr->io_context, IOOP_HIT); if (VacuumCostActive) VacuumCostBalance += VacuumCostPageHit; - TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum, - bmr.smgr->smgr_rlocator.locator.spcOid, - bmr.smgr->smgr_rlocator.locator.dbOid, - bmr.smgr->smgr_rlocator.locator.relNumber, - bmr.smgr->smgr_rlocator.backend, + TRACE_POSTGRESQL_BUFFER_READ_DONE(bmr->forkNum, blockNum, + bmr->smgr->smgr_rlocator.locator.spcOid, + bmr->smgr->smgr_rlocator.locator.dbOid, + bmr->smgr->smgr_rlocator.locator.relNumber, + bmr->smgr->smgr_rlocator.backend, true); } @@ -1125,14 +1145,12 @@ PinBufferForBlock(BufferManagerRelation bmr, /* * ReadBuffer_common -- common logic for all ReadBuffer variants */ -static inline Buffer -ReadBuffer_common(BufferManagerRelation bmr, ForkNumber forkNum, - BlockNumber blockNum, ReadBufferMode mode, - BufferAccessStrategy strategy) +Buffer +ReadBufferBMR(BufferManagerRelation *bmr, + BlockNumber blockNum, ReadBufferMode mode) { - ReadBuffersOperation operation; Buffer buffer; - int flags; + bool found; /* * Backward compatibility path, most code should use ExtendBufferedRel() @@ -1151,37 +1169,32 @@ ReadBuffer_common(BufferManagerRelation bmr, ForkNumber forkNum, if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) flags |= EB_LOCK_FIRST; - return ExtendBufferedRel(bmr, forkNum, strategy, flags); + return ExtendBufferedRel(bmr, flags); } + buffer = PinBufferForBlock(bmr, blockNum, &found); if (unlikely(mode == RBM_ZERO_AND_CLEANUP_LOCK || mode == RBM_ZERO_AND_LOCK)) { - bool found; - - if (bmr.smgr == NULL) - { - bmr.smgr = RelationGetSmgr(bmr.rel); - bmr.relpersistence = bmr.rel->rd_rel->relpersistence; - } - - buffer = PinBufferForBlock(bmr, forkNum, blockNum, strategy, &found); ZeroBuffer(buffer, mode); - return buffer; } + else if (!found) + { + ReadBuffersOperation operation; + + if (mode == RBM_ZERO_ON_ERROR) + operation.flags = READ_BUFFERS_ZERO_ON_ERROR; + else + operation.flags = 0; + + operation.bmr = bmr; + operation.buffers = &buffer; + operation.blocknum = blockNum; + operation.nblocks = 1; + operation.io_buffers_len = 1; - if (mode == RBM_ZERO_ON_ERROR) - flags = READ_BUFFERS_ZERO_ON_ERROR; - else - flags = 0; - operation.bmr = bmr; - operation.forknum = forkNum; - operation.strategy = strategy; - if (StartReadBuffer(&operation, - &buffer, - blockNum, - flags)) WaitReadBuffers(&operation); + } return buffer; } @@ -1223,33 +1236,26 @@ StartReadBuffer(ReadBuffersOperation *operation, * and the real I/O happens in WaitReadBuffers(). In future work, true I/O * could be initiated here. */ -inline bool +bool StartReadBuffers(ReadBuffersOperation *operation, Buffer *buffers, BlockNumber blockNum, int *nblocks, int flags) { + BufferManagerRelation *bmr = operation->bmr; int actual_nblocks = *nblocks; int io_buffers_len = 0; Assert(*nblocks > 0); Assert(*nblocks <= MAX_IO_COMBINE_LIMIT); - if (!operation->bmr.smgr) - { - operation->bmr.smgr = RelationGetSmgr(operation->bmr.rel); - operation->bmr.relpersistence = operation->bmr.rel->rd_rel->relpersistence; - } - for (int i = 0; i < actual_nblocks; ++i) { bool found; - buffers[i] = PinBufferForBlock(operation->bmr, - operation->forknum, + buffers[i] = PinBufferForBlock(bmr, blockNum + i, - operation->strategy, &found); if (found) @@ -1294,8 +1300,8 @@ StartReadBuffers(ReadBuffersOperation *operation, * true asynchronous version we might choose to process only one * real I/O at a time in that case. */ - smgrprefetch(operation->bmr.smgr, - operation->forknum, + smgrprefetch(bmr->smgr, + bmr->forkNum, blockNum, operation->io_buffers_len); } @@ -1328,10 +1334,7 @@ WaitReadBuffers(ReadBuffersOperation *operation) Buffer *buffers; int nblocks; BlockNumber blocknum; - ForkNumber forknum; bool isLocalBuf; - IOContext io_context; - IOObject io_object; /* * Currently operations are only allowed to include a read of some range, @@ -1348,19 +1351,6 @@ WaitReadBuffers(ReadBuffersOperation *operation) buffers = &operation->buffers[0]; blocknum = operation->blocknum; - forknum = operation->forknum; - - isLocalBuf = operation->bmr.relpersistence == RELPERSISTENCE_TEMP; - if (isLocalBuf) - { - io_context = IOCONTEXT_NORMAL; - io_object = IOOBJECT_TEMP_RELATION; - } - else - { - io_context = IOContextForStrategy(operation->strategy); - io_object = IOOBJECT_RELATION; - } /* * We count all these blocks as read by this backend. This is traditional @@ -1370,6 +1360,7 @@ WaitReadBuffers(ReadBuffersOperation *operation) * count this as a "hit", and we don't have a separate counter for "miss, * but another backend completed the read". */ + isLocalBuf = operation->bmr->relpersistence == RELPERSISTENCE_TEMP; if (isLocalBuf) pgBufferUsage.local_blks_read += nblocks; else @@ -1395,11 +1386,11 @@ WaitReadBuffers(ReadBuffersOperation *operation) * Report this as a 'hit' for this backend, even though it must * have started out as a miss in PinBufferForBlock(). */ - TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, blocknum + i, - operation->bmr.smgr->smgr_rlocator.locator.spcOid, - operation->bmr.smgr->smgr_rlocator.locator.dbOid, - operation->bmr.smgr->smgr_rlocator.locator.relNumber, - operation->bmr.smgr->smgr_rlocator.backend, + TRACE_POSTGRESQL_BUFFER_READ_DONE(operation->bmr->forkNum, blocknum + i, + operation->bmr->smgr->smgr_rlocator.locator.spcOid, + operation->bmr->smgr->smgr_rlocator.locator.dbOid, + operation->bmr->smgr->smgr_rlocator.locator.relNumber, + operation->bmr->smgr->smgr_rlocator.backend, true); continue; } @@ -1429,9 +1420,9 @@ WaitReadBuffers(ReadBuffersOperation *operation) } io_start = pgstat_prepare_io_time(track_io_timing); - smgrreadv(operation->bmr.smgr, forknum, io_first_block, io_pages, io_buffers_len); - pgstat_count_io_op_time(io_object, io_context, IOOP_READ, io_start, - io_buffers_len); + smgrreadv(operation->bmr->smgr, operation->bmr->forkNum, io_first_block, io_pages, io_buffers_len); + pgstat_count_io_op_time(operation->bmr->io_object, operation->bmr->io_context, + IOOP_READ, io_start, io_buffers_len); /* Verify each block we read, and terminate the I/O. */ for (int j = 0; j < io_buffers_len; ++j) @@ -1460,7 +1451,7 @@ WaitReadBuffers(ReadBuffersOperation *operation) (errcode(ERRCODE_DATA_CORRUPTED), errmsg("invalid page in block %u of relation %s; zeroing out page", io_first_block + j, - relpath(operation->bmr.smgr->smgr_rlocator, forknum)))); + relpath(operation->bmr->smgr->smgr_rlocator, operation->bmr->forkNum)))); memset(bufBlock, 0, BLCKSZ); } else @@ -1468,7 +1459,7 @@ WaitReadBuffers(ReadBuffersOperation *operation) (errcode(ERRCODE_DATA_CORRUPTED), errmsg("invalid page in block %u of relation %s", io_first_block + j, - relpath(operation->bmr.smgr->smgr_rlocator, forknum)))); + relpath(operation->bmr->smgr->smgr_rlocator, operation->bmr->forkNum)))); } /* Terminate I/O and set BM_VALID. */ @@ -1486,11 +1477,11 @@ WaitReadBuffers(ReadBuffersOperation *operation) } /* Report I/Os as completing individually. */ - TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, io_first_block + j, - operation->bmr.smgr->smgr_rlocator.locator.spcOid, - operation->bmr.smgr->smgr_rlocator.locator.dbOid, - operation->bmr.smgr->smgr_rlocator.locator.relNumber, - operation->bmr.smgr->smgr_rlocator.backend, + TRACE_POSTGRESQL_BUFFER_READ_DONE(operation->bmr->forkNum, io_first_block + j, + operation->bmr->smgr->smgr_rlocator.locator.spcOid, + operation->bmr->smgr->smgr_rlocator.locator.dbOid, + operation->bmr->smgr->smgr_rlocator.locator.relNumber, + operation->bmr->smgr->smgr_rlocator.backend, false); } @@ -2061,9 +2052,7 @@ LimitAdditionalPins(uint32 *additional_pins) * avoid duplicating the tracing and relpersistence related logic. */ static BlockNumber -ExtendBufferedRelCommon(BufferManagerRelation bmr, - ForkNumber fork, - BufferAccessStrategy strategy, +ExtendBufferedRelCommon(BufferManagerRelation *bmr, uint32 flags, uint32 extend_by, BlockNumber extend_upto, @@ -2072,28 +2061,28 @@ ExtendBufferedRelCommon(BufferManagerRelation bmr, { BlockNumber first_block; - TRACE_POSTGRESQL_BUFFER_EXTEND_START(fork, - bmr.smgr->smgr_rlocator.locator.spcOid, - bmr.smgr->smgr_rlocator.locator.dbOid, - bmr.smgr->smgr_rlocator.locator.relNumber, - bmr.smgr->smgr_rlocator.backend, + TRACE_POSTGRESQL_BUFFER_EXTEND_START(bmr->forkNum, + bmr->smgr->smgr_rlocator.locator.spcOid, + bmr->smgr->smgr_rlocator.locator.dbOid, + bmr->smgr->smgr_rlocator.locator.relNumber, + bmr->smgr->smgr_rlocator.backend, extend_by); - if (bmr.relpersistence == RELPERSISTENCE_TEMP) - first_block = ExtendBufferedRelLocal(bmr, fork, flags, + if (bmr->relpersistence == RELPERSISTENCE_TEMP) + first_block = ExtendBufferedRelLocal(bmr, flags, extend_by, extend_upto, buffers, &extend_by); else - first_block = ExtendBufferedRelShared(bmr, fork, strategy, flags, + first_block = ExtendBufferedRelShared(bmr, flags, extend_by, extend_upto, buffers, &extend_by); *extended_by = extend_by; - TRACE_POSTGRESQL_BUFFER_EXTEND_DONE(fork, - bmr.smgr->smgr_rlocator.locator.spcOid, - bmr.smgr->smgr_rlocator.locator.dbOid, - bmr.smgr->smgr_rlocator.locator.relNumber, - bmr.smgr->smgr_rlocator.backend, + TRACE_POSTGRESQL_BUFFER_EXTEND_DONE(bmr->forkNum, + bmr->smgr->smgr_rlocator.locator.spcOid, + bmr->smgr->smgr_rlocator.locator.dbOid, + bmr->smgr->smgr_rlocator.locator.relNumber, + bmr->smgr->smgr_rlocator.backend, *extended_by, first_block); @@ -2105,9 +2094,7 @@ ExtendBufferedRelCommon(BufferManagerRelation bmr, * shared buffers. */ static BlockNumber -ExtendBufferedRelShared(BufferManagerRelation bmr, - ForkNumber fork, - BufferAccessStrategy strategy, +ExtendBufferedRelShared(BufferManagerRelation *bmr, uint32 flags, uint32 extend_by, BlockNumber extend_upto, @@ -2115,7 +2102,6 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, uint32 *extended_by) { BlockNumber first_block; - IOContext io_context = IOContextForStrategy(strategy); instr_time io_start; LimitAdditionalPins(&extend_by); @@ -2134,7 +2120,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, { Block buf_block; - buffers[i] = GetVictimBuffer(strategy, io_context); + buffers[i] = GetVictimBuffer(bmr->strategy, bmr->io_context); buf_block = BufHdrGetBlock(GetBufferDescriptor(buffers[i] - 1)); /* new buffers are zero-filled */ @@ -2152,16 +2138,16 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, * we get the lock. */ if (!(flags & EB_SKIP_EXTENSION_LOCK)) - LockRelationForExtension(bmr.rel, ExclusiveLock); + LockRelationForExtension(bmr->rel, ExclusiveLock); /* * If requested, invalidate size cache, so that smgrnblocks asks the * kernel. */ if (flags & EB_CLEAR_SIZE_CACHE) - bmr.smgr->smgr_cached_nblocks[fork] = InvalidBlockNumber; + bmr->smgr->smgr_cached_nblocks[bmr->forkNum] = InvalidBlockNumber; - first_block = smgrnblocks(bmr.smgr, fork); + first_block = smgrnblocks(bmr->smgr, bmr->forkNum); /* * Now that we have the accurate relation size, check if the caller wants @@ -2193,7 +2179,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, if (extend_by == 0) { if (!(flags & EB_SKIP_EXTENSION_LOCK)) - UnlockRelationForExtension(bmr.rel, ExclusiveLock); + UnlockRelationForExtension(bmr->rel, ExclusiveLock); *extended_by = extend_by; return first_block; } @@ -2204,7 +2190,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("cannot extend relation %s beyond %u blocks", - relpath(bmr.smgr->smgr_rlocator, fork), + relpath(bmr->smgr->smgr_rlocator, bmr->forkNum), MaxBlockNumber))); /* @@ -2226,7 +2212,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, ResourceOwnerEnlarge(CurrentResourceOwner); ReservePrivateRefCountEntry(); - InitBufferTag(&tag, &bmr.smgr->smgr_rlocator.locator, fork, first_block + i); + InitBufferTag(&tag, &bmr->smgr->smgr_rlocator.locator, bmr->forkNum, first_block + i); hash = BufTableHashCode(&tag); partition_lock = BufMappingPartitionLock(hash); @@ -2258,7 +2244,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, * Pin the existing buffer before releasing the partition lock, * preventing it from being evicted. */ - valid = PinBuffer(existing_hdr, strategy); + valid = PinBuffer(existing_hdr, bmr->strategy); LWLockRelease(partition_lock); @@ -2275,7 +2261,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, if (valid && !PageIsNew((Page) buf_block)) ereport(ERROR, (errmsg("unexpected data beyond EOF in block %u of relation %s", - existing_hdr->tag.blockNum, relpath(bmr.smgr->smgr_rlocator, fork)), + existing_hdr->tag.blockNum, relpath(bmr->smgr->smgr_rlocator, bmr->forkNum)), errhint("This has been seen to occur with buggy kernels; consider updating your system."))); /* @@ -2309,7 +2295,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, victim_buf_hdr->tag = tag; buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE; - if (bmr.relpersistence == RELPERSISTENCE_PERMANENT || fork == INIT_FORKNUM) + if (bmr->relpersistence == RELPERSISTENCE_PERMANENT || bmr->forkNum == INIT_FORKNUM) buf_state |= BM_PERMANENT; UnlockBufHdr(victim_buf_hdr, buf_state); @@ -2333,7 +2319,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, * * We don't need to set checksum for all-zero pages. */ - smgrzeroextend(bmr.smgr, fork, first_block, extend_by, false); + smgrzeroextend(bmr->smgr, bmr->forkNum, first_block, extend_by, false); /* * Release the file-extension lock; it's now OK for someone else to extend @@ -2343,9 +2329,9 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, * take noticeable time. */ if (!(flags & EB_SKIP_EXTENSION_LOCK)) - UnlockRelationForExtension(bmr.rel, ExclusiveLock); + UnlockRelationForExtension(bmr->rel, ExclusiveLock); - pgstat_count_io_op_time(IOOBJECT_RELATION, io_context, IOOP_EXTEND, + pgstat_count_io_op_time(IOOBJECT_RELATION, bmr->io_context, IOOP_EXTEND, io_start, extend_by); /* Set BM_VALID, terminate IO, and wake up any waiters */ diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 985a2c7049c..414109c69fd 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -310,8 +310,7 @@ LimitAdditionalLocalPins(uint32 *additional_pins) * temporary buffers. */ BlockNumber -ExtendBufferedRelLocal(BufferManagerRelation bmr, - ForkNumber fork, +ExtendBufferedRelLocal(BufferManagerRelation *bmr, uint32 flags, uint32 extend_by, BlockNumber extend_upto, @@ -340,7 +339,7 @@ ExtendBufferedRelLocal(BufferManagerRelation bmr, MemSet((char *) buf_block, 0, BLCKSZ); } - first_block = smgrnblocks(bmr.smgr, fork); + first_block = smgrnblocks(bmr->smgr, bmr->forkNum); if (extend_upto != InvalidBlockNumber) { @@ -359,7 +358,7 @@ ExtendBufferedRelLocal(BufferManagerRelation bmr, ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("cannot extend relation %s beyond %u blocks", - relpath(bmr.smgr->smgr_rlocator, fork), + relpath(bmr->smgr->smgr_rlocator, bmr->forkNum), MaxBlockNumber))); for (uint32 i = 0; i < extend_by; i++) @@ -376,7 +375,7 @@ ExtendBufferedRelLocal(BufferManagerRelation bmr, /* in case we need to pin an existing buffer below */ ResourceOwnerEnlarge(CurrentResourceOwner); - InitBufferTag(&tag, &bmr.smgr->smgr_rlocator.locator, fork, first_block + i); + InitBufferTag(&tag, &bmr->smgr->smgr_rlocator.locator, bmr->forkNum, first_block + i); hresult = (LocalBufferLookupEnt *) hash_search(LocalBufHash, (void *) &tag, HASH_ENTER, &found); @@ -416,7 +415,7 @@ ExtendBufferedRelLocal(BufferManagerRelation bmr, io_start = pgstat_prepare_io_time(track_io_timing); /* actually extend relation */ - smgrzeroextend(bmr.smgr, fork, first_block, extend_by, false); + smgrzeroextend(bmr->smgr, bmr->forkNum, first_block, extend_by, false); pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_EXTEND, io_start, extend_by); diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index bcdb1821938..5dd584217ac 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -605,7 +605,11 @@ fsm_readbuf(Relation rel, FSMAddress addr, bool extend) static Buffer fsm_extend(Relation rel, BlockNumber fsm_nblocks) { - return ExtendBufferedRelTo(BMR_REL(rel), FSM_FORKNUM, NULL, + BufferManagerRelation bmr; + + InitBMRForRel(&bmr, rel, FSM_FORKNUM, NULL); + + return ExtendBufferedRelTo(&bmr, EB_CREATE_FORK_IF_NEEDED | EB_CLEAR_SIZE_CACHE, fsm_nblocks, diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index f190e6e5e46..88c91c4867c 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -451,8 +451,7 @@ extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, BlockNumber blockNum); extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr); -extern BlockNumber ExtendBufferedRelLocal(BufferManagerRelation bmr, - ForkNumber fork, +extern BlockNumber ExtendBufferedRelLocal(BufferManagerRelation *bmr, uint32 flags, uint32 extend_by, BlockNumber extend_upto, diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 241f68c45e1..1c7748019db 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -14,6 +14,7 @@ #ifndef BUFMGR_H #define BUFMGR_H +#include "pgstat.h" #include "port/pg_iovec.h" #include "storage/block.h" #include "storage/buf.h" @@ -92,22 +93,6 @@ typedef enum ExtendBufferedFlags EB_LOCK_TARGET = (1 << 5), } ExtendBufferedFlags; -/* - * Some functions identify relations either by relation or smgr + - * relpersistence. Used via the BMR_REL()/BMR_SMGR() macros below. This - * allows us to use the same function for both recovery and normal operation. - */ -typedef struct BufferManagerRelation -{ - Relation rel; - struct SMgrRelationData *smgr; - char relpersistence; -} BufferManagerRelation; - -#define BMR_REL(p_rel) ((BufferManagerRelation){.rel = p_rel}) -#define BMR_SMGR(p_smgr, p_relpersistence) ((BufferManagerRelation){.smgr = p_smgr, .relpersistence = p_relpersistence}) - - /* forward declared, to avoid having to expose buf_internals.h here */ struct WritebackContext; @@ -163,9 +148,36 @@ extern PGDLLIMPORT int32 *LocalRefCount; #define BUFFER_LOCK_SHARE 1 #define BUFFER_LOCK_EXCLUSIVE 2 +/* + * BufferManagerRelation encapsulates what the buffer manager functions like + * ReadBuffer() need to know about a relation. + * + * Initialize with InitBMRForRel() if you have access to the relcache, or + * InitBMRForSMgr() for raw access to the underlying smgr. This allows us to + * use the same functions for both recovery and normal operation. + */ +typedef struct BufferManagerRelation +{ + Relation rel; + struct SMgrRelationData *smgr; + char relpersistence; + ForkNumber forkNum; + /* admittedly 'strategy' isn't really a property of the relation */ + BufferAccessStrategy strategy; + + /* private data initialized by InitBMR functions */ + struct PgStat_TableStatus *pgstat_info; /* statistics collection area */ + IOContext io_context; + IOObject io_object; +} BufferManagerRelation; + +extern void InitBMRForRel(BufferManagerRelation *bmr, Relation rel, ForkNumber forkNum, BufferAccessStrategy strategy); +extern void InitBMRForSMgr(BufferManagerRelation *bmr, struct SMgrRelationData *srel, char relpersistence, ForkNumber forkNum, BufferAccessStrategy strategy); + /* * prototypes for functions in bufmgr.c */ + extern PrefetchBufferResult PrefetchSharedBuffer(struct SMgrRelationData *smgr_reln, ForkNumber forkNum, BlockNumber blockNum); @@ -173,6 +185,13 @@ extern PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum); extern bool ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer); + +extern Buffer ReadBufferBMR(BufferManagerRelation *bmr, BlockNumber blockNum, ReadBufferMode mode); + +/* + * compatibility and convenience wrappers which call InitBMRForRel or + * InitBMRForSmgr, followed by ReadBufferBMR. + */ extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum); extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, @@ -188,9 +207,7 @@ extern Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator, struct ReadBuffersOperation { /* The following members should be set by the caller. */ - BufferManagerRelation bmr; - ForkNumber forknum; - BufferAccessStrategy strategy; + BufferManagerRelation *bmr; /* The following private members should not be accessed directly. */ Buffer *buffers; @@ -223,20 +240,14 @@ extern void CheckBufferIsPinnedOnce(Buffer buffer); extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum); -extern Buffer ExtendBufferedRel(BufferManagerRelation bmr, - ForkNumber forkNum, - BufferAccessStrategy strategy, +extern Buffer ExtendBufferedRel(BufferManagerRelation *bmr, uint32 flags); -extern BlockNumber ExtendBufferedRelBy(BufferManagerRelation bmr, - ForkNumber fork, - BufferAccessStrategy strategy, +extern BlockNumber ExtendBufferedRelBy(BufferManagerRelation *bmr, uint32 flags, uint32 extend_by, Buffer *buffers, uint32 *extended_by); -extern Buffer ExtendBufferedRelTo(BufferManagerRelation bmr, - ForkNumber fork, - BufferAccessStrategy strategy, +extern Buffer ExtendBufferedRelTo(BufferManagerRelation *bmr, uint32 flags, BlockNumber extend_to, ReadBufferMode mode); diff --git a/src/include/storage/read_stream.h b/src/include/storage/read_stream.h index f5dbc087b0b..db962699c15 100644 --- a/src/include/storage/read_stream.h +++ b/src/include/storage/read_stream.h @@ -50,9 +50,7 @@ typedef BlockNumber (*ReadStreamBlockNumberCB) (ReadStream *stream, void *per_buffer_data); extern ReadStream *read_stream_begin_relation(int flags, - BufferAccessStrategy strategy, - BufferManagerRelation bmr, - ForkNumber forknum, + BufferManagerRelation *bmr, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size); -- 2.39.2