From b39b6d0081a417375c19d7966bb2d94ced5e8a03 Mon Sep 17 00:00:00 2001 From: dilip kumar Date: Thu, 7 Jul 2022 10:15:23 +0530 Subject: [PATCH v7 1/4] Preliminary refactoring for supporting larger relfilenumber Currently, relfilenumber is Oid type and it can wrap around so as part of the larger patch set we are trying to make it 64 bit to avoid wraparound and that will make a couple of other things simpler as explained in the next patches. So this is just a preliminary refactoring patch as part of this, in BufferTag, instead of keeping the RelFileLocator, we will keep the tablespace Oid, database Oid, and the relfilenumber directly. So that once we change the relNumber in RelFileLocator to 64 bits the buffer tag alignment padding will not change. --- contrib/pg_buffercache/pg_buffercache_pages.c | 6 +- contrib/pg_prewarm/autoprewarm.c | 7 +- src/backend/storage/buffer/bufmgr.c | 111 ++++++++++++++++++-------- src/backend/storage/buffer/localbuf.c | 22 +++-- src/include/storage/buf_internals.h | 41 ++++++++-- 5 files changed, 135 insertions(+), 52 deletions(-) diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c index 131bd62..f5eb197 100644 --- a/contrib/pg_buffercache/pg_buffercache_pages.c +++ b/contrib/pg_buffercache/pg_buffercache_pages.c @@ -153,9 +153,9 @@ pg_buffercache_pages(PG_FUNCTION_ARGS) buf_state = LockBufHdr(bufHdr); fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr); - fctx->record[i].relfilenumber = bufHdr->tag.rlocator.relNumber; - fctx->record[i].reltablespace = bufHdr->tag.rlocator.spcOid; - fctx->record[i].reldatabase = bufHdr->tag.rlocator.dbOid; + fctx->record[i].relfilenumber = BufTagGetRelNumber(bufHdr->tag); + fctx->record[i].reltablespace = bufHdr->tag.spcOid; + fctx->record[i].reldatabase = bufHdr->tag.dbOid; fctx->record[i].forknum = bufHdr->tag.forkNum; fctx->record[i].blocknum = bufHdr->tag.blockNum; fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state); diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c index 13eee4a..cc67aa6 100644 --- a/contrib/pg_prewarm/autoprewarm.c +++ b/contrib/pg_prewarm/autoprewarm.c @@ -631,9 +631,10 @@ apw_dump_now(bool is_bgworker, bool dump_unlogged) if (buf_state & BM_TAG_VALID && ((buf_state & BM_PERMANENT) || dump_unlogged)) { - block_info_array[num_blocks].database = bufHdr->tag.rlocator.dbOid; - block_info_array[num_blocks].tablespace = bufHdr->tag.rlocator.spcOid; - block_info_array[num_blocks].filenumber = bufHdr->tag.rlocator.relNumber; + block_info_array[num_blocks].database = bufHdr->tag.dbOid; + block_info_array[num_blocks].tablespace = bufHdr->tag.spcOid; + block_info_array[num_blocks].filenumber = + BufTagGetRelNumber(bufHdr->tag); block_info_array[num_blocks].forknum = bufHdr->tag.forkNum; block_info_array[num_blocks].blocknum = bufHdr->tag.blockNum; ++num_blocks; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index e4de4b3..0086716 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1647,7 +1647,7 @@ ReleaseAndReadBuffer(Buffer buffer, { bufHdr = GetLocalBufferDescriptor(-buffer - 1); if (bufHdr->tag.blockNum == blockNum && - RelFileLocatorEquals(bufHdr->tag.rlocator, relation->rd_locator) && + BufTagMatchesRelFileLocator(bufHdr->tag, relation->rd_locator) && bufHdr->tag.forkNum == forkNum) return buffer; ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer); @@ -1658,7 +1658,7 @@ ReleaseAndReadBuffer(Buffer buffer, bufHdr = GetBufferDescriptor(buffer - 1); /* we have pin, so it's ok to examine tag without spinlock */ if (bufHdr->tag.blockNum == blockNum && - RelFileLocatorEquals(bufHdr->tag.rlocator, relation->rd_locator) && + BufTagMatchesRelFileLocator(bufHdr->tag, relation->rd_locator) && bufHdr->tag.forkNum == forkNum) return buffer; UnpinBuffer(bufHdr, true); @@ -2000,8 +2000,8 @@ BufferSync(int flags) item = &CkptBufferIds[num_to_scan++]; item->buf_id = buf_id; - item->tsId = bufHdr->tag.rlocator.spcOid; - item->relNumber = bufHdr->tag.rlocator.relNumber; + item->tsId = bufHdr->tag.spcOid; + item->relNumber = BufTagGetRelNumber(bufHdr->tag); item->forkNum = bufHdr->tag.forkNum; item->blockNum = bufHdr->tag.blockNum; } @@ -2692,6 +2692,7 @@ PrintBufferLeakWarning(Buffer buffer) char *path; BackendId backend; uint32 buf_state; + RelFileLocator rlocator; Assert(BufferIsValid(buffer)); if (BufferIsLocal(buffer)) @@ -2707,8 +2708,10 @@ PrintBufferLeakWarning(Buffer buffer) backend = InvalidBackendId; } + BufTagGetRelFileLocator(buf->tag, rlocator); + /* theoretically we should lock the bufhdr here */ - path = relpathbackend(buf->tag.rlocator, backend, buf->tag.forkNum); + path = relpathbackend(rlocator, backend, buf->tag.forkNum); buf_state = pg_atomic_read_u32(&buf->state); elog(WARNING, "buffer refcount leak: [%03d] " @@ -2787,7 +2790,7 @@ BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, bufHdr = GetBufferDescriptor(buffer - 1); /* pinned, so OK to read tag without spinlock */ - *rlocator = bufHdr->tag.rlocator; + BufTagGetRelFileLocator(bufHdr->tag, *rlocator); *forknum = bufHdr->tag.forkNum; *blknum = bufHdr->tag.blockNum; } @@ -2838,7 +2841,12 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln) /* Find smgr relation for buffer */ if (reln == NULL) - reln = smgropen(buf->tag.rlocator, InvalidBackendId); + { + RelFileLocator rlocator; + + BufTagGetRelFileLocator(buf->tag, rlocator); + reln = smgropen(rlocator, InvalidBackendId); + } TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum, buf->tag.blockNum, @@ -3141,14 +3149,14 @@ DropRelFileLocatorBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum, * We could check forkNum and blockNum as well as the rlocator, but * the incremental win from doing so seems small. */ - if (!RelFileLocatorEquals(bufHdr->tag.rlocator, rlocator.locator)) + if (!BufTagMatchesRelFileLocator(bufHdr->tag, rlocator.locator)) continue; buf_state = LockBufHdr(bufHdr); for (j = 0; j < nforks; j++) { - if (RelFileLocatorEquals(bufHdr->tag.rlocator, rlocator.locator) && + if (BufTagMatchesRelFileLocator(bufHdr->tag, rlocator.locator) && bufHdr->tag.forkNum == forkNum[j] && bufHdr->tag.blockNum >= firstDelBlock[j]) { @@ -3301,7 +3309,7 @@ DropRelFileLocatorsAllBuffers(SMgrRelation *smgr_reln, int nlocators) for (j = 0; j < n; j++) { - if (RelFileLocatorEquals(bufHdr->tag.rlocator, locators[j])) + if (BufTagMatchesRelFileLocator(bufHdr->tag, locators[j])) { rlocator = &locators[j]; break; @@ -3310,7 +3318,10 @@ DropRelFileLocatorsAllBuffers(SMgrRelation *smgr_reln, int nlocators) } else { - rlocator = bsearch((const void *) &(bufHdr->tag.rlocator), + RelFileLocator locator; + + BufTagGetRelFileLocator(bufHdr->tag, locator); + rlocator = bsearch((const void *) &(locator), locators, n, sizeof(RelFileLocator), rlocator_comparator); } @@ -3320,7 +3331,7 @@ DropRelFileLocatorsAllBuffers(SMgrRelation *smgr_reln, int nlocators) continue; buf_state = LockBufHdr(bufHdr); - if (RelFileLocatorEquals(bufHdr->tag.rlocator, (*rlocator))) + if (BufTagMatchesRelFileLocator(bufHdr->tag, (*rlocator))) InvalidateBuffer(bufHdr); /* releases spinlock */ else UnlockBufHdr(bufHdr, buf_state); @@ -3380,7 +3391,7 @@ FindAndDropRelFileLocatorBuffers(RelFileLocator rlocator, ForkNumber forkNum, */ buf_state = LockBufHdr(bufHdr); - if (RelFileLocatorEquals(bufHdr->tag.rlocator, rlocator) && + if (BufTagMatchesRelFileLocator(bufHdr->tag, rlocator) && bufHdr->tag.forkNum == forkNum && bufHdr->tag.blockNum >= firstDelBlock) InvalidateBuffer(bufHdr); /* releases spinlock */ @@ -3419,11 +3430,11 @@ DropDatabaseBuffers(Oid dbid) * As in DropRelFileLocatorBuffers, an unlocked precheck should be * safe and saves some cycles. */ - if (bufHdr->tag.rlocator.dbOid != dbid) + if (bufHdr->tag.dbOid != dbid) continue; buf_state = LockBufHdr(bufHdr); - if (bufHdr->tag.rlocator.dbOid == dbid) + if (bufHdr->tag.dbOid == dbid) InvalidateBuffer(bufHdr); /* releases spinlock */ else UnlockBufHdr(bufHdr, buf_state); @@ -3447,13 +3458,16 @@ PrintBufferDescs(void) { BufferDesc *buf = GetBufferDescriptor(i); Buffer b = BufferDescriptorGetBuffer(buf); + RelFileLocator rlocator; + + BufTagGetRelFileLocator(buf->tag, rlocator); /* theoretically we should lock the bufhdr here */ elog(LOG, "[%02d] (freeNext=%d, rel=%s, " "blockNum=%u, flags=0x%x, refcount=%u %d)", i, buf->freeNext, - relpathbackend(buf->tag.rlocator, InvalidBackendId, buf->tag.forkNum), + relpathbackend(rlocator, InvalidBackendId, buf->tag.forkNum), buf->tag.blockNum, buf->flags, buf->refcount, GetPrivateRefCount(b)); } @@ -3473,12 +3487,16 @@ PrintPinnedBufs(void) if (GetPrivateRefCount(b) > 0) { + RelFileLocator rlocator; + + BufTagGetRelFileLocator(buf->tag, rlocator); + /* theoretically we should lock the bufhdr here */ elog(LOG, "[%02d] (freeNext=%d, rel=%s, " "blockNum=%u, flags=0x%x, refcount=%u %d)", i, buf->freeNext, - relpathperm(buf->tag.rlocator, buf->tag.forkNum), + relpathperm(rlocator, buf->tag.forkNum), buf->tag.blockNum, buf->flags, buf->refcount, GetPrivateRefCount(b)); } @@ -3517,7 +3535,7 @@ FlushRelationBuffers(Relation rel) uint32 buf_state; bufHdr = GetLocalBufferDescriptor(i); - if (RelFileLocatorEquals(bufHdr->tag.rlocator, rel->rd_locator) && + if (BufTagMatchesRelFileLocator(bufHdr->tag, rel->rd_locator) && ((buf_state = pg_atomic_read_u32(&bufHdr->state)) & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY)) { @@ -3564,13 +3582,13 @@ FlushRelationBuffers(Relation rel) * As in DropRelFileLocatorBuffers, an unlocked precheck should be * safe and saves some cycles. */ - if (!RelFileLocatorEquals(bufHdr->tag.rlocator, rel->rd_locator)) + if (!BufTagMatchesRelFileLocator(bufHdr->tag, rel->rd_locator)) continue; ReservePrivateRefCountEntry(); buf_state = LockBufHdr(bufHdr); - if (RelFileLocatorEquals(bufHdr->tag.rlocator, rel->rd_locator) && + if (BufTagMatchesRelFileLocator(bufHdr->tag, rel->rd_locator) && (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY)) { PinBuffer_Locked(bufHdr); @@ -3644,7 +3662,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels) for (j = 0; j < nrels; j++) { - if (RelFileLocatorEquals(bufHdr->tag.rlocator, srels[j].rlocator)) + if (BufTagMatchesRelFileLocator(bufHdr->tag, srels[j].rlocator)) { srelent = &srels[j]; break; @@ -3653,7 +3671,10 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels) } else { - srelent = bsearch((const void *) &(bufHdr->tag.rlocator), + RelFileLocator rlocator; + + BufTagGetRelFileLocator(bufHdr->tag, rlocator); + srelent = bsearch((const void *) &(rlocator), srels, nrels, sizeof(SMgrSortArray), rlocator_comparator); } @@ -3665,7 +3686,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels) ReservePrivateRefCountEntry(); buf_state = LockBufHdr(bufHdr); - if (RelFileLocatorEquals(bufHdr->tag.rlocator, srelent->rlocator) && + if (BufTagMatchesRelFileLocator(bufHdr->tag, srelent->rlocator) && (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY)) { PinBuffer_Locked(bufHdr); @@ -3867,13 +3888,13 @@ FlushDatabaseBuffers(Oid dbid) * As in DropRelFileLocatorBuffers, an unlocked precheck should be * safe and saves some cycles. */ - if (bufHdr->tag.rlocator.dbOid != dbid) + if (bufHdr->tag.dbOid != dbid) continue; ReservePrivateRefCountEntry(); buf_state = LockBufHdr(bufHdr); - if (bufHdr->tag.rlocator.dbOid == dbid && + if (bufHdr->tag.dbOid == dbid && (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY)) { PinBuffer_Locked(bufHdr); @@ -4033,6 +4054,10 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std) if (XLogHintBitIsNeeded() && (pg_atomic_read_u32(&bufHdr->state) & BM_PERMANENT)) { + RelFileLocator rlocator; + + BufTagGetRelFileLocator(bufHdr->tag, rlocator); + /* * If we must not write WAL, due to a relfilelocator-specific * condition or being in recovery, don't dirty the page. We can @@ -4041,8 +4066,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std) * * See src/backend/storage/page/README for longer discussion. */ - if (RecoveryInProgress() || - RelFileLocatorSkippingWAL(bufHdr->tag.rlocator)) + if (RecoveryInProgress() || RelFileLocatorSkippingWAL(rlocator)) return; /* @@ -4650,8 +4674,10 @@ AbortBufferIO(void) { /* Buffer is pinned, so we can read tag without spinlock */ char *path; + RelFileLocator rlocator; - path = relpathperm(buf->tag.rlocator, buf->tag.forkNum); + BufTagGetRelFileLocator(buf->tag, rlocator); + path = relpathperm(rlocator, buf->tag.forkNum); ereport(WARNING, (errcode(ERRCODE_IO_ERROR), errmsg("could not write block %u of %s", @@ -4675,7 +4701,11 @@ shared_buffer_write_error_callback(void *arg) /* Buffer is pinned, so we can read the tag without locking the spinlock */ if (bufHdr != NULL) { - char *path = relpathperm(bufHdr->tag.rlocator, bufHdr->tag.forkNum); + char *path; + RelFileLocator rlocator; + + BufTagGetRelFileLocator(bufHdr->tag, rlocator); + path = relpathperm(rlocator, bufHdr->tag.forkNum); errcontext("writing block %u of relation %s", bufHdr->tag.blockNum, path); @@ -4693,8 +4723,11 @@ local_buffer_write_error_callback(void *arg) if (bufHdr != NULL) { - char *path = relpathbackend(bufHdr->tag.rlocator, MyBackendId, - bufHdr->tag.forkNum); + char *path; + RelFileLocator rlocator; + + BufTagGetRelFileLocator(bufHdr->tag, rlocator); + path = relpathbackend(rlocator, MyBackendId, bufHdr->tag.forkNum); errcontext("writing block %u of relation %s", bufHdr->tag.blockNum, path); @@ -4788,8 +4821,13 @@ static inline int buffertag_comparator(const BufferTag *ba, const BufferTag *bb) { int ret; + RelFileLocator rlocatora; + RelFileLocator rlocatorb; + + BufTagGetRelFileLocator(*ba, rlocatora); + BufTagGetRelFileLocator(*bb, rlocatorb); - ret = rlocator_comparator(&ba->rlocator, &bb->rlocator); + ret = rlocator_comparator(&rlocatora, &rlocatorb); if (ret != 0) return ret; @@ -4946,10 +4984,12 @@ IssuePendingWritebacks(WritebackContext *context) SMgrRelation reln; int ahead; BufferTag tag; + RelFileLocator currlocator; Size nblocks = 1; cur = &context->pending_writebacks[i]; tag = cur->tag; + BufTagGetRelFileLocator(tag, currlocator); /* * Peek ahead, into following writeback requests, to see if they can @@ -4957,10 +4997,13 @@ IssuePendingWritebacks(WritebackContext *context) */ for (ahead = 0; i + ahead + 1 < context->nr_pending; ahead++) { + RelFileLocator nextrlocator; + next = &context->pending_writebacks[i + ahead + 1]; + BufTagGetRelFileLocator(next->tag, nextrlocator); /* different file, stop */ - if (!RelFileLocatorEquals(cur->tag.rlocator, next->tag.rlocator) || + if (!RelFileLocatorEquals(currlocator, nextrlocator) || cur->tag.forkNum != next->tag.forkNum) break; @@ -4979,7 +5022,7 @@ IssuePendingWritebacks(WritebackContext *context) i += ahead; /* and finally tell the kernel to write the data to storage */ - reln = smgropen(tag.rlocator, InvalidBackendId); + reln = smgropen(currlocator, InvalidBackendId); smgrwriteback(reln, tag.forkNum, tag.blockNum, nblocks); } diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 41a0807..76e8556 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -213,9 +213,12 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, { SMgrRelation oreln; Page localpage = (char *) LocalBufHdrGetBlock(bufHdr); + RelFileLocator rlocator; + + BufTagGetRelFileLocator(bufHdr->tag, rlocator); /* Find smgr relation for buffer */ - oreln = smgropen(bufHdr->tag.rlocator, MyBackendId); + oreln = smgropen(rlocator, MyBackendId); PageSetChecksumInplace(localpage, bufHdr->tag.blockNum); @@ -337,16 +340,22 @@ DropRelFileLocatorLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, buf_state = pg_atomic_read_u32(&bufHdr->state); if ((buf_state & BM_TAG_VALID) && - RelFileLocatorEquals(bufHdr->tag.rlocator, rlocator) && + BufTagMatchesRelFileLocator(bufHdr->tag, rlocator) && bufHdr->tag.forkNum == forkNum && bufHdr->tag.blockNum >= firstDelBlock) { if (LocalRefCount[i] != 0) + { + RelFileLocator rlocator; + + BufTagGetRelFileLocator(bufHdr->tag, rlocator); elog(ERROR, "block %u of %s is still referenced (local %u)", bufHdr->tag.blockNum, - relpathbackend(bufHdr->tag.rlocator, MyBackendId, + relpathbackend(rlocator, MyBackendId, bufHdr->tag.forkNum), LocalRefCount[i]); + } + /* Remove entry from hashtable */ hresult = (LocalBufferLookupEnt *) hash_search(LocalBufHash, (void *) &bufHdr->tag, @@ -383,12 +392,15 @@ DropRelFileLocatorAllLocalBuffers(RelFileLocator rlocator) buf_state = pg_atomic_read_u32(&bufHdr->state); if ((buf_state & BM_TAG_VALID) && - RelFileLocatorEquals(bufHdr->tag.rlocator, rlocator)) + BufTagMatchesRelFileLocator(bufHdr->tag, rlocator)) { + RelFileLocator rlocator; + + BufTagGetRelFileLocator(bufHdr->tag, rlocator); if (LocalRefCount[i] != 0) elog(ERROR, "block %u of %s is still referenced (local %u)", bufHdr->tag.blockNum, - relpathbackend(bufHdr->tag.rlocator, MyBackendId, + relpathbackend(rlocator, MyBackendId, bufHdr->tag.forkNum), LocalRefCount[i]); /* Remove entry from hashtable */ diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index aded5e8..4c36d55 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -90,34 +90,61 @@ */ typedef struct buftag { - RelFileLocator rlocator; /* physical relation identifier */ - ForkNumber forkNum; + Oid spcOid; /* tablespace oid */ + Oid dbOid; /* database oid */ + RelFileNumber relNumber; /* relation file number */ + ForkNumber forkNum; /* fork number */ BlockNumber blockNum; /* blknum relative to begin of reln */ } BufferTag; +#define BufTagGetRelNumber(a) ((a).relNumber) + +#define BufTagSetRelNumber(a, relnumber) \ +( \ + (a).relNumber = (relnumber) \ +) + +#define BufTagGetRelFileLocator(a, locator) \ +do { \ + (locator).spcOid = (a).spcOid; \ + (locator).dbOid = (a).dbOid; \ + (locator).relNumber = (a).relNumber; \ +} while(0) + #define CLEAR_BUFFERTAG(a) \ ( \ - (a).rlocator.spcOid = InvalidOid, \ - (a).rlocator.dbOid = InvalidOid, \ - (a).rlocator.relNumber = InvalidRelFileNumber, \ + (a).spcOid = InvalidOid, \ + (a).dbOid = InvalidOid, \ + BufTagSetRelNumber(a, InvalidRelFileNumber), \ (a).forkNum = InvalidForkNumber, \ (a).blockNum = InvalidBlockNumber \ ) #define INIT_BUFFERTAG(a,xx_rlocator,xx_forkNum,xx_blockNum) \ ( \ - (a).rlocator = (xx_rlocator), \ + (a).spcOid = (xx_rlocator).spcOid, \ + (a).dbOid = (xx_rlocator).dbOid, \ + BufTagSetRelNumber(a, (xx_rlocator).relNumber), \ (a).forkNum = (xx_forkNum), \ (a).blockNum = (xx_blockNum) \ ) #define BUFFERTAGS_EQUAL(a,b) \ ( \ - RelFileLocatorEquals((a).rlocator, (b).rlocator) && \ + (a).spcOid == (b).spcOid && \ + (a).dbOid == (b).dbOid && \ + (a).relNumber == (b).relNumber && \ (a).blockNum == (b).blockNum && \ (a).forkNum == (b).forkNum \ ) +#define BufTagMatchesRelFileLocator(a, locator) \ +( \ + (a).spcOid == (locator).spcOid && \ + (a).dbOid == (locator).dbOid && \ + (a).relNumber == (locator).relNumber \ +) + /* * The shared buffer mapping table is partitioned to reduce contention. * To determine which partition lock a given tag requires, compute the tag's -- 1.8.3.1