From 1fe17e10e402a87d7a907c37b65849ba763e5926 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Sun, 22 Mar 2026 02:22:06 -0400 Subject: [PATCH v21 07/15] heapam: Optimize pin transfers during index scans. Add an xs_lastinblock flag to IndexFetchHeapData that tracks whether the current TID is the last one on its heap block within the current batch. When it is, heapam_index_fetch_tuple can transfer its buffer pin to the slot (via ExecStorePinnedBufferHeapTuple) instead of incrementing the pin count, saving a pair of IncrBufferRefCount/ReleaseBuffer calls. This optimization is not used for index-only scans because all-visible items can be skipped, which would break block deduplication symmetry between the scan and the read stream (besides, the performance of this code path can only matter when many heap fetches are required, which is hopefully rare during index-only scans). Also add an explicit ExecClearTuple to the block-switch path in heapam_index_fetch_tuple_impl to release the pin transferred to the slot on the previous call (just before calling ReleaseBuffer as part of moving on to the scan's next block). This fixes a performance problem where the code path in question triggers GetPrivateRefCountEntrySlow calls more often than one would hope. The underlying issue has been tied to the pin in the slot being held, even if we decide to release the buffer and move on: ExecStoreBufferHeapTuple will first fail to hit the backend-local cache for the release of the old pin (because we just pinned and locked the new buffer), causing a cache miss inside IncrBufferRefCount. Author: Peter Geoghegan Suggested-by: Andres Freund Reviewed-By: Andres Freund Discussion: https://postgr.es/m/CAH2-Wz=D4Lru9BkvqaRnFRPDaZbfTOdWcxw13zyG6GVFTtz_vw@mail.gmail.com --- src/include/access/heapam.h | 3 + src/backend/access/heap/heapam_indexscan.c | 73 +++++++++++++++++++++- 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index bbe39a577..57c5c6b5f 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -132,6 +132,9 @@ typedef struct IndexFetchHeapData Buffer xs_vmbuffer; /* visibility map buffer */ int xs_vm_items; /* # items to resolve visibility info for */ + /* Plain index scan xs_lastinblock optimization */ + bool xs_lastinblock; /* last TID on this block in current batch? */ + } IndexFetchHeapData; /* diff --git a/src/backend/access/heap/heapam_indexscan.c b/src/backend/access/heap/heapam_indexscan.c index 900876e18..ec2afbdb4 100644 --- a/src/backend/access/heap/heapam_indexscan.c +++ b/src/backend/access/heap/heapam_indexscan.c @@ -90,6 +90,9 @@ heapam_index_fetch_begin(IndexScanDesc scan, uint32 flags) Assert(hscan->xs_vmbuffer == InvalidBuffer); hscan->xs_vm_items = 1; + /* xs_lastinblock optimization state */ + Assert(!hscan->xs_lastinblock); + /* Resolve which getnext_slot implementation to use for this scan */ if (scan->indexRelation->rd_indam->amgetbatch != NULL) { @@ -485,6 +488,14 @@ heapam_index_fetch_tuple_impl(Relation rel, /* Remember this buffer's block number for next time */ hscan->xs_blk = ItemPointerGetBlockNumber(tid); + /* + * Drop the xs_blk pin independently held on by slot (if any) now, + * before calling ReleaseBuffer. This avoids expensive calls to + * GetPrivateRefCountEntrySlow caused by ExecStoreBufferHeapTuple + * failing to hit the backend's cache for the release of the old pin. + */ + ExecClearTuple(slot); + if (BufferIsValid(hscan->xs_cbuf)) ReleaseBuffer(hscan->xs_cbuf); @@ -522,7 +533,33 @@ heapam_index_fetch_tuple_impl(Relation rel, *heap_continue = !IsMVCCLikeSnapshot(snapshot); slot->tts_tableOid = RelationGetRelid(rel); - ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf); + + /* + * If this is the last TID on the current heap block within the batch, + * transfer our buffer pin to the slot rather than having the slot + * increment the pin count. This saves a pair of IncrBufferRefCount + * and ReleaseBuffer calls, since the caller would just release its + * pin on xs_cbuf when switching to the next block anyway. + * + * We can only do this when heap_continue is false, since otherwise + * the caller will need xs_cbuf to remain valid for the next call. + */ + if (hscan->xs_lastinblock && !*heap_continue) + { + ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, + hscan->xs_cbuf); + hscan->xs_cbuf = InvalidBuffer; + hscan->xs_blk = InvalidBlockNumber; + + /* + * Note: the pin now owned by the slot is expected to be released + * on the next call here, via an explicit ExecClearTuple. This + * avoids churn in the backend's private refcount cache. + */ + } + else + ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, + hscan->xs_cbuf); } else { @@ -835,10 +872,42 @@ heapam_index_return_scanpos_tid(IndexScanDesc scan, IndexFetchHeapData *hscan, if (all_visible == NULL) { + int nextItem; + bool hasNext; + /* * Plain index scan. + * + * Determine if the next item in the current scan direction is on a + * different heap block. When it is, heapam_index_fetch_tuple_impl + * can transfer its buffer pin to the slot instead of incrementing the + * pin count, saving a pair of IncrBufferRefCount/ReleaseBuffer calls. + * + * Note: We cannot do this for index-only scans because all-visible + * items are skipped by both the scan and the read stream callback. It + * doesn't seem worth the trouble of reasoning about these issues, + * since the optimization only helps when heap fetches are required. + * + * Note: We deliberately don't consider the batch after scanBatch, + * because doing so would add complexity for little benefit. It's + * okay if xs_lastinblock is spuriously set to false. */ Assert(!scan->xs_want_itup); + if (ScanDirectionIsForward(direction)) + { + nextItem = scanPos->item + 1; + hasNext = (nextItem <= scanBatch->lastItem); + } + else + { + nextItem = scanPos->item - 1; + hasNext = (nextItem >= scanBatch->firstItem); + } + + hscan->xs_lastinblock = hasNext && + ItemPointerGetBlockNumber(&scanBatch->items[nextItem].tableTid) != + ItemPointerGetBlockNumber(&scan->xs_heaptid); + return &scan->xs_heaptid; } @@ -847,7 +916,7 @@ heapam_index_return_scanpos_tid(IndexScanDesc scan, IndexFetchHeapData *hscan, * * Also set xs_itup, which caller also needs. */ - Assert(scan->xs_want_itup); + Assert(scan->xs_want_itup && !hscan->xs_lastinblock); scan->xs_itup = (IndexTuple) (scanBatch->currTuples + scanBatch->items[scanPos->item].tupleOffset); -- 2.53.0