From 1b2cf67560cf24fd6daf87c4ee7cb03ae605babd Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Sun, 22 Mar 2026 02:22:06 -0400 Subject: [PATCH v23 4/8] heapam: Optimize pin transfers during index scans. Add an xs_lastinblock flag to IndexFetchHeapData that tracks whether the current TID is the last one on its heap block within the current batch. When it is, heapam_index_fetch_tuple can transfer its buffer pin to the slot (via ExecStorePinnedBufferHeapTuple) instead of incrementing the pin count, saving a pair of IncrBufferRefCount/ReleaseBuffer calls. This optimization is not used for index-only scans because all-visible items can be skipped, which would break block deduplication symmetry between the scan and the read stream (besides, the performance of this code path can only matter when many heap fetches are required, which is hopefully rare during index-only scans). Also add an explicit ExecClearTuple to the block-switch path in heapam_index_fetch_tuple_impl to release the pin transferred to the slot on the previous call (just before calling ReleaseBuffer as part of moving on to the scan's next block). This fixes a performance problem where the code path in question triggers GetPrivateRefCountEntrySlow calls more often than one would hope. The underlying issue has been tied to the pin in the slot being held, even if we decide to release the buffer and move on: ExecStoreBufferHeapTuple will first fail to hit the backend-local cache for the release of the old pin (because we just pinned and locked the new buffer), causing a cache miss inside IncrBufferRefCount. Author: Peter Geoghegan Suggested-by: Andres Freund Reviewed-By: Andres Freund Discussion: https://postgr.es/m/CAH2-Wz=D4Lru9BkvqaRnFRPDaZbfTOdWcxw13zyG6GVFTtz_vw@mail.gmail.com --- src/include/access/heapam.h | 3 + src/backend/access/heap/heapam_indexscan.c | 73 +++++++++++++++++++++- 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 79976f50f..9281d8645 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -131,6 +131,9 @@ typedef struct IndexFetchHeapData /* For visibility map checks (index-only scans and on-access pruning) */ Buffer xs_vmbuffer; /* visibility map buffer */ int xs_vm_items; /* # items to resolve visibility info for */ + + /* Plain index scan xs_lastinblock optimization */ + bool xs_lastinblock; /* last TID on this block in current batch? */ } IndexFetchHeapData; /* Result codes for HeapTupleSatisfiesVacuum */ diff --git a/src/backend/access/heap/heapam_indexscan.c b/src/backend/access/heap/heapam_indexscan.c index 5ff9d8bea..b6e57f8f8 100644 --- a/src/backend/access/heap/heapam_indexscan.c +++ b/src/backend/access/heap/heapam_indexscan.c @@ -126,6 +126,9 @@ heapam_index_fetch_begin(IndexScanDesc scan, uint32 flags) Assert(hscan->xs_vmbuffer == InvalidBuffer); hscan->xs_vm_items = 1; + /* xs_lastinblock optimization state */ + Assert(!hscan->xs_lastinblock); + /* Resolve which xs_getnext_slot implementation to use for this scan */ if (scan->indexRelation->rd_indam->amgetbatch != NULL) { @@ -435,6 +438,14 @@ heapam_index_fetch_tuple(Relation rel, /* Remember this buffer's block number for next time */ hscan->xs_blk = ItemPointerGetBlockNumber(tid); + /* + * Drop the xs_blk pin independently held on by slot (if any) now, + * before calling ReleaseBuffer. This avoids expensive calls to + * GetPrivateRefCountEntrySlow caused by ExecStoreBufferHeapTuple + * failing to hit the backend's cache for the release of the old pin. + */ + ExecClearTuple(slot); + if (BufferIsValid(hscan->xs_cbuf)) ReleaseBuffer(hscan->xs_cbuf); @@ -471,7 +482,33 @@ heapam_index_fetch_tuple(Relation rel, *heap_continue = !IsMVCCLikeSnapshot(snapshot); slot->tts_tableOid = RelationGetRelid(rel); - ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf); + + /* + * If this is the last TID on the current heap block within the batch, + * transfer our buffer pin to the slot rather than having the slot + * increment the pin count. This saves a pair of IncrBufferRefCount + * and ReleaseBuffer calls, since the caller would just release its + * pin on xs_cbuf when switching to the next block anyway. + * + * We can only do this when heap_continue is false, since otherwise + * the caller will need xs_cbuf to remain valid for the next call. + */ + if (hscan->xs_lastinblock && !*heap_continue) + { + ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, + hscan->xs_cbuf); + hscan->xs_cbuf = InvalidBuffer; + hscan->xs_blk = InvalidBlockNumber; + + /* + * Note: the pin now owned by the slot is expected to be released + * on the next call here, via an explicit ExecClearTuple. This + * avoids churn in the backend's private refcount cache. + */ + } + else + ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, + hscan->xs_cbuf); } else { @@ -659,10 +696,42 @@ heapam_index_return_scanpos_tid(IndexScanDesc scan, IndexFetchHeapData *hscan, if (all_visible == NULL) { + int nextItem; + bool hasNext; + /* * Plain index scan. + * + * Determine if the next item in the current scan direction is on a + * different heap block. When it is, heapam_index_fetch_tuple can + * transfer its buffer pin to the slot instead of incrementing the pin + * count, saving a pair of IncrBufferRefCount/ReleaseBuffer calls. + * + * Note: We cannot do this for index-only scans because all-visible + * items are skipped by both the scan and the read stream callback. It + * doesn't seem worth the trouble of reasoning about these issues, + * since the optimization only helps when heap fetches are required. + * + * Note: We deliberately don't consider the batch after scanBatch, + * because doing so would add complexity for little benefit. It's + * okay if xs_lastinblock is spuriously set to false. */ Assert(!scan->xs_want_itup); + if (ScanDirectionIsForward(direction)) + { + nextItem = scanPos->item + 1; + hasNext = (nextItem <= scanBatch->lastItem); + } + else + { + nextItem = scanPos->item - 1; + hasNext = (nextItem >= scanBatch->firstItem); + } + + hscan->xs_lastinblock = hasNext && + ItemPointerGetBlockNumber(&scanBatch->items[nextItem].tableTid) != + ItemPointerGetBlockNumber(&scan->xs_heaptid); + return &scan->xs_heaptid; } @@ -671,7 +740,7 @@ heapam_index_return_scanpos_tid(IndexScanDesc scan, IndexFetchHeapData *hscan, * * Also set xs_itup, which caller also needs. */ - Assert(scan->xs_want_itup); + Assert(scan->xs_want_itup && !hscan->xs_lastinblock); scan->xs_itup = (IndexTuple) (scanBatch->currTuples + scanBatch->items[scanPos->item].tupleOffset); -- 2.53.0