From 7cec29fbbee315b45e95275bfd1c77a2a13bf4c3 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Sun, 22 Mar 2026 02:22:06 -0400 Subject: [PATCH v17 05/18] Optimize heap buffer pin transfer for last-in-block index items. Add an xs_lastinblock flag to IndexFetchHeapData that tracks whether the current TID is the last one on its heap block within the current batch. When it is, heapam_index_fetch_tuple can transfer its buffer pin to the slot (via ExecStorePinnedBufferHeapTuple) instead of incrementing the pin count, saving a pair of IncrBufferRefCount/ReleaseBuffer calls. heapam_index_return_scanpos_tid now computes xs_lastinblock for plain index scans by peeking at the next item in the scan direction. This optimization is not used for index-only scans because all-visible items can be skipped, which would break block deduplication symmetry between the scan and the read stream. An explicit ExecClearTuple is added to the block-switch path in heapam_index_fetch_tuple to release the pin that was transferred to the slot on the previous call. Author: Peter Geoghegan Reviewed-By: Andres Freund Discussion: https://postgr.es/m/CAH2-Wz=D4Lru9BkvqaRnFRPDaZbfTOdWcxw13zyG6GVFTtz_vw@mail.gmail.com --- src/include/access/heapam.h | 1 + src/backend/access/heap/heapam_handler.c | 70 +++++++++++++++++++++++- 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 6236b84b4..e2f842db9 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -133,6 +133,7 @@ typedef struct IndexFetchHeapData /* For index-only scans that must access the visibility map */ Buffer xs_vmbuffer; /* visibility map buffer */ int xs_vm_items; /* # items to resolve visibility info for */ + bool xs_lastinblock; /* last TID on this block in current batch? */ } IndexFetchHeapData; diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index a40f7fe32..d9b1cece4 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -184,6 +184,12 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan, /* Remember this buffer's block number for next time */ hscan->xs_blk = ItemPointerGetBlockNumber(tid); + /* + * Drop the xs_blk pin independently held on by slot (if any) now. See + * comments around ExecStorePinnedBufferHeapTuple call below. + */ + ExecClearTuple(slot); + if (BufferIsValid(hscan->xs_cbuf)) ReleaseBuffer(hscan->xs_cbuf); @@ -220,7 +226,33 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan, *heap_continue = !IsMVCCLikeSnapshot(snapshot); slot->tts_tableOid = RelationGetRelid(scan->rel); - ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf); + + /* + * If this is the last TID on the current heap block within the batch, + * transfer our buffer pin to the slot rather than having the slot + * increment the pin count. This saves a pair of IncrBufferRefCount + * and ReleaseBuffer calls, since the caller would just release its + * pin on xs_cbuf when switching to the next block anyway. + * + * We can only do this when heap_continue is false, since otherwise + * the caller will need xs_cbuf to remain valid for the next call. + */ + if (hscan->xs_lastinblock && !*heap_continue) + { + ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, + hscan->xs_cbuf); + hscan->xs_cbuf = InvalidBuffer; + hscan->xs_blk = InvalidBlockNumber; + + /* + * Note: the pin now owned by the slot is expected to be released + * on the next call here, via an explicit ExecClearTuple. This + * avoids churn in the backend's private refcount cache. + */ + } + else + ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, + hscan->xs_cbuf); } else { @@ -359,7 +391,43 @@ heapam_index_return_scanpos_tid(IndexScanDesc scan, IndexFetchHeapData *hscan, scan->xs_heaptid = scanBatch->items[scanPos->item].tableTid; if (all_visible == NULL) + { + int nextItem; + bool hasNext; + + /* + * Plain index scan. + * + * Determine if the next item in the current scan direction is on a + * different heap block. When it is, heapam_index_fetch_tuple can + * transfer its buffer pin to the slot instead of incrementing the pin + * count, saving a pair of IncrBufferRefCount/ReleaseBuffer calls. + * + * Note: We cannot do this for index-only scans because all-visible + * items are skipped by both the scan and the read stream callback. + * Skipped items can break the block deduplication symmetry between + * the stream and the scan: the stream deduplicates consecutive + * non-all-visible items by block, but after invalidating xs_blk the + * scan would try to re-fetch a block that the stream already returned + * and deduplicated away. + */ + if (ScanDirectionIsForward(direction)) + { + nextItem = scanPos->item + 1; + hasNext = (nextItem <= scanBatch->lastItem); + } + else + { + nextItem = scanPos->item - 1; + hasNext = (nextItem >= scanBatch->firstItem); + } + + hscan->xs_lastinblock = hasNext && + ItemPointerGetBlockNumber(&scanBatch->items[nextItem].tableTid) != + ItemPointerGetBlockNumber(&scan->xs_heaptid); + return &scan->xs_heaptid; + } /* * Index-only scan. -- 2.53.0