From 3c6ef1cd01fa177c2d2439dfd8dcec5392f5b881 Mon Sep 17 00:00:00 2001 From: Julien Tachoires Date: Tue, 2 Dec 2025 10:40:55 +0100 Subject: [PATCH 2/7] Pass the number of ScanKeys to scan_rescan() The number of ScanKeys passed to the table AM API routine scan_rescan() was not specified, forcing the table AM to keep in memory the initial number of ScanKeys passed via scan_begin(). Currenlty, there isn't any real use of the ScanKeys during a table scan, so, this is not an issue, but it could become a blocking point in the future if we want to implement quals push down - as ScanKeys - to the table AM. Due to runtime keys evaluation, this number of ScanKeys can vary between the initial call to scan_begin() and a potential further call to scan_rescan(). table_rescan() is modified in order to reflect the changes on scan_rescan(). table_beginscan_parallel() signature is slightly modified in order to pass eventual ScanKeys and their numbers to scan_begin(). We also rename the variable "key" to "keys" in multiple places in order to make clear that this is an array of ScanKeys. --- src/backend/access/brin/brin.c | 3 +- src/backend/access/gin/gininsert.c | 3 +- src/backend/access/heap/heapam.c | 22 +++++++----- src/backend/access/nbtree/nbtsort.c | 3 +- src/backend/access/table/tableam.c | 9 ++--- src/backend/executor/execReplication.c | 4 +-- src/backend/executor/nodeBitmapHeapscan.c | 2 +- src/backend/executor/nodeSamplescan.c | 2 +- src/backend/executor/nodeSeqscan.c | 5 +-- src/backend/executor/nodeTidscan.c | 2 +- src/include/access/heapam.h | 7 ++-- src/include/access/tableam.h | 41 ++++++++++++----------- 12 files changed, 59 insertions(+), 44 deletions(-) diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index cb3331921cb..0a43f67f919 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -2842,7 +2842,8 @@ _brin_parallel_scan_and_build(BrinBuildState *state, indexInfo->ii_Concurrent = brinshared->isconcurrent; scan = table_beginscan_parallel(heap, - ParallelTableScanFromBrinShared(brinshared)); + ParallelTableScanFromBrinShared(brinshared), + 0, NULL); reltuples = table_index_build_scan(heap, index, indexInfo, true, true, brinbuildCallbackParallel, state, scan); diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index f87c60a230c..9f7a55f3647 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -2058,7 +2058,8 @@ _gin_parallel_scan_and_build(GinBuildState *state, indexInfo->ii_Concurrent = ginshared->isconcurrent; scan = table_beginscan_parallel(heap, - ParallelTableScanFromGinBuildShared(ginshared)); + ParallelTableScanFromGinBuildShared(ginshared), + 0, NULL); reltuples = table_index_build_scan(heap, index, indexInfo, true, progress, ginBuildCallbackParallel, state, scan); diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 1d9f9efa4fd..25bc941a815 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -346,10 +346,13 @@ bitmapheap_stream_read_next(ReadStream *pgsr, void *private_data, /* ---------------- * initscan - scan code common to heap_beginscan and heap_rescan + * + * Note: in order to pass on the ScanKeys to the scan, this function takes as + * arguments the number of ScanKeys and one array of ScanKeys. * ---------------- */ static void -initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) +initscan(HeapScanDesc scan, int nkeys, ScanKey keys, bool keep_startblock) { ParallelBlockTableScanDesc bpscan = NULL; bool allow_strat; @@ -471,10 +474,13 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) /* page-at-a-time fields are always invalid when not rs_inited */ /* - * copy the scan key, if appropriate + * copy the scan keys, if appropriate */ - if (key != NULL && scan->rs_base.rs_nkeys > 0) - memcpy(scan->rs_base.rs_key, key, scan->rs_base.rs_nkeys * sizeof(ScanKeyData)); + if (keys != NULL && nkeys > 0) + { + scan->rs_base.rs_nkeys = nkeys; + memcpy(scan->rs_base.rs_key, keys, nkeys * sizeof(ScanKeyData)); + } /* * Currently, we only have a stats counter for sequential heap scans (but @@ -1127,7 +1133,7 @@ continue_page: TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key, + int nkeys, ScanKey keys, ParallelTableScanDesc parallel_scan, uint32 flags) { @@ -1228,7 +1234,7 @@ heap_beginscan(Relation relation, Snapshot snapshot, else scan->rs_base.rs_key = NULL; - initscan(scan, key, false); + initscan(scan, nkeys, keys, false); scan->rs_read_stream = NULL; @@ -1280,7 +1286,7 @@ heap_beginscan(Relation relation, Snapshot snapshot, } void -heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, +heap_rescan(TableScanDesc sscan, int nkeys, ScanKey keys, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode) { HeapScanDesc scan = (HeapScanDesc) sscan; @@ -1329,7 +1335,7 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, /* * reinitialize scan descriptor */ - initscan(scan, key, true); + initscan(scan, nkeys, keys, true); } void diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 454adaee7dc..9c757a1ee95 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -1925,7 +1925,8 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, indexInfo = BuildIndexInfo(btspool->index); indexInfo->ii_Concurrent = btshared->isconcurrent; scan = table_beginscan_parallel(btspool->heap, - ParallelTableScanFromBTShared(btshared)); + ParallelTableScanFromBTShared(btshared), + 0, NULL); reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo, true, progress, _bt_build_callback, &buildstate, scan); diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 1e099febdc8..0efa564f7b2 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -110,14 +110,14 @@ table_slot_create(Relation relation, List **reglist) */ TableScanDesc -table_beginscan_catalog(Relation relation, int nkeys, ScanKeyData *key) +table_beginscan_catalog(Relation relation, int nkeys, ScanKeyData *keys) { uint32 flags = SO_TYPE_SEQSCAN | SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE | SO_TEMP_SNAPSHOT; Oid relid = RelationGetRelid(relation); Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); - return relation->rd_tableam->scan_begin(relation, snapshot, nkeys, key, + return relation->rd_tableam->scan_begin(relation, snapshot, nkeys, keys, NULL, flags); } @@ -163,7 +163,8 @@ table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, } TableScanDesc -table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan) +table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan, + int nkeys, ScanKeyData *keys) { Snapshot snapshot; uint32 flags = SO_TYPE_SEQSCAN | @@ -184,7 +185,7 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan) snapshot = SnapshotAny; } - return relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL, + return relation->rd_tableam->scan_begin(relation, snapshot, nkeys, keys, pscan, flags); } diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index def32774c90..b7d45daa55a 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -388,7 +388,7 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, retry: found = false; - table_rescan(scan, NULL); + table_rescan(scan, 0, NULL); /* Try to find the tuple */ while (table_scan_getnextslot(scan, ForwardScanDirection, scanslot)) @@ -604,7 +604,7 @@ RelationFindDeletedTupleInfoSeq(Relation rel, TupleTableSlot *searchslot, scan = table_beginscan(rel, SnapshotAny, 0, NULL); scanslot = table_slot_create(rel, NULL); - table_rescan(scan, NULL); + table_rescan(scan, 0, NULL); /* Try to find the tuple */ while (table_scan_getnextslot(scan, ForwardScanDirection, scanslot)) diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index bf24f3d7fe0..fb778e0ae3b 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -239,7 +239,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) tbm_end_iterate(&scan->st.rs_tbmiterator); /* rescan to release any page pin */ - table_rescan(node->ss.ss_currentScanDesc, NULL); + table_rescan(node->ss.ss_currentScanDesc, 0, NULL); } /* release bitmaps and buffers if any */ diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c index 6b3db7548ed..a7e172d83a4 100644 --- a/src/backend/executor/nodeSamplescan.c +++ b/src/backend/executor/nodeSamplescan.c @@ -301,7 +301,7 @@ tablesample_init(SampleScanState *scanstate) } else { - table_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL, + table_rescan_set_params(scanstate->ss.ss_currentScanDesc, 0, NULL, scanstate->use_bulkread, allow_sync, scanstate->use_pagemode); diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index 94047d29430..454d4ee0499 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -326,6 +326,7 @@ ExecReScanSeqScan(SeqScanState *node) if (scan != NULL) table_rescan(scan, /* scan desc */ + 0, /* number of new scan keys */ NULL); /* new scan keys */ ExecScanReScan((ScanState *) node); @@ -374,7 +375,7 @@ ExecSeqScanInitializeDSM(SeqScanState *node, estate->es_snapshot); shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); node->ss.ss_currentScanDesc = - table_beginscan_parallel(node->ss.ss_currentRelation, pscan); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan, 0, NULL); } /* ---------------------------------------------------------------- @@ -407,5 +408,5 @@ ExecSeqScanInitializeWorker(SeqScanState *node, pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); node->ss.ss_currentScanDesc = - table_beginscan_parallel(node->ss.ss_currentRelation, pscan); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan, 0, NULL); } diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c index d50c6600358..0ab70d891c4 100644 --- a/src/backend/executor/nodeTidscan.c +++ b/src/backend/executor/nodeTidscan.c @@ -465,7 +465,7 @@ ExecReScanTidScan(TidScanState *node) /* not really necessary, but seems good form */ if (node->ss.ss_currentScanDesc) - table_rescan(node->ss.ss_currentScanDesc, NULL); + table_rescan(node->ss.ss_currentScanDesc, 0, NULL); ExecScanReScan(&node->ss); } diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 632c4332a8c..7dd5e0bcd78 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -325,14 +325,15 @@ typedef struct PruneFreezeResult extern TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key, + int nkeys, ScanKey keys, ParallelTableScanDesc parallel_scan, uint32 flags); extern void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks); extern void heap_prepare_pagescan(TableScanDesc sscan); -extern void heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, - bool allow_strat, bool allow_sync, bool allow_pagemode); +extern void heap_rescan(TableScanDesc sscan, int nkeys, ScanKey keys, + bool set_params, bool allow_strat, bool allow_sync, + bool allow_pagemode); extern void heap_endscan(TableScanDesc sscan); extern HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction); extern bool heap_getnextslot(TableScanDesc sscan, diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 2fa790b6bf5..848aa114de1 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -326,7 +326,7 @@ typedef struct TableAmRoutine */ TableScanDesc (*scan_begin) (Relation rel, Snapshot snapshot, - int nkeys, ScanKeyData *key, + int nkeys, ScanKeyData *keys, ParallelTableScanDesc pscan, uint32 flags); @@ -340,9 +340,10 @@ typedef struct TableAmRoutine * Restart relation scan. If set_params is set to true, allow_{strat, * sync, pagemode} (see scan_begin) changes should be taken into account. */ - void (*scan_rescan) (TableScanDesc scan, ScanKeyData *key, - bool set_params, bool allow_strat, - bool allow_sync, bool allow_pagemode); + void (*scan_rescan) (TableScanDesc scan, int nkeys, + ScanKeyData *keys, bool set_params, + bool allow_strat, bool allow_sync, + bool allow_pagemode); /* * Return next tuple from `scan`, store in slot. @@ -874,12 +875,12 @@ extern TupleTableSlot *table_slot_create(Relation relation, List **reglist); */ static inline TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, - int nkeys, ScanKeyData *key) + int nkeys, ScanKeyData *keys) { uint32 flags = SO_TYPE_SEQSCAN | SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE; - return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, keys, NULL, flags); } /* @@ -887,7 +888,7 @@ table_beginscan(Relation rel, Snapshot snapshot, * snapshot appropriate for scanning catalog relations. */ extern TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, - ScanKeyData *key); + ScanKeyData *keys); /* * Like table_beginscan(), but table_beginscan_strat() offers an extended API @@ -898,7 +899,7 @@ extern TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, */ static inline TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, - int nkeys, ScanKeyData *key, + int nkeys, ScanKeyData *keys, bool allow_strat, bool allow_sync) { uint32 flags = SO_TYPE_SEQSCAN | SO_ALLOW_PAGEMODE; @@ -908,7 +909,7 @@ table_beginscan_strat(Relation rel, Snapshot snapshot, if (allow_sync) flags |= SO_ALLOW_SYNC; - return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, keys, NULL, flags); } /* @@ -919,11 +920,11 @@ table_beginscan_strat(Relation rel, Snapshot snapshot, */ static inline TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, - int nkeys, ScanKeyData *key) + int nkeys, ScanKeyData *keys) { uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE; - return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, keys, NULL, flags); } @@ -936,7 +937,7 @@ table_beginscan_bm(Relation rel, Snapshot snapshot, */ static inline TableScanDesc table_beginscan_sampling(Relation rel, Snapshot snapshot, - int nkeys, ScanKeyData *key, + int nkeys, ScanKeyData *keys, bool allow_strat, bool allow_sync, bool allow_pagemode) { @@ -949,7 +950,7 @@ table_beginscan_sampling(Relation rel, Snapshot snapshot, if (allow_pagemode) flags |= SO_ALLOW_PAGEMODE; - return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); + return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, keys, NULL, flags); } /* @@ -991,9 +992,9 @@ table_endscan(TableScanDesc scan) * Restart a relation scan. */ static inline void -table_rescan(TableScanDesc scan, ScanKeyData *key) +table_rescan(TableScanDesc scan, int nkeys, ScanKeyData *keys) { - scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false); + scan->rs_rd->rd_tableam->scan_rescan(scan, nkeys, keys, false, false, false, false); } /* @@ -1005,10 +1006,10 @@ table_rescan(TableScanDesc scan, ScanKeyData *key) * previously selected startblock will be kept. */ static inline void -table_rescan_set_params(TableScanDesc scan, ScanKeyData *key, +table_rescan_set_params(TableScanDesc scan, int nkeys, ScanKeyData *keys, bool allow_strat, bool allow_sync, bool allow_pagemode) { - scan->rs_rd->rd_tableam->scan_rescan(scan, key, true, + scan->rs_rd->rd_tableam->scan_rescan(scan, nkeys, keys, true, allow_strat, allow_sync, allow_pagemode); } @@ -1073,7 +1074,8 @@ table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid, /* Ensure table_beginscan_tidrange() was used. */ Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0); - sscan->rs_rd->rd_tableam->scan_rescan(sscan, NULL, false, false, false, false); + sscan->rs_rd->rd_tableam->scan_rescan(sscan, 0, NULL, false, false, false, + false); sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid); } @@ -1128,7 +1130,8 @@ extern void table_parallelscan_initialize(Relation rel, * Caller must hold a suitable lock on the relation. */ extern TableScanDesc table_beginscan_parallel(Relation relation, - ParallelTableScanDesc pscan); + ParallelTableScanDesc pscan, + int nkeys, ScanKeyData *keys); /* * Begin a parallel tid range scan. `pscan` needs to have been initialized -- 2.39.5