From 7d33449cc51f2713213093208f98720ef3adf3ad Mon Sep 17 00:00:00 2001 From: Julien Tachoires Date: Mon, 25 Aug 2025 17:01:57 +0200 Subject: [PATCH 1/6] Pass the number of ScanKeys to scan_rescan() The number of ScanKeys passed to the table AM API routine scan_rescan() was not specified, forcing the table AM to keep in memory the initial number of ScanKeys passed via scan_begin(). Currenlty, there isn't any real use of the ScanKeys during a table scan, so, this is not an issue, but it could become a blocking point in the future if we want to implement quals push down - as ScanKeys - to the table AM. Due to runtime keys evaluation, this number of ScanKeys can vary between the initial call to scan_begin() and a potential further call to scan_rescan(). table_rescan() is modified in order to reflect the changes on scan_rescan(). table_beginscan_parallel() signature is slightly modified in order to pass eventual ScanKeys and their numbers to scan_begin(). table_rescan_set_params() now takes the number of ScanKeys as a new argument. --- src/backend/access/brin/brin.c | 3 ++- src/backend/access/gin/gininsert.c | 3 ++- src/backend/access/heap/heapam.c | 2 +- src/backend/access/nbtree/nbtsort.c | 3 ++- src/backend/access/table/tableam.c | 5 +++-- src/backend/executor/execReplication.c | 4 ++-- src/backend/executor/nodeBitmapHeapscan.c | 2 +- src/backend/executor/nodeSamplescan.c | 2 +- src/backend/executor/nodeSeqscan.c | 5 +++-- src/backend/executor/nodeTidscan.c | 2 +- src/include/access/heapam.h | 5 +++-- src/include/access/tableam.h | 24 +++++++++++++---------- 12 files changed, 35 insertions(+), 25 deletions(-) diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 7ff7467e462..5995bd1243e 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -2828,7 +2828,8 @@ _brin_parallel_scan_and_build(BrinBuildState *state, indexInfo->ii_Concurrent = brinshared->isconcurrent; scan = table_beginscan_parallel(heap, - ParallelTableScanFromBrinShared(brinshared)); + ParallelTableScanFromBrinShared(brinshared), + 0, NULL); reltuples = table_index_build_scan(heap, index, indexInfo, true, true, brinbuildCallbackParallel, state, scan); diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index e9d4b27427e..deaa42cffa4 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -2030,7 +2030,8 @@ _gin_parallel_scan_and_build(GinBuildState *state, indexInfo->ii_Concurrent = ginshared->isconcurrent; scan = table_beginscan_parallel(heap, - ParallelTableScanFromGinBuildShared(ginshared)); + ParallelTableScanFromGinBuildShared(ginshared), + 0, NULL); reltuples = table_index_build_scan(heap, index, indexInfo, true, progress, ginBuildCallbackParallel, state, scan); diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 7491cc3cb93..a5c74d8948e 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1251,7 +1251,7 @@ heap_beginscan(Relation relation, Snapshot snapshot, } void -heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, +heap_rescan(TableScanDesc sscan, int nkeys, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode) { HeapScanDesc scan = (HeapScanDesc) sscan; diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 8828a7a8f89..d576ba3a762 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -1927,7 +1927,8 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, indexInfo = BuildIndexInfo(btspool->index); indexInfo->ii_Concurrent = btshared->isconcurrent; scan = table_beginscan_parallel(btspool->heap, - ParallelTableScanFromBTShared(btshared)); + ParallelTableScanFromBTShared(btshared), + 0, NULL); reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo, true, progress, _bt_build_callback, &buildstate, scan); diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index a56c5eceb14..46bed1614f0 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -163,7 +163,8 @@ table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, } TableScanDesc -table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan) +table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan, + int nkeys, struct ScanKeyData *key) { Snapshot snapshot; uint32 flags = SO_TYPE_SEQSCAN | @@ -184,7 +185,7 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan) snapshot = SnapshotAny; } - return relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL, + return relation->rd_tableam->scan_begin(relation, snapshot, nkeys, key, pscan, flags); } diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index b409d4ecbf5..1b0c97243a5 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -388,7 +388,7 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, retry: found = false; - table_rescan(scan, NULL); + table_rescan(scan, 0, NULL); /* Try to find the tuple */ while (table_scan_getnextslot(scan, ForwardScanDirection, scanslot)) @@ -604,7 +604,7 @@ RelationFindDeletedTupleInfoSeq(Relation rel, TupleTableSlot *searchslot, scan = table_beginscan(rel, SnapshotAny, 0, NULL); scanslot = table_slot_create(rel, NULL); - table_rescan(scan, NULL); + table_rescan(scan, 0, NULL); /* Try to find the tuple */ while (table_scan_getnextslot(scan, ForwardScanDirection, scanslot)) diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index bf24f3d7fe0..fb778e0ae3b 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -239,7 +239,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) tbm_end_iterate(&scan->st.rs_tbmiterator); /* rescan to release any page pin */ - table_rescan(node->ss.ss_currentScanDesc, NULL); + table_rescan(node->ss.ss_currentScanDesc, 0, NULL); } /* release bitmaps and buffers if any */ diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c index 6b3db7548ed..a7e172d83a4 100644 --- a/src/backend/executor/nodeSamplescan.c +++ b/src/backend/executor/nodeSamplescan.c @@ -301,7 +301,7 @@ tablesample_init(SampleScanState *scanstate) } else { - table_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL, + table_rescan_set_params(scanstate->ss.ss_currentScanDesc, 0, NULL, scanstate->use_bulkread, allow_sync, scanstate->use_pagemode); diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index 94047d29430..c89aa6c6616 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -326,6 +326,7 @@ ExecReScanSeqScan(SeqScanState *node) if (scan != NULL) table_rescan(scan, /* scan desc */ + 0, /* number of scan keys */ NULL); /* new scan keys */ ExecScanReScan((ScanState *) node); @@ -374,7 +375,7 @@ ExecSeqScanInitializeDSM(SeqScanState *node, estate->es_snapshot); shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); node->ss.ss_currentScanDesc = - table_beginscan_parallel(node->ss.ss_currentRelation, pscan); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan, 0, NULL); } /* ---------------------------------------------------------------- @@ -407,5 +408,5 @@ ExecSeqScanInitializeWorker(SeqScanState *node, pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); node->ss.ss_currentScanDesc = - table_beginscan_parallel(node->ss.ss_currentRelation, pscan); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan, 0, NULL); } diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c index 5e56e29a15f..6b37d1fcb74 100644 --- a/src/backend/executor/nodeTidscan.c +++ b/src/backend/executor/nodeTidscan.c @@ -454,7 +454,7 @@ ExecReScanTidScan(TidScanState *node) /* not really necessary, but seems good form */ if (node->ss.ss_currentScanDesc) - table_rescan(node->ss.ss_currentScanDesc, NULL); + table_rescan(node->ss.ss_currentScanDesc, 0, NULL); ExecScanReScan(&node->ss); } diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index a2bd5a897f8..252f5e661c1 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -293,8 +293,9 @@ extern TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, extern void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks); extern void heap_prepare_pagescan(TableScanDesc sscan); -extern void heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, - bool allow_strat, bool allow_sync, bool allow_pagemode); +extern void heap_rescan(TableScanDesc sscan, int nkeys, ScanKey key, + bool set_params, bool allow_strat, bool allow_sync, + bool allow_pagemode); extern void heap_endscan(TableScanDesc sscan); extern HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction); extern bool heap_getnextslot(TableScanDesc sscan, diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 1c9e802a6b1..6fa0fa55a33 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -334,9 +334,10 @@ typedef struct TableAmRoutine * Restart relation scan. If set_params is set to true, allow_{strat, * sync, pagemode} (see scan_begin) changes should be taken into account. */ - void (*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key, - bool set_params, bool allow_strat, - bool allow_sync, bool allow_pagemode); + void (*scan_rescan) (TableScanDesc scan, int nkeys, + struct ScanKeyData *key, bool set_params, + bool allow_strat, bool allow_sync, + bool allow_pagemode); /* * Return next tuple from `scan`, store in slot. @@ -985,10 +986,10 @@ table_endscan(TableScanDesc scan) * Restart a relation scan. */ static inline void -table_rescan(TableScanDesc scan, - struct ScanKeyData *key) +table_rescan(TableScanDesc scan, int nkeys, struct ScanKeyData *key) { - scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false); + scan->rs_rd->rd_tableam->scan_rescan(scan, nkeys, key, false, false, false, + false); } /* @@ -1000,10 +1001,10 @@ table_rescan(TableScanDesc scan, * previously selected startblock will be kept. */ static inline void -table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key, +table_rescan_set_params(TableScanDesc scan, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync, bool allow_pagemode) { - scan->rs_rd->rd_tableam->scan_rescan(scan, key, true, + scan->rs_rd->rd_tableam->scan_rescan(scan, nkeys, key, true, allow_strat, allow_sync, allow_pagemode); } @@ -1068,7 +1069,8 @@ table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid, /* Ensure table_beginscan_tidrange() was used. */ Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0); - sscan->rs_rd->rd_tableam->scan_rescan(sscan, NULL, false, false, false, false); + sscan->rs_rd->rd_tableam->scan_rescan(sscan, 0, NULL, false, false, false, + false); sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid); } @@ -1123,7 +1125,9 @@ extern void table_parallelscan_initialize(Relation rel, * Caller must hold a suitable lock on the relation. */ extern TableScanDesc table_beginscan_parallel(Relation relation, - ParallelTableScanDesc pscan); + ParallelTableScanDesc pscan, + int nkeys, + struct ScanKeyData *key); /* * Restart a parallel scan. Call this in the leader process. Caller is -- 2.39.5