From 3a6b08fc3219afd79dc81a5219e6a543d67036f6 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Mon, 2 Mar 2026 16:31:33 -0500 Subject: [PATCH v38 10/12] Pass down information on table modification to scan node Pass down information to sequential scan, index [only] scan, bitmap table scan, sample scan, and TID range scan nodes on whether or not the query modifies the relation being scanned. A later commit will use this information to update the VM during on-access pruning only if the relation is not modified by the query. Author: Melanie Plageman Reviewed-by: Andres Freund Reviewed-by: Andrey Borodin Reviewed-by: Chao Li Discussion: https://postgr.es/m/4379FDA3-9446-4E2C-9C15-32EFE8D4F31B%40yandex-team.ru --- src/backend/access/heap/heapam_handler.c | 1 + src/backend/executor/nodeBitmapHeapscan.c | 9 ++++++- src/backend/executor/nodeIndexonlyscan.c | 25 +++++++++++++++--- src/backend/executor/nodeIndexscan.c | 32 ++++++++++++++++++++--- src/backend/executor/nodeSamplescan.c | 8 +++++- src/backend/executor/nodeSeqscan.c | 26 +++++++++++++++--- src/backend/executor/nodeTidrangescan.c | 24 ++++++++++++++--- src/include/access/heapam.h | 6 +++++ src/include/access/tableam.h | 3 +++ 9 files changed, 119 insertions(+), 15 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 6122603d11e..d35b688d751 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -86,6 +86,7 @@ heapam_index_fetch_begin(Relation rel, uint32 flags) hscan->xs_base.rel = rel; hscan->xs_cbuf = InvalidBuffer; hscan->xs_vmbuffer = InvalidBuffer; + hscan->modifies_base_rel = !(flags & SO_HINT_REL_READ_ONLY); return &hscan->xs_base; } diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 620fc7e259a..a5ab5e2b37f 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -104,11 +104,18 @@ BitmapTableScanSetup(BitmapHeapScanState *node) */ if (!node->ss.ss_currentScanDesc) { + uint32 flags = 0; + + if (!bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid, + node->ss.ps.state->es_modified_relids)) + flags |= SO_HINT_REL_READ_ONLY; + node->ss.ss_currentScanDesc = table_beginscan_bm(node->ss.ss_currentRelation, node->ss.ps.state->es_snapshot, 0, - NULL, 0); + NULL, + flags); } node->ss.ss_currentScanDesc->st.rs_tbmiterator = tbmiterator; diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index aefb792ee6e..6d7a32c1cb8 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -84,6 +84,12 @@ IndexOnlyNext(IndexOnlyScanState *node) if (scandesc == NULL) { + uint32 flags = 0; + + if (!bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid, + estate->es_modified_relids)) + flags |= SO_HINT_REL_READ_ONLY; + /* * We reach here if the index only scan is not parallel, or if we're * serially executing an index only scan that was planned to be @@ -94,7 +100,8 @@ IndexOnlyNext(IndexOnlyScanState *node) estate->es_snapshot, &node->ioss_Instrument, node->ioss_NumScanKeys, - node->ioss_NumOrderByKeys, 0); + node->ioss_NumOrderByKeys, + flags); node->ioss_ScanDesc = scandesc; @@ -761,6 +768,7 @@ ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node, ParallelIndexScanDesc piscan; bool instrument = node->ss.ps.instrument != NULL; bool parallel_aware = node->ss.ps.plan->parallel_aware; + uint32 flags = 0; if (!instrument && !parallel_aware) { @@ -782,13 +790,18 @@ ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node, return; } + if (!bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid, + estate->es_modified_relids)) + flags |= SO_HINT_REL_READ_ONLY; + node->ioss_ScanDesc = index_beginscan_parallel(node->ss.ss_currentRelation, node->ioss_RelationDesc, &node->ioss_Instrument, node->ioss_NumScanKeys, node->ioss_NumOrderByKeys, - piscan, 0); + piscan, flags); + node->ioss_ScanDesc->xs_want_itup = true; node->ioss_VMBuffer = InvalidBuffer; @@ -829,6 +842,7 @@ ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, ParallelIndexScanDesc piscan; bool instrument = node->ss.ps.instrument != NULL; bool parallel_aware = node->ss.ps.plan->parallel_aware; + uint32 flags = 0; if (!instrument && !parallel_aware) { @@ -848,13 +862,18 @@ ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, return; } + if (!bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid, + node->ss.ps.state->es_modified_relids)) + flags |= SO_HINT_REL_READ_ONLY; + node->ioss_ScanDesc = index_beginscan_parallel(node->ss.ss_currentRelation, node->ioss_RelationDesc, &node->ioss_Instrument, node->ioss_NumScanKeys, node->ioss_NumOrderByKeys, - piscan, 0); + piscan, flags); + node->ioss_ScanDesc->xs_want_itup = true; /* diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 477cd4fcf99..52b7fc46593 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -102,6 +102,12 @@ IndexNext(IndexScanState *node) if (scandesc == NULL) { + uint32 flags = 0; + + if (!bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid, + estate->es_modified_relids)) + flags |= SO_HINT_REL_READ_ONLY; + /* * We reach here if the index scan is not parallel, or if we're * serially executing an index scan that was planned to be parallel. @@ -111,7 +117,8 @@ IndexNext(IndexScanState *node) estate->es_snapshot, &node->iss_Instrument, node->iss_NumScanKeys, - node->iss_NumOrderByKeys, 0); + node->iss_NumOrderByKeys, + flags); node->iss_ScanDesc = scandesc; @@ -198,6 +205,12 @@ IndexNextWithReorder(IndexScanState *node) if (scandesc == NULL) { + uint32 flags = 0; + + if (!bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid, + estate->es_modified_relids)) + flags |= SO_HINT_REL_READ_ONLY; + /* * We reach here if the index scan is not parallel, or if we're * serially executing an index scan that was planned to be parallel. @@ -207,7 +220,8 @@ IndexNextWithReorder(IndexScanState *node) estate->es_snapshot, &node->iss_Instrument, node->iss_NumScanKeys, - node->iss_NumOrderByKeys, 0); + node->iss_NumOrderByKeys, + flags); node->iss_ScanDesc = scandesc; @@ -1696,6 +1710,7 @@ ExecIndexScanInitializeDSM(IndexScanState *node, ParallelIndexScanDesc piscan; bool instrument = node->ss.ps.instrument != NULL; bool parallel_aware = node->ss.ps.plan->parallel_aware; + uint32 flags = 0; if (!instrument && !parallel_aware) { @@ -1717,13 +1732,17 @@ ExecIndexScanInitializeDSM(IndexScanState *node, return; } + if (!bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid, + estate->es_modified_relids)) + flags |= SO_HINT_REL_READ_ONLY; + node->iss_ScanDesc = index_beginscan_parallel(node->ss.ss_currentRelation, node->iss_RelationDesc, &node->iss_Instrument, node->iss_NumScanKeys, node->iss_NumOrderByKeys, - piscan, 0); + piscan, flags); /* * If no run-time keys to calculate or they are ready, go ahead and pass @@ -1762,6 +1781,7 @@ ExecIndexScanInitializeWorker(IndexScanState *node, ParallelIndexScanDesc piscan; bool instrument = node->ss.ps.instrument != NULL; bool parallel_aware = node->ss.ps.plan->parallel_aware; + uint32 flags = 0; if (!instrument && !parallel_aware) { @@ -1781,13 +1801,17 @@ ExecIndexScanInitializeWorker(IndexScanState *node, return; } + if (!bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid, + node->ss.ps.state->es_modified_relids)) + flags |= SO_HINT_REL_READ_ONLY; + node->iss_ScanDesc = index_beginscan_parallel(node->ss.ss_currentRelation, node->iss_RelationDesc, &node->iss_Instrument, node->iss_NumScanKeys, node->iss_NumOrderByKeys, - piscan, 0); + piscan, flags); /* * If no run-time keys to calculate or they are ready, go ahead and pass diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c index 47660baf2fa..62eff19bc4f 100644 --- a/src/backend/executor/nodeSamplescan.c +++ b/src/backend/executor/nodeSamplescan.c @@ -291,13 +291,19 @@ tablesample_init(SampleScanState *scanstate) /* Now we can create or reset the HeapScanDesc */ if (scanstate->ss.ss_currentScanDesc == NULL) { + uint32 flags = 0; + + if (!bms_is_member(((Scan *) scanstate->ss.ps.plan)->scanrelid, + scanstate->ss.ps.state->es_modified_relids)) + flags |= SO_HINT_REL_READ_ONLY; + scanstate->ss.ss_currentScanDesc = table_beginscan_sampling(scanstate->ss.ss_currentRelation, scanstate->ss.ps.state->es_snapshot, 0, NULL, scanstate->use_bulkread, allow_sync, - scanstate->use_pagemode, 0); + scanstate->use_pagemode, flags); } else { diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index d9d7ec0516a..65349ea9c54 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -65,13 +65,20 @@ SeqNext(SeqScanState *node) if (scandesc == NULL) { + uint32 flags = 0; + + if (!bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid, + estate->es_modified_relids)) + flags |= SO_HINT_REL_READ_ONLY; + /* * We reach here if the scan is not parallel, or if we're serially * executing a scan that was planned to be parallel. */ scandesc = table_beginscan(node->ss.ss_currentRelation, estate->es_snapshot, - 0, NULL, 0); + 0, NULL, flags); + node->ss.ss_currentScanDesc = scandesc; } @@ -367,14 +374,20 @@ ExecSeqScanInitializeDSM(SeqScanState *node, { EState *estate = node->ss.ps.state; ParallelTableScanDesc pscan; + uint32 flags = 0; pscan = shm_toc_allocate(pcxt->toc, node->pscan_len); table_parallelscan_initialize(node->ss.ss_currentRelation, pscan, estate->es_snapshot); shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); + if (!bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid, + estate->es_modified_relids)) + flags |= SO_HINT_REL_READ_ONLY; + node->ss.ss_currentScanDesc = - table_beginscan_parallel(node->ss.ss_currentRelation, pscan, 0); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan, + flags); } /* ---------------------------------------------------------------- @@ -404,8 +417,15 @@ ExecSeqScanInitializeWorker(SeqScanState *node, ParallelWorkerContext *pwcxt) { ParallelTableScanDesc pscan; + uint32 flags = 0; + + if (!bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid, + node->ss.ps.state->es_modified_relids)) + flags |= SO_HINT_REL_READ_ONLY; pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); node->ss.ss_currentScanDesc = - table_beginscan_parallel(node->ss.ss_currentRelation, pscan, 0); + table_beginscan_parallel(node->ss.ss_currentRelation, + pscan, + flags); } diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c index 461edb8893b..7fbdf401734 100644 --- a/src/backend/executor/nodeTidrangescan.c +++ b/src/backend/executor/nodeTidrangescan.c @@ -242,10 +242,16 @@ TidRangeNext(TidRangeScanState *node) if (scandesc == NULL) { + uint32 flags = 0; + + if (!bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid, + estate->es_modified_relids)) + flags |= SO_HINT_REL_READ_ONLY; + scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation, estate->es_snapshot, &node->trss_mintid, - &node->trss_maxtid, 0); + &node->trss_maxtid, flags); node->ss.ss_currentScanDesc = scandesc; } else @@ -451,15 +457,21 @@ ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt) { EState *estate = node->ss.ps.state; ParallelTableScanDesc pscan; + uint32 flags = 0; pscan = shm_toc_allocate(pcxt->toc, node->trss_pscanlen); table_parallelscan_initialize(node->ss.ss_currentRelation, pscan, estate->es_snapshot); shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); + + if (!bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid, + estate->es_modified_relids)) + flags |= SO_HINT_REL_READ_ONLY; + node->ss.ss_currentScanDesc = table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, - pscan, 0); + pscan, flags); } /* ---------------------------------------------------------------- @@ -489,9 +501,15 @@ ExecTidRangeScanInitializeWorker(TidRangeScanState *node, ParallelWorkerContext *pwcxt) { ParallelTableScanDesc pscan; + uint32 flags = 0; pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); + + if (!bms_is_member(((Scan *) node->ss.ps.plan)->scanrelid, + node->ss.ps.state->es_modified_relids)) + flags |= SO_HINT_REL_READ_ONLY; + node->ss.ss_currentScanDesc = table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, - pscan, 0); + pscan, flags); } diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index c2621dc2fac..978ea90ffa2 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -131,6 +131,12 @@ typedef struct IndexFetchHeapData /* Current heap block's corresponding page in the visibility map */ Buffer xs_vmbuffer; + + /* + * Some optimizations can only be performed if the query does not modify + * the underlying relation. Track that here. + */ + bool modifies_base_rel; } IndexFetchHeapData; /* Result codes for HeapTupleSatisfiesVacuum */ diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index f1065e30638..57ce94a386f 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -63,6 +63,9 @@ typedef enum ScanOptions /* unregister snapshot at scan end? */ SO_TEMP_SNAPSHOT = 1 << 9, + + /* set if the query doesn't modify the relation */ + SO_HINT_REL_READ_ONLY = 1 << 10, } ScanOptions; /* -- 2.43.0