From 72d8bbd24c379a487dcdca633e62ae5b539b836e Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Thu, 22 Jan 2026 13:07:13 -0500 Subject: [PATCH v10 06/11] Use ExecSetTupleBound hint during index scans. This gives index scans a way to avoid using a read stream during certain kinds of queries that are very unlikely to benefit from prefetching: queries whose plan involves a LIMIT node that is consumes tuples from an index scan (or index-only scan) node. Testing has shown this to be particularly important with nested loop joins with a LIMIT on an inner index scan. This is typical of nested loop anti-joins, and nested loop semi-joins. XXX This is still very much a WIP. Author: Peter Geoghegan Reviewed-By: Tomas Vondra --- src/include/access/relscan.h | 2 ++ src/include/nodes/execnodes.h | 4 +++ src/backend/access/heap/heapam_handler.c | 5 +++ src/backend/access/index/genam.c | 2 ++ src/backend/executor/execProcnode.c | 44 ++++++++++++++++++++++++ src/backend/executor/nodeIndexonlyscan.c | 10 ++++++ src/backend/executor/nodeIndexscan.c | 13 +++++++ 7 files changed, 80 insertions(+) diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 27d64a363..e33b66633 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -368,6 +368,8 @@ typedef struct IndexScanDescData /* parallel index scan information, in shared memory */ struct ParallelIndexScanDescData *parallel_scan; + + int64 tuples_needed; } IndexScanDescData; /* Generic structure for parallel scans */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 793b1a3c6..4693b0fd7 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1696,6 +1696,7 @@ typedef struct * ScanDesc index scan descriptor * Instrument local index scan instrumentation * SharedInfo parallel worker instrumentation (no leader entry) + * TuplesNeeded tuple bound, see ExecSetTupleBound * * ReorderQueue tuples that need reordering due to re-check * ReachedEnd have we fetched all tuples from index already? @@ -1724,6 +1725,7 @@ typedef struct IndexScanState struct IndexScanDescData *iss_ScanDesc; IndexScanInstrumentation iss_Instrument; SharedIndexScanInstrumentation *iss_SharedInfo; + int64 iss_TuplesNeeded; /* These are needed for re-checking ORDER BY expr ordering */ pairingheap *iss_ReorderQueue; @@ -1752,6 +1754,7 @@ typedef struct IndexScanState * ScanDesc index scan descriptor * Instrument local index scan instrumentation * SharedInfo parallel worker instrumentation (no leader entry) + * TuplesNeeded tuple bound, see ExecSetTupleBound * TableSlot slot for holding tuples fetched from the table * PscanLen size of parallel index-only scan descriptor * NameCStringAttNums attnums of name typed columns to pad to NAMEDATALEN @@ -1774,6 +1777,7 @@ typedef struct IndexOnlyScanState struct IndexScanDescData *ioss_ScanDesc; IndexScanInstrumentation ioss_Instrument; SharedIndexScanInstrumentation *ioss_SharedInfo; + int64 ioss_TuplesNeeded; TupleTableSlot *ioss_TableSlot; Size ioss_PscanLen; AttrNumber *ioss_NameCStringAttNums; diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 8ce749873..1f7338a37 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -522,9 +522,14 @@ heapam_batch_getnext(IndexScanDesc scan, ScanDirection direction, * haven't done any heap fetches yet. We don't want to waste any * cycles on allocating a read stream until we have a demonstrated * need for perform heap fetches. + * + * Also avoiding prefetching when the core executor passes the scan a + * tuples_needed hint that indicates that the scan is likely to end + * before long. */ if (!hscan->xs_read_stream && priorBatch && scan->MVCCScan && hscan->xs_blk != InvalidBlockNumber && /* for index-only scans */ + (scan->tuples_needed == -1 || scan->tuples_needed > 10) && io_method != IOMETHOD_SYNC && enable_indexscan_prefetch) { Assert(!batchringbuf->prefetchPos.valid); diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 18dccd3c9..c56a9f052 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -126,6 +126,8 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) scan->xs_hitup = NULL; scan->xs_hitupdesc = NULL; + scan->tuples_needed = -1; /* no limit */ + return scan; } diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 7e40b8525..8ae8fbebe 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -72,6 +72,7 @@ */ #include "postgres.h" +#include "access/relscan.h" #include "executor/executor.h" #include "executor/nodeAgg.h" #include "executor/nodeAppend.h" @@ -977,6 +978,49 @@ ExecSetTupleBound(int64 tuples_needed, PlanState *child_node) ExecSetTupleBound(tuples_needed, outerPlanState(child_node)); } + else if (IsA(child_node, IndexScanState)) + { + /* + * If it is an IndexScan, save the tuples_needed in the state so it + * can be propagated to the IndexScanDesc when the scan is started. + * + * Note: As with Sort, the index scan node is responsible for reacting + * properly to changes to this parameter. + */ + IndexScanState *isstate = (IndexScanState *) child_node; + + isstate->iss_TuplesNeeded = tuples_needed; + + /* If scan already started, update the IndexScanDesc too */ + if (isstate->iss_ScanDesc) + isstate->iss_ScanDesc->tuples_needed = tuples_needed; + } + else if (IsA(child_node, IndexOnlyScanState)) + { + /* Same comments as for IndexScan */ + IndexOnlyScanState *iosstate = (IndexOnlyScanState *) child_node; + + iosstate->ioss_TuplesNeeded = tuples_needed; + + /* If scan already started, update the IndexScanDesc too */ + if (iosstate->ioss_ScanDesc) + iosstate->ioss_ScanDesc->tuples_needed = tuples_needed; + } + else if (IsA(child_node, NestLoopState)) + { + /* + * For NestLoop joins where each outer tuple produces at most one + * output tuple, we can propagate the bound to the outer child + */ + NestLoopState *nlstate = (NestLoopState *) child_node; + JoinType jointype = nlstate->js.jointype; + + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || + nlstate->js.single_match) + { + ExecSetTupleBound(tuples_needed, outerPlanState(child_node)); + } + } /* * In principle we could descend through any plan node type that is diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index ff3e8f302..8a068634b 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -96,6 +96,9 @@ IndexOnlyNext(IndexOnlyScanState *node) node->ioss_ScanDesc = scandesc; Assert(node->ioss_ScanDesc->xs_want_itup); + /* Pass down any tuple bound */ + scandesc->tuples_needed = node->ioss_TuplesNeeded; + /* * If no run-time keys to calculate or they are ready, go ahead and * pass the scankeys to the index AM. @@ -524,6 +527,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) indexstate->ioss_RuntimeKeysReady = false; indexstate->ioss_RuntimeKeys = NULL; indexstate->ioss_NumRuntimeKeys = 0; + indexstate->ioss_TuplesNeeded = -1; /* * build the index scan keys from the index qualification @@ -700,6 +704,9 @@ ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node, piscan); Assert(node->ioss_ScanDesc->xs_want_itup); + /* Pass down any tuple bound */ + node->ioss_ScanDesc->tuples_needed = node->ioss_TuplesNeeded; + /* * If no run-time keys to calculate or they are ready, go ahead and pass * the scankeys to the index AM. @@ -765,6 +772,9 @@ ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, piscan); Assert(node->ioss_ScanDesc->xs_want_itup); + /* Pass down any tuple bound */ + node->ioss_ScanDesc->tuples_needed = node->ioss_TuplesNeeded; + /* * If no run-time keys to calculate or they are ready, go ahead and pass * the scankeys to the index AM. diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index a25e40e1f..7641adc14 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -115,6 +115,9 @@ IndexNext(IndexScanState *node) node->iss_ScanDesc = scandesc; + /* Pass down any tuple bound */ + scandesc->tuples_needed = node->iss_TuplesNeeded; + /* * If no run-time keys to calculate or they are ready, go ahead and * pass the scankeys to the index AM. @@ -211,6 +214,9 @@ IndexNextWithReorder(IndexScanState *node) node->iss_ScanDesc = scandesc; + /* Pass down any tuple bound */ + scandesc->tuples_needed = node->iss_TuplesNeeded; + /* * If no run-time keys to calculate or they are ready, go ahead and * pass the scankeys to the index AM. @@ -982,6 +988,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) indexstate->iss_RuntimeKeysReady = false; indexstate->iss_RuntimeKeys = NULL; indexstate->iss_NumRuntimeKeys = 0; + indexstate->iss_TuplesNeeded = -1; /* * build the index scan keys from the index qualification @@ -1726,6 +1733,9 @@ ExecIndexScanInitializeDSM(IndexScanState *node, node->iss_NumOrderByKeys, piscan); + /* Pass down any tuple bound */ + node->iss_ScanDesc->tuples_needed = node->iss_TuplesNeeded; + /* * If no run-time keys to calculate or they are ready, go ahead and pass * the scankeys to the index AM. @@ -1790,6 +1800,9 @@ ExecIndexScanInitializeWorker(IndexScanState *node, node->iss_NumOrderByKeys, piscan); + /* Pass down any tuple bound */ + node->iss_ScanDesc->tuples_needed = node->iss_TuplesNeeded; + /* * If no run-time keys to calculate or they are ready, go ahead and pass * the scankeys to the index AM. -- 2.51.0