From 5facc503442201929c416bd37233ccd3d85ee45d Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Sun, 5 Apr 2026 17:31:18 -0400 Subject: [PATCH v13 5/5] Add EXPLAIN (IO) instrumentation for TidRangeScan Adds support for EXPLAIN (IO) instrumentation for TidRange scans. This requires adding shared instrumentation for parallel scans, using the separate DSM approach introduced by dd78e69cfc33. Reviewed-by: Melanie Plageman Reviewed-by: Lukas Fittl Reviewed-by: Andres Freund Discussion: https://postgr.es/m/flat/a177a6dd-240b-455a-8f25-aca0b1c08c6e%40vondra.me --- src/backend/commands/explain.c | 25 +++++ src/backend/executor/execParallel.c | 12 +++ src/backend/executor/nodeTidrangescan.c | 127 ++++++++++++++++++++++-- src/include/executor/instrument_node.h | 18 ++++ src/include/executor/nodeTidrangescan.h | 9 ++ src/include/nodes/execnodes.h | 1 + src/tools/pgindent/typedefs.list | 2 + 7 files changed, 186 insertions(+), 8 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 18d424da944..b8e4c4eb2da 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -2149,6 +2149,7 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + show_scan_io_usage((ScanState *) planstate, es); } break; case T_ForeignScan: @@ -4127,6 +4128,30 @@ show_scan_io_usage(ScanState *planstate, ExplainState *es) } } + break; + } + case T_TidRangeScan: + { + SharedTidRangeScanInstrumentation *sinstrument + = ((TidRangeScanState *) planstate)->trss_sinstrument; + + if (sinstrument) + { + for (int i = 0; i < sinstrument->num_workers; ++i) + { + TidRangeScanInstrumentation *winstrument = &sinstrument->sinstrument[i]; + + AccumulateIOStats(&stats, &winstrument->stats.io); + + if (!es->workers_state) + continue; + + ExplainOpenWorker(i, es); + print_io_usage(es, &winstrument->stats.io); + ExplainCloseWorker(i, es); + } + } + break; } default: diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 9690f0938ae..81b87d82fab 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -291,6 +291,9 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e) if (planstate->plan->parallel_aware) ExecTidRangeScanEstimate((TidRangeScanState *) planstate, e->pcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecTidRangeScanInstrumentEstimate((TidRangeScanState *) planstate, + e->pcxt); break; case T_AppendState: if (planstate->plan->parallel_aware) @@ -536,6 +539,9 @@ ExecParallelInitializeDSM(PlanState *planstate, if (planstate->plan->parallel_aware) ExecTidRangeScanInitializeDSM((TidRangeScanState *) planstate, d->pcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecTidRangeScanInstrumentInitDSM((TidRangeScanState *) planstate, + d->pcxt); break; case T_AppendState: if (planstate->plan->parallel_aware) @@ -1157,6 +1163,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate, case T_SeqScanState: ExecSeqScanRetrieveInstrumentation((SeqScanState *) planstate); break; + case T_TidRangeScanState: + ExecTidRangeScanRetrieveInstrumentation((TidRangeScanState *) planstate); + break; default: break; } @@ -1430,6 +1439,9 @@ ExecParallelInitializeWorker(PlanState *planstate, ParallelWorkerContext *pwcxt) if (planstate->plan->parallel_aware) ExecTidRangeScanInitializeWorker((TidRangeScanState *) planstate, pwcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecTidRangeScanInstrumentInitWorker((TidRangeScanState *) planstate, + pwcxt); break; case T_AppendState: if (planstate->plan->parallel_aware) diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c index 4a8fe91b2b3..550916928cb 100644 --- a/src/backend/executor/nodeTidrangescan.c +++ b/src/backend/executor/nodeTidrangescan.c @@ -18,7 +18,9 @@ #include "access/sysattr.h" #include "access/tableam.h" #include "catalog/pg_operator.h" +#include "executor/execParallel.h" #include "executor/executor.h" +#include "executor/instrument.h" #include "executor/nodeTidrangescan.h" #include "nodes/nodeFuncs.h" #include "utils/rel.h" @@ -242,12 +244,19 @@ TidRangeNext(TidRangeScanState *node) if (scandesc == NULL) { + uint32 flags = SO_NONE; + + if (ScanRelIsReadOnly(&node->ss)) + flags |= SO_HINT_REL_READ_ONLY; + + if (estate->es_instrument & INSTRUMENT_IO) + flags |= SO_SCAN_INSTRUMENT; + scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation, estate->es_snapshot, &node->trss_mintid, &node->trss_maxtid, - ScanRelIsReadOnly(&node->ss) ? - SO_HINT_REL_READ_ONLY : SO_NONE); + flags); node->ss.ss_currentScanDesc = scandesc; } else @@ -342,6 +351,20 @@ ExecEndTidRangeScan(TidRangeScanState *node) { TableScanDesc scan = node->ss.ss_currentScanDesc; + /* Collect IO stats for this process into shared instrumentation */ + if (node->trss_sinstrument != NULL && IsParallelWorker()) + { + TidRangeScanInstrumentation *si; + + Assert(ParallelWorkerNumber < node->trss_sinstrument->num_workers); + si = &node->trss_sinstrument->sinstrument[ParallelWorkerNumber]; + + if (scan && scan->rs_instrument) + { + AccumulateIOStats(&si->stats.io, &scan->rs_instrument->io); + } + } + if (scan != NULL) table_endscan(scan); } @@ -454,6 +477,13 @@ ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt) { EState *estate = node->ss.ps.state; ParallelTableScanDesc pscan; + uint32 flags = SO_NONE; + + if (ScanRelIsReadOnly(&node->ss)) + flags |= SO_HINT_REL_READ_ONLY; + + if (estate->es_instrument & INSTRUMENT_IO) + flags |= SO_SCAN_INSTRUMENT; pscan = shm_toc_allocate(pcxt->toc, node->trss_pscanlen); table_parallelscan_initialize(node->ss.ss_currentRelation, @@ -462,9 +492,7 @@ ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt) shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); node->ss.ss_currentScanDesc = table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, - pscan, - ScanRelIsReadOnly(&node->ss) ? - SO_HINT_REL_READ_ONLY : SO_NONE); + pscan, flags); } /* ---------------------------------------------------------------- @@ -494,11 +522,94 @@ ExecTidRangeScanInitializeWorker(TidRangeScanState *node, ParallelWorkerContext *pwcxt) { ParallelTableScanDesc pscan; + uint32 flags = SO_NONE; + + if (ScanRelIsReadOnly(&node->ss)) + flags |= SO_HINT_REL_READ_ONLY; + + if (node->ss.ps.state->es_instrument) + flags |= SO_SCAN_INSTRUMENT; pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); node->ss.ss_currentScanDesc = table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, - pscan, - ScanRelIsReadOnly(&node->ss) ? - SO_HINT_REL_READ_ONLY : SO_NONE); + pscan, flags); +} + +/* + * Compute the amount of space we'll need for the shared instrumentation and + * inform pcxt->estimator. + */ +void +ExecTidRangeScanInstrumentEstimate(TidRangeScanState *node, + ParallelContext *pcxt) +{ + EState *estate = node->ss.ps.state; + + if (!estate->es_instrument || pcxt->nworkers == 0) + return; + + shm_toc_estimate_chunk(&pcxt->estimator, + offsetof(SharedTidRangeScanInstrumentation, sinstrument) + + sizeof(TidRangeScanInstrumentation) * pcxt->nworkers); + shm_toc_estimate_keys(&pcxt->estimator, 1); +} + +/* + * Set up parallel scan instrumentation. + */ +void +ExecTidRangeScanInstrumentInitDSM(TidRangeScanState *node, + ParallelContext *pcxt) +{ + EState *estate = node->ss.ps.state; + SharedTidRangeScanInstrumentation *sinstrument; + Size size; + + if (!estate->es_instrument || pcxt->nworkers == 0) + return; + + size = offsetof(SharedTidRangeScanInstrumentation, sinstrument) + + sizeof(TidRangeScanInstrumentation) * pcxt->nworkers; + sinstrument = shm_toc_allocate(pcxt->toc, size); + memset(sinstrument, 0, size); + sinstrument->num_workers = pcxt->nworkers; + shm_toc_insert(pcxt->toc, + node->ss.ps.plan->plan_node_id + PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET, + sinstrument); + node->trss_sinstrument = sinstrument; +} + +/* + * Look up and save the location of the shared instrumentation. + */ +void +ExecTidRangeScanInstrumentInitWorker(TidRangeScanState *node, + ParallelWorkerContext *pwcxt) +{ + if (!node->ss.ps.state->es_instrument) + return; + + node->trss_sinstrument = shm_toc_lookup(pwcxt->toc, + node->ss.ps.plan->plan_node_id + PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET, + true); +} + +/* + * Transfer scan instrumentation from DSM to private memory. + */ +void +ExecTidRangeScanRetrieveInstrumentation(TidRangeScanState *node) +{ + SharedTidRangeScanInstrumentation *sinstrument = node->trss_sinstrument; + Size size; + + if (sinstrument == NULL) + return; + + size = offsetof(SharedTidRangeScanInstrumentation, sinstrument) + + sinstrument->num_workers * sizeof(TidRangeScanInstrumentation); + + node->trss_sinstrument = palloc(size); + memcpy(node->trss_sinstrument, sinstrument, size); } diff --git a/src/include/executor/instrument_node.h b/src/include/executor/instrument_node.h index 003dc262b5d..4076990408e 100644 --- a/src/include/executor/instrument_node.h +++ b/src/include/executor/instrument_node.h @@ -285,4 +285,22 @@ typedef struct SharedSeqScanInstrumentation SeqScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]; } SharedSeqScanInstrumentation; + +/* + * Instrumentation information for TID range scans + */ +typedef struct TidRangeScanInstrumentation +{ + TableScanInstrumentation stats; +} TidRangeScanInstrumentation; + +/* + * Shared memory container for per-worker information + */ +typedef struct SharedTidRangeScanInstrumentation +{ + int num_workers; + TidRangeScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]; +} SharedTidRangeScanInstrumentation; + #endif /* INSTRUMENT_NODE_H */ diff --git a/src/include/executor/nodeTidrangescan.h b/src/include/executor/nodeTidrangescan.h index 8752d1ea8c4..9e7d0a357bb 100644 --- a/src/include/executor/nodeTidrangescan.h +++ b/src/include/executor/nodeTidrangescan.h @@ -28,4 +28,13 @@ extern void ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelConte extern void ExecTidRangeScanReInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt); extern void ExecTidRangeScanInitializeWorker(TidRangeScanState *node, ParallelWorkerContext *pwcxt); +/* instrument support */ +extern void ExecTidRangeScanInstrumentEstimate(TidRangeScanState *node, + ParallelContext *pcxt); +extern void ExecTidRangeScanInstrumentInitDSM(TidRangeScanState *node, + ParallelContext *pcxt); +extern void ExecTidRangeScanInstrumentInitWorker(TidRangeScanState *node, + ParallelWorkerContext *pwcxt); +extern void ExecTidRangeScanRetrieveInstrumentation(TidRangeScanState *node); + #endif /* NODETIDRANGESCAN_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 56febb3204c..13359180d25 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1922,6 +1922,7 @@ typedef struct TidRangeScanState ItemPointerData trss_maxtid; bool trss_inScan; Size trss_pscanlen; + struct SharedTidRangeScanInstrumentation *trss_sinstrument; } TidRangeScanState; /* ---------------- diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index c0436a13ac3..0cd41106975 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2867,6 +2867,7 @@ SharedRecordTableKey SharedRecordTypmodRegistry SharedSeqScanInstrumentation SharedSortInfo +SharedTidRangeScanInstrumentation SharedTuplestore SharedTuplestoreAccessor SharedTuplestoreChunk @@ -3171,6 +3172,7 @@ TidOpExpr TidPath TidRangePath TidRangeScan +TidRangeScanInstrumentation TidRangeScanState TidScan TidScanState -- 2.53.0