From 03899df55da0e525668ad6ef0d37fcc18ecf5dab Mon Sep 17 00:00:00 2001 From: test Date: Tue, 7 Apr 2026 01:38:53 +0200 Subject: [PATCH v12 6/7] Add EXPLAIN (IO) instrumentation for SeqScan --- src/backend/commands/explain.c | 25 ++++++ src/backend/executor/execParallel.c | 11 +++ src/backend/executor/nodeSeqscan.c | 116 +++++++++++++++++++++++-- src/include/executor/instrument_node.h | 19 ++++ src/include/executor/nodeSeqscan.h | 9 ++ src/include/nodes/execnodes.h | 1 + src/test/regress/expected/explain.out | 18 +++- src/test/regress/sql/explain.sql | 4 +- src/tools/pgindent/typedefs.list | 2 + 9 files changed, 192 insertions(+), 13 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 863a9dd0f0d..ac1bbf19a80 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -2032,6 +2032,7 @@ ExplainNode(PlanState *planstate, List *ancestors, planstate, es); if (IsA(plan, CteScan)) show_ctescan_info(castNode(CteScanState, planstate), es); + show_scan_io_usage((ScanState *) planstate, es); break; case T_Gather: { @@ -4086,6 +4087,30 @@ show_scan_io_usage(ScanState *planstate, ExplainState *es) } } + break; + } + case T_SeqScan: + { + SharedSeqScanInstrumentation *sinstrument + = ((SeqScanState *) planstate)->sinstrument; + + if (sinstrument) + { + for (int i = 0; i < sinstrument->num_workers; ++i) + { + SeqScanInstrumentation *winstrument = &sinstrument->sinstrument[i]; + + AccumulateIOStats(&stats, &winstrument->stats.io); + + if (!es->workers_state) + continue; + + ExplainOpenWorker(i, es); + print_io_usage(es, &winstrument->stats.io); + ExplainCloseWorker(i, es); + } + } + break; } default: diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 1a5ec0c305f..9690f0938ae 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -257,6 +257,9 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e) if (planstate->plan->parallel_aware) ExecSeqScanEstimate((SeqScanState *) planstate, e->pcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecSeqScanInstrumentEstimate((SeqScanState *) planstate, + e->pcxt); break; case T_IndexScanState: if (planstate->plan->parallel_aware) @@ -500,6 +503,9 @@ ExecParallelInitializeDSM(PlanState *planstate, if (planstate->plan->parallel_aware) ExecSeqScanInitializeDSM((SeqScanState *) planstate, d->pcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecSeqScanInstrumentInitDSM((SeqScanState *) planstate, + d->pcxt); break; case T_IndexScanState: if (planstate->plan->parallel_aware) @@ -1148,6 +1154,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate, case T_BitmapHeapScanState: ExecBitmapHeapRetrieveInstrumentation((BitmapHeapScanState *) planstate); break; + case T_SeqScanState: + ExecSeqScanRetrieveInstrumentation((SeqScanState *) planstate); + break; default: break; } @@ -1388,6 +1397,8 @@ ExecParallelInitializeWorker(PlanState *planstate, ParallelWorkerContext *pwcxt) case T_SeqScanState: if (planstate->plan->parallel_aware) ExecSeqScanInitializeWorker((SeqScanState *) planstate, pwcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecSeqScanInstrumentInitWorker((SeqScanState *) planstate, pwcxt); break; case T_IndexScanState: if (planstate->plan->parallel_aware) diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index 04803b0e37d..b95ac2a8696 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -29,6 +29,7 @@ #include "access/relscan.h" #include "access/tableam.h" +#include "executor/execParallel.h" #include "executor/execScan.h" #include "executor/executor.h" #include "executor/nodeSeqscan.h" @@ -65,15 +66,21 @@ SeqNext(SeqScanState *node) if (scandesc == NULL) { + uint32 flags = SO_NONE; + + if (ScanRelIsReadOnly(&node->ss)) + flags |= SO_HINT_REL_READ_ONLY; + + if (estate->es_instrument & INSTRUMENT_IO) + flags |= SO_SCAN_INSTRUMENT; + /* * We reach here if the scan is not parallel, or if we're serially * executing a scan that was planned to be parallel. */ scandesc = table_beginscan(node->ss.ss_currentRelation, estate->es_snapshot, - 0, NULL, - ScanRelIsReadOnly(&node->ss) ? - SO_HINT_REL_READ_ONLY : SO_NONE); + 0, NULL, flags); node->ss.ss_currentScanDesc = scandesc; } @@ -297,6 +304,24 @@ ExecEndSeqScan(SeqScanState *node) { TableScanDesc scanDesc; + /* + * Collect IO stats for this process into shared instrumentation. + */ + if (node->sinstrument != NULL && IsParallelWorker()) + { + SeqScanInstrumentation *si; + + Assert(ParallelWorkerNumber <= node->sinstrument->num_workers); + si = &node->sinstrument->sinstrument[ParallelWorkerNumber]; + + if (node->ss.ss_currentScanDesc && + node->ss.ss_currentScanDesc->rs_instrument) + { + AccumulateIOStats(&si->stats.io, + &node->ss.ss_currentScanDesc->rs_instrument->io); + } + } + /* * get information from node */ @@ -370,6 +395,13 @@ ExecSeqScanInitializeDSM(SeqScanState *node, { EState *estate = node->ss.ps.state; ParallelTableScanDesc pscan; + uint32 flags = SO_NONE; + + if (ScanRelIsReadOnly(&node->ss)) + flags |= SO_HINT_REL_READ_ONLY; + + if (estate->es_instrument) + flags |= SO_SCAN_INSTRUMENT; pscan = shm_toc_allocate(pcxt->toc, node->pscan_len); table_parallelscan_initialize(node->ss.ss_currentRelation, @@ -378,9 +410,7 @@ ExecSeqScanInitializeDSM(SeqScanState *node, shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); node->ss.ss_currentScanDesc = - table_beginscan_parallel(node->ss.ss_currentRelation, pscan, - ScanRelIsReadOnly(&node->ss) ? - SO_HINT_REL_READ_ONLY : SO_NONE); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan, flags); } /* ---------------------------------------------------------------- @@ -410,10 +440,78 @@ ExecSeqScanInitializeWorker(SeqScanState *node, ParallelWorkerContext *pwcxt) { ParallelTableScanDesc pscan; + uint32 flags = SO_NONE; + + if (ScanRelIsReadOnly(&node->ss)) + flags |= SO_HINT_REL_READ_ONLY; + + if (node->ss.ps.state->es_instrument) + flags |= SO_SCAN_INSTRUMENT; pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); node->ss.ss_currentScanDesc = - table_beginscan_parallel(node->ss.ss_currentRelation, pscan, - ScanRelIsReadOnly(&node->ss) ? - SO_HINT_REL_READ_ONLY : SO_NONE); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan, flags); +} + +void +ExecSeqScanInstrumentEstimate(SeqScanState *node, ParallelContext *pcxt) +{ + EState *estate = node->ss.ps.state; + + if (!estate->es_instrument || pcxt->nworkers == 0) + return; + + shm_toc_estimate_chunk(&pcxt->estimator, + offsetof(SharedSeqScanInstrumentation, sinstrument) + + sizeof(SeqScanInstrumentation) * pcxt->nworkers); + shm_toc_estimate_keys(&pcxt->estimator, 1); +} + +void +ExecSeqScanInstrumentInitDSM(SeqScanState *node, ParallelContext *pcxt) +{ + EState *estate = node->ss.ps.state; + SharedSeqScanInstrumentation *sinstrument; + Size size; + + if (!estate->es_instrument || pcxt->nworkers == 0) + return; + + size = offsetof(SharedSeqScanInstrumentation, sinstrument) + + sizeof(SeqScanInstrumentation) * pcxt->nworkers; + sinstrument = shm_toc_allocate(pcxt->toc, size); + memset(sinstrument, 0, size); + sinstrument->num_workers = pcxt->nworkers; + shm_toc_insert(pcxt->toc, + node->ss.ps.plan->plan_node_id + PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET, + sinstrument); + node->sinstrument = sinstrument; +} + +void +ExecSeqScanInstrumentInitWorker(SeqScanState *node, + ParallelWorkerContext *pwcxt) +{ + if (!node->ss.ps.state->es_instrument) + return; + + node->sinstrument = shm_toc_lookup(pwcxt->toc, + node->ss.ps.plan->plan_node_id + PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET, + true); +} + +void +ExecSeqScanRetrieveInstrumentation(SeqScanState *node) +{ + SharedSeqScanInstrumentation *sinstrument = node->sinstrument; + Size size; + + if (sinstrument == NULL) + return; + + size = offsetof(SharedSeqScanInstrumentation, sinstrument) + + sinstrument->num_workers * sizeof(SeqScanInstrumentation); + + node->sinstrument = palloc(size); + memcpy(node->sinstrument, sinstrument, size); } diff --git a/src/include/executor/instrument_node.h b/src/include/executor/instrument_node.h index 22a75ccd863..003dc262b5d 100644 --- a/src/include/executor/instrument_node.h +++ b/src/include/executor/instrument_node.h @@ -266,4 +266,23 @@ typedef struct SharedIncrementalSortInfo IncrementalSortInfo sinfo[FLEXIBLE_ARRAY_MEMBER]; } SharedIncrementalSortInfo; + +/* --------------------- + * Instrumentation information for sequential scans + * --------------------- + */ +typedef struct SeqScanInstrumentation +{ + TableScanInstrumentation stats; +} SeqScanInstrumentation; + +/* + * Shared memory container for per-worker information + */ +typedef struct SharedSeqScanInstrumentation +{ + int num_workers; + SeqScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]; +} SharedSeqScanInstrumentation; + #endif /* INSTRUMENT_NODE_H */ diff --git a/src/include/executor/nodeSeqscan.h b/src/include/executor/nodeSeqscan.h index 7a1490596fb..9c0ad4879d7 100644 --- a/src/include/executor/nodeSeqscan.h +++ b/src/include/executor/nodeSeqscan.h @@ -28,4 +28,13 @@ extern void ExecSeqScanReInitializeDSM(SeqScanState *node, ParallelContext *pcxt extern void ExecSeqScanInitializeWorker(SeqScanState *node, ParallelWorkerContext *pwcxt); +/* instrument support */ +extern void ExecSeqScanInstrumentEstimate(SeqScanState *node, + ParallelContext *pcxt); +extern void ExecSeqScanInstrumentInitDSM(SeqScanState *node, + ParallelContext *pcxt); +extern void ExecSeqScanInstrumentInitWorker(SeqScanState *node, + ParallelWorkerContext *pwcxt); +extern void ExecSeqScanRetrieveInstrumentation(SeqScanState *node); + #endif /* NODESEQSCAN_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 3ecae7552fc..56febb3204c 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1670,6 +1670,7 @@ typedef struct SeqScanState { ScanState ss; /* its first field is NodeTag */ Size pscan_len; /* size of parallel heap scan descriptor */ + struct SharedSeqScanInstrumentation *sinstrument; } SeqScanState; /* ---------------- diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out index dc31c7ce9f9..74a4d87801e 100644 --- a/src/test/regress/expected/explain.out +++ b/src/test/regress/expected/explain.out @@ -100,7 +100,7 @@ select explain_filter('explain (buffers, format text) select * from int8_tbl i8' (1 row) \a -select explain_filter('explain (analyze, buffers, format xml) select * from int8_tbl i8'); +select explain_filter('explain (analyze, buffers, io, format xml) select * from int8_tbl i8'); explain_filter @@ -119,6 +119,13 @@ explain_filter N.N N false + N.N + N + N + N + N + N.N + N.N N N N @@ -149,7 +156,7 @@ explain_filter (1 row) -select explain_filter('explain (analyze, serialize, buffers, format yaml) select * from int8_tbl i8'); +select explain_filter('explain (analyze, serialize, buffers, io, format yaml) select * from int8_tbl i8'); explain_filter - Plan: Node Type: "Seq Scan" @@ -166,6 +173,13 @@ explain_filter Actual Rows: N.N Actual Loops: N Disabled: false + Average Prefetch Distance: N.N + Max Prefetch Distance: N + Prefetch Capacity: N + I/O Count: N + I/O Waits: N + Average I/O Size: N.N + Average I/Os In Progress: N.N Shared Hit Blocks: N Shared Read Blocks: N Shared Dirtied Blocks: N diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql index 8f10e1aff55..2f163c64bf6 100644 --- a/src/test/regress/sql/explain.sql +++ b/src/test/regress/sql/explain.sql @@ -69,8 +69,8 @@ select explain_filter('explain (analyze, buffers, format text) select * from int select explain_filter('explain (buffers, format text) select * from int8_tbl i8'); \a -select explain_filter('explain (analyze, buffers, format xml) select * from int8_tbl i8'); -select explain_filter('explain (analyze, serialize, buffers, format yaml) select * from int8_tbl i8'); +select explain_filter('explain (analyze, buffers, io, format xml) select * from int8_tbl i8'); +select explain_filter('explain (analyze, serialize, buffers, io, format yaml) select * from int8_tbl i8'); select explain_filter('explain (buffers, format json) select * from int8_tbl i8'); \a diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 98b8d78e693..c0436a13ac3 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2800,6 +2800,7 @@ SelfJoinCandidate SemTPadded SemiAntiJoinFactors SeqScan +SeqScanInstrumentation SeqScanState SeqTable SeqTableData @@ -2864,6 +2865,7 @@ SharedMemoizeInfo SharedRecordTableEntry SharedRecordTableKey SharedRecordTypmodRegistry +SharedSeqScanInstrumentation SharedSortInfo SharedTuplestore SharedTuplestoreAccessor -- 2.53.0