From fead2d30bf6116f48bbe630a73a1161c081c7ed6 Mon Sep 17 00:00:00 2001 From: rafaelthca Date: Tue, 25 Mar 2025 19:05:59 -0300 Subject: [PATCH v13] Proposal for progressive explains This proposal introduces a feature to print execution plans of active queries in an in-memory shared hash object so that other sessions can visualize via new view pg_stat_progress_explain. Plans are only printed if new GUC parameter progressive_explain is enabled. When progressive_explain is set to 'explain' the plan will be printed only once after progressive_explain_min_duration. If set to 'analyze' the QueryDesc will be adjusted adding instrumentation flags. In that case the plan will be printed on a fixed interval controlled by new GUC parameter progressive_explain_interval including all instrumentation stats computed so far (per node rows and execution time). New view: - pg_stat_progress_explain - datid: OID of the database - datname: Name of the database - pid: PID of the process running the query - last_update: timestamp when plan was last printed - query_plan: the actual plan (limited read privileges) New GUCs: - progressive_explain: if progressive plans are printed for local session. - type: enum - default: off - values: [off, explain, analyze] - context: user - progressive_explain_min_duration: min query duration until progressive explain starts. - type: int - default: 1s - min: 0 - context: user - progressive_explain_interval: interval between each explain print. - type: int - default: 1s - min: 10ms - context: user - progressive_explain_format: format used to print the plans. - type: enum - default: text - values: [TEXT, XML, JSON, or YAML] - context: user - progressive_explain_settings: controls whether information about modified configuration is added to the printed plan. - type: bool - default: off - context: user - progressive_explain_verbose: controls whether verbose details are added to the printed plan. - type: bool - default: off - context: user - progressive_explain_buffers: controls whether buffers details are added to the printed plan. - type: bool - default: off - context: user - progressive_explain_timing: controls whether per node timing details are added to the printed plan. - type: bool - default: true - context: user - progressive_explain_wal: controls whether WAL record generation details are added to the printed plan. - type: bool - default: off - context: user - progressive_explain_costs: controls whether estimated startup and total cost of each plan noded is added to the printed plan. - type: bool - default: true - context: user --- contrib/auto_explain/auto_explain.c | 10 +- doc/src/sgml/config.sgml | 156 ++++ doc/src/sgml/monitoring.sgml | 82 ++ doc/src/sgml/perform.sgml | 97 +++ src/backend/access/transam/xact.c | 3 + src/backend/catalog/system_views.sql | 10 + src/backend/commands/Makefile | 1 + src/backend/commands/explain.c | 159 ++-- src/backend/commands/explain_format.c | 12 + src/backend/commands/explain_progressive.c | 762 ++++++++++++++++++ src/backend/commands/meson.build | 1 + src/backend/executor/execMain.c | 24 + src/backend/executor/execProcnode.c | 14 +- src/backend/executor/instrument.c | 20 +- src/backend/storage/ipc/ipci.c | 7 + src/backend/storage/lmgr/lwlock.c | 1 + src/backend/tcop/pquery.c | 3 + .../utils/activity/wait_event_names.txt | 1 + src/backend/utils/init/postinit.c | 19 + src/backend/utils/misc/guc_tables.c | 130 +++ src/backend/utils/misc/postgresql.conf.sample | 14 + src/include/catalog/pg_proc.dat | 10 + src/include/commands/explain_progressive.h | 57 ++ src/include/commands/explain_state.h | 9 + src/include/executor/execdesc.h | 1 + src/include/executor/instrument.h | 1 + src/include/nodes/execnodes.h | 6 + src/include/storage/lwlock.h | 1 + src/include/storage/lwlocklist.h | 1 + src/include/utils/guc.h | 11 + src/include/utils/timeout.h | 2 + .../test_misc/t/008_progressive_explain.pl | 130 +++ src/test/regress/expected/rules.out | 7 + src/tools/pgindent/typedefs.list | 3 + 34 files changed, 1706 insertions(+), 59 deletions(-) create mode 100644 src/backend/commands/explain_progressive.c create mode 100644 src/include/commands/explain_progressive.h create mode 100644 src/test/modules/test_misc/t/008_progressive_explain.pl diff --git a/contrib/auto_explain/auto_explain.c b/contrib/auto_explain/auto_explain.c index cd6625020a7..0d28ae2ffe1 100644 --- a/contrib/auto_explain/auto_explain.c +++ b/contrib/auto_explain/auto_explain.c @@ -42,14 +42,6 @@ static int auto_explain_log_level = LOG; static bool auto_explain_log_nested_statements = false; static double auto_explain_sample_rate = 1; -static const struct config_enum_entry format_options[] = { - {"text", EXPLAIN_FORMAT_TEXT, false}, - {"xml", EXPLAIN_FORMAT_XML, false}, - {"json", EXPLAIN_FORMAT_JSON, false}, - {"yaml", EXPLAIN_FORMAT_YAML, false}, - {NULL, 0, false} -}; - static const struct config_enum_entry loglevel_options[] = { {"debug5", DEBUG5, false}, {"debug4", DEBUG4, false}, @@ -191,7 +183,7 @@ _PG_init(void) NULL, &auto_explain_log_format, EXPLAIN_FORMAT_TEXT, - format_options, + explain_format_options, PGC_SUSET, 0, NULL, diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 69fc93dffc4..ff0b147877e 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8674,6 +8674,162 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + progressive_explain (enum) + + progressive_explain configuration parameter + + + + + Determines whether progressive explains are enabled and how + they are executed; see . + Possible values are off, explain + and analyze. The default is off. + When set to explain the plan will be printed only + once after . If + set to analyze, instrumentation flags are enabled, + causing the plan to be printed on a fixed interval controlled by + including all + instrumentation stats computed so far. + + + + + + progressive_explain_min_duration (integer) + + progressive_explain_min_duration configuration parameter + + + + + Sets the threshold (in milliseconds) until progressive explain is + printed for the first time. The default is 1s. + + + + + + progressive_explain_interval (integer) + + progressive_explain_interval configuration parameter + + + + + Sets the interval (in milliseconds) between each instrumented + progressive explain. The default is 1s. + + + + + + progressive_explain_buffers (boolean) + + progressive_explain_buffers configuration parameter + + + + + Controls whether information on buffer usage is added to + progressive explains. Equivalent to the BUFFERS option of + EXPLAIN. The default is off. + + + + + + progressive_explain_timing (boolean) + + progressive_explain_timing configuration parameter + + + + + Controls whether information on per node timing is added + to progressive explains. Equivalent to the TIMING option of + EXPLAIN. The default is off. + + + + + + progressive_explain_wal (boolean) + + progressive_explain_wal configuration parameter + + + + + Controls whether information on WAL record generation is + added to progressive explains. Equivalent to the WAL option of + EXPLAIN. The default is off. + + + + + + progressive_explain_verbose (boolean) + + progressive_explain_verbose configuration parameter + + + + + Controls whether verbose details are added to progressive explains. + Equivalent to the VERBOSE option of EXPLAIN. The default is + off. + + + + + + progressive_explain_settings (boolean) + + progressive_explain_settings configuration parameter + + + + + Controls whether information on modified configuration is added to + progressive explains. Equivalent to the SETTINGS option of EXPLAIN. + The default is off. + + + + + + progressive_explain_costs (boolean) + + progressive_explain_costs configuration parameter + + + + + Controls whether information on the estimated startup and total cost of + each plan node is added to progressive explains. Equivalent to the COSTS + option of EXPLAIN. + The default is off. + + + + + + progressive_explain_format (enum) + + progressive_explain_format configuration parameter + + + + + Selects the EXPLAIN output format to be used with progressive + explains. Equivalent to the FORMAT option of EXPLAIN. The default + is text. + + + + diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 0960f5ba94a..14780c54565 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -6840,6 +6840,88 @@ FROM pg_stat_get_backend_idset() AS backendid; + + EXPLAIN Progress Reporting + + + pg_stat_progress_explain + + + + Whenever a client backend or parallel worker is running a query with + enabled, the + pg_stat_progress_explain view will contain a + corresponding row with query plan details; see + . The table below describe the + information that will be reported. + + + + <structname>pg_stat_progress_explain</structname> View + + + + + Column Type + + + Description + + + + + + + + datid oid + + + OID of the database this backend is connected to + + + + + + datname name + + + Name of the database this backend is connected to + + + + + + pid integer + + + Process ID of a client backend or parallel worker. + + + + + + last_update timestamp with time zone + + + Timestamp when plan was last printed. + + + + + + query_plan text + + + The progressive explain text. + + + + + +
+ +
+ diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml index 387baac7e8c..04a78f29df9 100644 --- a/doc/src/sgml/perform.sgml +++ b/doc/src/sgml/perform.sgml @@ -1169,6 +1169,103 @@ EXPLAIN ANALYZE SELECT * FROM tenk1 WHERE unique1 < 100 AND unique2 > 9000 + + Progressive <command>EXPLAIN</command> + + + The query plan created by the planner for any given active query can + be visualized by any session via + view when is enabled in the + client backend or parallel worker executing query and after min duration + specified by has + passed. Settings , + and + control which additional + instrumentaton details are collected and included in the output while + , + , + and + + define how the plan is printed and which details are added there. + + + + When is set to explain + the plan will be printed once at the beginning of the query. + + + + +SET progressive_explain = 'explain'; +SET + +SELECT * FROM test t1 INNER JOIN test t2 ON t1.c1=t2.c1; + + + + +SELECT * FROM pg_stat_progress_explain; +datid | datname | pid | last_update | query_plan +-------+----------+-------+-------------------------------+------------------------------------------------------------------------------ + 5 | postgres | 73972 | 2025-03-13 23:41:01.606324-03 | Hash Join (cost=327879.85..878413.95 rows=9999860 width=18) + + | | | | Hash Cond: (t1.c1 = t2.c1) + + | | | | -> Seq Scan on test t1 (cost=0.00..154053.60 rows=9999860 width=9) + + | | | | -> Hash (cost=154053.60..154053.60 rows=9999860 width=9) + + | | | | -> Seq Scan on test t2 (cost=0.00..154053.60 rows=9999860 width=9)+ + | | | | +(1 row) + + + + + Setting to analyze + will enable instrumentation and the detailed plan is printed on a fixed interval + controlled by , including + per node accumulated row count and other statistics. + + + + Progressive explains include additional information per node to help analyzing + execution progress: + + + + + current: the plan node currently being processed. + + + + + never executed: a plan node not processed yet. + + + + + + +SET progressive_explain = 'analyze'; +SET + +SELECT * FROM test t1 INNER JOIN test t2 ON t1.c1=t2.c1; + + + + +SELECT * FROM pg_stat_progress_explain; +datid | datname | pid | last_update | query_plan +-------+----------+-------+-------------------------------+--------------------------------------------------------------------------------------------------------------------------------------- + 5 | postgres | 73972 | 2025-03-13 23:41:53.951884-03 | Hash Join (cost=327879.85..878413.95 rows=9999860 width=18) (actual time=1581.469..2963.158 rows=64862.00 loops=1) + + | | | | Hash Cond: (t1.c1 = t2.c1) + + | | | | -> Seq Scan on test t1 (cost=0.00..154053.60 rows=9999860 width=9) (actual time=0.079..486.702 rows=8258962.00 loops=1) (current)+ + | | | | -> Hash (cost=154053.60..154053.60 rows=9999860 width=9) (actual time=1580.933..1580.933 rows=10000000.00 loops=1) + + | | | | -> Seq Scan on test t2 (cost=0.00..154053.60 rows=9999860 width=9) (actual time=0.004..566.961 rows=10000000.00 loops=1) + + | | | | +(1 row) + + + + + diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index b885513f765..fcdc4a8fefe 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -36,6 +36,7 @@ #include "catalog/pg_enum.h" #include "catalog/storage.h" #include "commands/async.h" +#include "commands/explain_progressive.h" #include "commands/tablecmds.h" #include "commands/trigger.h" #include "common/pg_prng.h" @@ -2993,6 +2994,7 @@ AbortTransaction(void) AtEOXact_PgStat(false, is_parallel_worker); AtEOXact_ApplyLauncher(false); AtEOXact_LogicalRepWorkers(false); + AtAbort_ProgressiveExplain(); pgstat_report_xact_timestamp(0); } @@ -5361,6 +5363,7 @@ AbortSubTransaction(void) AtEOSubXact_HashTables(false, s->nestingLevel); AtEOSubXact_PgStat(false, s->nestingLevel); AtSubAbort_Snapshot(s->nestingLevel); + AtSubAbort_ProgressiveExplain(); } /* diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 31d269b7ee0..767735c1a2c 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1334,6 +1334,16 @@ CREATE VIEW pg_stat_progress_copy AS FROM pg_stat_get_progress_info('COPY') AS S LEFT JOIN pg_database D ON S.datid = D.oid; +CREATE VIEW pg_stat_progress_explain AS + SELECT + S.datid AS datid, + D.datname AS datname, + S.pid, + S.last_update, + S.query_plan + FROM pg_stat_progress_explain() AS S + LEFT JOIN pg_database AS D ON (S.datid = D.oid); + CREATE VIEW pg_user_mappings AS SELECT U.oid AS umid, diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile index cb2fbdc7c60..e10224b2cd2 100644 --- a/src/backend/commands/Makefile +++ b/src/backend/commands/Makefile @@ -36,6 +36,7 @@ OBJS = \ explain.o \ explain_dr.o \ explain_format.o \ + explain_progressive.o \ explain_state.o \ extension.o \ foreigncmds.o \ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 391b34a2af2..359ae31e7d0 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -20,6 +20,7 @@ #include "commands/explain.h" #include "commands/explain_dr.h" #include "commands/explain_format.h" +#include "commands/explain_progressive.h" #include "commands/explain_state.h" #include "commands/prepare.h" #include "foreign/fdwapi.h" @@ -139,7 +140,7 @@ static void show_indexsearches_info(PlanState *planstate, ExplainState *es); static void show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es); static void show_instrumentation_count(const char *qlabel, int which, - PlanState *planstate, ExplainState *es); + Instrumentation *instr, ExplainState *es); static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es); static const char *explain_get_index_name(Oid indexId); static bool peek_buffer_usage(ExplainState *es, const BufferUsage *usage); @@ -596,6 +597,15 @@ ExplainOnePlan(PlannedStmt *plannedstmt, CachedPlan *cplan, /* We can't run ExecutorEnd 'till we're done printing the stats... */ totaltime += elapsed_time(&starttime); } + else + { + /* + * Handle progressive explain cleanup manually if enabled as it is not + * called as part of ExecutorFinish. + */ + if (progressive_explain != PROGRESSIVE_EXPLAIN_NONE) + ProgressiveExplainFinish(queryDesc); + } /* grab serialization metrics before we destroy the DestReceiver */ if (es->serialize != EXPLAIN_SERIALIZE_NONE) @@ -1371,6 +1381,7 @@ ExplainNode(PlanState *planstate, List *ancestors, const char *partialmode = NULL; const char *operation = NULL; const char *custom_name = NULL; + Instrumentation *local_instr = NULL; ExplainWorkersState *save_workers_state = es->workers_state; int save_indent = es->indent; bool haschildren; @@ -1834,17 +1845,38 @@ ExplainNode(PlanState *planstate, List *ancestors, * instrumentation results the user didn't ask for. But we do the * InstrEndLoop call anyway, if possible, to reduce the number of cases * auto_explain has to contend with. + * + * For regular explains instrumentation clean up is called directly in the + * main instrumentation objects. Progressive explains need to clone + * instrumentation object and forcibly end the loop in nodes that may be + * running. */ if (planstate->instrument) - InstrEndLoop(planstate->instrument); + { + /* Progressive explain. Use auxiliary instrumentation object */ + if (es->progressive) + { + local_instr = es->pe_local_instr; + *local_instr = *planstate->instrument; + + /* Force end loop even if node is in progress */ + InstrEndLoopForce(local_instr); + } + /* Use main instrumentation */ + else + { + local_instr = planstate->instrument; + InstrEndLoop(local_instr); + } + } if (es->analyze && - planstate->instrument && planstate->instrument->nloops > 0) + local_instr && local_instr->nloops > 0) { - double nloops = planstate->instrument->nloops; - double startup_ms = 1000.0 * planstate->instrument->startup / nloops; - double total_ms = 1000.0 * planstate->instrument->total / nloops; - double rows = planstate->instrument->ntuples / nloops; + double nloops = local_instr->nloops; + double startup_ms = 1000.0 * local_instr->startup / nloops; + double total_ms = 1000.0 * local_instr->total / nloops; + double rows = local_instr->ntuples / nloops; if (es->format == EXPLAIN_FORMAT_TEXT) { @@ -1854,6 +1886,9 @@ ExplainNode(PlanState *planstate, List *ancestors, appendStringInfo(es->str, "time=%.3f..%.3f ", startup_ms, total_ms); appendStringInfo(es->str, "rows=%.2f loops=%.0f)", rows, nloops); + + if (es->progressive && planstate == es->pe_curr_node) + appendStringInfo(es->str, " (current)"); } else { @@ -1866,6 +1901,10 @@ ExplainNode(PlanState *planstate, List *ancestors, } ExplainPropertyFloat("Actual Rows", NULL, rows, 2, es); ExplainPropertyFloat("Actual Loops", NULL, nloops, 0, es); + + /* Progressive explain. Add current node flag is applicable */ + if (es->progressive && planstate == es->pe_curr_node) + ExplainPropertyBool("Current", true, es); } } else if (es->analyze) @@ -1970,13 +2009,13 @@ ExplainNode(PlanState *planstate, List *ancestors, "Index Cond", planstate, ancestors, es); if (((IndexScan *) plan)->indexqualorig) show_instrumentation_count("Rows Removed by Index Recheck", 2, - planstate, es); + local_instr, es); show_scan_qual(((IndexScan *) plan)->indexorderbyorig, "Order By", planstate, ancestors, es); show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); show_indexsearches_info(planstate, es); break; case T_IndexOnlyScan: @@ -1984,16 +2023,16 @@ ExplainNode(PlanState *planstate, List *ancestors, "Index Cond", planstate, ancestors, es); if (((IndexOnlyScan *) plan)->recheckqual) show_instrumentation_count("Rows Removed by Index Recheck", 2, - planstate, es); + local_instr, es); show_scan_qual(((IndexOnlyScan *) plan)->indexorderby, "Order By", planstate, ancestors, es); show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); if (es->analyze) ExplainPropertyFloat("Heap Fetches", NULL, - planstate->instrument->ntuples2, 0, es); + local_instr->ntuples2, 0, es); show_indexsearches_info(planstate, es); break; case T_BitmapIndexScan: @@ -2006,11 +2045,11 @@ ExplainNode(PlanState *planstate, List *ancestors, "Recheck Cond", planstate, ancestors, es); if (((BitmapHeapScan *) plan)->bitmapqualorig) show_instrumentation_count("Rows Removed by Index Recheck", 2, - planstate, es); + local_instr, es); show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); show_tidbitmap_info((BitmapHeapScanState *) planstate, es); break; case T_SampleScan: @@ -2027,7 +2066,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); if (IsA(plan, CteScan)) show_ctescan_info(castNode(CteScanState, planstate), es); break; @@ -2038,7 +2077,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); ExplainPropertyInteger("Workers Planned", NULL, gather->num_workers, es); @@ -2062,7 +2101,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); ExplainPropertyInteger("Workers Planned", NULL, gm->num_workers, es); @@ -2096,7 +2135,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); break; case T_TableFuncScan: if (es->verbose) @@ -2110,7 +2149,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); show_table_func_scan_info(castNode(TableFuncScanState, planstate), es); break; @@ -2128,7 +2167,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); } break; case T_TidRangeScan: @@ -2145,14 +2184,14 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); } break; case T_ForeignScan: show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); show_foreignscan_info((ForeignScanState *) planstate, es); break; case T_CustomScan: @@ -2162,7 +2201,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); if (css->methods->ExplainCustomScan) css->methods->ExplainCustomScan(css, ancestors, es); } @@ -2172,11 +2211,11 @@ ExplainNode(PlanState *planstate, List *ancestors, "Join Filter", planstate, ancestors, es); if (((NestLoop *) plan)->join.joinqual) show_instrumentation_count("Rows Removed by Join Filter", 1, - planstate, es); + local_instr, es); show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 2, - planstate, es); + local_instr, es); break; case T_MergeJoin: show_upper_qual(((MergeJoin *) plan)->mergeclauses, @@ -2185,11 +2224,11 @@ ExplainNode(PlanState *planstate, List *ancestors, "Join Filter", planstate, ancestors, es); if (((MergeJoin *) plan)->join.joinqual) show_instrumentation_count("Rows Removed by Join Filter", 1, - planstate, es); + local_instr, es); show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 2, - planstate, es); + local_instr, es); break; case T_HashJoin: show_upper_qual(((HashJoin *) plan)->hashclauses, @@ -2198,11 +2237,11 @@ ExplainNode(PlanState *planstate, List *ancestors, "Join Filter", planstate, ancestors, es); if (((HashJoin *) plan)->join.joinqual) show_instrumentation_count("Rows Removed by Join Filter", 1, - planstate, es); + local_instr, es); show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 2, - planstate, es); + local_instr, es); break; case T_Agg: show_agg_keys(castNode(AggState, planstate), ancestors, es); @@ -2210,7 +2249,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_hashagg_info((AggState *) planstate, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); break; case T_WindowAgg: show_window_def(castNode(WindowAggState, planstate), ancestors, es); @@ -2219,7 +2258,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); show_windowagg_info(castNode(WindowAggState, planstate), es); break; case T_Group: @@ -2227,7 +2266,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); break; case T_Sort: show_sort_keys(castNode(SortState, planstate), ancestors, es); @@ -2249,7 +2288,7 @@ ExplainNode(PlanState *planstate, List *ancestors, show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, - planstate, es); + local_instr, es); break; case T_ModifyTable: show_modifytable_info(castNode(ModifyTableState, planstate), ancestors, @@ -2294,10 +2333,10 @@ ExplainNode(PlanState *planstate, List *ancestors, } /* Show buffer/WAL usage */ - if (es->buffers && planstate->instrument) - show_buffer_usage(es, &planstate->instrument->bufusage); - if (es->wal && planstate->instrument) - show_wal_usage(es, &planstate->instrument->walusage); + if (es->buffers && local_instr) + show_buffer_usage(es, &local_instr->bufusage); + if (es->wal && local_instr) + show_wal_usage(es, &local_instr->walusage); /* Prepare per-worker buffer/WAL usage */ if (es->workers_state && (es->buffers || es->wal) && es->verbose) @@ -3975,19 +4014,19 @@ show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es) */ static void show_instrumentation_count(const char *qlabel, int which, - PlanState *planstate, ExplainState *es) + Instrumentation *instr, ExplainState *es) { double nfiltered; double nloops; - if (!es->analyze || !planstate->instrument) + if (!es->analyze || !instr) return; if (which == 2) - nfiltered = planstate->instrument->nfiltered2; + nfiltered = instr->nfiltered2; else - nfiltered = planstate->instrument->nfiltered1; - nloops = planstate->instrument->nloops; + nfiltered = instr->nfiltered1; + nloops = instr->nloops; /* In text mode, suppress zero counts; they're not interesting enough */ if (nfiltered > 0 || es->format != EXPLAIN_FORMAT_TEXT) @@ -4668,7 +4707,7 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, { show_upper_qual((List *) node->onConflictWhere, "Conflict Filter", &mtstate->ps, ancestors, es); - show_instrumentation_count("Rows Removed by Conflict Filter", 1, &mtstate->ps, es); + show_instrumentation_count("Rows Removed by Conflict Filter", 1, (&mtstate->ps)->instrument, es); } /* EXPLAIN ANALYZE display of actual outcome for each tuple proposed */ @@ -4677,11 +4716,24 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, double total; double insert_path; double other_path; + Instrumentation *local_instr; - InstrEndLoop(outerPlanState(mtstate)->instrument); + /* Progressive explain. Use auxiliary instrumentation object */ + if (es->progressive) + { + local_instr = es->pe_local_instr; + *local_instr = *outerPlanState(mtstate)->instrument; + /* Force end loop even if node is in progress */ + InstrEndLoopForce(local_instr); + } + else + { + local_instr = outerPlanState(mtstate)->instrument; + InstrEndLoop(local_instr); + } /* count the number of source rows */ - total = outerPlanState(mtstate)->instrument->ntuples; + total = local_instr->ntuples; other_path = mtstate->ps.instrument->ntuples2; insert_path = total - other_path; @@ -4701,11 +4753,24 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, double update_path; double delete_path; double skipped_path; + Instrumentation *local_instr; - InstrEndLoop(outerPlanState(mtstate)->instrument); + /* Progressive explain. Use auxiliary instrumentation object */ + if (es->progressive) + { + local_instr = es->pe_local_instr; + *local_instr = *outerPlanState(mtstate)->instrument; + /* Force end loop even if node is in progress */ + InstrEndLoopForce(local_instr); + } + else + { + local_instr = outerPlanState(mtstate)->instrument; + InstrEndLoop(local_instr); + } /* count the number of source rows */ - total = outerPlanState(mtstate)->instrument->ntuples; + total = local_instr->ntuples; insert_path = mtstate->mt_merge_inserted; update_path = mtstate->mt_merge_updated; delete_path = mtstate->mt_merge_deleted; diff --git a/src/backend/commands/explain_format.c b/src/backend/commands/explain_format.c index 752691d56db..c0d6973d1e5 100644 --- a/src/backend/commands/explain_format.c +++ b/src/backend/commands/explain_format.c @@ -16,6 +16,7 @@ #include "commands/explain.h" #include "commands/explain_format.h" #include "commands/explain_state.h" +#include "utils/guc_tables.h" #include "utils/json.h" #include "utils/xml.h" @@ -25,6 +26,17 @@ #define X_CLOSE_IMMEDIATE 2 #define X_NOWHITESPACE 4 +/* + * GUC support + */ +const struct config_enum_entry explain_format_options[] = { + {"text", EXPLAIN_FORMAT_TEXT, false}, + {"xml", EXPLAIN_FORMAT_XML, false}, + {"json", EXPLAIN_FORMAT_JSON, false}, + {"yaml", EXPLAIN_FORMAT_YAML, false}, + {NULL, 0, false} +}; + static void ExplainJSONLineEnding(ExplainState *es); static void ExplainXMLTag(const char *tagname, int flags, ExplainState *es); static void ExplainYAMLLineStarting(ExplainState *es); diff --git a/src/backend/commands/explain_progressive.c b/src/backend/commands/explain_progressive.c new file mode 100644 index 00000000000..17516aee0d7 --- /dev/null +++ b/src/backend/commands/explain_progressive.c @@ -0,0 +1,762 @@ +/*------------------------------------------------------------------------- + * + * explain_progressive.c + * Code for the progressive explain feature + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994-5, Regents of the University of California + * + * IDENTIFICATION + * src/backend/commands/explain_progressive.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/xact.h" +#include "catalog/pg_authid.h" +#include "commands/explain.h" +#include "commands/explain_format.h" +#include "commands/explain_progressive.h" +#include "commands/explain_state.h" +#include "foreign/fdwapi.h" +#include "funcapi.h" +#include "utils/acl.h" +#include "utils/backend_status.h" +#include "utils/builtins.h" +#include "utils/guc_tables.h" +#include "utils/timeout.h" + + +#define PROGRESSIVE_EXPLAIN_FREE_SIZE 4096 + +/* Shared hash to store progressive explains */ +static HTAB *progressiveExplainHash = NULL; + +/* Pointer to running query */ +static QueryDesc *activeQueryDesc = NULL; + +/* Transaction nest level of the tracked query */ +static int activeQueryXactNestLevel = 0; + +/* Flag set by timeouts to control when to update progressive explains */ +bool ProgressiveExplainPending = false; + +static void ProgressiveExplainInit(QueryDesc *queryDesc); +static void ProgressiveExplainPrint(QueryDesc *queryDesc); +static void ProgressiveExplainCleanup(QueryDesc *queryDesc); +static TupleTableSlot *ExecProcNodeExplain(PlanState *node); +static void WrapExecProcNodeWithExplain(PlanState *ps); +static void UnwrapExecProcNodeWithExplain(PlanState *ps); + + + +/* + * ProgressiveExplainSetup - + * Track query descriptor and adjust instrumentation. + * + * If progressive explain is enabled and configured to collect + * instrumentation details, we adjust QueryDesc accordingly even if + * the query was not initiated with EXPLAIN ANALYZE. This will + * directly affect query execution and add computation overhead. + */ +void +ProgressiveExplainSetup(QueryDesc *queryDesc) +{ + /* Setup only if this is the outer most query */ + if (activeQueryDesc == NULL) + { + activeQueryDesc = queryDesc; + activeQueryXactNestLevel = GetCurrentTransactionNestLevel(); + + /* Setup instrumentation if enabled */ + if (progressive_explain == PROGRESSIVE_EXPLAIN_ANALYZE) + { + if (progressive_explain_timing) + queryDesc->instrument_options |= INSTRUMENT_TIMER; + else + queryDesc->instrument_options |= INSTRUMENT_ROWS; + if (progressive_explain_buffers) + queryDesc->instrument_options |= INSTRUMENT_BUFFERS; + if (progressive_explain_wal) + queryDesc->instrument_options |= INSTRUMENT_WAL; + } + } +} + +/* + * ProgressiveExplainStart + * Progressive explain start point. + */ +void +ProgressiveExplainStart(QueryDesc *queryDesc) +{ + /* + * Progressive explain is only done for the outer most query descriptor. + */ + if (queryDesc == activeQueryDesc) + { + /* Timeout is only needed if duration > 0 */ + if (progressive_explain_min_duration == 0) + ProgressiveExplainInit(queryDesc); + else + enable_timeout_after(PROGRESSIVE_EXPLAIN_STARTUP_TIMEOUT, + progressive_explain_min_duration); + } +} + +/* + * ProgressiveExplainInit - + * Responsible for initialization of all structures related to progressive + * explains. + * + * We define a ExplainState that will be reused in every iteration of + * plan prints. + * + * Progressive explain plans are printed in shared memory via DSAs. Each + * A dynamic shared memory area is created to hold the progressive plans. + * Each backend printing plans has its own DSA, which is shared with other + * backends via the global progressive explain hash through dsa_handle and + * dsa_pointer pointers. + * + * A memory context release callback is defined for manual resource release + * in case of query cancellations. + * + * A periodic timeout is configured to print the plan in fixed intervals if + * progressive explain is configured with instrumentation enabled. Otherwise + * the plain plan is printed once. + */ +void +ProgressiveExplainInit(QueryDesc *queryDesc) +{ + ExplainState *es; + ProgressiveExplainHashEntry *entry; + bool found; + + /* Initialize ExplainState to be used for all prints */ + es = NewExplainState(); + queryDesc->pestate = es; + + /* Local instrumentation object to be reused for every node */ + es->pe_local_instr = palloc0(sizeof(Instrumentation)); + + /* + * Mark ExplainState as progressive so that the plan printing function + * adjusts logic accordingly. + */ + es->progressive = true; + + es->analyze = (queryDesc->instrument_options && + (progressive_explain == PROGRESSIVE_EXPLAIN_ANALYZE)); + es->buffers = (es->analyze && progressive_explain_buffers); + es->wal = (es->analyze && progressive_explain_wal); + es->timing = (es->analyze && progressive_explain_timing); + es->summary = (es->analyze); + es->format = progressive_explain_format; + es->verbose = progressive_explain_verbose; + es->settings = progressive_explain_settings; + es->costs = progressive_explain_costs; + + /* Define the DSA and share through the hash */ + es->pe_a = dsa_create(LWTRANCHE_PROGRESSIVE_EXPLAIN_DSA); + + /* Exclusive access is needed to update the hash */ + LWLockAcquire(ProgressiveExplainHashLock, LW_EXCLUSIVE); + + /* Find or create an entry with desired hash code */ + entry = (ProgressiveExplainHashEntry *) hash_search(progressiveExplainHash, + &MyProcPid, + HASH_ENTER, + &found); + + entry->pe_h = dsa_get_handle(es->pe_a); + entry->pe_p = (dsa_pointer) NULL; + + LWLockRelease(ProgressiveExplainHashLock); + + /* Enable timeout only if instrumentation is enabled */ + if (es->analyze) + enable_timeout_every(PROGRESSIVE_EXPLAIN_TIMEOUT, + TimestampTzPlusMilliseconds(GetCurrentTimestamp(), + progressive_explain_interval), + progressive_explain_interval); + + /* Printing progressive plan for the first time */ + ProgressiveExplainPrint(queryDesc); +} + +/* + * ProgressiveExplainTrigger - + * Called by the timeout handler to start printing progressive + * explain plans. + */ +void +ProgressiveExplainTrigger(void) +{ + /* + * Check that the pointer is still active to avoid corner case where the + * timeout function is called after cleanup is done. + */ + if (activeQueryDesc) + WrapExecProcNodeWithExplain(activeQueryDesc->planstate); +} + +/* + * ProgressiveExplainUpdate + * Updates progressive explain for instrumented runs. + */ +void +ProgressiveExplainUpdate(PlanState *node) +{ + /* Track the current PlanState */ + node->state->query_desc->pestate->pe_curr_node = node; + ProgressiveExplainPrint(node->state->query_desc); + node->state->query_desc->pestate->pe_curr_node = NULL; + + /* Reset timeout flag */ + ProgressiveExplainPending = false; +} + +/* + * ProgressiveExplainPrint - + * Prints progressive explain in memory. + * + * This function resets the reusable ExplainState, prints the + * plan and updates the DSA with new data. + * + * DSA memory allocation is also done here. Amount of shared + * memory allocated depends on size of currently printed plan. + * There may be reallocations in subsequent calls if new plans + * don't fit in the existing area. + */ +void +ProgressiveExplainPrint(QueryDesc *queryDesc) +{ + bool alloc_needed = false; + QueryDesc *currentQueryDesc = queryDesc; + ProgressiveExplainHashEntry *entry; + ProgressiveExplainHashData *pe_data; + ExplainState *es = queryDesc->pestate; + + /* Reset the string to be reused */ + resetStringInfo(es->str); + + /* Print the plan */ + ExplainBeginOutput(es); + ExplainPrintPlan(es, currentQueryDesc); + ExplainEndOutput(es); + + /* Exclusive access is needed to update the hash */ + LWLockAcquire(ProgressiveExplainHashLock, LW_EXCLUSIVE); + entry = (ProgressiveExplainHashEntry *) hash_search(progressiveExplainHash, + &MyProcPid, + HASH_FIND, + NULL); + + /* Entry must already exist in shared memory at this point */ + if (!entry) + elog(ERROR, "no entry in progressive explain hash for pid %d", + MyProcPid); + else + { + /* Plan was never printed */ + if (!entry->pe_p) + alloc_needed = true; + else + { + pe_data = dsa_get_address(es->pe_a, + entry->pe_p); + + /* + * Plan does not fit in existing shared memory area. Reallocation + * is needed. + */ + if (strlen(es->str->data) > entry->plan_alloc_size) + { + dsa_free(es->pe_a, entry->pe_p); + alloc_needed = true; + } + } + + if (alloc_needed) + { + /* + * The allocated size combines the length of the currently printed + * query plan with an additional delta defined by + * PROGRESSIVE_EXPLAIN_FREE_SIZE. This strategy prevents having to + * reallocate the segment very often, which would be needed in + * case the length of the next printed exceeds the previously + * allocated size. + */ + entry->plan_alloc_size = add_size(strlen(es->str->data), + PROGRESSIVE_EXPLAIN_FREE_SIZE); + entry->pe_p = dsa_allocate(es->pe_a, + add_size(sizeof(ProgressiveExplainHashData), + entry->plan_alloc_size)); + pe_data = dsa_get_address(es->pe_a, entry->pe_p); + pe_data->pid = MyProcPid; + } + + /* Update shared memory with new data */ + strcpy(pe_data->plan, es->str->data); + pe_data->last_update = GetCurrentTimestamp(); + } + + LWLockRelease(ProgressiveExplainHashLock); +} + +/* + * ProgressiveExplainFinish - + * Finalizes query execution with progressive explain enabled. + */ +void +ProgressiveExplainFinish(QueryDesc *queryDesc) +{ + /* + * Progressive explain is only done for the outer most query descriptor. + */ + if (queryDesc == activeQueryDesc) + { + /* Startup timeout hasn't triggered yet, just disable it */ + if (get_timeout_active(PROGRESSIVE_EXPLAIN_STARTUP_TIMEOUT)) + disable_timeout(PROGRESSIVE_EXPLAIN_STARTUP_TIMEOUT, false); + /* Initial progressive explain was done, clean everything */ + else if (queryDesc && queryDesc->pestate) + ProgressiveExplainCleanup(queryDesc); + } +} + +/* + * ProgressiveExplainFinish - + * Finalizes query execution with progressive explain enabled. + */ +bool +ProgressiveExplainIsActive(QueryDesc *queryDesc) +{ + if (queryDesc == activeQueryDesc) + return true; + else + return false; +} + +/* + * End-of-transaction cleanup for progressive explains. + */ +void +AtAbort_ProgressiveExplain(void) +{ + /* Only perform cleanup if query descriptor is being tracked */ + if (activeQueryDesc) + ProgressiveExplainCleanup(NULL); +} + +/* + * End-of-subtransaction cleanup for progressive explains. + */ +void +AtSubAbort_ProgressiveExplain(void) +{ + /* + * Only perform cleanup if query descriptor is being tracked and the + * current transaction nested level is the same as the level of the + * tracked query. This is to avoid doing cleanup in subtransaction aborts + * triggered by exception blocks in functions and procedures. + */ + if (activeQueryDesc && + activeQueryXactNestLevel == GetCurrentTransactionNestLevel()) + ProgressiveExplainCleanup(NULL); +} + +/* + * ProgressiveExplainCleanup - + * Cleanup routine when progressive explain is enabled. + * + * We need to deal with structures not automatically released by the memory + * context removal. Current tasks are: + * - remove local backend from progressive explain hash + * - detach from DSA used to store shared data + */ +void +ProgressiveExplainCleanup(QueryDesc *queryDesc) +{ + /* Stop timeout */ + disable_timeout(PROGRESSIVE_EXPLAIN_TIMEOUT, false); + + /* Reset timeout flag */ + ProgressiveExplainPending = false; + + /* Reset querydesc tracker */ + activeQueryDesc = NULL; + + LWLockAcquire(ProgressiveExplainHashLock, LW_EXCLUSIVE); + + /* + * Only detach from DSA if query ended gracefully, ie, if + * ProgressiveExplainCleanup was called by function + * ProgressiveExplainFinish + */ + if (queryDesc) + dsa_detach(queryDesc->pestate->pe_a); + hash_search(progressiveExplainHash, &MyProcPid, HASH_REMOVE, NULL); + LWLockRelease(ProgressiveExplainHashLock); +} + +/* + * ExecProcNodeExplain - + * ExecProcNode wrapper that initializes progressive explain + * and uwraps ExecProdNode to the original function. + */ +static TupleTableSlot * +ExecProcNodeExplain(PlanState *node) +{ + /* Initialize progressive explain */ + ProgressiveExplainInit(node->state->query_desc); + + /* Unwrap exec proc node for all nodes */ + UnwrapExecProcNodeWithExplain(node->state->query_desc->planstate); + + /* + * Since unwrapping has already done, call ExecProcNode() not + * ExecProcNodeOriginal(). + */ + return node->ExecProcNode(node); +} + +/* + * ExecProcNodeInstrExplain - + * ExecProcNode wrapper that performs instrumentation calls and prints + * progressive explains. By keeping this a separate function, we add + * overhead only when progressive explain is enabled + */ +TupleTableSlot * +ExecProcNodeInstrExplain(PlanState *node) +{ + TupleTableSlot *result; + + InstrStartNode(node->instrument); + + /* + * Update progressive after timeout is reached. + */ + if (ProgressiveExplainPending) + ProgressiveExplainUpdate(node); + + result = node->ExecProcNodeReal(node); + + InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0); + + return result; +} + +/* + * WrapMemberNodesExecProcNodesWithExplain - + * Wrap array of PlanStates ExecProcNodes with ExecProcNodeWithExplain + */ +static void +WrapMemberNodesExecProcNodesWithExplain(PlanState **planstates, int nplans) +{ + int i; + + for (i = 0; i < nplans; i++) + WrapExecProcNodeWithExplain(planstates[i]); +} + +/* + * WrapCustomPlanChildExecProcNodesWithExplain - + * Wrap CustomScanstate children's ExecProcNodes with ExecProcNodeWithExplain + */ +static void +WrapCustomPlanChildExecProcNodesWithExplain(CustomScanState *css) +{ + ListCell *cell; + + foreach(cell, css->custom_ps) + WrapExecProcNodeWithExplain((PlanState *) lfirst(cell)); +} + +/* + * WrapSubPlansExecProcNodesWithExplain - + * Wrap SubPlans ExecProcNodes with ExecProcNodeWithExplain + */ +static void +WrapSubPlansExecProcNodesWithExplain(List *plans) +{ + ListCell *lst; + + foreach(lst, plans) + { + SubPlanState *sps = (SubPlanState *) lfirst(lst); + + WrapExecProcNodeWithExplain(sps->planstate); + } +} + +/* + * WrapExecProcNodeWithExplain - + * Wrap ExecProcNode with ExecProcNodeWithExplain recursively + */ +static void +WrapExecProcNodeWithExplain(PlanState *ps) +{ + /* wrapping can be done only once */ + if (ps->ExecProcNodeOriginal != NULL) + return; + + check_stack_depth(); + + ps->ExecProcNodeOriginal = ps->ExecProcNode; + ps->ExecProcNode = ExecProcNodeExplain; + + /* initPlan-s */ + if (ps->initPlan != NULL) + WrapSubPlansExecProcNodesWithExplain(ps->initPlan); + + /* lefttree */ + if (ps->lefttree != NULL) + WrapExecProcNodeWithExplain(ps->lefttree); + /* righttree */ + if (ps->righttree != NULL) + WrapExecProcNodeWithExplain(ps->righttree); + + /* special child plans */ + switch (nodeTag(ps->plan)) + { + case T_Append: + WrapMemberNodesExecProcNodesWithExplain(((AppendState *) ps)->appendplans, + ((AppendState *) ps)->as_nplans); + break; + case T_MergeAppend: + WrapMemberNodesExecProcNodesWithExplain(((MergeAppendState *) ps)->mergeplans, + ((MergeAppendState *) ps)->ms_nplans); + break; + case T_BitmapAnd: + WrapMemberNodesExecProcNodesWithExplain(((BitmapAndState *) ps)->bitmapplans, + ((BitmapAndState *) ps)->nplans); + break; + case T_BitmapOr: + WrapMemberNodesExecProcNodesWithExplain(((BitmapOrState *) ps)->bitmapplans, + ((BitmapOrState *) ps)->nplans); + break; + case T_SubqueryScan: + WrapExecProcNodeWithExplain(((SubqueryScanState *) ps)->subplan); + break; + case T_CustomScan: + WrapCustomPlanChildExecProcNodesWithExplain((CustomScanState *) ps); + break; + default: + break; + } +} + +/* + * UnWrapMemberNodesExecProcNodesWithExplain - + * Unwrap array of PlanStates ExecProcNodes with ExecProcNodeWithExplain + */ +static void +UnWrapMemberNodesExecProcNodesWithExplain(PlanState **planstates, int nplans) +{ + int i; + + for (i = 0; i < nplans; i++) + UnwrapExecProcNodeWithExplain(planstates[i]); +} + +/* + * UnwrapCustomPlanChildExecProcNodesWithExplain - + * Unwrap CustomScanstate children's ExecProcNodes with ExecProcNodeWithExplain + */ +static void +UnwrapCustomPlanChildExecProcNodesWithExplain(CustomScanState *css) +{ + ListCell *cell; + + foreach(cell, css->custom_ps) + UnwrapExecProcNodeWithExplain((PlanState *) lfirst(cell)); +} + +/* + * UnwrapSubPlansExecProcNodesWithExplain - + * Unwrap SubPlans ExecProcNodes with ExecProcNodeWithExplain + */ +static void +UnwrapSubPlansExecProcNodesWithExplain(List *plans) +{ + ListCell *lst; + + foreach(lst, plans) + { + SubPlanState *sps = (SubPlanState *) lfirst(lst); + + UnwrapExecProcNodeWithExplain(sps->planstate); + } +} + +/* + * UnwrapExecProcNodeWithExplain - + * Unwrap ExecProcNode with ExecProcNodeWithExplain recursively + */ +static void +UnwrapExecProcNodeWithExplain(PlanState *ps) +{ + Assert(ps->ExecProcNodeOriginal != NULL); + + check_stack_depth(); + + ps->ExecProcNode = ps->ExecProcNodeOriginal; + ps->ExecProcNodeOriginal = NULL; + + /* initPlan-s */ + if (ps->initPlan != NULL) + UnwrapSubPlansExecProcNodesWithExplain(ps->initPlan); + + /* lefttree */ + if (ps->lefttree != NULL) + UnwrapExecProcNodeWithExplain(ps->lefttree); + /* righttree */ + if (ps->righttree != NULL) + UnwrapExecProcNodeWithExplain(ps->righttree); + + /* special child plans */ + switch (nodeTag(ps->plan)) + { + case T_Append: + UnWrapMemberNodesExecProcNodesWithExplain(((AppendState *) ps)->appendplans, + ((AppendState *) ps)->as_nplans); + break; + case T_MergeAppend: + UnWrapMemberNodesExecProcNodesWithExplain(((MergeAppendState *) ps)->mergeplans, + ((MergeAppendState *) ps)->ms_nplans); + break; + case T_BitmapAnd: + UnWrapMemberNodesExecProcNodesWithExplain(((BitmapAndState *) ps)->bitmapplans, + ((BitmapAndState *) ps)->nplans); + break; + case T_BitmapOr: + UnWrapMemberNodesExecProcNodesWithExplain(((BitmapOrState *) ps)->bitmapplans, + ((BitmapOrState *) ps)->nplans); + break; + case T_SubqueryScan: + UnwrapExecProcNodeWithExplain(((SubqueryScanState *) ps)->subplan); + break; + case T_CustomScan: + UnwrapCustomPlanChildExecProcNodesWithExplain((CustomScanState *) ps); + break; + default: + break; + } +} + +/* + * ProgressiveExplainHashShmemSize + * Compute shared memory space needed for explain hash. + */ +Size +ProgressiveExplainHashShmemSize(void) +{ + Size size = 0; + long max_table_size; + + max_table_size = add_size(MaxBackends, + max_parallel_workers); + size = add_size(size, + hash_estimate_size(max_table_size, + sizeof(ProgressiveExplainHashEntry))); + + return size; +} + +/* + * InitProgressiveExplainHash - + * Initialize hash used to store data of progressive explains. + */ +void +InitProgressiveExplainHash(void) +{ + HASHCTL info; + + info.keysize = sizeof(int); + info.entrysize = sizeof(ProgressiveExplainHashEntry); + + progressiveExplainHash = ShmemInitHash("progressive explain hash", + 50, 50, + &info, + HASH_ELEM | HASH_BLOBS); +} + +/* + * pg_stat_progress_explain - + * Return the progress of progressive explains. + */ +Datum +pg_stat_progress_explain(PG_FUNCTION_ARGS) +{ +#define EXPLAIN_ACTIVITY_COLS 4 + int num_backends = pgstat_fetch_stat_numbackends(); + int curr_backend; + HASH_SEQ_STATUS hash_seq; + ProgressiveExplainHashEntry *entry; + dsa_area *a; + ProgressiveExplainHashData *ped; + + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + + InitMaterializedSRF(fcinfo, 0); + + LWLockAcquire(ProgressiveExplainHashLock, LW_SHARED); + + hash_seq_init(&hash_seq, progressiveExplainHash); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + Datum values[EXPLAIN_ACTIVITY_COLS] = {0}; + bool nulls[EXPLAIN_ACTIVITY_COLS] = {0}; + + /* + * We don't look at a DSA that doesn't contain data yet, or at our own + * row. + */ + if (!DsaPointerIsValid(entry->pe_p) || + MyProcPid == entry->pid) + continue; + + a = dsa_attach(entry->pe_h); + ped = dsa_get_address(a, entry->pe_p); + + /* 1-based index */ + for (curr_backend = 1; curr_backend <= num_backends; curr_backend++) + { + LocalPgBackendStatus *local_beentry; + PgBackendStatus *beentry; + + /* Get the next one in the list */ + local_beentry = pgstat_get_local_beentry_by_index(curr_backend); + beentry = &local_beentry->backendStatus; + + if (beentry->st_procpid == ped->pid) + { + /* Values available to all callers */ + if (beentry->st_databaseid != InvalidOid) + values[0] = ObjectIdGetDatum(beentry->st_databaseid); + else + nulls[0] = true; + + values[1] = ped->pid; + values[2] = TimestampTzGetDatum(ped->last_update); + + if (has_privs_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS) || + has_privs_of_role(GetUserId(), beentry->st_procpid)) + values[3] = CStringGetTextDatum(ped->plan); + else + values[3] = CStringGetTextDatum(""); + break; + } + } + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); + + dsa_detach(a); + + } + LWLockRelease(ProgressiveExplainHashLock); + + return (Datum) 0; +} diff --git a/src/backend/commands/meson.build b/src/backend/commands/meson.build index dd4cde41d32..2bb0ac7d286 100644 --- a/src/backend/commands/meson.build +++ b/src/backend/commands/meson.build @@ -24,6 +24,7 @@ backend_sources += files( 'explain.c', 'explain_dr.c', 'explain_format.c', + 'explain_progressive.c', 'explain_state.c', 'extension.c', 'foreigncmds.c', diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index e9bd98c7738..cbb074eaa93 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -43,6 +43,7 @@ #include "access/xact.h" #include "catalog/namespace.h" #include "catalog/partition.h" +#include "commands/explain_progressive.h" #include "commands/matview.h" #include "commands/trigger.h" #include "executor/executor.h" @@ -157,6 +158,12 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) /* caller must ensure the query's snapshot is active */ Assert(GetActiveSnapshot() == queryDesc->snapshot); + /* + * Setup progressive explain if enabled. + */ + if (progressive_explain != PROGRESSIVE_EXPLAIN_NONE) + ProgressiveExplainSetup(queryDesc); + /* * If the transaction is read-only, we need to check if any writes are * planned to non-temporary tables. EXPLAIN is considered read-only. @@ -182,6 +189,11 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) estate = CreateExecutorState(); queryDesc->estate = estate; + /* + * Adding back reference to QueryDesc + */ + estate->query_desc = queryDesc; + oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); /* @@ -267,6 +279,12 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) */ InitPlan(queryDesc, eflags); + /* + * Start progressive explain if enabled. + */ + if (progressive_explain != PROGRESSIVE_EXPLAIN_NONE) + ProgressiveExplainStart(queryDesc); + MemoryContextSwitchTo(oldcontext); return ExecPlanStillValid(queryDesc->estate); @@ -516,6 +534,12 @@ standard_ExecutorFinish(QueryDesc *queryDesc) MemoryContextSwitchTo(oldcontext); + /* + * Finish progressive explain if enabled. + */ + if (progressive_explain != PROGRESSIVE_EXPLAIN_NONE) + ProgressiveExplainFinish(queryDesc); + estate->es_finished = true; } diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index f5f9cfbeead..82fe90bf231 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -72,6 +72,7 @@ */ #include "postgres.h" +#include "commands/explain_progressive.h" #include "executor/executor.h" #include "executor/nodeAgg.h" #include "executor/nodeAppend.h" @@ -118,6 +119,7 @@ #include "executor/nodeWorktablescan.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" +#include "utils/guc.h" static TupleTableSlot *ExecProcNodeFirst(PlanState *node); static TupleTableSlot *ExecProcNodeInstr(PlanState *node); @@ -462,7 +464,17 @@ ExecProcNodeFirst(PlanState *node) * have ExecProcNode() directly call the relevant function from now on. */ if (node->instrument) - node->ExecProcNode = ExecProcNodeInstr; + { + /* + * Use wrapper for progressive explains if enabled and the node + * belongs to the currently tracked query descriptor. + */ + if (progressive_explain == PROGRESSIVE_EXPLAIN_ANALYZE && + ProgressiveExplainIsActive(node->state->query_desc)) + node->ExecProcNode = ExecProcNodeInstrExplain; + else + node->ExecProcNode = ExecProcNodeInstr; + } else node->ExecProcNode = node->ExecProcNodeReal; diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index 56e635f4700..6a160ee254f 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -25,6 +25,8 @@ static WalUsage save_pgWalUsage; static void BufferUsageAdd(BufferUsage *dst, const BufferUsage *add); static void WalUsageAdd(WalUsage *dst, WalUsage *add); +static void InstrEndLoopInternal(Instrumentation *instr, bool force); + /* Allocate new instrumentation structure(s) */ Instrumentation * @@ -137,7 +139,7 @@ InstrUpdateTupleCount(Instrumentation *instr, double nTuples) /* Finish a run cycle for a plan node */ void -InstrEndLoop(Instrumentation *instr) +InstrEndLoopInternal(Instrumentation *instr, bool force) { double totaltime; @@ -145,7 +147,7 @@ InstrEndLoop(Instrumentation *instr) if (!instr->running) return; - if (!INSTR_TIME_IS_ZERO(instr->starttime)) + if (!INSTR_TIME_IS_ZERO(instr->starttime) && !force) elog(ERROR, "InstrEndLoop called on running node"); /* Accumulate per-cycle statistics into totals */ @@ -164,6 +166,20 @@ InstrEndLoop(Instrumentation *instr) instr->tuplecount = 0; } +/* Safely finish a run cycle for a plan node */ +void +InstrEndLoop(Instrumentation *instr) +{ + InstrEndLoopInternal(instr, false); +} + +/* Forcibly finish a run cycle for a plan node */ +void +InstrEndLoopForce(Instrumentation *instr) +{ + InstrEndLoopInternal(instr, true); +} + /* aggregate instrumentation information */ void InstrAggNode(Instrumentation *dst, Instrumentation *add) diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 2fa045e6b0f..389f5b55831 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -25,6 +25,7 @@ #include "access/xlogprefetcher.h" #include "access/xlogrecovery.h" #include "commands/async.h" +#include "commands/explain_progressive.h" #include "miscadmin.h" #include "pgstat.h" #include "postmaster/autovacuum.h" @@ -150,6 +151,7 @@ CalculateShmemSize(int *num_semaphores) size = add_size(size, InjectionPointShmemSize()); size = add_size(size, SlotSyncShmemSize()); size = add_size(size, AioShmemSize()); + size = add_size(size, ProgressiveExplainHashShmemSize()); /* include additional requested shmem from preload libraries */ size = add_size(size, total_addin_request); @@ -302,6 +304,11 @@ CreateOrAttachShmemStructs(void) */ PredicateLockShmemInit(); + /* + * Set up instrumented explain hash table + */ + InitProgressiveExplainHash(); + /* * Set up process table */ diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 5702c35bb91..7097312b1a8 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -177,6 +177,7 @@ static const char *const BuiltinTrancheNames[] = { [LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU", [LWTRANCHE_XACT_SLRU] = "XactSLRU", [LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA", + [LWTRANCHE_PROGRESSIVE_EXPLAIN_DSA] = "ProgressiveExplainDSA", }; StaticAssertDecl(lengthof(BuiltinTrancheNames) == diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 8164d0fbb4f..081966ca267 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -102,6 +102,9 @@ CreateQueryDesc(PlannedStmt *plannedstmt, /* not yet executed */ qd->already_executed = false; + /* null until set by progressive explains */ + qd->pestate = NULL; + return qd; } diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt index 9fa12a555e8..b53bc61d0d8 100644 --- a/src/backend/utils/activity/wait_event_names.txt +++ b/src/backend/utils/activity/wait_event_names.txt @@ -349,6 +349,7 @@ DSMRegistry "Waiting to read or update the dynamic shared memory registry." InjectionPoint "Waiting to read or update information related to injection points." SerialControl "Waiting to read or update shared pg_serial state." AioWorkerSubmissionQueue "Waiting to access AIO worker submission queue." +ProgressiveExplainHash "Waiting to access backend progressive explain shared hash table." # # END OF PREDEFINED LWLOCKS (DO NOT CHANGE THIS LINE) diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 7958ea11b73..3b55a061f3e 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -33,6 +33,7 @@ #include "catalog/pg_database.h" #include "catalog/pg_db_role_setting.h" #include "catalog/pg_tablespace.h" +#include "commands/explain_progressive.h" #include "libpq/auth.h" #include "libpq/libpq-be.h" #include "mb/pg_wchar.h" @@ -82,6 +83,8 @@ static void TransactionTimeoutHandler(void); static void IdleSessionTimeoutHandler(void); static void IdleStatsUpdateTimeoutHandler(void); static void ClientCheckTimeoutHandler(void); +static void ProgressiveExplainStartupTimeoutHandler(void); +static void ProgressiveExplainTimeoutHandler(void); static bool ThereIsAtLeastOneRole(void); static void process_startup_options(Port *port, bool am_superuser); static void process_settings(Oid databaseid, Oid roleid); @@ -771,6 +774,10 @@ InitPostgres(const char *in_dbname, Oid dboid, RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler); RegisterTimeout(IDLE_STATS_UPDATE_TIMEOUT, IdleStatsUpdateTimeoutHandler); + RegisterTimeout(PROGRESSIVE_EXPLAIN_TIMEOUT, + ProgressiveExplainTimeoutHandler); + RegisterTimeout(PROGRESSIVE_EXPLAIN_STARTUP_TIMEOUT, + ProgressiveExplainStartupTimeoutHandler); } /* @@ -1432,6 +1439,18 @@ ClientCheckTimeoutHandler(void) SetLatch(MyLatch); } +static void +ProgressiveExplainStartupTimeoutHandler(void) +{ + ProgressiveExplainTrigger(); +} + +static void +ProgressiveExplainTimeoutHandler(void) +{ + ProgressiveExplainPending = true; +} + /* * Returns true if at least one role is defined in this database cluster. */ diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 989825d3a9c..2a5f5058bda 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -41,6 +41,8 @@ #include "commands/async.h" #include "commands/extension.h" #include "commands/event_trigger.h" +#include "commands/explain_progressive.h" +#include "commands/explain_state.h" #include "commands/tablespace.h" #include "commands/trigger.h" #include "commands/user.h" @@ -479,6 +481,14 @@ static const struct config_enum_entry wal_compression_options[] = { {NULL, 0, false} }; +static const struct config_enum_entry progressive_explain_options[] = { + {"off", PROGRESSIVE_EXPLAIN_NONE, false}, + {"explain", PROGRESSIVE_EXPLAIN_EXPLAIN, false}, + {"analyze", PROGRESSIVE_EXPLAIN_ANALYZE, false}, + {"false", PROGRESSIVE_EXPLAIN_NONE, true}, + {NULL, 0, false} +}; + /* * Options for enum values stored in other modules */ @@ -533,6 +543,16 @@ int log_parameter_max_length_on_error = 0; int log_temp_files = -1; double log_statement_sample_rate = 1.0; double log_xact_sample_rate = 0; +int progressive_explain = PROGRESSIVE_EXPLAIN_NONE; +bool progressive_explain_verbose = false; +bool progressive_explain_settings = false; +bool progressive_explain_timing = true; +bool progressive_explain_buffers = false; +bool progressive_explain_wal = false; +bool progressive_explain_costs = true; +int progressive_explain_min_duration = 1000; +int progressive_explain_interval = 1000; +int progressive_explain_format = EXPLAIN_FORMAT_TEXT; char *backtrace_functions; int temp_file_limit = -1; @@ -2141,6 +2161,72 @@ struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"progressive_explain_verbose", PGC_USERSET, STATS_MONITORING, + gettext_noop("Controls whether verbose details are added to progressive explains."), + gettext_noop("Equivalent to the VERBOSE option of EXPLAIN."), + GUC_EXPLAIN + }, + &progressive_explain_verbose, + false, + NULL, NULL, NULL + }, + + { + {"progressive_explain_settings", PGC_USERSET, STATS_MONITORING, + gettext_noop("Controls whether information on modified configuration is added to progressive explains."), + gettext_noop("Equivalent to the SETTINGS option of EXPLAIN."), + GUC_EXPLAIN + }, + &progressive_explain_settings, + false, + NULL, NULL, NULL + }, + + { + {"progressive_explain_timing", PGC_USERSET, STATS_MONITORING, + gettext_noop("Controls whether information on per node timing is added to progressive explains."), + gettext_noop("Equivalent to the TIMING option of EXPLAIN."), + GUC_EXPLAIN + }, + &progressive_explain_timing, + true, + NULL, NULL, NULL + }, + + { + {"progressive_explain_buffers", PGC_USERSET, STATS_MONITORING, + gettext_noop("Controls whether information on buffer usage is added to progressive explains."), + gettext_noop("Equivalent to the BUFFERS option of EXPLAIN."), + GUC_EXPLAIN + }, + &progressive_explain_buffers, + false, + NULL, NULL, NULL + }, + + { + {"progressive_explain_wal", PGC_USERSET, STATS_MONITORING, + gettext_noop("Controls whether information on WAL record generation is added to progressive explains."), + gettext_noop("Equivalent to the WAL option of EXPLAIN."), + GUC_EXPLAIN + }, + &progressive_explain_wal, + false, + NULL, NULL, NULL + }, + + { + {"progressive_explain_costs", PGC_USERSET, STATS_MONITORING, + gettext_noop("Controls whether information on the estimated startup and total cost of each plan node is added to progressive explains."), + gettext_noop("Equivalent to the COSTS option of EXPLAIN."), + GUC_EXPLAIN + }, + &progressive_explain_costs, + true, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL @@ -3858,6 +3944,30 @@ struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"progressive_explain_interval", PGC_USERSET, STATS_MONITORING, + gettext_noop("Sets the interval between instrumented progressive " + "explains."), + NULL, + GUC_UNIT_MS + }, + &progressive_explain_interval, + 1000, 10, INT_MAX, + NULL, NULL, NULL + }, + + { + {"progressive_explain_min_duration", PGC_USERSET, STATS_MONITORING, + gettext_noop("Min query duration to start printing instrumented " + "progressive explains."), + NULL, + GUC_UNIT_MS + }, + &progressive_explain_min_duration, + 1000, 0, INT_MAX, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL @@ -5406,6 +5516,26 @@ struct config_enum ConfigureNamesEnum[] = NULL, assign_io_method, NULL }, + { + {"progressive_explain_format", PGC_USERSET, STATS_MONITORING, + gettext_noop("Selects the EXPLAIN output format to be used with progressive explains."), + gettext_noop("Equivalent to the FORMAT option of EXPLAIN.") + }, + &progressive_explain_format, + EXPLAIN_FORMAT_TEXT, explain_format_options, + NULL, NULL, NULL + }, + + { + {"progressive_explain", PGC_USERSET, STATS_MONITORING, + gettext_noop("Enables progressive explains."), + gettext_noop("Explain output is visible via pg_stat_progress_explain.") + }, + &progressive_explain, + PROGRESSIVE_EXPLAIN_NONE, progressive_explain_options, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 0b9e3066bde..89aa80afb5c 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -670,6 +670,20 @@ #log_executor_stats = off +# - Progressive Explain - + +#progressive_explain = off +#progressive_explain_min_duration = 1s +#progressive_explain_interval = 1s +#progressive_explain_format = text +#progressive_explain_settings = off +#progressive_explain_verbose = off +#progressive_explain_buffers = off +#progressive_explain_wal = off +#progressive_explain_timing = on +#progressive_explain_costs = on + + #------------------------------------------------------------------------------ # VACUUMING #------------------------------------------------------------------------------ diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 8b68b16d79d..69092d9ccc8 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12493,4 +12493,14 @@ proargtypes => 'int4', prosrc => 'gist_stratnum_common' }, +{ oid => '8770', + descr => 'statistics: information about progress of backends running statements', + proname => 'pg_stat_progress_explain', prorows => '100', proisstrict => 'f', + proretset => 't', provolatile => 's', proparallel => 'r', + prorettype => 'record', proargtypes => '', + proallargtypes => '{oid,int4,timestamptz,text}', + proargmodes => '{o,o,o,o}', + proargnames => '{datid,pid,last_update,query_plan}', + prosrc => 'pg_stat_progress_explain' }, + ] diff --git a/src/include/commands/explain_progressive.h b/src/include/commands/explain_progressive.h new file mode 100644 index 00000000000..ae0046aaf90 --- /dev/null +++ b/src/include/commands/explain_progressive.h @@ -0,0 +1,57 @@ +/*------------------------------------------------------------------------- + * + * explain_progressive.h + * prototypes for explain_progressive.c + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994-5, Regents of the University of California + * + * src/include/commands/explain_progressive.h + * + *------------------------------------------------------------------------- + */ +#ifndef EXPLAIN_PROGRESSIVE_H +#define EXPLAIN_PROGRESSIVE_H + +#include "datatype/timestamp.h" +#include "executor/executor.h" + +typedef enum ProgressiveExplain +{ + PROGRESSIVE_EXPLAIN_NONE, + PROGRESSIVE_EXPLAIN_EXPLAIN, + PROGRESSIVE_EXPLAIN_ANALYZE, +} ProgressiveExplain; + +typedef struct ProgressiveExplainHashEntry +{ + int pid; /* hash key of entry - MUST BE FIRST */ + int plan_alloc_size; + dsa_handle pe_h; + dsa_pointer pe_p; +} ProgressiveExplainHashEntry; + +typedef struct ProgressiveExplainHashData +{ + int pid; + TimestampTz last_update; + char plan[FLEXIBLE_ARRAY_MEMBER]; +} ProgressiveExplainHashData; + +extern bool ProgressiveExplainIsActive(QueryDesc *queryDesc); +extern void ProgressiveExplainSetup(QueryDesc *queryDesc); +extern void ProgressiveExplainStart(QueryDesc *queryDesc); +extern void ProgressiveExplainTrigger(void); +extern void ProgressiveExplainUpdate(PlanState *node); +extern void ProgressiveExplainFinish(QueryDesc *queryDesc); +extern Size ProgressiveExplainHashShmemSize(void); +extern void InitProgressiveExplainHash(void); +extern TupleTableSlot *ExecProcNodeInstrExplain(PlanState *node); + +/* transaction cleanup code */ +extern void AtAbort_ProgressiveExplain(void); +extern void AtSubAbort_ProgressiveExplain(void); + +extern bool ProgressiveExplainPending; + +#endif /* EXPLAIN_PROGRESSIVE_H */ diff --git a/src/include/commands/explain_state.h b/src/include/commands/explain_state.h index 32728f5d1a1..64370a5d6ea 100644 --- a/src/include/commands/explain_state.h +++ b/src/include/commands/explain_state.h @@ -16,6 +16,7 @@ #include "nodes/parsenodes.h" #include "nodes/plannodes.h" #include "parser/parse_node.h" +#include "utils/dsa.h" typedef enum ExplainSerializeOption { @@ -74,6 +75,14 @@ typedef struct ExplainState /* extensions */ void **extension_state; int extension_state_allocated; + /* set if tracking a progressive explain */ + bool progressive; + /* current plan node in progressive explains */ + struct PlanState *pe_curr_node; + /* reusable instr object used in progressive explains */ + struct Instrumentation *pe_local_instr; + /* dsa area used to store progressive explain data */ + dsa_area *pe_a; } ExplainState; typedef void (*ExplainOptionHandler) (ExplainState *, DefElem *, ParseState *); diff --git a/src/include/executor/execdesc.h b/src/include/executor/execdesc.h index ba53305ad42..27692aee542 100644 --- a/src/include/executor/execdesc.h +++ b/src/include/executor/execdesc.h @@ -48,6 +48,7 @@ typedef struct QueryDesc TupleDesc tupDesc; /* descriptor for result tuples */ EState *estate; /* executor's query-wide state */ PlanState *planstate; /* tree of per-plan-node state */ + struct ExplainState *pestate; /* progressive explain state if enabled */ /* This field is set by ExecutePlan */ bool already_executed; /* true if previously executed */ diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h index 03653ab6c6c..21de2a5632d 100644 --- a/src/include/executor/instrument.h +++ b/src/include/executor/instrument.h @@ -109,6 +109,7 @@ extern void InstrStartNode(Instrumentation *instr); extern void InstrStopNode(Instrumentation *instr, double nTuples); extern void InstrUpdateTupleCount(Instrumentation *instr, double nTuples); extern void InstrEndLoop(Instrumentation *instr); +extern void InstrEndLoopForce(Instrumentation *instr); extern void InstrAggNode(Instrumentation *dst, Instrumentation *add); extern void InstrStartParallelQuery(void); extern void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index e42f9f9f957..c863cb8f032 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -57,6 +57,7 @@ struct ExprState; struct ExprContext; struct RangeTblEntry; /* avoid including parsenodes.h here */ struct ExprEvalStep; /* avoid including execExpr.h everywhere */ +struct QueryDesc; /* avoid including execdesc.h here */ struct CopyMultiInsertBuffer; struct LogicalTapeSet; @@ -763,6 +764,9 @@ typedef struct EState */ List *es_insert_pending_result_relations; List *es_insert_pending_modifytables; + + /* Reference to query descriptor */ + struct QueryDesc *query_desc; } EState; @@ -1159,6 +1163,8 @@ typedef struct PlanState ExecProcNodeMtd ExecProcNode; /* function to return next tuple */ ExecProcNodeMtd ExecProcNodeReal; /* actual function, if above is a * wrapper */ + ExecProcNodeMtd ExecProcNodeOriginal; /* pointer to original function + * another wrapper was added */ Instrumentation *instrument; /* Optional runtime stats for this node */ WorkerInstrumentation *worker_instrument; /* per-worker instrumentation */ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index ffa03189e2d..f3499d307f4 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -218,6 +218,7 @@ typedef enum BuiltinTrancheIds LWTRANCHE_SUBTRANS_SLRU, LWTRANCHE_XACT_SLRU, LWTRANCHE_PARALLEL_VACUUM_DSA, + LWTRANCHE_PROGRESSIVE_EXPLAIN_DSA, LWTRANCHE_FIRST_USER_DEFINED, } BuiltinTrancheIds; diff --git a/src/include/storage/lwlocklist.h b/src/include/storage/lwlocklist.h index 932024b1b0b..7d88e7e9b58 100644 --- a/src/include/storage/lwlocklist.h +++ b/src/include/storage/lwlocklist.h @@ -84,3 +84,4 @@ PG_LWLOCK(50, DSMRegistry) PG_LWLOCK(51, InjectionPoint) PG_LWLOCK(52, SerialControl) PG_LWLOCK(53, AioWorkerSubmissionQueue) +PG_LWLOCK(54, ProgressiveExplainHash) diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index f619100467d..cff5c1f4cdb 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -278,6 +278,16 @@ extern PGDLLIMPORT int log_min_duration_statement; extern PGDLLIMPORT int log_temp_files; extern PGDLLIMPORT double log_statement_sample_rate; extern PGDLLIMPORT double log_xact_sample_rate; +extern PGDLLIMPORT int progressive_explain; +extern PGDLLIMPORT int progressive_explain_min_duration; +extern PGDLLIMPORT int progressive_explain_interval; +extern PGDLLIMPORT int progressive_explain_format; +extern PGDLLIMPORT bool progressive_explain_verbose; +extern PGDLLIMPORT bool progressive_explain_settings; +extern PGDLLIMPORT bool progressive_explain_timing; +extern PGDLLIMPORT bool progressive_explain_buffers; +extern PGDLLIMPORT bool progressive_explain_wal; +extern PGDLLIMPORT bool progressive_explain_costs; extern PGDLLIMPORT char *backtrace_functions; extern PGDLLIMPORT int temp_file_limit; @@ -322,6 +332,7 @@ extern PGDLLIMPORT const struct config_enum_entry io_method_options[]; extern PGDLLIMPORT const struct config_enum_entry recovery_target_action_options[]; extern PGDLLIMPORT const struct config_enum_entry wal_level_options[]; extern PGDLLIMPORT const struct config_enum_entry wal_sync_method_options[]; +extern PGDLLIMPORT const struct config_enum_entry explain_format_options[]; /* * Functions exported by guc.c diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h index 7b19beafdc9..ea66a0505d9 100644 --- a/src/include/utils/timeout.h +++ b/src/include/utils/timeout.h @@ -36,6 +36,8 @@ typedef enum TimeoutId IDLE_STATS_UPDATE_TIMEOUT, CLIENT_CONNECTION_CHECK_TIMEOUT, STARTUP_PROGRESS_TIMEOUT, + PROGRESSIVE_EXPLAIN_STARTUP_TIMEOUT, + PROGRESSIVE_EXPLAIN_TIMEOUT, /* First user-definable timeout reason */ USER_TIMEOUT, /* Maximum number of timeout reasons */ diff --git a/src/test/modules/test_misc/t/008_progressive_explain.pl b/src/test/modules/test_misc/t/008_progressive_explain.pl new file mode 100644 index 00000000000..05e555a5e26 --- /dev/null +++ b/src/test/modules/test_misc/t/008_progressive_explain.pl @@ -0,0 +1,130 @@ +# Copyright (c) 2023-2025, PostgreSQL Global Development Group +# +# Test progressive explain +# +# We need to make sure pg_stat_progress_explain does not show rows for the local +# session, even if progressive explain is enabled. For other sessions pg_stat_progress_explain +# should contain data for a PID only if progressive_explain is enabled and a query +# is running. Data needs to be removed when query finishes (or gets cancelled). + +use strict; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Initialize node +my $node = PostgreSQL::Test::Cluster->new('progressive_explain'); + +$node->init; +# Configure progressive explain to be logged immediately +$node->append_conf('postgresql.conf', 'progressive_explain_min_duration = 0'); +$node->start; + +# Test for local session +sub test_local_session +{ + my $setting = $_[0]; + # Make sure local session does not appear in pg_stat_progress_explain + my $count = $node->safe_psql( + 'postgres', qq[ + SET progressive_explain='$setting'; + SELECT count(*) from pg_stat_progress_explain WHERE pid = pg_backend_pid() + ]); + + ok($count == "0", + "Session cannot see its own explain with progressive_explain set to ${setting}"); +} + +# Tests for peer session +sub test_peer_session +{ + my $setting = $_[0]; + my $ret; + + # Start a background session and get its PID + my $background_psql = $node->background_psql( + 'postgres', + on_error_stop => 0); + + my $pid = $background_psql->query_safe( + qq[ + SELECT pg_backend_pid(); + ]); + + # Configure progressive explain in background session and run a simple query + # letting it finish + $background_psql->query_safe( + qq[ + SET progressive_explain='$setting'; + SELECT 1; + ]); + + # Check that pg_stat_progress_explain contains no row for the PID that finished + # its query gracefully + $ret = $node->safe_psql( + 'postgres', qq[ + SELECT count(*) FROM pg_stat_progress_explain where pid = $pid + ]); + + ok($ret == "0", + "pg_stat_progress_explain empty for completed query with progressive_explain set to ${setting}"); + + # Start query in background session and leave it running + $background_psql->query_until( + qr/start/, q( + \echo start + SELECT pg_sleep(600); + )); + + $ret = $node->safe_psql( + 'postgres', qq[ + SELECT count(*) FROM pg_stat_progress_explain where pid = $pid + ]); + + # If progressive_explain is disabled pg_stat_progress_explain should not contain + # row for PID + if ($setting eq 'off') { + ok($ret == "0", + "pg_stat_progress_explain empty for running query with progressive_explain set to ${setting}"); + } + # 1 row for pid is expected for running query + else { + ok($ret == "1", + "pg_stat_progress_explain with 1 row for running query with progressive_explain set to ${setting}"); + } + + # Terminate running query and make sure it is gone + $node->safe_psql( + 'postgres', qq[ + SELECT pg_cancel_backend($pid); + ]); + + $node->poll_query_until( + 'postgres', qq[ + SELECT count(*) = 0 FROM pg_stat_activity + WHERE pid = $pid and state = 'active' + ]); + + # Check again pg_stat_progress_explain and confirm that the existing row is + # now gone + $ret = $node->safe_psql( + 'postgres', qq[ + SELECT count(*) FROM pg_stat_progress_explain where pid = $pid + ]); + + ok($ret == "0", + "pg_stat_progress_explain empty for canceled query with progressive_explain set to ${setting}"); +} + +# Run tests for the local session +test_local_session('off'); +test_local_session('explain'); +test_local_session('analyze'); + +# Run tests for peer session +test_peer_session('off'); +test_peer_session('explain'); +test_peer_session('analyze'); + +$node->stop; +done_testing(); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 47478969135..62b70cf4618 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2041,6 +2041,13 @@ pg_stat_progress_create_index| SELECT s.pid, s.param15 AS partitions_done FROM (pg_stat_get_progress_info('CREATE INDEX'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) LEFT JOIN pg_database d ON ((s.datid = d.oid))); +pg_stat_progress_explain| SELECT s.datid, + d.datname, + s.pid, + s.last_update, + s.query_plan + FROM (pg_stat_progress_explain() s(datid, pid, last_update, query_plan) + LEFT JOIN pg_database d ON ((s.datid = d.oid))); pg_stat_progress_vacuum| SELECT s.pid, s.datid, d.datname, diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 78e22a14f62..d9bdd7ce536 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2297,6 +2297,9 @@ ProcessUtilityContext ProcessUtility_hook_type ProcessingMode ProgressCommandType +ProgressiveExplain +ProgressiveExplainHashData +ProgressiveExplainHashEntry ProjectSet ProjectSetPath ProjectSetState -- 2.43.0