Thread: Add memory/disk usage for WindowAgg nodes in EXPLAIN
1eff8279d4 added memory/disk usage for materialize nodes in EXPLAIN ANALYZE. In the commit message: > There are a few other executor node types that use tuplestores, so we > could also consider adding these details to the EXPLAIN ANALYZE for > those nodes too. So I wanted to Add memory/disk usage for WindowAgg. Patch attached. Since WindowAgg node could create multiple tuplestore for each Window partition, we need to track each tuplestore storage usage so that the maximum storage usage is determined. For this purpose I added new fields to the WindowAggState. -- Tatsuo Ishii SRA OSS LLC English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp From 00c7546659e305be45dbeb13a14bcde5066be76f Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii <ishii@postgresql.org> Date: Sat, 6 Jul 2024 19:48:30 +0900 Subject: [PATCH v1] Add memory/disk usage for WindowAgg nodes in EXPLAIN. --- src/backend/commands/explain.c | 37 ++++++++++++++++++++++++++++ src/backend/executor/nodeWindowAgg.c | 19 ++++++++++++++ src/include/nodes/execnodes.h | 2 ++ 3 files changed, 58 insertions(+) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 1e80fd8b68..177687ea80 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -126,6 +126,7 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate, ExplainState *es); static void show_hash_info(HashState *hashstate, ExplainState *es); static void show_material_info(MaterialState *mstate, ExplainState *es); +static void show_windowagg_info(WindowAggState *winstate, ExplainState *es); static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es); static void show_hashagg_info(AggState *aggstate, ExplainState *es); @@ -2215,6 +2216,7 @@ ExplainNode(PlanState *planstate, List *ancestors, planstate, es); show_upper_qual(((WindowAgg *) plan)->runConditionOrig, "Run Condition", planstate, ancestors, es); + show_windowagg_info(castNode(WindowAggState, planstate), es); break; case T_Group: show_group_keys(castNode(GroupState, planstate), ancestors, es); @@ -3362,6 +3364,41 @@ show_material_info(MaterialState *mstate, ExplainState *es) } } +/* + * Show information on WindowAgg node, storage method and maximum memory/disk + * space used. + */ +static void +show_windowagg_info(WindowAggState *winstate, ExplainState *es) +{ + const char *storageType; + int64 spaceUsedKB; + + /* + * Nothing to show if ANALYZE option wasn't used or if execution didn't + * get as far as creating the tuplestore. + */ + if (!es->analyze || winstate->storageType == NULL) + return; + + storageType = winstate->storageType; + spaceUsedKB = BYTES_TO_KILOBYTES(winstate->storageSize); + + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyText("Storage", storageType, es); + ExplainPropertyInteger("Maximum Storage", "kB", spaceUsedKB, es); + } + else + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Storage: %s Maximum Storage: " INT64_FORMAT "kB\n", + storageType, + spaceUsedKB); + } +} + /* * Show information on memoize hits/misses/evictions and memory usage. */ diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 3221fa1522..bcfe144511 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -1360,7 +1360,23 @@ release_partition(WindowAggState *winstate) } if (winstate->buffer) + { + int64 spaceUsed = tuplestore_space_used(winstate->buffer); + + /* + * We want to track the max mem/disk usage so that we can use the info + * in EXPLAIN (ANALYZE). + */ + if (spaceUsed > winstate->storageSize) + { + if (winstate->storageType != NULL) + pfree(winstate->storageType); + winstate->storageType = + pstrdup(tuplestore_storage_type_name(winstate->buffer)); + winstate->storageSize = spaceUsed; + } tuplestore_end(winstate->buffer); + } winstate->buffer = NULL; winstate->partition_spooled = false; } @@ -2671,6 +2687,9 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) winstate->partition_spooled = false; winstate->more_partitions = false; + winstate->storageType = NULL; + winstate->storageSize = 0; + return winstate; } diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index b62c96f206..7a3dfa2bc3 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -2567,6 +2567,8 @@ typedef struct WindowAggState ExprState *partEqfunction; /* equality funcs for partition columns */ ExprState *ordEqfunction; /* equality funcs for ordering columns */ Tuplestorestate *buffer; /* stores rows of current partition */ + int64 storageSize; /* max storage size in buffer */ + char *storageType; /* the storage type above */ int current_ptr; /* read pointer # for current row */ int framehead_ptr; /* read pointer # for frame head, if used */ int frametail_ptr; /* read pointer # for frame tail, if used */ -- 2.25.1
On Sat, 6 Jul 2024 at 23:23, Tatsuo Ishii <ishii@postgresql.org> wrote: > So I wanted to Add memory/disk usage for WindowAgg. Patch attached. Thanks for working on that. > Since WindowAgg node could create multiple tuplestore for each Window > partition, we need to track each tuplestore storage usage so that the > maximum storage usage is determined. For this purpose I added new > fields to the WindowAggState. I'd recently been looking at the code that recreates the tuplestore for each partition and thought we could do a bit better. In [1], I proposed a patch to make this better. If you based your patch on [1], maybe a better way of doing this is having tuplestore.c track the maximum space used on disk in an extra field which is updated with tuplestore_clear(). It's probably ok to update a new field called maxDiskSpace in tuplestore_clear() if state->status != TSS_INMEM. If the tuplestore went to disk then an extra call to BufFileSize() isn't going to be noticeable, even in cases where we only just went over work_mem. You could then adjust tuplestore_space_used() to look at maxDiskSpace and return that value if it's larger than BufFileSize(state->myfile) and state->maxSpace. You could check if maxDiskSpace == 0 to determine if the tuplestore has ever gone to disk. tuplestore_storage_type_name() would also need to check maxDiskSpace and return "Disk" if that's non-zero. David [1] https://www.postgresql.org/message-id/CAHoyFK9n-QCXKTUWT_xxtXninSMEv+gbJN66-y6prM3f4WkEHw@mail.gmail.com
> On Sat, 6 Jul 2024 at 23:23, Tatsuo Ishii <ishii@postgresql.org> wrote: >> So I wanted to Add memory/disk usage for WindowAgg. Patch attached. > > Thanks for working on that. Thank you for the infrastructure you created in tuplestore.c and explain.c. BTW, it seems these executor nodes (other than Materialize and Window Aggregate node) use tuplestore for their own purpose. CTE Scan Recursive Union Table Function Scan I have already implemented that for CTE Scan. Do you think other two nodes are worth to add the information? I think for consistency sake, it will better to add the info Recursive Union and Table Function Scan. >> Since WindowAgg node could create multiple tuplestore for each Window >> partition, we need to track each tuplestore storage usage so that the >> maximum storage usage is determined. For this purpose I added new >> fields to the WindowAggState. > > I'd recently been looking at the code that recreates the tuplestore > for each partition and thought we could do a bit better. In [1], I > proposed a patch to make this better. > > If you based your patch on [1], maybe a better way of doing this is > having tuplestore.c track the maximum space used on disk in an extra > field which is updated with tuplestore_clear(). It's probably ok to > update a new field called maxDiskSpace in tuplestore_clear() if > state->status != TSS_INMEM. If the tuplestore went to disk then an > extra call to BufFileSize() isn't going to be noticeable, even in > cases where we only just went over work_mem. You could then adjust > tuplestore_space_used() to look at maxDiskSpace and return that value > if it's larger than BufFileSize(state->myfile) and state->maxSpace. > You could check if maxDiskSpace == 0 to determine if the tuplestore > has ever gone to disk. tuplestore_storage_type_name() would also need > to check maxDiskSpace and return "Disk" if that's non-zero. Thank you for the suggestion. Yes, I noticed [1] and once it is committed, I will start to study tuplestore.c in this direction. Best reagards, -- Tatsuo Ishii SRA OSS LLC English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
On Tue, 9 Jul 2024 at 14:44, Tatsuo Ishii <ishii@postgresql.org> wrote: > BTW, it seems these executor nodes (other than Materialize and Window > Aggregate node) use tuplestore for their own purpose. > > CTE Scan > Recursive Union > Table Function Scan > > I have already implemented that for CTE Scan. Do you think other two > nodes are worth to add the information? Yes, I think so. I'd keep each as a separate patch so they can be considered independently. Doing all of them should hopefully ensure we strike the right balance of what code to put in explain.c and what code to put in tuplestore.c. I think the WindowAgg's tuplestore usage pattern might show that the API I picked isn't well suited when a tuplestore is cleared and refilled over and over. David
On Tue, Jul 9, 2024 at 8:20 AM David Rowley <dgrowleyml@gmail.com> wrote: > > On Tue, 9 Jul 2024 at 14:44, Tatsuo Ishii <ishii@postgresql.org> wrote: > > BTW, it seems these executor nodes (other than Materialize and Window > > Aggregate node) use tuplestore for their own purpose. > > > > CTE Scan > > Recursive Union > > Table Function Scan > > > > I have already implemented that for CTE Scan. Do you think other two > > nodes are worth to add the information? > > Yes, I think so. I'd keep each as a separate patch so they can be > considered independently. Doing all of them should hopefully ensure we > strike the right balance of what code to put in explain.c and what > code to put in tuplestore.c. +1 + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyText("Storage", storageType, es); + ExplainPropertyInteger("Maximum Storage", "kB", spaceUsedKB, es); + } + else + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Storage: %s Maximum Storage: " INT64_FORMAT "kB\n", + storageType, + spaceUsedKB); + } It will be good to move this code to a function which will be called by show_*_info functions(). We might even convert it into a tuplestore specific implementation hook after David's work. -- Best Wishes, Ashutosh Bapat
>> Yes, I think so. I'd keep each as a separate patch so they can be >> considered independently. Doing all of them should hopefully ensure we >> strike the right balance of what code to put in explain.c and what >> code to put in tuplestore.c. > +1 > > + if (es->format != EXPLAIN_FORMAT_TEXT) > + { > + ExplainPropertyText("Storage", storageType, es); > + ExplainPropertyInteger("Maximum Storage", "kB", spaceUsedKB, es); > + } > + else > + { > + ExplainIndentText(es); > + appendStringInfo(es->str, > + "Storage: %s Maximum Storage: " INT64_FORMAT "kB\n", > + storageType, > + spaceUsedKB); > + } > > It will be good to move this code to a function which will be called > by show_*_info functions(). I have already implemented that in this direction in my working in progress patch: /* * Show information regarding storage method and maximum memory/disk space * used. */ static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es) Which can be shared by Material and CTE scan node. I am going to post it after I take care Recursive Union and Table Function Scan node. > We might even convert it into a tuplestore > specific implementation hook after David's work. Best reagards, -- Tatsuo Ishii SRA OSS LLC English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
>>> Yes, I think so. I'd keep each as a separate patch so they can be >>> considered independently. Doing all of them should hopefully ensure we >>> strike the right balance of what code to put in explain.c and what >>> code to put in tuplestore.c. >> +1 >> >> + if (es->format != EXPLAIN_FORMAT_TEXT) >> + { >> + ExplainPropertyText("Storage", storageType, es); >> + ExplainPropertyInteger("Maximum Storage", "kB", spaceUsedKB, es); >> + } >> + else >> + { >> + ExplainIndentText(es); >> + appendStringInfo(es->str, >> + "Storage: %s Maximum Storage: " INT64_FORMAT "kB\n", >> + storageType, >> + spaceUsedKB); >> + } >> >> It will be good to move this code to a function which will be called >> by show_*_info functions(). > > I have already implemented that in this direction in my working in > progress patch: Attached are the v2 patches. As suggested by David, I split them into multiple patches so that each patch implements the feature for each node. You need to apply the patches in the order of patch number (if you want to apply all of them, "git apply v2-*.patch" should work). v2-0001-Refactor-show_material_info.patch: This refactors show_material_info(). The guts are moved to new show_storage_info() so that it can be shared by not only Materialized node. v2-0002-Add-memory-disk-usage-for-CTE-Scan-nodes-in-EXPLA.patch: This adds memory/disk usage for CTE Scan nodes in EXPLAIN (ANALYZE) command. v2-0003-Add-memory-disk-usage-for-Table-Function-Scan-nod.patch: This adds memory/disk usage for Table Function Scan nodes in EXPLAIN (ANALYZE) command. v2-0004-Add-memory-disk-usage-for-Recursive-Union-nodes-i.patch: This adds memory/disk usage for Recursive Union nodes in EXPLAIN (ANALYZE) command. Also show_storage_info() is changed so that it accepts int64 storage_used, char *storage_type arguments. They are used if the target node uses multiple tuplestores, in case a simple call to tuplestore_space_used() does not work. Such executor nodes need to collect storage_used while running the node. This type of node includes Recursive Union and Window Aggregate. v2-0005-Add-memory-disk-usage-for-Window-Aggregate-nodes-.patch: This adds memory/disk usage for Window Aggregate nodes in EXPLAIN (ANALYZE) command. Note that if David's proposal https://www.postgresql.org/message-id/CAHoyFK9n-QCXKTUWT_xxtXninSMEv%2BgbJN66-y6prM3f4WkEHw%40mail.gmail.com is committed, this will need to be adjusted. For a demonstration, how storage/memory usage is shown in EXPLAIN (notice "Storage: Memory Maximum Storage: 120kB" etc.). The script used is attached (test.sql.txt). The SQLs are shamelessly copied from David's example and the regression test (some of them were modified by me). EXPLAIN (ANALYZE, COSTS OFF) SELECT count(t1.b) FROM (VALUES(1),(2)) t2(x) LEFT JOIN (SELECT * FROM t1 WHERE a <= 100) t1 ON TRUE; QUERY PLAN --------------------------------------------------------------------------------- Aggregate (actual time=0.345..0.346 rows=1 loops=1) -> Nested Loop Left Join (actual time=0.015..0.330 rows=200 loops=1) -> Values Scan on "*VALUES*" (actual time=0.001..0.003 rows=2 loops=1) -> Materialize (actual time=0.006..0.152 rows=100 loops=2) Storage: Memory Maximum Storage: 120kB -> Seq Scan on t1 (actual time=0.007..0.213 rows=100 loops=1) Filter: (a <= 100) Rows Removed by Filter: 900 Planning Time: 0.202 ms Execution Time: 0.377 ms (10 rows) -- CTE Scan node EXPLAIN (ANALYZE, COSTS OFF) WITH RECURSIVE t(n) AS ( VALUES (1) UNION ALL SELECT n+1 FROM t WHERE n < 100 ) SELECT sum(n) OVER() FROM t; QUERY PLAN ----------------------------------------------------------------------------------- WindowAgg (actual time=0.151..0.169 rows=100 loops=1) Storage: Memory Maximum Storage: 20kB CTE t -> Recursive Union (actual time=0.001..0.105 rows=100 loops=1) Storage: Memory Maximum Storage: 17kB -> Result (actual time=0.001..0.001 rows=1 loops=1) -> WorkTable Scan on t t_1 (actual time=0.000..0.000 rows=1 loops=100) Filter: (n < 100) Rows Removed by Filter: 0 -> CTE Scan on t (actual time=0.002..0.127 rows=100 loops=1) Storage: Memory Maximum Storage: 20kB Planning Time: 0.053 ms Execution Time: 0.192 ms (13 rows) -- Table Function Scan node CREATE OR REPLACE VIEW public.jsonb_table_view6 AS SELECT js2, jsb2w, jsb2q, ia, ta, jba FROM JSON_TABLE( 'null'::jsonb, '$[*]' AS json_table_path_0 PASSING 1 + 2 AS a, '"foo"'::json AS "b c" COLUMNS ( js2 json PATH '$' WITHOUT WRAPPER KEEP QUOTES, jsb2w jsonb PATH '$' WITH UNCONDITIONAL WRAPPER KEEP QUOTES, jsb2q jsonb PATH '$' WITHOUT WRAPPER OMIT QUOTES, ia integer[] PATH '$' WITHOUT WRAPPER KEEP QUOTES, ta text[] PATH '$' WITHOUT WRAPPER KEEP QUOTES, jba jsonb[] PATH '$' WITHOUT WRAPPER KEEP QUOTES ) ); CREATE VIEW EXPLAIN (ANALYZE, COSTS OFF) SELECT * FROM jsonb_table_view6; QUERY PLAN ------------------------------------------------------------------------------- Table Function Scan on "json_table" (actual time=0.024..0.025 rows=1 loops=1) Storage: Memory Maximum Storage: 17kB Planning Time: 0.100 ms Execution Time: 0.054 ms (4 rows) From 1d2f67dff48601f92b5378838c0e908d200d4493 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii <ishii@postgresql.org> Date: Wed, 10 Jul 2024 14:02:13 +0900 Subject: [PATCH v2 1/5] Refactor show_material_info(). --- src/backend/commands/explain.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 118db12903..43ef33295e 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -125,6 +125,7 @@ static void show_sort_info(SortState *sortstate, ExplainState *es); static void show_incremental_sort_info(IncrementalSortState *incrsortstate, ExplainState *es); static void show_hash_info(HashState *hashstate, ExplainState *es); +static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es); static void show_material_info(MaterialState *mstate, ExplainState *es); static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es); @@ -3326,13 +3327,11 @@ show_hash_info(HashState *hashstate, ExplainState *es) } /* - * Show information on material node, storage method and maximum memory/disk - * space used. + * Show information on storage method and maximum memory/disk space used. */ static void -show_material_info(MaterialState *mstate, ExplainState *es) +show_storage_info(Tuplestorestate *tupstore, ExplainState *es) { - Tuplestorestate *tupstore = mstate->tuplestorestate; const char *storageType; int64 spaceUsedKB; @@ -3361,6 +3360,18 @@ show_material_info(MaterialState *mstate, ExplainState *es) } } +/* + * Show information on material node, storage method and maximum memory/disk + * space used. + */ +static void +show_material_info(MaterialState *mstate, ExplainState *es) +{ + Tuplestorestate *tupstore = mstate->tuplestorestate; + + show_storage_info(tupstore, es); +} + /* * Show information on memoize hits/misses/evictions and memory usage. */ -- 2.25.1 From e4eb2288c90e92a42ba85c672d477b1a93ebf6f8 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii <ishii@postgresql.org> Date: Wed, 10 Jul 2024 14:09:46 +0900 Subject: [PATCH v2 2/5] Add memory/disk usage for CTE Scan nodes in EXPLAIN --- src/backend/commands/explain.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 43ef33295e..661e9101cb 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -127,6 +127,7 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate, static void show_hash_info(HashState *hashstate, ExplainState *es); static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es); static void show_material_info(MaterialState *mstate, ExplainState *es); +static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es); static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es); static void show_hashagg_info(AggState *aggstate, ExplainState *es); @@ -2028,6 +2029,8 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + if (IsA(plan, CteScan)) + show_ctescan_info(castNode(CteScanState, planstate), es); break; case T_Gather: { @@ -3372,6 +3375,18 @@ show_material_info(MaterialState *mstate, ExplainState *es) show_storage_info(tupstore, es); } +/* + * Show information on CTE Scan node, storage method and maximum memory/disk + * space used. + */ +static void +show_ctescan_info(CteScanState *ctescanstate, ExplainState *es) +{ + Tuplestorestate *tupstore = ctescanstate->leader->cte_table; + + show_storage_info(tupstore, es); +} + /* * Show information on memoize hits/misses/evictions and memory usage. */ -- 2.25.1 From a7bfaf711b5a1ecb4fe58d3b46db1911383dbfb8 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii <ishii@postgresql.org> Date: Wed, 10 Jul 2024 14:32:59 +0900 Subject: [PATCH v2 3/5] Add memory/disk usage for Table Function Scan nodes in EXPLAIN --- src/backend/commands/explain.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 661e9101cb..1a4e83ad38 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -128,6 +128,7 @@ static void show_hash_info(HashState *hashstate, ExplainState *es); static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es); static void show_material_info(MaterialState *mstate, ExplainState *es); static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es); +static void show_table_func_can_info(TableFuncScanState *tscanstate, ExplainState *es); static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es); static void show_hashagg_info(AggState *aggstate, ExplainState *es); @@ -2112,6 +2113,7 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + show_table_func_can_info(castNode(TableFuncScanState, planstate), es); break; case T_TidScan: { @@ -3387,6 +3389,18 @@ show_ctescan_info(CteScanState *ctescanstate, ExplainState *es) show_storage_info(tupstore, es); } +/* + * Show information on Table Function Scan node, storage method and maximum + * memory/disk space used. + */ +static void +show_table_func_can_info(TableFuncScanState *tscanstate, ExplainState *es) +{ + Tuplestorestate *tupstore = tscanstate->tupstore; + + show_storage_info(tupstore, es); +} + /* * Show information on memoize hits/misses/evictions and memory usage. */ -- 2.25.1 From b03264d55d0c23885fe9ce4f9d93a6ca65d45261 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii <ishii@postgresql.org> Date: Wed, 10 Jul 2024 16:06:59 +0900 Subject: [PATCH v2 4/5] Add memory/disk usage for Recursive Union nodes in EXPLAIN --- src/backend/commands/explain.c | 47 +++++++++++++++++------ src/backend/executor/nodeRecursiveunion.c | 30 +++++++++++++++ src/include/nodes/execnodes.h | 2 + 3 files changed, 68 insertions(+), 11 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 1a4e83ad38..054d909093 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -125,10 +125,12 @@ static void show_sort_info(SortState *sortstate, ExplainState *es); static void show_incremental_sort_info(IncrementalSortState *incrsortstate, ExplainState *es); static void show_hash_info(HashState *hashstate, ExplainState *es); -static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es); +static void show_storage_info(Tuplestorestate *tupstore, int64 storage_used, char *storage_type, + ExplainState *es); static void show_material_info(MaterialState *mstate, ExplainState *es); static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es); static void show_table_func_can_info(TableFuncScanState *tscanstate, ExplainState *es); +static void show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es); static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es); static void show_hashagg_info(AggState *aggstate, ExplainState *es); @@ -2264,6 +2266,9 @@ ExplainNode(PlanState *planstate, List *ancestors, show_memoize_info(castNode(MemoizeState, planstate), ancestors, es); break; + case T_RecursiveUnion: + show_recursive_union_info(castNode(RecursiveUnionState, planstate), es); + break; default: break; } @@ -3332,23 +3337,32 @@ show_hash_info(HashState *hashstate, ExplainState *es) } /* - * Show information on storage method and maximum memory/disk space used. + * Show information on storage method and maximum memory/disk space used. If + * tupstore is NULL, then storage_used and storage_type are used instead. */ static void -show_storage_info(Tuplestorestate *tupstore, ExplainState *es) +show_storage_info(Tuplestorestate *tupstore, int64 storage_used, char *storage_type, + ExplainState *es) { const char *storageType; int64 spaceUsedKB; /* - * Nothing to show if ANALYZE option wasn't used or if execution didn't - * get as far as creating the tuplestore. + * Nothing to show if ANALYZE option wasn't used. */ - if (!es->analyze || tupstore == NULL) + if (!es->analyze) return; - storageType = tuplestore_storage_type_name(tupstore); - spaceUsedKB = BYTES_TO_KILOBYTES(tuplestore_space_used(tupstore)); + if (tupstore != NULL) + { + storageType = tuplestore_storage_type_name(tupstore); + spaceUsedKB = BYTES_TO_KILOBYTES(tuplestore_space_used(tupstore)); + } + else + { + storageType = storage_type; + spaceUsedKB = BYTES_TO_KILOBYTES(storage_used); + } if (es->format != EXPLAIN_FORMAT_TEXT) { @@ -3374,7 +3388,7 @@ show_material_info(MaterialState *mstate, ExplainState *es) { Tuplestorestate *tupstore = mstate->tuplestorestate; - show_storage_info(tupstore, es); + show_storage_info(tupstore, 0, NULL, es); } /* @@ -3386,7 +3400,7 @@ show_ctescan_info(CteScanState *ctescanstate, ExplainState *es) { Tuplestorestate *tupstore = ctescanstate->leader->cte_table; - show_storage_info(tupstore, es); + show_storage_info(tupstore, 0, NULL, es); } /* @@ -3398,7 +3412,18 @@ show_table_func_can_info(TableFuncScanState *tscanstate, ExplainState *es) { Tuplestorestate *tupstore = tscanstate->tupstore; - show_storage_info(tupstore, es); + show_storage_info(tupstore, 0, NULL, es); +} + +/* + * Show information on Recursive Union node, storage method and maximum + * memory/disk space used. + */ + +static void +show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es) +{ + show_storage_info(NULL, rstate->storageSize, rstate->storageType, es); } /* diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c index c7f8a19fa4..8667b7ca93 100644 --- a/src/backend/executor/nodeRecursiveunion.c +++ b/src/backend/executor/nodeRecursiveunion.c @@ -52,6 +52,33 @@ build_hash_table(RecursiveUnionState *rustate) false); } +/* + * Track the maximum memory/disk usage in working_table and + * intermediate_table. Supposed to be called just before tuplestore_end. + */ +static void +track_storage_usage(RecursiveUnionState *state, Tuplestorestate *tup) +{ + int64 spaceUsed; + + if (tup == NULL) + return; + + spaceUsed = tuplestore_space_used(tup); + + /* + * We want to track the maximum mem/disk usage so that we can use the info + * in EXPLAIN (ANALYZE). + */ + if (spaceUsed > state->storageSize) + { + if (state->storageType != NULL) + pfree(state->storageType); + state->storageType = + pstrdup(tuplestore_storage_type_name(tup)); + state->storageSize = spaceUsed; + } +} /* ---------------------------------------------------------------- * ExecRecursiveUnion(node) @@ -120,6 +147,7 @@ ExecRecursiveUnion(PlanState *pstate) break; /* done with old working table ... */ + track_storage_usage(node, node->working_table); tuplestore_end(node->working_table); /* intermediate table becomes working table */ @@ -191,6 +219,8 @@ ExecInitRecursiveUnion(RecursiveUnion *node, EState *estate, int eflags) rustate->intermediate_empty = true; rustate->working_table = tuplestore_begin_heap(false, false, work_mem); rustate->intermediate_table = tuplestore_begin_heap(false, false, work_mem); + rustate->storageSize = 0; + rustate->storageType = NULL; /* * If hashing, we need a per-tuple memory context for comparisons, and a diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index cac684d9b3..bc2c0baed6 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1509,6 +1509,8 @@ typedef struct RecursiveUnionState bool intermediate_empty; Tuplestorestate *working_table; Tuplestorestate *intermediate_table; + int64 storageSize; /* max storage size Tuplestore */ + char *storageType; /* the storage type above */ /* Remaining fields are unused in UNION ALL case */ Oid *eqfuncoids; /* per-grouping-field equality fns */ FmgrInfo *hashfunctions; /* per-grouping-field hash fns */ -- 2.25.1 From b61edc12210eb71c8a7a987171e8bbf39299989c Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii <ishii@postgresql.org> Date: Wed, 10 Jul 2024 16:22:41 +0900 Subject: [PATCH v2 5/5] Add memory/disk usage for Window Aggregate nodes in EXPLAIN --- src/backend/commands/explain.c | 12 ++++++++++++ src/backend/executor/nodeWindowAgg.c | 19 +++++++++++++++++++ src/include/nodes/execnodes.h | 2 ++ 3 files changed, 33 insertions(+) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 054d909093..5f32c967e1 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -131,6 +131,7 @@ static void show_material_info(MaterialState *mstate, ExplainState *es); static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es); static void show_table_func_can_info(TableFuncScanState *tscanstate, ExplainState *es); static void show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es); +static void show_windowagg_info(WindowAggState *winstate, ExplainState *es); static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es); static void show_hashagg_info(AggState *aggstate, ExplainState *es); @@ -2222,6 +2223,7 @@ ExplainNode(PlanState *planstate, List *ancestors, planstate, es); show_upper_qual(((WindowAgg *) plan)->runConditionOrig, "Run Condition", planstate, ancestors, es); + show_windowagg_info(castNode(WindowAggState, planstate), es); break; case T_Group: show_group_keys(castNode(GroupState, planstate), ancestors, es); @@ -3426,6 +3428,16 @@ show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es) show_storage_info(NULL, rstate->storageSize, rstate->storageType, es); } +/* + * Show information on WindowAgg node, storage method and maximum memory/disk + * space used. + */ +static void +show_windowagg_info(WindowAggState *winstate, ExplainState *es) +{ + show_storage_info(NULL, winstate->storageSize, winstate->storageType, es); +} + /* * Show information on memoize hits/misses/evictions and memory usage. */ diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 3221fa1522..bcfe144511 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -1360,7 +1360,23 @@ release_partition(WindowAggState *winstate) } if (winstate->buffer) + { + int64 spaceUsed = tuplestore_space_used(winstate->buffer); + + /* + * We want to track the max mem/disk usage so that we can use the info + * in EXPLAIN (ANALYZE). + */ + if (spaceUsed > winstate->storageSize) + { + if (winstate->storageType != NULL) + pfree(winstate->storageType); + winstate->storageType = + pstrdup(tuplestore_storage_type_name(winstate->buffer)); + winstate->storageSize = spaceUsed; + } tuplestore_end(winstate->buffer); + } winstate->buffer = NULL; winstate->partition_spooled = false; } @@ -2671,6 +2687,9 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) winstate->partition_spooled = false; winstate->more_partitions = false; + winstate->storageType = NULL; + winstate->storageSize = 0; + return winstate; } diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index bc2c0baed6..82a597aae9 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -2596,6 +2596,8 @@ typedef struct WindowAggState ExprState *partEqfunction; /* equality funcs for partition columns */ ExprState *ordEqfunction; /* equality funcs for ordering columns */ Tuplestorestate *buffer; /* stores rows of current partition */ + int64 storageSize; /* max storage size in buffer */ + char *storageType; /* the storage type above */ int current_ptr; /* read pointer # for current row */ int framehead_ptr; /* read pointer # for frame head, if used */ int frametail_ptr; /* read pointer # for frame tail, if used */ -- 2.25.1 -- Mateialize node DROP TABLE t1; CREATE TABLE t1 (a INT, b TEXT); INSERT INTO t1 SELECT x,repeat('a',1024) from generate_series(1,1000)x; CREATE INDEX ON t1(a); EXPLAIN (ANALYZE, COSTS OFF) SELECT count(t1.b) FROM (VALUES(1),(2)) t2(x) LEFT JOIN (SELECT * FROM t1 WHERE a <= 100) t1 ON TRUE; -- CTE Scan node EXPLAIN (ANALYZE, COSTS OFF) WITH RECURSIVE t(n) AS ( VALUES (1) UNION ALL SELECT n+1 FROM t WHERE n < 100 ) SELECT sum(n) OVER() FROM t; -- Table Function Scan node CREATE OR REPLACE VIEW public.jsonb_table_view6 AS SELECT js2, jsb2w, jsb2q, ia, ta, jba FROM JSON_TABLE( 'null'::jsonb, '$[*]' AS json_table_path_0 PASSING 1 + 2 AS a, '"foo"'::json AS "b c" COLUMNS ( js2 json PATH '$' WITHOUT WRAPPER KEEP QUOTES, jsb2w jsonb PATH '$' WITH UNCONDITIONAL WRAPPER KEEP QUOTES, jsb2q jsonb PATH '$' WITHOUT WRAPPER OMIT QUOTES, ia integer[] PATH '$' WITHOUT WRAPPER KEEP QUOTES, ta text[] PATH '$' WITHOUT WRAPPER KEEP QUOTES, jba jsonb[] PATH '$' WITHOUT WRAPPER KEEP QUOTES ) ); EXPLAIN (ANALYZE, COSTS OFF) SELECT * FROM jsonb_table_view6;
Hi!
+1 for the idea of the patch. Consider it useful.
I looked at the patch set and don't see any obvious defects. It applies without any problems and looks pretty good for me.
Only one thing is left to do. Add basic tests for the added functionality to make it committable. For example, as in the
mentioned 1eff8279d494b9.
--
Best regards,
Maxim Orlov.
> Hi! > > +1 for the idea of the patch. Consider it useful. > > I looked at the patch set and don't see any obvious defects. It applies > without any problems and looks pretty good for me. Thank you for reviewing my patch. > Only one thing is left to do. Add basic tests for the added functionality > to make it committable. For example, as in the > mentioned 1eff8279d494b9. Agreed. Probably add to explain.sql? Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
On Wed, 4 Sept 2024 at 03:07, Tatsuo Ishii <ishii@postgresql.org> wrote:
Agreed. Probably add to explain.sql?
Yeah, I think this is an appropriate place.
--
Best regards,
Maxim Orlov.
On Wed, Jul 10, 2024 at 5:36 PM Tatsuo Ishii <ishii@postgresql.org> wrote: > > > Attached are the v2 patches. As suggested by David, I split them > into multiple patches so that each patch implements the feature for > each node. You need to apply the patches in the order of patch number > (if you want to apply all of them, "git apply v2-*.patch" should > work). > > v2-0001-Refactor-show_material_info.patch: > This refactors show_material_info(). The guts are moved to new > show_storage_info() so that it can be shared by not only Materialized > node. > > v2-0002-Add-memory-disk-usage-for-CTE-Scan-nodes-in-EXPLA.patch: > This adds memory/disk usage for CTE Scan nodes in EXPLAIN (ANALYZE) command. > > v2-0003-Add-memory-disk-usage-for-Table-Function-Scan-nod.patch: > This adds memory/disk usage for Table Function Scan nodes in EXPLAIN (ANALYZE) command. > > v2-0004-Add-memory-disk-usage-for-Recursive-Union-nodes-i.patch: > This adds memory/disk usage for Recursive Union nodes in EXPLAIN > (ANALYZE) command. Also show_storage_info() is changed so that it > accepts int64 storage_used, char *storage_type arguments. They are > used if the target node uses multiple tuplestores, in case a simple > call to tuplestore_space_used() does not work. Such executor nodes > need to collect storage_used while running the node. This type of node > includes Recursive Union and Window Aggregate. > > v2-0005-Add-memory-disk-usage-for-Window-Aggregate-nodes-.patch: This > adds memory/disk usage for Window Aggregate nodes in EXPLAIN (ANALYZE) > command. Note that if David's proposal > https://www.postgresql.org/message-id/CAHoyFK9n-QCXKTUWT_xxtXninSMEv%2BgbJN66-y6prM3f4WkEHw%40mail.gmail.com > is committed, this will need to be adjusted. > > For a demonstration, how storage/memory usage is shown in EXPLAIN > (notice "Storage: Memory Maximum Storage: 120kB" etc.). The script > used is attached (test.sql.txt). The SQLs are shamelessly copied from > David's example and the regression test (some of them were modified by > me). > hi. I can roughly understand it. I have one minor issue with the comment. typedef struct RecursiveUnionState { PlanState ps; /* its first field is NodeTag */ bool recursing; bool intermediate_empty; Tuplestorestate *working_table; Tuplestorestate *intermediate_table; int64 storageSize; /* max storage size Tuplestore */ char *storageType; /* the storage type above */ .... } "/* the storage type above */" is kind of ambiguous, since there is more than one Tuplestorestate. i think it roughly means: the storage type of working_table while the max storage of working_table. typedef struct WindowAggState { ScanState ss; /* its first field is NodeTag */ /* these fields are filled in by ExecInitExpr: */ List *funcs; /* all WindowFunc nodes in targetlist */ int numfuncs; /* total number of window functions */ int numaggs; /* number that are plain aggregates */ WindowStatePerFunc perfunc; /* per-window-function information */ WindowStatePerAgg peragg; /* per-plain-aggregate information */ ExprState *partEqfunction; /* equality funcs for partition columns */ ExprState *ordEqfunction; /* equality funcs for ordering columns */ Tuplestorestate *buffer; /* stores rows of current partition */ int64 storageSize; /* max storage size in buffer */ char *storageType; /* the storage type above */ } " /* the storage type above */" I think it roughly means: " the storage type of WindowAggState->buffer while the max storage of WindowAggState->buffer".
On Wed, 10 Jul 2024 at 21:36, Tatsuo Ishii <ishii@postgresql.org> wrote: > v2-0005-Add-memory-disk-usage-for-Window-Aggregate-nodes-.patch: This > adds memory/disk usage for Window Aggregate nodes in EXPLAIN (ANALYZE) > command. Note that if David's proposal > https://www.postgresql.org/message-id/CAHoyFK9n-QCXKTUWT_xxtXninSMEv%2BgbJN66-y6prM3f4WkEHw%40mail.gmail.com > is committed, this will need to be adjusted. Hi, I pushed the changes to WindowAgg so as not to call tuplestore_end() on every partition. Can you rebase this patch over that change? It would be good to do this in a way that does not add any new state to WindowAggState, you can see that I had to shuffle fields around in that struct because the next_parition field would have caused the struct to become larger. I've not looked closely, but I expect this can be done by adding more code to tuplestore_updatemax() to also track the disk space used if the current storage has gone to disk. I expect the maxSpace field can be used for both, but we'd need another bool field to track if the max used was by disk or memory. I think the performance of this would also need to be tested as it means doing an lseek() on every tuplestore_clear() when we've gone to disk. Probably that will be dominated by all the other overheads of a tuplestore going to disk (i.e. dumptuples() etc), but it would be good to check this. I suggest setting work_mem = 64 and making a test case that only just spills to disk. Maybe do a few thousand partitions worth of that and see if you can measure any slowdown. David
Hi, > On Wed, 10 Jul 2024 at 21:36, Tatsuo Ishii <ishii@postgresql.org> wrote: >> v2-0005-Add-memory-disk-usage-for-Window-Aggregate-nodes-.patch: This >> adds memory/disk usage for Window Aggregate nodes in EXPLAIN (ANALYZE) >> command. Note that if David's proposal >> https://www.postgresql.org/message-id/CAHoyFK9n-QCXKTUWT_xxtXninSMEv%2BgbJN66-y6prM3f4WkEHw%40mail.gmail.com >> is committed, this will need to be adjusted. > > Hi, > > I pushed the changes to WindowAgg so as not to call tuplestore_end() > on every partition. Can you rebase this patch over that change? > > It would be good to do this in a way that does not add any new state > to WindowAggState, you can see that I had to shuffle fields around in > that struct because the next_parition field would have caused the > struct to become larger. I've not looked closely, but I expect this > can be done by adding more code to tuplestore_updatemax() to also > track the disk space used if the current storage has gone to disk. I > expect the maxSpace field can be used for both, but we'd need another > bool field to track if the max used was by disk or memory. > > I think the performance of this would also need to be tested as it > means doing an lseek() on every tuplestore_clear() when we've gone to > disk. Probably that will be dominated by all the other overheads of a > tuplestore going to disk (i.e. dumptuples() etc), but it would be good > to check this. I suggest setting work_mem = 64 and making a test case > that only just spills to disk. Maybe do a few thousand partitions > worth of that and see if you can measure any slowdown. Thank you for the suggestion. I will look into this. Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
Hi, > hi. I can roughly understand it. > > I have one minor issue with the comment. > > typedef struct RecursiveUnionState > { > PlanState ps; /* its first field is NodeTag */ > bool recursing; > bool intermediate_empty; > Tuplestorestate *working_table; > Tuplestorestate *intermediate_table; > int64 storageSize; /* max storage size Tuplestore */ > char *storageType; /* the storage type above */ > .... > } > > "/* the storage type above */" > is kind of ambiguous, since there is more than one Tuplestorestate. > > i think it roughly means: the storage type of working_table > while the max storage of working_table. > > > > typedef struct WindowAggState > { > ScanState ss; /* its first field is NodeTag */ > > /* these fields are filled in by ExecInitExpr: */ > List *funcs; /* all WindowFunc nodes in targetlist */ > int numfuncs; /* total number of window functions */ > int numaggs; /* number that are plain aggregates */ > > WindowStatePerFunc perfunc; /* per-window-function information */ > WindowStatePerAgg peragg; /* per-plain-aggregate information */ > ExprState *partEqfunction; /* equality funcs for partition columns */ > ExprState *ordEqfunction; /* equality funcs for ordering columns */ > Tuplestorestate *buffer; /* stores rows of current partition */ > int64 storageSize; /* max storage size in buffer */ > char *storageType; /* the storage type above */ > } > > " /* the storage type above */" > I think it roughly means: > " the storage type of WindowAggState->buffer while the max storage of > WindowAggState->buffer". Thank you for looking into my patch. Unfortunately I need to work on other issue before adjusting the comments because the fields might go away if I change the tuplestore infrastructure per David's suggestion: https://www.postgresql.org/message-id/CAApHDvoY8cibGcicLV0fNh%3D9JVx9PANcWvhkdjBnDCc9Quqytg%40mail.gmail.com After this I will rebase the patches. This commit requires changes. https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=908a968612f9ed61911d8ca0a185b262b82f1269 Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
> Thanks for making the adjustments to this. > > I don't think there is any need to call tuplestore_updatemax() from > within writetup_heap(). That means having to update the maximum space > used every time a tuple is written to disk. That's a fairly massive > overhead. > > Instead, it should be fine to modify tuplestore_updatemax() to set a > flag to true if state->status != TSS_INMEM and then record the disk > space used. That flag won't ever be set to false again. > tuplestore_storage_type_name() should just return "Disk" if the new > disk flag is set, even if state->status == TSS_INMEM. Since the > work_mem size won't change between tuplestore_clear() calls, if we've > once spilt to disk, then we shouldn't care about the memory used for > runs that didn't. Those will always have used less memory. > > I did this quickly, but playing around with the attached, I didn't see > any slowdown. Your patch looks good to me and I confirmed that with your patch I didn't see any slowdown either. Thanks! Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
On Fri, Sep 6, 2024 at 11:32 AM Tatsuo Ishii <ishii@postgresql.org> wrote: > > > Thanks for making the adjustments to this. > > > > I don't think there is any need to call tuplestore_updatemax() from > > within writetup_heap(). That means having to update the maximum space > > used every time a tuple is written to disk. That's a fairly massive > > overhead. > > > > Instead, it should be fine to modify tuplestore_updatemax() to set a > > flag to true if state->status != TSS_INMEM and then record the disk > > space used. That flag won't ever be set to false again. > > tuplestore_storage_type_name() should just return "Disk" if the new > > disk flag is set, even if state->status == TSS_INMEM. Since the > > work_mem size won't change between tuplestore_clear() calls, if we've > > once spilt to disk, then we shouldn't care about the memory used for > > runs that didn't. Those will always have used less memory. > > > > I did this quickly, but playing around with the attached, I didn't see > > any slowdown. > > Your patch looks good to me and I confirmed that with your patch I > didn't see any slowdown either. Thanks! The changes look better. A nitpick though. With their definitions changed, I think it's better to change the names of the functions since their purpose has changed. Right now they report the storage type and size used, respectively, at the time of calling the function. With this patch, they report maximum space ever used and the storage corresponding to the maximum space. tuplestore_space_used() may be changed to tuplestore_maxspace_used(). I am having difficulty with tuplestore_storage_type_name(); tuplestore_largest_storage_type_name() seems mouthful and yet not doing justice to the functionality. It might be better to just have one funciton tuplestore_maxspace_used() which returns both the maximum space used as well as the storage type when maximum space was used. The comments need a bit of grammar fixes, but that can be done when finalizing the patches. -- Best Wishes, Ashutosh Bapat
> The changes look better. A nitpick though. With their definitions > changed, I think it's better to change the names of the functions > since their purpose has changed. Right now they report the storage > type and size used, respectively, at the time of calling the function. > With this patch, they report maximum space ever used and the storage > corresponding to the maximum space. tuplestore_space_used() may be > changed to tuplestore_maxspace_used(). I am having difficulty with > tuplestore_storage_type_name(); tuplestore_largest_storage_type_name() > seems mouthful and yet not doing justice to the functionality. It > might be better to just have one funciton tuplestore_maxspace_used() > which returns both the maximum space used as well as the storage type > when maximum space was used. +1. Returning the storage type by the same function, not by a separate function looks more natural. Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
On Fri, 6 Sept 2024 at 19:08, Ashutosh Bapat <ashutosh.bapat.oss@gmail.com> wrote: > The changes look better. A nitpick though. With their definitions > changed, I think it's better to change the names of the functions > since their purpose has changed. Right now they report the storage > type and size used, respectively, at the time of calling the function. > With this patch, they report maximum space ever used and the storage > corresponding to the maximum space. tuplestore_space_used() may be > changed to tuplestore_maxspace_used(). I am having difficulty with > tuplestore_storage_type_name(); tuplestore_largest_storage_type_name() > seems mouthful and yet not doing justice to the functionality. It > might be better to just have one funciton tuplestore_maxspace_used() > which returns both the maximum space used as well as the storage type > when maximum space was used. How about just removing tuplestore_storage_type_name() and tuplestore_space_used() and adding tuplestore_get_stats(). I did take some inspiration from tuplesort.c for this, so maybe we can defer back there for further guidance. I'm not so sure it's worth having a stats struct type like tuplesort.c has. All we need is a char ** and an int64 * output parameter to pass to the stats function. I don't think we need to copy the tuplesort_method_name(). It seems fine just to point the output parameter of the stats function at the statically allocated constant. David
On Fri, Sep 6, 2024 at 7:21 PM David Rowley <dgrowleyml@gmail.com> wrote: > > On Fri, 6 Sept 2024 at 19:08, Ashutosh Bapat > <ashutosh.bapat.oss@gmail.com> wrote: > > The changes look better. A nitpick though. With their definitions > > changed, I think it's better to change the names of the functions > > since their purpose has changed. Right now they report the storage > > type and size used, respectively, at the time of calling the function. > > With this patch, they report maximum space ever used and the storage > > corresponding to the maximum space. tuplestore_space_used() may be > > changed to tuplestore_maxspace_used(). I am having difficulty with > > tuplestore_storage_type_name(); tuplestore_largest_storage_type_name() > > seems mouthful and yet not doing justice to the functionality. It > > might be better to just have one funciton tuplestore_maxspace_used() > > which returns both the maximum space used as well as the storage type > > when maximum space was used. > > How about just removing tuplestore_storage_type_name() and > tuplestore_space_used() and adding tuplestore_get_stats(). I did take > some inspiration from tuplesort.c for this, so maybe we can defer back > there for further guidance. I'm not so sure it's worth having a stats > struct type like tuplesort.c has. All we need is a char ** and an > int64 * output parameter to pass to the stats function. I don't think > we need to copy the tuplesort_method_name(). It seems fine just to > point the output parameter of the stats function at the statically > allocated constant. tuplestore_get_stats() similar to tuplesort_get_stats() looks fine. In future the stats reported by this function might expand e.g. maximum number of readers may be included in the stats. If it expands beyond two values, we could think of a separate structure, but for now it looks fine given its limited use. A comment explaining why we aren't using a stats structure and some guidance on when that would be appropriate will be better. -- Best Wishes, Ashutosh Bapat
> On Fri, 6 Sept 2024 at 19:08, Ashutosh Bapat > <ashutosh.bapat.oss@gmail.com> wrote: >> The changes look better. A nitpick though. With their definitions >> changed, I think it's better to change the names of the functions >> since their purpose has changed. Right now they report the storage >> type and size used, respectively, at the time of calling the function. >> With this patch, they report maximum space ever used and the storage >> corresponding to the maximum space. tuplestore_space_used() may be >> changed to tuplestore_maxspace_used(). I am having difficulty with >> tuplestore_storage_type_name(); tuplestore_largest_storage_type_name() >> seems mouthful and yet not doing justice to the functionality. It >> might be better to just have one funciton tuplestore_maxspace_used() >> which returns both the maximum space used as well as the storage type >> when maximum space was used. > > How about just removing tuplestore_storage_type_name() and > tuplestore_space_used() and adding tuplestore_get_stats(). I did take > some inspiration from tuplesort.c for this, so maybe we can defer back > there for further guidance. I'm not so sure it's worth having a stats > struct type like tuplesort.c has. All we need is a char ** and an > int64 * output parameter to pass to the stats function. I don't think > we need to copy the tuplesort_method_name(). It seems fine just to > point the output parameter of the stats function at the statically > allocated constant. Are you going to push the changes to tuplestore.c anytime soon? I would like to rebase my patch[1] but the patch could be affected by the tuplestore API change. Best reagards, [1] https://www.postgresql.org/message-id/CAApHDvoY8cibGcicLV0fNh%3D9JVx9PANcWvhkdjBnDCc9Quqytg%40mail.gmail.com -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
On Thu, 12 Sept 2024 at 14:04, Tatsuo Ishii <ishii@postgresql.org> wrote: > Are you going to push the changes to tuplestore.c anytime soon? I > would like to rebase my patch[1] but the patch could be affected by > the tuplestore API change. Ok, I'll look at that. I had thought you were taking care of writing the patch. David
> On Thu, 12 Sept 2024 at 14:04, Tatsuo Ishii <ishii@postgresql.org> wrote: >> Are you going to push the changes to tuplestore.c anytime soon? I >> would like to rebase my patch[1] but the patch could be affected by >> the tuplestore API change. > > Ok, I'll look at that. Thanks. I had thought you were taking care of writing the patch. Sorry, I should have asked you first if you are going to write the API change patch. -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
On Thu, 12 Sept 2024 at 14:42, Tatsuo Ishii <ishii@postgresql.org> wrote: > Sorry, I should have asked you first if you are going to write the API > change patch. I pushed a patch to change the API. David
> I pushed a patch to change the API. Thank you! -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
Here is the v3 patch. This time I only include patch for the Window Aggregate node. Patches for other node types will come after this patch getting committed or come close to commitable state. David, In this patch I refactored show_material_info. I divided it into show_material_info and show_storage_info so that the latter can be used by other node types including window aggregate node. What do you think? I also added a test case in explain.sql per discussion with Maxim Orlov. Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp From 74ab7724bfdb49dc111eca5d96abed9cd29abdc7 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii <ishii@postgresql.org> Date: Thu, 12 Sep 2024 16:15:43 +0900 Subject: [PATCH v3] Add memory/disk usage for Window aggregate nodes in EXPLAIN. This commit is similar to 1eff8279d and expands the idea to Window aggregate nodes so that users can know how much memory or disk the tuplestore used. This commit uses newly introduced tuplestore_get_stats() to query this information and add some additional output in EXPLAIN ANALYZE to display the information for the Window aggregate node. --- src/backend/commands/explain.c | 31 +++++++++++++++++++++++---- src/test/regress/expected/explain.out | 11 ++++++++++ src/test/regress/sql/explain.sql | 3 +++ 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 2819e479f8..ce6792be8a 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -126,7 +126,9 @@ static void show_sort_info(SortState *sortstate, ExplainState *es); static void show_incremental_sort_info(IncrementalSortState *incrsortstate, ExplainState *es); static void show_hash_info(HashState *hashstate, ExplainState *es); +static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es); static void show_material_info(MaterialState *mstate, ExplainState *es); +static void show_windowagg_info(WindowAggState *winstate, ExplainState *es); static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es); static void show_hashagg_info(AggState *aggstate, ExplainState *es); @@ -2231,6 +2233,7 @@ ExplainNode(PlanState *planstate, List *ancestors, planstate, es); show_upper_qual(((WindowAgg *) plan)->runConditionOrig, "Run Condition", planstate, ancestors, es); + show_windowagg_info(castNode(WindowAggState, planstate), es); break; case T_Group: show_group_keys(castNode(GroupState, planstate), ancestors, es); @@ -3343,13 +3346,11 @@ show_hash_info(HashState *hashstate, ExplainState *es) } /* - * Show information on material node, storage method and maximum memory/disk - * space used. + * Show information on storage method and maximum memory/disk space used. */ static void -show_material_info(MaterialState *mstate, ExplainState *es) +show_storage_info(Tuplestorestate *tupstore, ExplainState *es) { - Tuplestorestate *tupstore = mstate->tuplestorestate; char *maxStorageType; int64 maxSpaceUsed, maxSpaceUsedKB; @@ -3379,6 +3380,28 @@ show_material_info(MaterialState *mstate, ExplainState *es) } } +/* + * Show information on material node, storage method and maximum memory/disk + * space used. + */ +static void +show_material_info(MaterialState *mstate, ExplainState *es) +{ + Tuplestorestate *tupstore = mstate->tuplestorestate; + + show_storage_info(tupstore, es); +} + +/* + * Show information on WindowAgg node, storage method and maximum memory/disk + * space used. + */ +static void +show_windowagg_info(WindowAggState *winstate, ExplainState *es) +{ + show_storage_info(winstate->buffer, es); +} + /* * Show information on memoize hits/misses/evictions and memory usage. */ diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out index 6585c6a69e..d27fbdfebb 100644 --- a/src/test/regress/expected/explain.out +++ b/src/test/regress/expected/explain.out @@ -691,3 +691,14 @@ select explain_filter('explain (analyze,serialize) create temp table explain_tem Execution Time: N.N ms (4 rows) +-- Test tuplestore storage usage in Window aggregate +select explain_filter('explain (analyze) select sum(n) over() from generate_series(1,100) a(n)'); + explain_filter +---------------------------------------------------------------------------------------------------------------- + WindowAgg (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N) + Storage: Memory Maximum Storage: NkB + -> Function Scan on generate_series a (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N) + Planning Time: N.N ms + Execution Time: N.N ms +(5 rows) + diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql index c7055f850c..50eaf554eb 100644 --- a/src/test/regress/sql/explain.sql +++ b/src/test/regress/sql/explain.sql @@ -169,3 +169,6 @@ select explain_filter('explain (analyze,serialize text,buffers,timing off) selec select explain_filter('explain (analyze,serialize binary,buffers,timing) select * from int8_tbl i8'); -- this tests an edge case where we have no data to return select explain_filter('explain (analyze,serialize) create temp table explain_temp as select * from int8_tbl i8'); + +-- Test tuplestore storage usage in Window aggregate +select explain_filter('explain (analyze) select sum(n) over() from generate_series(1,100) a(n)'); -- 2.25.1
On Fri, 13 Sept 2024 at 00:12, Tatsuo Ishii <ishii@postgresql.org> wrote: > In this patch I refactored show_material_info. I divided it into > show_material_info and show_storage_info so that the latter can be > used by other node types including window aggregate node. What do you > think? Yes, I think it's a good idea to move that into a helper function. If you do the other node types, without that helper the could would have to be repeated quite a few times. Maybe show_storage_info() can be moved up with the other helper functions, say below show_sortorder_options() ? It might be a good idea to keep the "if (!es->analyze || tupstore == NULL)" checks in the calling function rather than the helper too. I thought about the location of the test for a while and read the "This file is concerned with testing EXPLAIN in its own right." comment at the top of that explain.out. I was trying to decide if testing output of a specific node type met this or not. I can't pick out any other tests there which are specific to a node type, so I'm unsure if this is the location for it or not. However, to put it anywhere else means having to add a plpgsql function to mask out the unstable parts of EXPLAIN, so maybe the location is good as it saves from having to do that. I'm 50/50 on this, so I'm happy to let you decide. You could also shrink that 100 rows into a smaller number for the generate_series without losing any coverage. Aside from that, I think the patch is good. Thanks for working on it. David
David, Thank you for your review. > Yes, I think it's a good idea to move that into a helper function. If > you do the other node types, without that helper the could would have > to be repeated quite a few times. Maybe show_storage_info() can be > moved up with the other helper functions, say below > show_sortorder_options() ? Yeah, that makes sense. Looks less random. > It might be a good idea to keep the "if > (!es->analyze || tupstore == NULL)" checks in the calling function > rather than the helper too. I agree with this. This kind of check should be done in the calling function. > I thought about the location of the test for a while and read the > "This file is concerned with testing EXPLAIN in its own right." > comment at the top of that explain.out. I was trying to decide if > testing output of a specific node type met this or not. I can't pick > out any other tests there which are specific to a node type, so I'm > unsure if this is the location for it or not. However, to put it > anywhere else means having to add a plpgsql function to mask out the > unstable parts of EXPLAIN, so maybe the location is good as it saves > from having to do that. I'm 50/50 on this, so I'm happy to let you > decide. Yeah. Maybe we should move the function to elsewhere so that it can be shared by other tests. However in this case it's purpose is testing an additional output in an explain command. I think this is not far from "This file is concerned with testing EXPLAIN in its own right.". So I would like to keep the test in explain.sql. > You could also shrink that 100 rows into a smaller number for > the generate_series without losing any coverage. Right. I will make the change. > Aside from that, I think the patch is good. Thanks for working on it. Thanks. Attached is the v4 patch. I am going push it if there's no objection. After this, I will work on remaining node types. Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp From e71e697f4ec399bc19af6f1aeac614185acc38c7 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii <ishii@postgresql.org> Date: Fri, 13 Sep 2024 14:51:45 +0900 Subject: [PATCH v4] Add memory/disk usage for Window aggregate nodes in EXPLAIN. This commit is similar to 1eff8279d and expands the idea to Window aggregate nodes so that users can know how much memory or disk the tuplestore used. This commit uses newly introduced tuplestore_get_stats() to query this information and add some additional output in EXPLAIN ANALYZE to display the information for the Window aggregate node. --- src/backend/commands/explain.c | 68 ++++++++++++++++++++------- src/test/regress/expected/explain.out | 11 +++++ src/test/regress/sql/explain.sql | 3 ++ 3 files changed, 64 insertions(+), 18 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 2819e479f8..aaec439892 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -120,6 +120,7 @@ static void show_sort_group_keys(PlanState *planstate, const char *qlabel, List *ancestors, ExplainState *es); static void show_sortorder_options(StringInfo buf, Node *sortexpr, Oid sortOperator, Oid collation, bool nullsFirst); +static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es); static void show_tablesample(TableSampleClause *tsc, PlanState *planstate, List *ancestors, ExplainState *es); static void show_sort_info(SortState *sortstate, ExplainState *es); @@ -127,6 +128,7 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate, ExplainState *es); static void show_hash_info(HashState *hashstate, ExplainState *es); static void show_material_info(MaterialState *mstate, ExplainState *es); +static void show_windowagg_info(WindowAggState *winstate, ExplainState *es); static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es); static void show_hashagg_info(AggState *aggstate, ExplainState *es); @@ -2231,6 +2233,7 @@ ExplainNode(PlanState *planstate, List *ancestors, planstate, es); show_upper_qual(((WindowAgg *) plan)->runConditionOrig, "Run Condition", planstate, ancestors, es); + show_windowagg_info(castNode(WindowAggState, planstate), es); break; case T_Group: show_group_keys(castNode(GroupState, planstate), ancestors, es); @@ -2894,6 +2897,34 @@ show_sortorder_options(StringInfo buf, Node *sortexpr, } } +/* + * Show information on storage method and maximum memory/disk space used. + */ +static void +show_storage_info(Tuplestorestate *tupstore, ExplainState *es) +{ + char *maxStorageType; + int64 maxSpaceUsed, + maxSpaceUsedKB; + + tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed); + maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed); + + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyText("Storage", maxStorageType, es); + ExplainPropertyInteger("Maximum Storage", "kB", maxSpaceUsedKB, es); + } + else + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Storage: %s Maximum Storage: " INT64_FORMAT "kB\n", + maxStorageType, + maxSpaceUsedKB); + } +} + /* * Show TABLESAMPLE properties */ @@ -3350,9 +3381,6 @@ static void show_material_info(MaterialState *mstate, ExplainState *es) { Tuplestorestate *tupstore = mstate->tuplestorestate; - char *maxStorageType; - int64 maxSpaceUsed, - maxSpaceUsedKB; /* * Nothing to show if ANALYZE option wasn't used or if execution didn't @@ -3361,22 +3389,26 @@ show_material_info(MaterialState *mstate, ExplainState *es) if (!es->analyze || tupstore == NULL) return; - tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed); - maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed); + show_storage_info(tupstore, es); +} - if (es->format != EXPLAIN_FORMAT_TEXT) - { - ExplainPropertyText("Storage", maxStorageType, es); - ExplainPropertyInteger("Maximum Storage", "kB", maxSpaceUsedKB, es); - } - else - { - ExplainIndentText(es); - appendStringInfo(es->str, - "Storage: %s Maximum Storage: " INT64_FORMAT "kB\n", - maxStorageType, - maxSpaceUsedKB); - } +/* + * Show information on WindowAgg node, storage method and maximum memory/disk + * space used. + */ +static void +show_windowagg_info(WindowAggState *winstate, ExplainState *es) +{ + Tuplestorestate *tupstore = winstate->buffer; + + /* + * Nothing to show if ANALYZE option wasn't used or if execution didn't + * get as far as creating the tuplestore. + */ + if (!es->analyze || tupstore == NULL) + return; + + show_storage_info(tupstore, es); } /* diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out index 6585c6a69e..88355b6a99 100644 --- a/src/test/regress/expected/explain.out +++ b/src/test/regress/expected/explain.out @@ -691,3 +691,14 @@ select explain_filter('explain (analyze,serialize) create temp table explain_tem Execution Time: N.N ms (4 rows) +-- Test tuplestore storage usage in Window aggregate +select explain_filter('explain (analyze) select sum(n) over() from generate_series(1,10) a(n)'); + explain_filter +---------------------------------------------------------------------------------------------------------------- + WindowAgg (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N) + Storage: Memory Maximum Storage: NkB + -> Function Scan on generate_series a (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N) + Planning Time: N.N ms + Execution Time: N.N ms +(5 rows) + diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql index c7055f850c..3f11b559aa 100644 --- a/src/test/regress/sql/explain.sql +++ b/src/test/regress/sql/explain.sql @@ -169,3 +169,6 @@ select explain_filter('explain (analyze,serialize text,buffers,timing off) selec select explain_filter('explain (analyze,serialize binary,buffers,timing) select * from int8_tbl i8'); -- this tests an edge case where we have no data to return select explain_filter('explain (analyze,serialize) create temp table explain_temp as select * from int8_tbl i8'); + +-- Test tuplestore storage usage in Window aggregate +select explain_filter('explain (analyze) select sum(n) over() from generate_series(1,10) a(n)'); -- 2.25.1
On Fri, 13 Sept 2024 at 09:11, Tatsuo Ishii <ishii@postgresql.org> wrote:
Thanks. Attached is the v4 patch. I am going push it if there's no
objection.
Looks good to me. Thank you for your work.
Best regards,
Maxim Orlov.
The patch looks fine but it doesn't add a test case where Storage is Disk or the case when the last usage fit in memory but an earlier usage spilled to disk. Do we want to cover those. This test would be the only one where those code paths could be tested. On Fri, Sep 13, 2024 at 11:41 AM Tatsuo Ishii <ishii@postgresql.org> wrote: > > David, > > Thank you for your review. > > > Yes, I think it's a good idea to move that into a helper function. If > > you do the other node types, without that helper the could would have > > to be repeated quite a few times. Maybe show_storage_info() can be > > moved up with the other helper functions, say below > > show_sortorder_options() ? > > Yeah, that makes sense. Looks less random. > > > It might be a good idea to keep the "if > > (!es->analyze || tupstore == NULL)" checks in the calling function > > rather than the helper too. > > I agree with this. This kind of check should be done in the calling > function. > > > I thought about the location of the test for a while and read the > > "This file is concerned with testing EXPLAIN in its own right." > > comment at the top of that explain.out. I was trying to decide if > > testing output of a specific node type met this or not. I can't pick > > out any other tests there which are specific to a node type, so I'm > > unsure if this is the location for it or not. However, to put it > > anywhere else means having to add a plpgsql function to mask out the > > unstable parts of EXPLAIN, so maybe the location is good as it saves > > from having to do that. I'm 50/50 on this, so I'm happy to let you > > decide. > > Yeah. Maybe we should move the function to elsewhere so that it can be > shared by other tests. However in this case it's purpose is testing an > additional output in an explain command. I think this is not far from > "This file is concerned with testing EXPLAIN in its own right.". So I > would like to keep the test in explain.sql. > > > You could also shrink that 100 rows into a smaller number for > > the generate_series without losing any coverage. > > Right. I will make the change. > > > Aside from that, I think the patch is good. Thanks for working on it. > > Thanks. Attached is the v4 patch. I am going push it if there's no > objection. > > After this, I will work on remaining node types. > > Best reagards, > -- > Tatsuo Ishii > SRA OSS K.K. > English: http://www.sraoss.co.jp/index_en/ > Japanese:http://www.sraoss.co.jp -- Best Wishes, Ashutosh Bapat
> The patch looks fine but it doesn't add a test case where Storage is > Disk We can test the case by setting work_mem to the minimum size (64kB) and giving slightly larger "stop" parameter to generate_series. > or the case when the last usage fit in memory but an earlier > usage spilled to disk. In my understanding once tuplestore changes the storage type to disk, it never returns to the memory storage type in terms of tuplestore_get_stats. i.e. once state->usedDisk is set to true, it never goes back to false. So the test case is not necessary. David, am I correct? > Do we want to cover those. This test would be > the only one where those code paths could be tested. I am fine to add the first test case. Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
On Fri, Sep 13, 2024 at 3:02 PM Tatsuo Ishii <ishii@postgresql.org> wrote: > > > The patch looks fine but it doesn't add a test case where Storage is > > Disk > > We can test the case by setting work_mem to the minimum size (64kB) > and giving slightly larger "stop" parameter to generate_series. > WFM > > or the case when the last usage fit in memory but an earlier > > usage spilled to disk. > > In my understanding once tuplestore changes the storage type to disk, > it never returns to the memory storage type in terms of > tuplestore_get_stats. i.e. once state->usedDisk is set to true, it > never goes back to false. So the test case is not necessary. > David, am I correct? I understand that. I am requesting a testcase to test that same logic. -- Best Wishes, Ashutosh Bapat
>> > or the case when the last usage fit in memory but an earlier >> > usage spilled to disk. >> >> In my understanding once tuplestore changes the storage type to disk, >> it never returns to the memory storage type in terms of >> tuplestore_get_stats. i.e. once state->usedDisk is set to true, it >> never goes back to false. So the test case is not necessary. >> David, am I correct? > > I understand that. I am requesting a testcase to test that same logic. Maybe something like this? In the example below there are 2 partitions. the first one has 1998 rows and the second one has 2 rows. Assuming that work_mem is 64kB, the first one does not fit the memory and spills to disk. The second partition fits memory. However as state->usedDisk remains true, explain shows "Storage: Disk". test=# explain (analyze,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2000)a(n)); n QUERY PLAN -------------------------------------------------------------------------------- ------------- WindowAgg (actual time=1.958..473328.589 rows=2000 loops=1) Storage: Disk Maximum Storage: 65kB -> Sort (actual time=1.008..1.277 rows=2000 loops=1) Sort Key: ((a.n < 3)) Sort Method: external merge Disk: 48kB -> Function Scan on generate_series a (actual time=0.300..0.633 rows=2 000 loops=1) Planning Time: 0.069 ms Execution Time: 474515.476 ms (8 rows) Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
On Sat, Sep 14, 2024 at 1:42 PM Tatsuo Ishii <ishii@postgresql.org> wrote: > > >> > or the case when the last usage fit in memory but an earlier > >> > usage spilled to disk. > >> > >> In my understanding once tuplestore changes the storage type to disk, > >> it never returns to the memory storage type in terms of > >> tuplestore_get_stats. i.e. once state->usedDisk is set to true, it > >> never goes back to false. So the test case is not necessary. > >> David, am I correct? > > > > I understand that. I am requesting a testcase to test that same logic. > > Maybe something like this? In the example below there are 2 > partitions. the first one has 1998 rows and the second one has 2 > rows. Assuming that work_mem is 64kB, the first one does not fit the > memory and spills to disk. The second partition fits memory. However as > state->usedDisk remains true, explain shows "Storage: Disk". > > test=# explain (analyze,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2000)a(n)); > n QUERY PLAN > > -------------------------------------------------------------------------------- > ------------- > WindowAgg (actual time=1.958..473328.589 rows=2000 loops=1) > Storage: Disk Maximum Storage: 65kB > -> Sort (actual time=1.008..1.277 rows=2000 loops=1) > Sort Key: ((a.n < 3)) > Sort Method: external merge Disk: 48kB > -> Function Scan on generate_series a (actual time=0.300..0.633 rows=2 > 000 loops=1) > Planning Time: 0.069 ms > Execution Time: 474515.476 ms > (8 rows) Thanks. This will do. Is there a way to force the larger partition to be computed first? That way we definitely know that the last computation was done when all the tuples in the tuplestore were in memory. -- Best Wishes, Ashutosh Bapat
Hi Ashutosh, Thank you for the review. > Thanks. This will do. Is there a way to force the larger partition to > be computed first? That way we definitely know that the last > computation was done when all the tuples in the tuplestore were in > memory. Not sure if there's any way to force it in the SQL standard. However in term of implementation, PostgreSQL sorts the function (generate_series) scan result using a sort key "a.n < 3", which results in rows being >= 2 first (as false == 0), then rows being < 3 (as true == 1). So unless PostgreSQL changes the way to sort boolean data type, I think the result should be stable. Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
> Not sure if there's any way to force it in the SQL standard. However > in term of implementation, PostgreSQL sorts the function > (generate_series) scan result using a sort key "a.n < 3", which > results in rows being >= 2 first (as false == 0), then rows being < 3 > (as true == 1). So unless PostgreSQL changes the way to sort boolean > data type, I think the result should be stable. Attached is the v5 patch. The difference from v4 is addtion of two more tests to explain.sql: 1) spils to disk case 2) splis to disk then switch back to memory case Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp From 23d5d62ef3c08117a0e452a57264477a3f9aba08 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii <ishii@postgresql.org> Date: Tue, 17 Sep 2024 10:59:07 +0900 Subject: [PATCH v5] Add memory/disk usage for Window aggregate nodes in EXPLAIN. This commit is similar to 1eff8279d and expands the idea to Window aggregate nodes so that users can know how much memory or disk the tuplestore used. This commit uses newly introduced tuplestore_get_stats() to query this information and add some additional output in EXPLAIN ANALYZE to display the information for the Window aggregate node. --- src/backend/commands/explain.c | 68 ++++++++++++++++++++------- src/test/regress/expected/explain.out | 37 +++++++++++++++ src/test/regress/sql/explain.sql | 8 ++++ 3 files changed, 95 insertions(+), 18 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 2819e479f8..aaec439892 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -120,6 +120,7 @@ static void show_sort_group_keys(PlanState *planstate, const char *qlabel, List *ancestors, ExplainState *es); static void show_sortorder_options(StringInfo buf, Node *sortexpr, Oid sortOperator, Oid collation, bool nullsFirst); +static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es); static void show_tablesample(TableSampleClause *tsc, PlanState *planstate, List *ancestors, ExplainState *es); static void show_sort_info(SortState *sortstate, ExplainState *es); @@ -127,6 +128,7 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate, ExplainState *es); static void show_hash_info(HashState *hashstate, ExplainState *es); static void show_material_info(MaterialState *mstate, ExplainState *es); +static void show_windowagg_info(WindowAggState *winstate, ExplainState *es); static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es); static void show_hashagg_info(AggState *aggstate, ExplainState *es); @@ -2231,6 +2233,7 @@ ExplainNode(PlanState *planstate, List *ancestors, planstate, es); show_upper_qual(((WindowAgg *) plan)->runConditionOrig, "Run Condition", planstate, ancestors, es); + show_windowagg_info(castNode(WindowAggState, planstate), es); break; case T_Group: show_group_keys(castNode(GroupState, planstate), ancestors, es); @@ -2894,6 +2897,34 @@ show_sortorder_options(StringInfo buf, Node *sortexpr, } } +/* + * Show information on storage method and maximum memory/disk space used. + */ +static void +show_storage_info(Tuplestorestate *tupstore, ExplainState *es) +{ + char *maxStorageType; + int64 maxSpaceUsed, + maxSpaceUsedKB; + + tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed); + maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed); + + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyText("Storage", maxStorageType, es); + ExplainPropertyInteger("Maximum Storage", "kB", maxSpaceUsedKB, es); + } + else + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Storage: %s Maximum Storage: " INT64_FORMAT "kB\n", + maxStorageType, + maxSpaceUsedKB); + } +} + /* * Show TABLESAMPLE properties */ @@ -3350,9 +3381,6 @@ static void show_material_info(MaterialState *mstate, ExplainState *es) { Tuplestorestate *tupstore = mstate->tuplestorestate; - char *maxStorageType; - int64 maxSpaceUsed, - maxSpaceUsedKB; /* * Nothing to show if ANALYZE option wasn't used or if execution didn't @@ -3361,22 +3389,26 @@ show_material_info(MaterialState *mstate, ExplainState *es) if (!es->analyze || tupstore == NULL) return; - tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed); - maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed); + show_storage_info(tupstore, es); +} - if (es->format != EXPLAIN_FORMAT_TEXT) - { - ExplainPropertyText("Storage", maxStorageType, es); - ExplainPropertyInteger("Maximum Storage", "kB", maxSpaceUsedKB, es); - } - else - { - ExplainIndentText(es); - appendStringInfo(es->str, - "Storage: %s Maximum Storage: " INT64_FORMAT "kB\n", - maxStorageType, - maxSpaceUsedKB); - } +/* + * Show information on WindowAgg node, storage method and maximum memory/disk + * space used. + */ +static void +show_windowagg_info(WindowAggState *winstate, ExplainState *es) +{ + Tuplestorestate *tupstore = winstate->buffer; + + /* + * Nothing to show if ANALYZE option wasn't used or if execution didn't + * get as far as creating the tuplestore. + */ + if (!es->analyze || tupstore == NULL) + return; + + show_storage_info(tupstore, es); } /* diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out index 6585c6a69e..a9ead5b36f 100644 --- a/src/test/regress/expected/explain.out +++ b/src/test/regress/expected/explain.out @@ -691,3 +691,40 @@ select explain_filter('explain (analyze,serialize) create temp table explain_tem Execution Time: N.N ms (4 rows) +-- Test tuplestore storage usage in Window aggregate (memory case) +select explain_filter('explain (analyze,costs off) select sum(n) over() from generate_series(1,10) a(n)'); + explain_filter +-------------------------------------------------------------------------------- + WindowAgg (actual time=N.N..N.N rows=N loops=N) + Storage: Memory Maximum Storage: NkB + -> Function Scan on generate_series a (actual time=N.N..N.N rows=N loops=N) + Planning Time: N.N ms + Execution Time: N.N ms +(5 rows) + +-- Test tuplestore storage usage in Window aggregate (disk case) +set work_mem to 64; +select explain_filter('explain (analyze,costs off) select sum(n) over() from generate_series(1,2000) a(n)'); + explain_filter +-------------------------------------------------------------------------------- + WindowAgg (actual time=N.N..N.N rows=N loops=N) + Storage: Disk Maximum Storage: NkB + -> Function Scan on generate_series a (actual time=N.N..N.N rows=N loops=N) + Planning Time: N.N ms + Execution Time: N.N ms +(5 rows) + +-- Test tuplestore storage usage in Window aggregate (memory and disk case, final result is disk) +select explain_filter('explain (analyze,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2000)a(n))'); + explain_filter +-------------------------------------------------------------------------------------- + WindowAgg (actual time=N.N..N.N rows=N loops=N) + Storage: Disk Maximum Storage: NkB + -> Sort (actual time=N.N..N.N rows=N loops=N) + Sort Key: ((a.n < N)) + Sort Method: external merge Disk: NkB + -> Function Scan on generate_series a (actual time=N.N..N.N rows=N loops=N) + Planning Time: N.N ms + Execution Time: N.N ms +(8 rows) + diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql index c7055f850c..54ecfcba63 100644 --- a/src/test/regress/sql/explain.sql +++ b/src/test/regress/sql/explain.sql @@ -169,3 +169,11 @@ select explain_filter('explain (analyze,serialize text,buffers,timing off) selec select explain_filter('explain (analyze,serialize binary,buffers,timing) select * from int8_tbl i8'); -- this tests an edge case where we have no data to return select explain_filter('explain (analyze,serialize) create temp table explain_temp as select * from int8_tbl i8'); + +-- Test tuplestore storage usage in Window aggregate (memory case) +select explain_filter('explain (analyze,costs off) select sum(n) over() from generate_series(1,10) a(n)'); +-- Test tuplestore storage usage in Window aggregate (disk case) +set work_mem to 64; +select explain_filter('explain (analyze,costs off) select sum(n) over() from generate_series(1,2000) a(n)'); +-- Test tuplestore storage usage in Window aggregate (memory and disk case, final result is disk) +select explain_filter('explain (analyze,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2000)a(n))'); -- 2.25.1
On Tue, 17 Sept 2024 at 14:40, Tatsuo Ishii <ishii@postgresql.org> wrote: > Attached is the v5 patch. The difference from v4 is addtion of two > more tests to explain.sql: > > 1) spils to disk case > 2) splis to disk then switch back to memory case Looks ok to me, aside from the missing "reset work_mem;" after you're done with testing the disk spilling code. David
> On Tue, 17 Sept 2024 at 14:40, Tatsuo Ishii <ishii@postgresql.org> wrote: >> Attached is the v5 patch. The difference from v4 is addtion of two >> more tests to explain.sql: >> >> 1) spils to disk case >> 2) splis to disk then switch back to memory case > > Looks ok to me, aside from the missing "reset work_mem;" after you're > done with testing the disk spilling code. Thanks. I have added it and pushed the patch. Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
> Thanks. I have added it and pushed the patch. So I have created patches to do the same for CTE scan and table function scan node. Patch attached. Actually there's one more executor node type that uses tuplestore: recursive union (used in "with recursive"). The particular node type uses two tuplestore and we cannot simply apply tuplestore_get_stats() to the node type. We need to modify RecursiveUnionState to track the maximum tuplestore usage. I am not sure this would be worth the effort. Opinion? Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp From e2c8d0257d4b0e11a9ef9cfe632d8ea1806a08b7 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii <ishii@postgresql.org> Date: Wed, 18 Sep 2024 20:54:58 +0900 Subject: [PATCH v1] Add memory/disk usage for CTE scan and table function scan nodes in EXPLAIN. This commit is similar to 95d6e9af07, expanding the idea to CTE scan and table function scan nodes so that the maximum tuplestore memory or disk usage is shown with EXPLAIN ANALYZE command. --- src/backend/commands/explain.c | 35 +++++++++++++++++++++++++++ src/test/regress/expected/explain.out | 20 +++++++++++++++ src/test/regress/sql/explain.sql | 4 +++ 3 files changed, 59 insertions(+) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index aaec439892..34a23428c3 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -129,6 +129,8 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate, static void show_hash_info(HashState *hashstate, ExplainState *es); static void show_material_info(MaterialState *mstate, ExplainState *es); static void show_windowagg_info(WindowAggState *winstate, ExplainState *es); +static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es); +static void show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es); static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es); static void show_hashagg_info(AggState *aggstate, ExplainState *es); @@ -2046,6 +2048,8 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + if (IsA(plan, CteScan)) + show_ctescan_info(castNode(CteScanState, planstate), es); break; case T_Gather: { @@ -2127,6 +2131,7 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + show_table_func_scan_info(castNode(TableFuncScanState, planstate), es); break; case T_TidScan: { @@ -3411,6 +3416,36 @@ show_windowagg_info(WindowAggState *winstate, ExplainState *es) show_storage_info(tupstore, es); } +/* + * Show information on CTE Scan node, storage method and maximum memory/disk + * space used. + */ +static void +show_ctescan_info(CteScanState *ctescanstate, ExplainState *es) +{ + Tuplestorestate *tupstore = ctescanstate->leader->cte_table; + + if (!es->analyze || tupstore == NULL) + return; + + show_storage_info(tupstore, es); +} + +/* + * Show information on Table Function Scan node, storage method and maximum + * memory/disk space used. + */ +static void +show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es) +{ + Tuplestorestate *tupstore = tscanstate->tupstore; + + if (!es->analyze || tupstore == NULL) + return; + + show_storage_info(tupstore, es); +} + /* * Show information on memoize hits/misses/evictions and memory usage. */ diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out index d01c304c24..b64e64bb43 100644 --- a/src/test/regress/expected/explain.out +++ b/src/test/regress/expected/explain.out @@ -729,3 +729,23 @@ select explain_filter('explain (analyze,costs off) select sum(n) over(partition (8 rows) reset work_mem; +-- Test tuplestore storage usage in CTE scan +select explain_filter('explain (analyze,costs off) with w(n) as (select n from generate_series(1,10) a(n)) select sum(n)from w'); + explain_filter +-------------------------------------------------------------------------------- + Aggregate (actual time=N.N..N.N rows=N loops=N) + -> Function Scan on generate_series a (actual time=N.N..N.N rows=N loops=N) + Planning Time: N.N ms + Execution Time: N.N ms +(4 rows) + +-- Test tuplestore storage usage in table function scan +select explain_filter('explain (analyze,costs off) select * from json_table(''[]'', ''strict $.a'' columns (i int path ''$''))'); + explain_filter +--------------------------------------------------------------------------- + Table Function Scan on "json_table" (actual time=N.N..N.N rows=N loops=N) + Storage: Memory Maximum Storage: NkB + Planning Time: N.N ms + Execution Time: N.N ms +(4 rows) + diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql index b861e2b53d..0623440124 100644 --- a/src/test/regress/sql/explain.sql +++ b/src/test/regress/sql/explain.sql @@ -178,3 +178,7 @@ select explain_filter('explain (analyze,costs off) select sum(n) over() from gen -- Test tuplestore storage usage in Window aggregate (memory and disk case, final result is disk) select explain_filter('explain (analyze,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2000)a(n))'); reset work_mem; +-- Test tuplestore storage usage in CTE scan +select explain_filter('explain (analyze,costs off) with w(n) as (select n from generate_series(1,10) a(n)) select sum(n)from w'); +-- Test tuplestore storage usage in table function scan +select explain_filter('explain (analyze,costs off) select * from json_table(''[]'', ''strict $.a'' columns (i int path ''$''))'); -- 2.25.1
On Thu, 19 Sept 2024 at 00:13, Tatsuo Ishii <ishii@postgresql.org> wrote: > Actually there's one more executor node type that uses tuplestore: > recursive union (used in "with recursive"). The particular node type > uses two tuplestore and we cannot simply apply tuplestore_get_stats() > to the node type. We need to modify RecursiveUnionState to track the > maximum tuplestore usage. I am not sure this would be worth the > effort. Opinion? Could you add the two sizes together and take the storage type from the tuplestore with the highest storage size? David
> On Thu, 19 Sept 2024 at 00:13, Tatsuo Ishii <ishii@postgresql.org> wrote: >> Actually there's one more executor node type that uses tuplestore: >> recursive union (used in "with recursive"). The particular node type >> uses two tuplestore and we cannot simply apply tuplestore_get_stats() >> to the node type. We need to modify RecursiveUnionState to track the >> maximum tuplestore usage. I am not sure this would be worth the >> effort. Opinion? > > Could you add the two sizes together and take the storage type from > the tuplestore with the highest storage size? I don't think this works because tuplestore_begin/tuplestore_end are called while executing the node (ExecRecursiveUnion). I think the way you are proposing only shows the stats last time when those tuplestore are created. Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
> That code could be modified to swap the tuplestores and do a > tuplestore_clear() instead of tuplestore_end() followed by > tuplestore_begin_heap(). > > It's likely worthwhile from a performance point of view. Here's a > small test as an example: > > master: > postgres=# with recursive cte (a) as (select 1 union all select > cte.a+1 from cte where cte.a+1 <= 1000000) select count(*) from cte; > Time: 219.023 ms > Time: 218.828 ms > Time: 219.093 ms > > with attached patched: > postgres=# with recursive cte (a) as (select 1 union all select > cte.a+1 from cte where cte.a+1 <= 1000000) select count(*) from cte; > Time: 169.734 ms > Time: 164.841 ms > Time: 169.168 ms Impressive result. I also ran your query with count 1000. without the patch: Time: 3.655 ms Time: 4.123 ms Time: 2.163 ms wit the patch: Time: 3.641 ms Time: 2.356 ms Time: 2.347 ms It seems with the patch the performance is slightly better or almost same. I think the patch improves the performance without sacrificing the smaller iteration case. Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp
On Thu, 19 Sept 2024 at 13:49, Tatsuo Ishii <ishii@postgresql.org> wrote: > I also ran your query with count 1000. > > without the patch: > Time: 3.655 ms > Time: 4.123 ms > Time: 2.163 ms > > wit the patch: > Time: 3.641 ms > Time: 2.356 ms > Time: 2.347 ms > > It seems with the patch the performance is slightly better or almost > same. I think the patch improves the performance without sacrificing > the smaller iteration case. You might need to use pgbench to get more stable results with such a small test. If your CPU clocks down when idle, it's not going to clock up as fast as you might like it to when you give it something to do. Here's what I got when running 1000 iterations: $ cat bench.sql with recursive cte (a) as (select 1 union all select cte.a+1 from cte where cte.a+1 <= 1000) select count(*) from cte; master $ for i in {1..5}; do pgbench -n -f bench.sql -T 10 -M prepared postgres | grep latency; done latency average = 0.251 ms latency average = 0.254 ms latency average = 0.251 ms latency average = 0.252 ms latency average = 0.253 ms patched $ for i in {1..5}; do pgbench -n -f bench.sql -T 10 -M prepared postgres | grep latency; done latency average = 0.202 ms latency average = 0.202 ms latency average = 0.207 ms latency average = 0.202 ms latency average = 0.202 ms (~24.2% faster) David
On Thu, 19 Sept 2024 at 13:49, Tatsuo Ishii <ishii@postgresql.org> wrote: > > > That code could be modified to swap the tuplestores and do a > > tuplestore_clear() instead of tuplestore_end() followed by > > tuplestore_begin_heap(). > > > Impressive result. I also ran your query with count 1000. I've pushed that patch. That should now unblock you on the nodeRecursiveunion.c telemetry. David
>> > That code could be modified to swap the tuplestores and do a >> > tuplestore_clear() instead of tuplestore_end() followed by >> > tuplestore_begin_heap(). >> > >> Impressive result. I also ran your query with count 1000. > > I've pushed that patch. That should now unblock you on the > nodeRecursiveunion.c telemetry. Thanks. Attached is a patch for CTE scan, table function scan and recursive union scan nodes. Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp From e9e933bf959e54018fa20feca2034567024e551e Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii <ishii@postgresql.org> Date: Thu, 19 Sep 2024 13:13:50 +0900 Subject: [PATCH v2] Add memory/disk usage for more executor nodes. This commit is similar to 95d6e9af07, expanding the idea to CTE scan, table function scan and recursive union scan nodes so that the maximum tuplestore memory or disk usage is shown with EXPLAIN ANALYZE command. --- src/backend/commands/explain.c | 97 +++++++++++++++++++++++++++ src/test/regress/expected/explain.out | 38 +++++++++++ src/test/regress/sql/explain.sql | 6 ++ 3 files changed, 141 insertions(+) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index aaec439892..b5f8f34df0 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -129,6 +129,9 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate, static void show_hash_info(HashState *hashstate, ExplainState *es); static void show_material_info(MaterialState *mstate, ExplainState *es); static void show_windowagg_info(WindowAggState *winstate, ExplainState *es); +static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es); +static void show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es); +static void show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es); static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es); static void show_hashagg_info(AggState *aggstate, ExplainState *es); @@ -2046,6 +2049,8 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + if (IsA(plan, CteScan)) + show_ctescan_info(castNode(CteScanState, planstate), es); break; case T_Gather: { @@ -2127,6 +2132,7 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + show_table_func_scan_info(castNode(TableFuncScanState, planstate), es); break; case T_TidScan: { @@ -2278,6 +2284,10 @@ ExplainNode(PlanState *planstate, List *ancestors, show_memoize_info(castNode(MemoizeState, planstate), ancestors, es); break; + case T_RecursiveUnion: + show_recursive_union_info(castNode(RecursiveUnionState, planstate), + es); + break; default: break; } @@ -3411,6 +3421,93 @@ show_windowagg_info(WindowAggState *winstate, ExplainState *es) show_storage_info(tupstore, es); } +/* + * Show information on CTE Scan node, storage method and maximum memory/disk + * space used. + */ +static void +show_ctescan_info(CteScanState *ctescanstate, ExplainState *es) +{ + Tuplestorestate *tupstore = ctescanstate->leader->cte_table; + + if (!es->analyze || tupstore == NULL) + return; + + show_storage_info(tupstore, es); +} + +/* + * Show information on Table Function Scan node, storage method and maximum + * memory/disk space used. + */ +static void +show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es) +{ + Tuplestorestate *tupstore = tscanstate->tupstore; + + if (!es->analyze || tupstore == NULL) + return; + + show_storage_info(tupstore, es); +} + +/* + * Show information on Recursive Union node, storage method and maximum + * memory/disk space used. + */ +static void +show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es) +{ + char *maxStorageType; + char *tempStorageType; + int64 maxSpaceUsed, + maxSpaceUsedKB, + tempSpaceUsed; + Tuplestorestate *working_table, + *intermediate_table; + + /* + * Recursive union node uses two tuplestore. We employ one of them which + * consumed larger memory/disk usage. + */ + working_table = rstate->working_table; + intermediate_table = rstate->intermediate_table; + + if (!es->analyze || + (working_table == NULL && intermediate_table == NULL)) + return; + + tempSpaceUsed = maxSpaceUsed = 0; + + if (working_table != NULL) + tuplestore_get_stats(working_table, &tempStorageType, &tempSpaceUsed); + + if (intermediate_table != NULL) + tuplestore_get_stats(intermediate_table, &maxStorageType, &maxSpaceUsed); + + if (tempSpaceUsed > maxSpaceUsed) + { + maxStorageType = tempStorageType; + maxSpaceUsed = tempSpaceUsed; + } + + maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed); + + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyText("Storage", maxStorageType, es); + ExplainPropertyInteger("Maximum Storage", "kB", maxSpaceUsedKB, es); + } + else + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Storage: %s Maximum Storage: " INT64_FORMAT "kB\n", + maxStorageType, + maxSpaceUsedKB); + } +} + /* * Show information on memoize hits/misses/evictions and memory usage. */ diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out index d01c304c24..87bfaa6d79 100644 --- a/src/test/regress/expected/explain.out +++ b/src/test/regress/expected/explain.out @@ -729,3 +729,41 @@ select explain_filter('explain (analyze,costs off) select sum(n) over(partition (8 rows) reset work_mem; +-- Test tuplestore storage usage in CTE scan +select explain_filter('explain (analyze,costs off) with w(n) as (select n from generate_series(1,10) a(n)) select sum(n)from w'); + explain_filter +-------------------------------------------------------------------------------- + Aggregate (actual time=N.N..N.N rows=N loops=N) + -> Function Scan on generate_series a (actual time=N.N..N.N rows=N loops=N) + Planning Time: N.N ms + Execution Time: N.N ms +(4 rows) + +-- Test tuplestore storage usage in table function scan +select explain_filter('explain (analyze,costs off) select * from json_table(''[]'', ''strict $.a'' columns (i int path ''$''))'); + explain_filter +--------------------------------------------------------------------------- + Table Function Scan on "json_table" (actual time=N.N..N.N rows=N loops=N) + Storage: Memory Maximum Storage: NkB + Planning Time: N.N ms + Execution Time: N.N ms +(4 rows) + +-- Test tuplestore storage usage in recurive union scan +select explain_filter('explain (analyze,costs off) with recursive t(n) as (values(1) union all select n+1 from t where n< 10) select sum(n) from t'); + explain_filter +----------------------------------------------------------------------------- + Aggregate (actual time=N.N..N.N rows=N loops=N) + CTE t + -> Recursive Union (actual time=N.N..N.N rows=N loops=N) + Storage: Memory Maximum Storage: NkB + -> Result (actual time=N.N..N.N rows=N loops=N) + -> WorkTable Scan on t t_1 (actual time=N.N..N.N rows=N loops=N) + Filter: (n < N) + Rows Removed by Filter: N + -> CTE Scan on t (actual time=N.N..N.N rows=N loops=N) + Storage: Memory Maximum Storage: NkB + Planning Time: N.N ms + Execution Time: N.N ms +(12 rows) + diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql index b861e2b53d..5d5c68443e 100644 --- a/src/test/regress/sql/explain.sql +++ b/src/test/regress/sql/explain.sql @@ -178,3 +178,9 @@ select explain_filter('explain (analyze,costs off) select sum(n) over() from gen -- Test tuplestore storage usage in Window aggregate (memory and disk case, final result is disk) select explain_filter('explain (analyze,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2000)a(n))'); reset work_mem; +-- Test tuplestore storage usage in CTE scan +select explain_filter('explain (analyze,costs off) with w(n) as (select n from generate_series(1,10) a(n)) select sum(n)from w'); +-- Test tuplestore storage usage in table function scan +select explain_filter('explain (analyze,costs off) select * from json_table(''[]'', ''strict $.a'' columns (i int path ''$''))'); +-- Test tuplestore storage usage in recurive union scan +select explain_filter('explain (analyze,costs off) with recursive t(n) as (values(1) union all select n+1 from t where n< 10) select sum(n) from t'); -- 2.25.1
On Thu, 19 Sept 2024 at 16:21, Tatsuo Ishii <ishii@postgresql.org> wrote: > Thanks. Attached is a patch for CTE scan, table function scan and > recursive union scan nodes. 1. It's probably a minor detail, but in show_recursive_union_info(), I don't think the tuplestores can ever be NULL. + if (working_table != NULL) + tuplestore_get_stats(working_table, &tempStorageType, &tempSpaceUsed); + + if (intermediate_table != NULL) + tuplestore_get_stats(intermediate_table, &maxStorageType, &maxSpaceUsed); I added the NULL tests for the Materialize case as the tuplestore is created in ExecMaterial() rather than ExecInitMaterial(). For the two tuplestorestates above, they're both created in ExecInitRecursiveUnion(). 2. I imagined you'd always do maxSpaceUsed += tempSpaceUsed; or maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed + tempSpaceUsed); + if (tempSpaceUsed > maxSpaceUsed) + { + maxStorageType = tempStorageType; + maxSpaceUsed = tempSpaceUsed; + } Why do you think the the space used by the smaller tuplestore should be ignored in the storage size output? David
> 1. It's probably a minor detail, but in show_recursive_union_info(), I > don't think the tuplestores can ever be NULL. > > + if (working_table != NULL) > + tuplestore_get_stats(working_table, &tempStorageType, &tempSpaceUsed); > + > + if (intermediate_table != NULL) > + tuplestore_get_stats(intermediate_table, &maxStorageType, &maxSpaceUsed); > > I added the NULL tests for the Materialize case as the tuplestore is > created in ExecMaterial() rather than ExecInitMaterial(). For the two > tuplestorestates above, they're both created in > ExecInitRecursiveUnion(). You are right. Also I checked other Exec* in nodeRecursiveunion.c and did not find any place where working_table and intermediate_table are set to NULL. > 2. I imagined you'd always do maxSpaceUsed += tempSpaceUsed; or > maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed + tempSpaceUsed); > > + if (tempSpaceUsed > maxSpaceUsed) > + { > + maxStorageType = tempStorageType; > + maxSpaceUsed = tempSpaceUsed; > + } > > Why do you think the the space used by the smaller tuplestore should > be ignored in the storage size output? I thought about the case when the two tuplestores have different storage types. But I remember that we already use disk storage type even if the storage type was changed from disk to memory[1]. So probably we don't need to much worry about the storage kind difference in the two tuplestores. Attached patch fixes 1 & 2. [1] https://www.postgresql.org/message-id/CAExHW5vRPRLvsZYLmNGcDLkPDWDHXGSWYjox-to-OsCVFETd3w%40mail.gmail.com Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp From 814fa7bdafba2c004b923ce577c0c6f53cfc5387 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii <ishii@postgresql.org> Date: Thu, 19 Sep 2024 19:14:07 +0900 Subject: [PATCH v3] Add memory/disk usage for more executor nodes. This commit is similar to 95d6e9af07, expanding the idea to CTE scan, table function scan and recursive union scan nodes so that the maximum tuplestore memory or disk usage is shown with EXPLAIN ANALYZE command. --- src/backend/commands/explain.c | 85 +++++++++++++++++++++++++++ src/test/regress/expected/explain.out | 38 ++++++++++++ src/test/regress/sql/explain.sql | 6 ++ 3 files changed, 129 insertions(+) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index aaec439892..725c3e88db 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -129,6 +129,9 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate, static void show_hash_info(HashState *hashstate, ExplainState *es); static void show_material_info(MaterialState *mstate, ExplainState *es); static void show_windowagg_info(WindowAggState *winstate, ExplainState *es); +static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es); +static void show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es); +static void show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es); static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es); static void show_hashagg_info(AggState *aggstate, ExplainState *es); @@ -2046,6 +2049,8 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + if (IsA(plan, CteScan)) + show_ctescan_info(castNode(CteScanState, planstate), es); break; case T_Gather: { @@ -2127,6 +2132,7 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + show_table_func_scan_info(castNode(TableFuncScanState, planstate), es); break; case T_TidScan: { @@ -2278,6 +2284,10 @@ ExplainNode(PlanState *planstate, List *ancestors, show_memoize_info(castNode(MemoizeState, planstate), ancestors, es); break; + case T_RecursiveUnion: + show_recursive_union_info(castNode(RecursiveUnionState, planstate), + es); + break; default: break; } @@ -3411,6 +3421,81 @@ show_windowagg_info(WindowAggState *winstate, ExplainState *es) show_storage_info(tupstore, es); } +/* + * Show information on CTE Scan node, storage method and maximum memory/disk + * space used. + */ +static void +show_ctescan_info(CteScanState *ctescanstate, ExplainState *es) +{ + Tuplestorestate *tupstore = ctescanstate->leader->cte_table; + + if (!es->analyze || tupstore == NULL) + return; + + show_storage_info(tupstore, es); +} + +/* + * Show information on Table Function Scan node, storage method and maximum + * memory/disk space used. + */ +static void +show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es) +{ + Tuplestorestate *tupstore = tscanstate->tupstore; + + if (!es->analyze || tupstore == NULL) + return; + + show_storage_info(tupstore, es); +} + +/* + * Show information on Recursive Union node, storage method and maximum + * memory/disk space used. + */ +static void +show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es) +{ + char *maxStorageType, + *tempStorageType; + int64 maxSpaceUsed, + maxSpaceUsedKB, + tempSpaceUsed; + + if (!es->analyze) + return; + + /* + * Recursive union node uses two tuplestores. We employ the storage type + * from one of them which consumed more memory/disk than the other. The + * storage size is sum of the two. + */ + tuplestore_get_stats(rstate->working_table, &tempStorageType, &tempSpaceUsed); + tuplestore_get_stats(rstate->intermediate_table, &maxStorageType, &maxSpaceUsed); + + if (tempSpaceUsed > maxSpaceUsed) + maxStorageType = tempStorageType; + + maxSpaceUsed += tempSpaceUsed; + maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed); + + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyText("Storage", maxStorageType, es); + ExplainPropertyInteger("Maximum Storage", "kB", maxSpaceUsedKB, es); + } + else + { + ExplainIndentText(es); + appendStringInfo(es->str, + "Storage: %s Maximum Storage: " INT64_FORMAT "kB\n", + maxStorageType, + maxSpaceUsedKB); + } +} + /* * Show information on memoize hits/misses/evictions and memory usage. */ diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out index d01c304c24..87bfaa6d79 100644 --- a/src/test/regress/expected/explain.out +++ b/src/test/regress/expected/explain.out @@ -729,3 +729,41 @@ select explain_filter('explain (analyze,costs off) select sum(n) over(partition (8 rows) reset work_mem; +-- Test tuplestore storage usage in CTE scan +select explain_filter('explain (analyze,costs off) with w(n) as (select n from generate_series(1,10) a(n)) select sum(n)from w'); + explain_filter +-------------------------------------------------------------------------------- + Aggregate (actual time=N.N..N.N rows=N loops=N) + -> Function Scan on generate_series a (actual time=N.N..N.N rows=N loops=N) + Planning Time: N.N ms + Execution Time: N.N ms +(4 rows) + +-- Test tuplestore storage usage in table function scan +select explain_filter('explain (analyze,costs off) select * from json_table(''[]'', ''strict $.a'' columns (i int path ''$''))'); + explain_filter +--------------------------------------------------------------------------- + Table Function Scan on "json_table" (actual time=N.N..N.N rows=N loops=N) + Storage: Memory Maximum Storage: NkB + Planning Time: N.N ms + Execution Time: N.N ms +(4 rows) + +-- Test tuplestore storage usage in recurive union scan +select explain_filter('explain (analyze,costs off) with recursive t(n) as (values(1) union all select n+1 from t where n< 10) select sum(n) from t'); + explain_filter +----------------------------------------------------------------------------- + Aggregate (actual time=N.N..N.N rows=N loops=N) + CTE t + -> Recursive Union (actual time=N.N..N.N rows=N loops=N) + Storage: Memory Maximum Storage: NkB + -> Result (actual time=N.N..N.N rows=N loops=N) + -> WorkTable Scan on t t_1 (actual time=N.N..N.N rows=N loops=N) + Filter: (n < N) + Rows Removed by Filter: N + -> CTE Scan on t (actual time=N.N..N.N rows=N loops=N) + Storage: Memory Maximum Storage: NkB + Planning Time: N.N ms + Execution Time: N.N ms +(12 rows) + diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql index b861e2b53d..5d5c68443e 100644 --- a/src/test/regress/sql/explain.sql +++ b/src/test/regress/sql/explain.sql @@ -178,3 +178,9 @@ select explain_filter('explain (analyze,costs off) select sum(n) over() from gen -- Test tuplestore storage usage in Window aggregate (memory and disk case, final result is disk) select explain_filter('explain (analyze,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2000)a(n))'); reset work_mem; +-- Test tuplestore storage usage in CTE scan +select explain_filter('explain (analyze,costs off) with w(n) as (select n from generate_series(1,10) a(n)) select sum(n)from w'); +-- Test tuplestore storage usage in table function scan +select explain_filter('explain (analyze,costs off) select * from json_table(''[]'', ''strict $.a'' columns (i int path ''$''))'); +-- Test tuplestore storage usage in recurive union scan +select explain_filter('explain (analyze,costs off) with recursive t(n) as (values(1) union all select n+1 from t where n< 10) select sum(n) from t'); -- 2.25.1
On Thu, 19 Sept 2024 at 22:17, Tatsuo Ishii <ishii@postgresql.org> wrote: > Attached patch fixes 1 & 2. I looked at this and thought that one thing you might want to consider is adjusting show_storage_info() to accept the size and type parameters so you don't have to duplicate the formatting code in show_recursive_union_info(). The first of the new tests also isn't testing what you want it to test. Maybe you could add a "materialized" in there to stop the CTE being inlined: explain (analyze,costs off) with w(n) as materialized (select n from generate_series(1,10) a(n)) select sum(n) from w Also, I'm on the fence about if the new tests are worthwhile. I won't object to them, however. I just wanted to note that most of the complexity is in tuplestore.c of which there's already coverage for. The test's value is reduced by the fact that most of the interesting details have to be masked out due to possible platform variations in the number of bytes. Really the new tests are only testing that we display the storage details and maybe that the storage type came out as expected. It seems unlikely these would get broken. I'd say it's committers preference, however. I just wanted to add my thoughts. You have to offset the value against the fact that the expected output is likely to change over the years which adds to the burden of making changes to the EXPLAIN output. David
> I looked at this and thought that one thing you might want to consider > is adjusting show_storage_info() to accept the size and type > parameters so you don't have to duplicate the formatting code in > show_recursive_union_info(). I agree and made necessary changes. See attached v4 patches. > The first of the new tests also isn't testing what you want it to > test. Maybe you could add a "materialized" in there to stop the CTE > being inlined: > > explain (analyze,costs off) with w(n) as materialized (select n from > generate_series(1,10) a(n)) select sum(n) from w > > Also, I'm on the fence about if the new tests are worthwhile. I won't > object to them, however. I just wanted to note that most of the > complexity is in tuplestore.c of which there's already coverage for. > The test's value is reduced by the fact that most of the interesting > details have to be masked out due to possible platform variations in > the number of bytes. Really the new tests are only testing that we > display the storage details and maybe that the storage type came out > as expected. It seems unlikely these would get broken. I'd say it's > committers preference, however. I just wanted to add my thoughts. You > have to offset the value against the fact that the expected output is > likely to change over the years which adds to the burden of making > changes to the EXPLAIN output. After thinking more, I lean toward to your opinion. The new tests do not give big value, but on the other hand they could become a burden over the years. I do not include the new tests in the v4 patches. Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp From b94c524623a27f526f03de5e90564b185e37ffdd Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii <ishii@postgresql.org> Date: Mon, 23 Sep 2024 14:57:39 +0900 Subject: [PATCH v4] Add memory/disk usage for more executor nodes. This commit is similar to 95d6e9af07, expanding the idea to CTE scan, table function scan and recursive union scan nodes so that the maximum tuplestore memory or disk usage is shown with EXPLAIN ANALYZE command. Also adjust show_storage_info() so that it accepts storage type and storage size arguments instead of Tuplestorestate. This makes it possible for the node types to share the formatting code in show_storage_info(). Due to this show_material_info() and show_windowagg_info() are also modified. --- src/backend/commands/explain.c | 107 ++++++++++++++++++++++++++++++--- 1 file changed, 97 insertions(+), 10 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index aaec439892..ee1bcb84e2 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -120,7 +120,8 @@ static void show_sort_group_keys(PlanState *planstate, const char *qlabel, List *ancestors, ExplainState *es); static void show_sortorder_options(StringInfo buf, Node *sortexpr, Oid sortOperator, Oid collation, bool nullsFirst); -static void show_storage_info(Tuplestorestate *tupstore, ExplainState *es); +static void show_storage_info(char *maxStorageType, int64 maxSpaceUsed, + ExplainState *es); static void show_tablesample(TableSampleClause *tsc, PlanState *planstate, List *ancestors, ExplainState *es); static void show_sort_info(SortState *sortstate, ExplainState *es); @@ -129,6 +130,11 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate, static void show_hash_info(HashState *hashstate, ExplainState *es); static void show_material_info(MaterialState *mstate, ExplainState *es); static void show_windowagg_info(WindowAggState *winstate, ExplainState *es); +static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es); +static void show_table_func_scan_info(TableFuncScanState *tscanstate, + ExplainState *es); +static void show_recursive_union_info(RecursiveUnionState *rstate, + ExplainState *es); static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es); static void show_hashagg_info(AggState *aggstate, ExplainState *es); @@ -2046,6 +2052,8 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + if (IsA(plan, CteScan)) + show_ctescan_info(castNode(CteScanState, planstate), es); break; case T_Gather: { @@ -2127,6 +2135,8 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + show_table_func_scan_info(castNode(TableFuncScanState, + planstate), es); break; case T_TidScan: { @@ -2278,6 +2288,10 @@ ExplainNode(PlanState *planstate, List *ancestors, show_memoize_info(castNode(MemoizeState, planstate), ancestors, es); break; + case T_RecursiveUnion: + show_recursive_union_info(castNode(RecursiveUnionState, + planstate), es); + break; default: break; } @@ -2901,14 +2915,9 @@ show_sortorder_options(StringInfo buf, Node *sortexpr, * Show information on storage method and maximum memory/disk space used. */ static void -show_storage_info(Tuplestorestate *tupstore, ExplainState *es) +show_storage_info(char *maxStorageType, int64 maxSpaceUsed, ExplainState *es) { - char *maxStorageType; - int64 maxSpaceUsed, - maxSpaceUsedKB; - - tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed); - maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed); + int64 maxSpaceUsedKB = BYTES_TO_KILOBYTES(maxSpaceUsed); if (es->format != EXPLAIN_FORMAT_TEXT) { @@ -3380,6 +3389,9 @@ show_hash_info(HashState *hashstate, ExplainState *es) static void show_material_info(MaterialState *mstate, ExplainState *es) { + char *maxStorageType; + int64 maxSpaceUsed; + Tuplestorestate *tupstore = mstate->tuplestorestate; /* @@ -3389,7 +3401,8 @@ show_material_info(MaterialState *mstate, ExplainState *es) if (!es->analyze || tupstore == NULL) return; - show_storage_info(tupstore, es); + tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed); + show_storage_info(maxStorageType, maxSpaceUsed, es); } /* @@ -3399,6 +3412,9 @@ show_material_info(MaterialState *mstate, ExplainState *es) static void show_windowagg_info(WindowAggState *winstate, ExplainState *es) { + char *maxStorageType; + int64 maxSpaceUsed; + Tuplestorestate *tupstore = winstate->buffer; /* @@ -3408,7 +3424,78 @@ show_windowagg_info(WindowAggState *winstate, ExplainState *es) if (!es->analyze || tupstore == NULL) return; - show_storage_info(tupstore, es); + tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed); + show_storage_info(maxStorageType, maxSpaceUsed, es); +} + +/* + * Show information on CTE Scan node, storage method and maximum memory/disk + * space used. + */ +static void +show_ctescan_info(CteScanState *ctescanstate, ExplainState *es) +{ + char *maxStorageType; + int64 maxSpaceUsed; + + Tuplestorestate *tupstore = ctescanstate->leader->cte_table; + + if (!es->analyze || tupstore == NULL) + return; + + tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed); + show_storage_info(maxStorageType, maxSpaceUsed, es); +} + +/* + * Show information on Table Function Scan node, storage method and maximum + * memory/disk space used. + */ +static void +show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es) +{ + char *maxStorageType; + int64 maxSpaceUsed; + + Tuplestorestate *tupstore = tscanstate->tupstore; + + if (!es->analyze || tupstore == NULL) + return; + + tuplestore_get_stats(tupstore, &maxStorageType, &maxSpaceUsed); + show_storage_info(maxStorageType, maxSpaceUsed, es); +} + +/* + * Show information on Recursive Union node, storage method and maximum + * memory/disk space used. + */ +static void +show_recursive_union_info(RecursiveUnionState *rstate, ExplainState *es) +{ + char *maxStorageType, + *tempStorageType; + int64 maxSpaceUsed, + tempSpaceUsed; + + if (!es->analyze) + return; + + /* + * Recursive union node uses two tuplestores. We employ the storage type + * from one of them which consumed more memory/disk than the other. The + * storage size is sum of the two. + */ + tuplestore_get_stats(rstate->working_table, &tempStorageType, + &tempSpaceUsed); + tuplestore_get_stats(rstate->intermediate_table, &maxStorageType, + &maxSpaceUsed); + + if (tempSpaceUsed > maxSpaceUsed) + maxStorageType = tempStorageType; + + maxSpaceUsed += tempSpaceUsed; + show_storage_info(maxStorageType, maxSpaceUsed, es); } /* -- 2.25.1
On Mon, 23 Sept 2024 at 18:28, Tatsuo Ishii <ishii@postgresql.org> wrote: > I agree and made necessary changes. See attached v4 patches. Looks good to me. David
> On Mon, 23 Sept 2024 at 18:28, Tatsuo Ishii <ishii@postgresql.org> wrote: >> I agree and made necessary changes. See attached v4 patches. > > Looks good to me. Thank you for the review! I have pushed the patch. Best reagards, -- Tatsuo Ishii SRA OSS K.K. English: http://www.sraoss.co.jp/index_en/ Japanese:http://www.sraoss.co.jp