From f2494d3b33405e2af8838b876cf97d2bb06666fb Mon Sep 17 00:00:00 2001 From: James Hunter Date: Wed, 26 Feb 2025 01:02:19 +0000 Subject: [PATCH 3/4] Add EXPLAIN (work_mem on) command option So that users can see how much working memory a query is likely to use, as well as how much memory it will be limited to, this commit adds an EXPLAIN (work_mem on) command option that displays the workmem estimate and limit, added in the previous two commits. --- src/backend/commands/explain.c | 233 +++++++++ src/backend/executor/nodeHash.c | 7 +- src/backend/optimizer/path/costsize.c | 4 +- src/include/commands/explain.h | 4 + src/include/executor/nodeHash.h | 2 +- src/test/regress/expected/workmem.out | 653 ++++++++++++++++++++++++++ src/test/regress/parallel_schedule | 2 +- src/test/regress/sql/workmem.sql | 307 ++++++++++++ 8 files changed, 1206 insertions(+), 6 deletions(-) create mode 100644 src/test/regress/expected/workmem.out create mode 100644 src/test/regress/sql/workmem.sql diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index d8a7232cedb..bc8e68e7be1 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -20,6 +20,8 @@ #include "commands/explain_dr.h" #include "commands/explain_format.h" #include "commands/prepare.h" +#include "executor/hashjoin.h" +#include "executor/nodeHash.h" #include "foreign/fdwapi.h" #include "jit/jit.h" #include "libpq/pqformat.h" @@ -27,6 +29,7 @@ #include "nodes/extensible.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/cost.h" #include "parser/analyze.h" #include "parser/parsetree.h" #include "rewrite/rewriteHandler.h" @@ -154,6 +157,14 @@ static ExplainWorkersState *ExplainCreateWorkersState(int num_workers); static void ExplainOpenWorker(int n, ExplainState *es); static void ExplainCloseWorker(int n, ExplainState *es); static void ExplainFlushWorkersState(ExplainState *es); +static void compute_subplan_workmem(List *plans, double *sp_estimate, + double *sp_limit); +static void compute_agg_workmem(PlanState *planstate, Agg *agg, + double *agg_estimate, double *agg_limit); +static void compute_hash_workmem(PlanState *planstate, double *hash_estimate, + double *hash_limit); +static void increment_workmem(PlanState *planstate, int workmem_id, + double *estimate, double *limit); @@ -209,6 +220,8 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt, } else if (strcmp(opt->defname, "memory") == 0) es->memory = defGetBoolean(opt); + else if (strcmp(opt->defname, "work_mem") == 0) + es->work_mem = defGetBoolean(opt); else if (strcmp(opt->defname, "serialize") == 0) { if (opt->arg) @@ -809,6 +822,14 @@ ExplainOnePlan(PlannedStmt *plannedstmt, CachedPlan *cplan, ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3, es); + if (es->work_mem) + { + ExplainPropertyFloat("Total Working Memory Estimate", "kB", + es->total_workmem_estimate, 0, es); + ExplainPropertyFloat("Total Working Memory Limit", "kB", + es->total_workmem_limit, 0, es); + } + ExplainCloseGroup("Query", NULL, true, es); } @@ -1944,6 +1965,72 @@ ExplainNode(PlanState *planstate, List *ancestors, } } + if (es->work_mem) + { + double plan_estimate = 0.0; + double plan_limit = 0.0; + + /* + * Include working memory used by this Plan's SubPlan objects, whether + * they are included on the Plan's initPlan or subPlan lists. + */ + compute_subplan_workmem(planstate->initPlan, &plan_estimate, + &plan_limit); + compute_subplan_workmem(planstate->subPlan, &plan_estimate, + &plan_limit); + + /* Include working memory used by this Plan, itself. */ + switch (nodeTag(plan)) + { + case T_Agg: + compute_agg_workmem(planstate, (Agg *) plan, + &plan_estimate, &plan_limit); + break; + case T_Hash: + compute_hash_workmem(planstate, &plan_estimate, &plan_limit); + break; + case T_RecursiveUnion: + { + RecursiveUnion *runion = (RecursiveUnion *) plan; + + if (runion->hashWorkMemId > 0) + increment_workmem(planstate, runion->hashWorkMemId, + &plan_estimate, &plan_limit); + } + /* FALLTHROUGH */ + default: + if (plan->workmem_id > 0) + increment_workmem(planstate, plan->workmem_id, + &plan_estimate, &plan_limit); + break; + } + + /* + * Every parallel worker (plus the leader) gets its own copy of + * working memory. + */ + plan_estimate *= (1 + es->num_workers); + plan_limit *= (1 + es->num_workers); + + es->total_workmem_estimate += plan_estimate; + es->total_workmem_limit += plan_limit; + + if (plan_estimate > 0.0 || plan_limit > 0.0) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + appendStringInfo(es->str, + " (work_mem=%.0f kB) (limit=%.0f kB)", + plan_estimate, plan_limit); + else + { + ExplainPropertyFloat("Working Memory Estimate", "kB", + plan_estimate, 0, es); + ExplainPropertyFloat("Working Memory Limit", "kB", + plan_limit, 0, es); + } + } + } + /* * We have to forcibly clean up the instrumentation state because we * haven't done ExecutorEnd yet. This is pretty grotty ... @@ -2488,6 +2575,24 @@ ExplainNode(PlanState *planstate, List *ancestors, if (planstate->initPlan) ExplainSubPlans(planstate->initPlan, ancestors, "InitPlan", es); + if (nodeTag(plan) == T_Gather || nodeTag(plan) == T_GatherMerge) + { + /* + * Other than initPlan-s, every node below us gets the # of planned + * workers we specified. + */ + Assert(es->num_workers == 0); + + if (nodeTag(plan) == T_Gather) + es->num_workers = es->analyze ? + ((GatherState *) planstate)->nworkers_launched : + ((Gather *) plan)->num_workers; + else + es->num_workers = es->analyze ? + ((GatherMergeState *) planstate)->nworkers_launched : + ((GatherMerge *) plan)->num_workers; + } + /* lefttree */ if (outerPlanState(planstate)) ExplainNode(outerPlanState(planstate), ancestors, @@ -2544,6 +2649,12 @@ ExplainNode(PlanState *planstate, List *ancestors, ExplainCloseGroup("Plans", "Plans", false, es); } + if (nodeTag(plan) == T_Gather || nodeTag(plan) == T_GatherMerge) + { + /* End of parallel sub-tree. */ + es->num_workers = 0; + } + /* in text format, undo whatever indentation we added */ if (es->format == EXPLAIN_FORMAT_TEXT) es->indent = save_indent; @@ -4931,3 +5042,125 @@ ExplainFlushWorkersState(ExplainState *es) pfree(wstate->worker_state_save); pfree(wstate); } + +/* + * compute_subplan_work_mem - compute total workmem for a SubPlan object + * + * If a SubPlan object uses a hash table, then that hash table needs working + * memory. We display that working memory on the owning Plan. This function + * increments work_mem counters to include the SubPlan's working-memory. + */ +static void +compute_subplan_workmem(List *plans, double *sp_estimate, double *sp_limit) +{ + foreach_node(SubPlanState, sps, plans) + { + SubPlan *sp = sps->subplan; + + if (sp->hashtab_workmem_id > 0) + increment_workmem(sps->planstate, sp->hashtab_workmem_id, + sp_estimate, sp_limit); + + if (sp->hashnul_workmem_id > 0) + increment_workmem(sps->planstate, sp->hashnul_workmem_id, + sp_estimate, sp_limit); + } +} + +static void +compute_agg_workmem_node(PlanState *planstate, Agg *agg, double *agg_estimate, + double *agg_limit) +{ + /* Record memory used for output data structures. */ + if (agg->plan.workmem_id > 0) + increment_workmem(planstate, agg->plan.workmem_id, agg_estimate, + agg_limit); + + /* Record memory used for input sort buffers. */ + if (agg->sortWorkMemId > 0) + increment_workmem(planstate, agg->sortWorkMemId, agg_estimate, + agg_limit); +} + +/* + * compute_agg_workmem - compute Agg node's total workmem estimate and limit + * + * An Agg node might point to a chain of additional Agg nodes. When we explain + * the plan, we display only the first, "main" Agg node. + */ +static void +compute_agg_workmem(PlanState *planstate, Agg *agg, double *agg_estimate, + double *agg_limit) +{ + compute_agg_workmem_node(planstate, agg, agg_estimate, agg_limit); + + /* Also include the chain of GROUPING SETS aggs. */ + foreach_node(Agg, aggnode, agg->chain) + compute_agg_workmem_node(planstate, aggnode, agg_estimate, agg_limit); +} + +/* + * compute_hash_workmem - compute total workmem for a Hash node + * + * This function is complicated, because we currently can adjust workmem limits + * for Hash (Joins), at runtime; and because the memory a Hash (Join) needs + * per-batch is not currently counted against the workmem limit. + * + * Here, we try to give a more accurate accounting than we'd get from just + * displaying limit * count. + */ +static void +compute_hash_workmem(PlanState *planstate, double *hash_estimate, + double *hash_limit) +{ + double count = workMemCount(planstate); + double estimate = workMemEstimate(planstate); + size_t limit = workMemLimit(planstate); + HashState *hstate = (HashState *) planstate; + Plan *plan = planstate->plan; + Hash *hash = (Hash *) plan; + Plan *outerNode = outerPlan(plan); + double rows; + size_t nbytes; + size_t total_space_allowed; /* ignored */ + int nbuckets; /* ignored */ + int nbatch; + int num_skew_mcvs; /* ignored */ + int workmem_estimate; /* ignored */ + + /* + * For Hash Joins, we currently don't count per-batch memory against the + * "workmem_limit", but we can at least estimate it for display with the + * Plan. + */ + rows = plan->parallel_aware ? hash->rows_total : outerNode->plan_rows; + nbytes = limit * 1024; + + ExecChooseHashTableSize(rows, outerNode->plan_width, + OidIsValid(hash->skewTable), + hstate->parallel_state != NULL, + hstate->parallel_state != NULL ? + hstate->parallel_state->nparticipants - 1 : 0, + &nbytes, &total_space_allowed, + &nbuckets, &nbatch, &num_skew_mcvs, + &workmem_estimate); + + /* Include space for per-batch memory, if any: 2 blocks per batch. */ + if (nbatch > 1) + nbytes += nbatch * 2 * BLCKSZ; + + Assert(nbytes >= limit * 1024); + + *hash_estimate += estimate * count; + *hash_limit += (double) normalize_work_bytes(nbytes) * count; +} + +static void +increment_workmem(PlanState *planstate, int workmem_id, double *estimate, + double *limit) +{ + double count = workMemCountFromId(planstate, workmem_id); + + *estimate += workMemEstimateFromId(planstate, workmem_id) * count; + *limit += workMemLimitFromId(planstate, workmem_id) * count; +} diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 7d09ac8b5a3..6ae3d649be6 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -482,7 +482,7 @@ ExecHashTableCreate(HashState *state) state->parallel_state != NULL, state->parallel_state != NULL ? state->parallel_state->nparticipants - 1 : 0, - worker_space_allowed, + &worker_space_allowed, &space_allowed, &nbuckets, &nbatch, &num_skew_mcvs, &workmem); @@ -666,7 +666,7 @@ void ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, bool try_combined_hash_mem, int parallel_workers, - size_t worker_space_allowed, + size_t *worker_space_allowed, size_t *total_space_allowed, int *numbuckets, int *numbatches, @@ -699,7 +699,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, /* * Caller tells us our (per-worker) in-memory hashtable size limit. */ - hash_table_bytes = worker_space_allowed; + hash_table_bytes = *worker_space_allowed; /* * Parallel Hash tries to use the combined hash_mem of all workers to @@ -963,6 +963,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, nbatch /= 2; nbuckets *= 2; + *worker_space_allowed = (*worker_space_allowed) * 2; *total_space_allowed = (*total_space_allowed) * 2; } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 12b1f1d82a9..c1db6f53d10 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -4277,6 +4277,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, int numbuckets; int numbatches; int num_skew_mcvs; + size_t worker_space_allowed; size_t space_allowed; /* unused */ /* Count up disabled nodes. */ @@ -4322,12 +4323,13 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, * XXX at some point it might be interesting to try to account for skew * optimization in the cost estimate, but for now, we don't. */ + worker_space_allowed = get_hash_memory_limit(); ExecChooseHashTableSize(inner_path_rows_total, inner_path->pathtarget->width, true, /* useskew */ parallel_hash, /* try_combined_hash_mem */ outer_path->parallel_workers, - get_hash_memory_limit(), + &worker_space_allowed, &space_allowed, &numbuckets, &numbatches, diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h index 64547bd9b9c..cd8be1c5bdb 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -53,6 +53,7 @@ typedef struct ExplainState bool timing; /* print detailed node timing */ bool summary; /* print total planning and execution timing */ bool memory; /* print planner's memory usage information */ + bool work_mem; /* print work_mem estimates per node */ bool settings; /* print modified settings */ bool generic; /* generate a generic plan */ ExplainSerializeOption serialize; /* serialize the query's output? */ @@ -69,6 +70,9 @@ typedef struct ExplainState bool hide_workers; /* set if we find an invisible Gather */ int rtable_size; /* length of rtable excluding the RTE_GROUP * entry */ + int num_workers; /* # of worker processes *planned* to use */ + double total_workmem_estimate; /* total working memory estimate */ + double total_workmem_limit; /* total working memory limit */ /* state related to the current plan node */ ExplainWorkersState *workers_state; /* needed if parallel plan */ } ExplainState; diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index 6cd9bffbee5..b346a270b67 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -59,7 +59,7 @@ extern void ExecHashTableResetMatchFlags(HashJoinTable hashtable); extern void ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, bool try_combined_hash_mem, int parallel_workers, - size_t worker_space_allowed, + size_t *worker_space_allowed, size_t *total_space_allowed, int *numbuckets, int *numbatches, diff --git a/src/test/regress/expected/workmem.out b/src/test/regress/expected/workmem.out new file mode 100644 index 00000000000..ca8edde6d5f --- /dev/null +++ b/src/test/regress/expected/workmem.out @@ -0,0 +1,653 @@ +---- +-- Tests that show "work_mem" output to EXPLAIN plans. +---- +-- Note: Function derived from file explain.sql. We can't use that other +-- function, since we're run in parallel with explain.sql. +create or replace function workmem_filter(text) returns setof text +language plpgsql as +$$ +declare + ln text; +begin + for ln in execute $1 + loop + -- Mask out work_mem estimate, since it might be brittle + ln := regexp_replace(ln, '\mwork_mem=\d+\M', 'work_mem=N', 'g'); + ln := regexp_replace(ln, '\mMemory Estimate: \d+\M', 'Memory Estimate: N', 'g'); + return next ln; + end loop; +end; +$$; +-- Unique -> hash agg +set enable_hashagg = on; +select workmem_filter(' +explain (costs off, work_mem on) +select * +from onek +where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99)) +order by unique1; +'); + workmem_filter +----------------------------------------------------------------- + Sort (work_mem=N kB) (limit=4096 kB) + Sort Key: onek.unique1 + -> Nested Loop + -> HashAggregate (work_mem=N kB) (limit=8192 kB) + Group Key: "*VALUES*".column1, "*VALUES*".column2 + -> Values Scan on "*VALUES*" + -> Index Scan using onek_unique1 on onek + Index Cond: (unique1 = "*VALUES*".column1) + Filter: ("*VALUES*".column2 = ten) + Total Working Memory Estimate: N kB + Total Working Memory Limit: 12288 kB +(11 rows) + +select * +from onek +where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99)) +order by unique1; + unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 +---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+--------- + 1 | 214 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 3 | BAAAAA | GIAAAA | OOOOxx + 20 | 306 | 0 | 0 | 0 | 0 | 0 | 20 | 20 | 20 | 20 | 0 | 1 | UAAAAA | ULAAAA | OOOOxx + 99 | 101 | 1 | 3 | 9 | 19 | 9 | 99 | 99 | 99 | 99 | 18 | 19 | VDAAAA | XDAAAA | HHHHxx +(3 rows) + +reset enable_hashagg; +-- Unique -> sort +set enable_hashagg = off; +select workmem_filter(' +explain (costs off, work_mem on) +select * +from onek +where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99)) +order by unique1; +'); + workmem_filter +---------------------------------------------------------------------- + Sort (work_mem=N kB) (limit=4096 kB) + Sort Key: onek.unique1 + -> Nested Loop + -> Unique + -> Sort (work_mem=N kB) (limit=4096 kB) + Sort Key: "*VALUES*".column1, "*VALUES*".column2 + -> Values Scan on "*VALUES*" + -> Index Scan using onek_unique1 on onek + Index Cond: (unique1 = "*VALUES*".column1) + Filter: ("*VALUES*".column2 = ten) + Total Working Memory Estimate: N kB + Total Working Memory Limit: 8192 kB +(12 rows) + +select * +from onek +where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99)) +order by unique1; + unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 +---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+--------- + 1 | 214 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 3 | BAAAAA | GIAAAA | OOOOxx + 20 | 306 | 0 | 0 | 0 | 0 | 0 | 20 | 20 | 20 | 20 | 0 | 1 | UAAAAA | ULAAAA | OOOOxx + 99 | 101 | 1 | 3 | 9 | 19 | 9 | 99 | 99 | 99 | 99 | 18 | 19 | VDAAAA | XDAAAA | HHHHxx +(3 rows) + +reset enable_hashagg; +-- Incremental Sort +select workmem_filter(' +explain (costs off, work_mem on) +select * from (select * from tenk1 order by four) t order by four, ten +limit 1; +'); + workmem_filter +--------------------------------------------------------- + Limit + -> Incremental Sort (work_mem=N kB) (limit=8192 kB) + Sort Key: tenk1.four, tenk1.ten + Presorted Key: tenk1.four + -> Sort (work_mem=N kB) (limit=4096 kB) + Sort Key: tenk1.four + -> Seq Scan on tenk1 + Total Working Memory Estimate: N kB + Total Working Memory Limit: 12288 kB +(9 rows) + +select * from (select * from tenk1 order by four) t order by four, ten +limit 1; + unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 +---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+--------- + 4220 | 5017 | 0 | 0 | 0 | 0 | 20 | 220 | 220 | 4220 | 4220 | 40 | 41 | IGAAAA | ZKHAAA | HHHHxx +(1 row) + +-- Hash Join +select workmem_filter(' +explain (costs off, work_mem on) +select count(*) from ( +select t1.unique1, t2.hundred +from onek t1, tenk1 t2 +where exists (select 1 from tenk1 t3 + where t3.thousand = t1.unique1 and t3.tenthous = t2.hundred) + and t1.unique1 < 1 +) t; +'); + workmem_filter +-------------------------------------------------------------------------------- + Aggregate + -> Nested Loop + -> Hash Join + Hash Cond: (t3.thousand = t1.unique1) + -> HashAggregate (work_mem=N kB) (limit=8192 kB) + Group Key: t3.thousand, t3.tenthous + -> Index Only Scan using tenk1_thous_tenthous on tenk1 t3 + -> Hash (work_mem=N kB) (limit=8192 kB) + -> Index Only Scan using onek_unique1 on onek t1 + Index Cond: (unique1 < 1) + -> Index Only Scan using tenk1_hundred on tenk1 t2 + Index Cond: (hundred = t3.tenthous) + Total Working Memory Estimate: N kB + Total Working Memory Limit: 16384 kB +(14 rows) + +select count(*) from ( +select t1.unique1, t2.hundred +from onek t1, tenk1 t2 +where exists (select 1 from tenk1 t3 + where t3.thousand = t1.unique1 and t3.tenthous = t2.hundred) + and t1.unique1 < 1 +) t; + count +------- + 100 +(1 row) + +-- Materialize +select workmem_filter(' +explain (costs off, work_mem on) +select count(*) from ( +select t1.f1 +from int4_tbl t1, int4_tbl t2 + left join int4_tbl t3 on t3.f1 > 0 + left join int4_tbl t4 on t3.f1 > 1 +where t4.f1 is null +) t; +'); + workmem_filter +---------------------------------------------------------------------------- + Aggregate + -> Nested Loop + -> Nested Loop Left Join + Filter: (t4.f1 IS NULL) + -> Seq Scan on int4_tbl t2 + -> Materialize (work_mem=N kB) (limit=4096 kB) + -> Nested Loop Left Join + Join Filter: (t3.f1 > 1) + -> Seq Scan on int4_tbl t3 + Filter: (f1 > 0) + -> Materialize (work_mem=N kB) (limit=4096 kB) + -> Seq Scan on int4_tbl t4 + -> Seq Scan on int4_tbl t1 + Total Working Memory Estimate: N kB + Total Working Memory Limit: 8192 kB +(15 rows) + +select count(*) from ( +select t1.f1 +from int4_tbl t1, int4_tbl t2 + left join int4_tbl t3 on t3.f1 > 0 + left join int4_tbl t4 on t3.f1 > 1 +where t4.f1 is null +) t; + count +------- + 0 +(1 row) + +-- Grouping Sets (Hash) +select workmem_filter(' +explain (costs off, work_mem on) +select a, b, row_number() over (order by a, b nulls first) +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)); +'); + workmem_filter +---------------------------------------------------------------------- + WindowAgg (work_mem=N kB) (limit=4096 kB) + -> Sort (work_mem=N kB) (limit=4096 kB) + Sort Key: "*VALUES*".column1, "*VALUES*".column2 NULLS FIRST + -> HashAggregate (work_mem=N kB) (limit=8192 kB) + Hash Key: "*VALUES*".column1, "*VALUES*".column2 + Hash Key: "*VALUES*".column1 + -> Values Scan on "*VALUES*" + Filter: (column1 = column2) + Total Working Memory Estimate: N kB + Total Working Memory Limit: 16384 kB +(10 rows) + +select a, b, row_number() over (order by a, b nulls first) +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)); + a | b | row_number +---+---+------------ + 1 | | 1 + 1 | 1 | 2 + 2 | | 3 + 2 | 2 | 4 +(4 rows) + +-- Grouping Sets (Sort) +set enable_hashagg = off; +select workmem_filter(' +explain (costs off, work_mem on) +select a, b, row_number() over (order by a, b nulls first) +from (values (1, 1, 1, 1), (2, 2, 2, 2)) as t (a, b, c, d) where a = b +group by grouping sets((a, b), (a), (b), (c), (d)); +'); + workmem_filter +---------------------------------------------------------------------- + WindowAgg (work_mem=N kB) (limit=4096 kB) + -> Sort (work_mem=N kB) (limit=4096 kB) + Sort Key: "*VALUES*".column1, "*VALUES*".column2 NULLS FIRST + -> GroupAggregate (work_mem=N kB) (limit=8192 kB) + Group Key: "*VALUES*".column1, "*VALUES*".column2 + Group Key: "*VALUES*".column1 + Sort Key: "*VALUES*".column2 + Group Key: "*VALUES*".column2 + Sort Key: "*VALUES*".column3 + Group Key: "*VALUES*".column3 + Sort Key: "*VALUES*".column4 + Group Key: "*VALUES*".column4 + -> Sort (work_mem=N kB) (limit=4096 kB) + Sort Key: "*VALUES*".column1 + -> Values Scan on "*VALUES*" + Filter: (column1 = column2) + Total Working Memory Estimate: N kB + Total Working Memory Limit: 20480 kB +(18 rows) + +select a, b, row_number() over (order by a, b nulls first) +from (values (1, 1, 1, 1), (2, 2, 2, 2)) as t (a, b, c, d) where a = b +group by grouping sets((a, b), (a), (b), (c), (d)); + a | b | row_number +---+---+------------ + 1 | | 1 + 1 | 1 | 2 + 2 | | 3 + 2 | 2 | 4 + | | 5 + | | 6 + | | 7 + | | 8 + | 1 | 9 + | 2 | 10 +(10 rows) + +reset enable_hashagg; +-- Agg (hash, parallel) +set parallel_setup_cost=0; +set parallel_tuple_cost=0; +set min_parallel_table_scan_size=0; +set max_parallel_workers_per_gather=4; +select workmem_filter(' +explain (costs off, work_mem on) +select length(stringu1) from tenk1 group by length(stringu1); +'); + workmem_filter +--------------------------------------------------------------------- + Finalize HashAggregate (work_mem=N kB) (limit=8192 kB) + Group Key: (length((stringu1)::text)) + -> Gather + Workers Planned: 4 + -> Partial HashAggregate (work_mem=N kB) (limit=40960 kB) + Group Key: length((stringu1)::text) + -> Parallel Seq Scan on tenk1 + Total Working Memory Estimate: N kB + Total Working Memory Limit: 49152 kB +(9 rows) + +select length(stringu1) from tenk1 group by length(stringu1); + length +-------- + 6 +(1 row) + +reset parallel_setup_cost; +reset parallel_tuple_cost; +reset min_parallel_table_scan_size; +reset max_parallel_workers_per_gather; +-- Agg (simple) [no work_mem] +explain (costs off, work_mem on) +select MAX(length(stringu1)) from tenk1; + QUERY PLAN +------------------------------------- + Aggregate + -> Seq Scan on tenk1 + Total Working Memory Estimate: 0 kB + Total Working Memory Limit: 0 kB +(4 rows) + +select MAX(length(stringu1)) from tenk1; + max +----- + 6 +(1 row) + +-- Function Scan +select workmem_filter(' +explain (work_mem on, costs off) +select count(*) from ( +select sum(n) over(partition by m) +from (SELECT n < 3 as m, n from generate_series(1,2000) a(n)) +) t; +'); + workmem_filter +--------------------------------------------------------------------------- + Aggregate + -> Function Scan on generate_series a (work_mem=N kB) (limit=4096 kB) + Total Working Memory Estimate: N kB + Total Working Memory Limit: 4096 kB +(4 rows) + +select count(*) from ( +select sum(n) over(partition by m) +from (SELECT n < 3 as m, n from generate_series(1,2000) a(n)) +) t; + count +------- + 2000 +(1 row) + +-- Three Function Scans +select workmem_filter(' +explain (work_mem on, costs off) +select count(*) +from rows from(generate_series(1, 5), + generate_series(2, 10), + generate_series(4, 15)); +'); + workmem_filter +-------------------------------------------------------------------------- + Aggregate + -> Function Scan on generate_series (work_mem=N kB) (limit=12288 kB) + Total Working Memory Estimate: N kB + Total Working Memory Limit: 12288 kB +(4 rows) + +select count(*) +from rows from(generate_series(1, 5), + generate_series(2, 10), + generate_series(4, 15)); + count +------- + 12 +(1 row) + +-- Table Function Scan +CREATE TABLE workmem_xmldata(data xml); +select workmem_filter(' +EXPLAIN (COSTS OFF, work_mem on) +SELECT xmltable.* + FROM (SELECT data FROM workmem_xmldata) x, + LATERAL XMLTABLE(''/ROWS/ROW'' + PASSING data + COLUMNS id int PATH ''@id'', + _id FOR ORDINALITY, + country_name text PATH ''COUNTRY_NAME'' NOT NULL, + country_id text PATH ''COUNTRY_ID'', + region_id int PATH ''REGION_ID'', + size float PATH ''SIZE'', + unit text PATH ''SIZE/@unit'', + premier_name text PATH ''PREMIER_NAME'' DEFAULT ''not specified''); +'); + workmem_filter +-------------------------------------------------------------------------- + Nested Loop + -> Seq Scan on workmem_xmldata + -> Table Function Scan on "xmltable" (work_mem=N kB) (limit=4096 kB) + Total Working Memory Estimate: N kB + Total Working Memory Limit: 4096 kB +(5 rows) + +SELECT xmltable.* + FROM (SELECT data FROM workmem_xmldata) x, + LATERAL XMLTABLE('/ROWS/ROW' + PASSING data + COLUMNS id int PATH '@id', + _id FOR ORDINALITY, + country_name text PATH 'COUNTRY_NAME' NOT NULL, + country_id text PATH 'COUNTRY_ID', + region_id int PATH 'REGION_ID', + size float PATH 'SIZE', + unit text PATH 'SIZE/@unit', + premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified'); + id | _id | country_name | country_id | region_id | size | unit | premier_name +----+-----+--------------+------------+-----------+------+------+-------------- +(0 rows) + +drop table workmem_xmldata; +-- SetOp [no work_mem] +explain (costs off, work_mem on) +select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10; + QUERY PLAN +------------------------------------------------------------ + SetOp Except + -> Index Only Scan using tenk1_unique1 on tenk1 + -> Index Only Scan using tenk1_unique2 on tenk1 tenk1_1 + Filter: (unique2 <> 10) + Total Working Memory Estimate: 0 kB + Total Working Memory Limit: 0 kB +(6 rows) + +select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10; + unique1 +--------- + 10 +(1 row) + +-- HashSetOp +select workmem_filter(' +explain (costs off, work_mem on) +select count(*) from + ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss; +'); + workmem_filter +------------------------------------------------------------------ + Aggregate + -> HashSetOp Intersect (work_mem=N kB) (limit=8192 kB) + -> Seq Scan on tenk1 + -> Index Only Scan using tenk1_unique1 on tenk1 tenk1_1 + Total Working Memory Estimate: N kB + Total Working Memory Limit: 8192 kB +(6 rows) + +select count(*) from + ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss; + count +------- + 5000 +(1 row) + +-- RecursiveUnion and Memoize (also WorkTable Scan [no work_mem]) +select workmem_filter(' +explain (costs off, work_mem on) +select sum(o.four), sum(ss.a) from onek o +cross join lateral (with recursive x(a) as ( + select o.four as a union select a + 1 from x where a < 10) + select * from x) ss where o.ten = 1; +'); + workmem_filter +----------------------------------------------------------------------------- + Aggregate + -> Nested Loop + -> Seq Scan on onek o + Filter: (ten = 1) + -> Memoize (work_mem=N kB) (limit=8192 kB) + Cache Key: o.four + Cache Mode: binary + -> CTE Scan on x (work_mem=N kB) (limit=4096 kB) + CTE x + -> Recursive Union (work_mem=N kB) (limit=16384 kB) + -> Result + -> WorkTable Scan on x x_1 + Filter: (a < 10) + Total Working Memory Estimate: N kB + Total Working Memory Limit: 28672 kB +(15 rows) + +select sum(o.four), sum(ss.a) from onek o +cross join lateral (with recursive x(a) as ( + select o.four as a union select a + 1 from x where a < 10) + select * from x) ss where o.ten = 1; + sum | sum +------+------ + 1700 | 5350 +(1 row) + +-- CTE Scan +select workmem_filter(' +explain (costs off, work_mem on) +WITH q1(x,y) AS ( + SELECT hundred, sum(ten) FROM tenk1 GROUP BY hundred + ) +SELECT count(*) FROM q1 WHERE y > (SELECT sum(y)/100 FROM q1 qsub); +'); + workmem_filter +-------------------------------------------------------------------- + Aggregate + CTE q1 + -> HashAggregate (work_mem=N kB) (limit=8192 kB) + Group Key: tenk1.hundred + -> Seq Scan on tenk1 + InitPlan 2 + -> Aggregate + -> CTE Scan on q1 qsub (work_mem=N kB) (limit=4096 kB) + -> CTE Scan on q1 (work_mem=N kB) (limit=4096 kB) + Filter: ((y)::numeric > (InitPlan 2).col1) + Total Working Memory Estimate: N kB + Total Working Memory Limit: 16384 kB +(12 rows) + +WITH q1(x,y) AS ( + SELECT hundred, sum(ten) FROM tenk1 GROUP BY hundred + ) +SELECT count(*) FROM q1 WHERE y > (SELECT sum(y)/100 FROM q1 qsub); + count +------- + 50 +(1 row) + +-- WindowAgg +select workmem_filter(' +explain (costs off, work_mem on) +select sum(n) over(partition by m) +from (SELECT n < 3 as m, n from generate_series(1,2000) a(n)) +limit 5; +'); + workmem_filter +--------------------------------------------------------------------------------------- + Limit + -> WindowAgg (work_mem=N kB) (limit=4096 kB) + -> Sort (work_mem=N kB) (limit=4096 kB) + Sort Key: ((a.n < 3)) + -> Function Scan on generate_series a (work_mem=N kB) (limit=4096 kB) + Total Working Memory Estimate: N kB + Total Working Memory Limit: 12288 kB +(7 rows) + +select sum(n) over(partition by m) +from (SELECT n < 3 as m, n from generate_series(1,2000) a(n)) +limit 5; + sum +--------- + 2000997 + 2000997 + 2000997 + 2000997 + 2000997 +(5 rows) + +-- Bitmap Heap Scan +select workmem_filter(' +explain (costs off, work_mem on) +select count(*) from ( +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.hundred = 4) +); +'); + workmem_filter +------------------------------------------------------------------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.hundred = 4))) + -> Bitmap Heap Scan on tenk1 b + Recheck Cond: ((hundred = 4) OR (unique1 = 2)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_hundred (work_mem=N kB) (limit=4096 kB) + Index Cond: (hundred = 4) + -> Bitmap Index Scan on tenk1_unique1 (work_mem=N kB) (limit=4096 kB) + Index Cond: (unique1 = 2) + -> Materialize (work_mem=N kB) (limit=4096 kB) + -> Bitmap Heap Scan on tenk1 a + Recheck Cond: ((unique2 = 3) OR (unique1 = 1)) + -> BitmapOr + -> Bitmap Index Scan on tenk1_unique2 (work_mem=N kB) (limit=4096 kB) + Index Cond: (unique2 = 3) + -> Bitmap Index Scan on tenk1_unique1 (work_mem=N kB) (limit=4096 kB) + Index Cond: (unique1 = 1) + Total Working Memory Estimate: N kB + Total Working Memory Limit: 20480 kB +(20 rows) + +select count(*) from ( +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.hundred = 4) +); + count +------- + 101 +(1 row) + +-- InitPlan with hash table ("IN SELECT") +select workmem_filter(' +explain (costs off, work_mem on) +select ''foo''::text in (select ''bar''::name union all select ''bar''::name); +'); + workmem_filter +------------------------------------------ + Result (work_mem=N kB) (limit=16384 kB) + SubPlan 1 + -> Append + -> Result + -> Result + Total Working Memory Estimate: N kB + Total Working Memory Limit: 16384 kB +(7 rows) + +select 'foo'::text in (select 'bar'::name union all select 'bar'::name); + ?column? +---------- + f +(1 row) + +-- SubPlan with hash table +select workmem_filter(' +explain (costs off, work_mem on) +select 1 = any (select (select 1) where 1 = any (select 1)); +'); + workmem_filter +---------------------------------------------------------------- + Result (work_mem=N kB) (limit=16384 kB) + SubPlan 3 + -> Result (work_mem=N kB) (limit=8192 kB) + One-Time Filter: (ANY (1 = (hashed SubPlan 2).col1)) + InitPlan 1 + -> Result + SubPlan 2 + -> Result + Total Working Memory Estimate: N kB + Total Working Memory Limit: 24576 kB +(10 rows) + +select 1 = any (select (select 1) where 1 = any (select 1)); + ?column? +---------- + t +(1 row) + diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 37b6d21e1f9..1089e3bdf96 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -119,7 +119,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr # The stats test resets stats, so nothing else needing stats access can be in # this group. # ---------- -test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats predicate +test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats predicate workmem # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL diff --git a/src/test/regress/sql/workmem.sql b/src/test/regress/sql/workmem.sql new file mode 100644 index 00000000000..2de22be0427 --- /dev/null +++ b/src/test/regress/sql/workmem.sql @@ -0,0 +1,307 @@ +---- +-- Tests that show "work_mem" output to EXPLAIN plans. +---- + +-- Note: Function derived from file explain.sql. We can't use that other +-- function, since we're run in parallel with explain.sql. +create or replace function workmem_filter(text) returns setof text +language plpgsql as +$$ +declare + ln text; +begin + for ln in execute $1 + loop + -- Mask out work_mem estimate, since it might be brittle + ln := regexp_replace(ln, '\mwork_mem=\d+\M', 'work_mem=N', 'g'); + ln := regexp_replace(ln, '\mMemory Estimate: \d+\M', 'Memory Estimate: N', 'g'); + return next ln; + end loop; +end; +$$; + +-- Unique -> hash agg +set enable_hashagg = on; + +select workmem_filter(' +explain (costs off, work_mem on) +select * +from onek +where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99)) +order by unique1; +'); + +select * +from onek +where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99)) +order by unique1; + +reset enable_hashagg; + +-- Unique -> sort +set enable_hashagg = off; + +select workmem_filter(' +explain (costs off, work_mem on) +select * +from onek +where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99)) +order by unique1; +'); + +select * +from onek +where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99)) +order by unique1; + +reset enable_hashagg; + +-- Incremental Sort +select workmem_filter(' +explain (costs off, work_mem on) +select * from (select * from tenk1 order by four) t order by four, ten +limit 1; +'); + +select * from (select * from tenk1 order by four) t order by four, ten +limit 1; + +-- Hash Join +select workmem_filter(' +explain (costs off, work_mem on) +select count(*) from ( +select t1.unique1, t2.hundred +from onek t1, tenk1 t2 +where exists (select 1 from tenk1 t3 + where t3.thousand = t1.unique1 and t3.tenthous = t2.hundred) + and t1.unique1 < 1 +) t; +'); + +select count(*) from ( +select t1.unique1, t2.hundred +from onek t1, tenk1 t2 +where exists (select 1 from tenk1 t3 + where t3.thousand = t1.unique1 and t3.tenthous = t2.hundred) + and t1.unique1 < 1 +) t; + +-- Materialize +select workmem_filter(' +explain (costs off, work_mem on) +select count(*) from ( +select t1.f1 +from int4_tbl t1, int4_tbl t2 + left join int4_tbl t3 on t3.f1 > 0 + left join int4_tbl t4 on t3.f1 > 1 +where t4.f1 is null +) t; +'); + +select count(*) from ( +select t1.f1 +from int4_tbl t1, int4_tbl t2 + left join int4_tbl t3 on t3.f1 > 0 + left join int4_tbl t4 on t3.f1 > 1 +where t4.f1 is null +) t; + +-- Grouping Sets (Hash) +select workmem_filter(' +explain (costs off, work_mem on) +select a, b, row_number() over (order by a, b nulls first) +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)); +'); + +select a, b, row_number() over (order by a, b nulls first) +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)); + +-- Grouping Sets (Sort) +set enable_hashagg = off; + +select workmem_filter(' +explain (costs off, work_mem on) +select a, b, row_number() over (order by a, b nulls first) +from (values (1, 1, 1, 1), (2, 2, 2, 2)) as t (a, b, c, d) where a = b +group by grouping sets((a, b), (a), (b), (c), (d)); +'); + +select a, b, row_number() over (order by a, b nulls first) +from (values (1, 1, 1, 1), (2, 2, 2, 2)) as t (a, b, c, d) where a = b +group by grouping sets((a, b), (a), (b), (c), (d)); + +reset enable_hashagg; + +-- Agg (hash, parallel) +set parallel_setup_cost=0; +set parallel_tuple_cost=0; +set min_parallel_table_scan_size=0; +set max_parallel_workers_per_gather=4; + +select workmem_filter(' +explain (costs off, work_mem on) +select length(stringu1) from tenk1 group by length(stringu1); +'); + +select length(stringu1) from tenk1 group by length(stringu1); + +reset parallel_setup_cost; +reset parallel_tuple_cost; +reset min_parallel_table_scan_size; +reset max_parallel_workers_per_gather; + +-- Agg (simple) [no work_mem] +explain (costs off, work_mem on) +select MAX(length(stringu1)) from tenk1; + +select MAX(length(stringu1)) from tenk1; + +-- Function Scan +select workmem_filter(' +explain (work_mem on, costs off) +select count(*) from ( +select sum(n) over(partition by m) +from (SELECT n < 3 as m, n from generate_series(1,2000) a(n)) +) t; +'); + +select count(*) from ( +select sum(n) over(partition by m) +from (SELECT n < 3 as m, n from generate_series(1,2000) a(n)) +) t; + +-- Three Function Scans +select workmem_filter(' +explain (work_mem on, costs off) +select count(*) +from rows from(generate_series(1, 5), + generate_series(2, 10), + generate_series(4, 15)); +'); + +select count(*) +from rows from(generate_series(1, 5), + generate_series(2, 10), + generate_series(4, 15)); + +-- Table Function Scan +CREATE TABLE workmem_xmldata(data xml); + +select workmem_filter(' +EXPLAIN (COSTS OFF, work_mem on) +SELECT xmltable.* + FROM (SELECT data FROM workmem_xmldata) x, + LATERAL XMLTABLE(''/ROWS/ROW'' + PASSING data + COLUMNS id int PATH ''@id'', + _id FOR ORDINALITY, + country_name text PATH ''COUNTRY_NAME'' NOT NULL, + country_id text PATH ''COUNTRY_ID'', + region_id int PATH ''REGION_ID'', + size float PATH ''SIZE'', + unit text PATH ''SIZE/@unit'', + premier_name text PATH ''PREMIER_NAME'' DEFAULT ''not specified''); +'); + +SELECT xmltable.* + FROM (SELECT data FROM workmem_xmldata) x, + LATERAL XMLTABLE('/ROWS/ROW' + PASSING data + COLUMNS id int PATH '@id', + _id FOR ORDINALITY, + country_name text PATH 'COUNTRY_NAME' NOT NULL, + country_id text PATH 'COUNTRY_ID', + region_id int PATH 'REGION_ID', + size float PATH 'SIZE', + unit text PATH 'SIZE/@unit', + premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified'); + +drop table workmem_xmldata; + +-- SetOp [no work_mem] +explain (costs off, work_mem on) +select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10; + +select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10; + +-- HashSetOp +select workmem_filter(' +explain (costs off, work_mem on) +select count(*) from + ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss; +'); + +select count(*) from + ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss; + +-- RecursiveUnion and Memoize (also WorkTable Scan [no work_mem]) +select workmem_filter(' +explain (costs off, work_mem on) +select sum(o.four), sum(ss.a) from onek o +cross join lateral (with recursive x(a) as ( + select o.four as a union select a + 1 from x where a < 10) + select * from x) ss where o.ten = 1; +'); + +select sum(o.four), sum(ss.a) from onek o +cross join lateral (with recursive x(a) as ( + select o.four as a union select a + 1 from x where a < 10) + select * from x) ss where o.ten = 1; + +-- CTE Scan +select workmem_filter(' +explain (costs off, work_mem on) +WITH q1(x,y) AS ( + SELECT hundred, sum(ten) FROM tenk1 GROUP BY hundred + ) +SELECT count(*) FROM q1 WHERE y > (SELECT sum(y)/100 FROM q1 qsub); +'); + +WITH q1(x,y) AS ( + SELECT hundred, sum(ten) FROM tenk1 GROUP BY hundred + ) +SELECT count(*) FROM q1 WHERE y > (SELECT sum(y)/100 FROM q1 qsub); + +-- WindowAgg +select workmem_filter(' +explain (costs off, work_mem on) +select sum(n) over(partition by m) +from (SELECT n < 3 as m, n from generate_series(1,2000) a(n)) +limit 5; +'); + +select sum(n) over(partition by m) +from (SELECT n < 3 as m, n from generate_series(1,2000) a(n)) +limit 5; + +-- Bitmap Heap Scan +select workmem_filter(' +explain (costs off, work_mem on) +select count(*) from ( +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.hundred = 4) +); +'); + +select count(*) from ( +select * from tenk1 a join tenk1 b on + (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.hundred = 4) +); + +-- InitPlan with hash table ("IN SELECT") +select workmem_filter(' +explain (costs off, work_mem on) +select ''foo''::text in (select ''bar''::name union all select ''bar''::name); +'); + +select 'foo'::text in (select 'bar'::name union all select 'bar'::name); + +-- SubPlan with hash table +select workmem_filter(' +explain (costs off, work_mem on) +select 1 = any (select (select 1) where 1 = any (select 1)); +'); + +select 1 = any (select (select 1) where 1 = any (select 1)); -- 2.47.1