From f2494d3b33405e2af8838b876cf97d2bb06666fb Mon Sep 17 00:00:00 2001
From: James Hunter <james.hunter.pg@gmail.com>
Date: Wed, 26 Feb 2025 01:02:19 +0000
Subject: [PATCH 3/4] Add EXPLAIN (work_mem on) command option

So that users can see how much working memory a query is likely to use, as
well as how much memory it will be limited to, this commit adds an
EXPLAIN (work_mem on) command option that displays the workmem estimate
and limit, added in the previous two commits.
---
 src/backend/commands/explain.c        | 233 +++++++++
 src/backend/executor/nodeHash.c       |   7 +-
 src/backend/optimizer/path/costsize.c |   4 +-
 src/include/commands/explain.h        |   4 +
 src/include/executor/nodeHash.h       |   2 +-
 src/test/regress/expected/workmem.out | 653 ++++++++++++++++++++++++++
 src/test/regress/parallel_schedule    |   2 +-
 src/test/regress/sql/workmem.sql      | 307 ++++++++++++
 8 files changed, 1206 insertions(+), 6 deletions(-)
 create mode 100644 src/test/regress/expected/workmem.out
 create mode 100644 src/test/regress/sql/workmem.sql

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index d8a7232cedb..bc8e68e7be1 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -20,6 +20,8 @@
 #include "commands/explain_dr.h"
 #include "commands/explain_format.h"
 #include "commands/prepare.h"
+#include "executor/hashjoin.h"
+#include "executor/nodeHash.h"
 #include "foreign/fdwapi.h"
 #include "jit/jit.h"
 #include "libpq/pqformat.h"
@@ -27,6 +29,7 @@
 #include "nodes/extensible.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
+#include "optimizer/cost.h"
 #include "parser/analyze.h"
 #include "parser/parsetree.h"
 #include "rewrite/rewriteHandler.h"
@@ -154,6 +157,14 @@ static ExplainWorkersState *ExplainCreateWorkersState(int num_workers);
 static void ExplainOpenWorker(int n, ExplainState *es);
 static void ExplainCloseWorker(int n, ExplainState *es);
 static void ExplainFlushWorkersState(ExplainState *es);
+static void compute_subplan_workmem(List *plans, double *sp_estimate,
+									double *sp_limit);
+static void compute_agg_workmem(PlanState *planstate, Agg *agg,
+								double *agg_estimate, double *agg_limit);
+static void compute_hash_workmem(PlanState *planstate, double *hash_estimate,
+								 double *hash_limit);
+static void increment_workmem(PlanState *planstate, int workmem_id,
+							  double *estimate, double *limit);
 
 
 
@@ -209,6 +220,8 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
 		}
 		else if (strcmp(opt->defname, "memory") == 0)
 			es->memory = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "work_mem") == 0)
+			es->work_mem = defGetBoolean(opt);
 		else if (strcmp(opt->defname, "serialize") == 0)
 		{
 			if (opt->arg)
@@ -809,6 +822,14 @@ ExplainOnePlan(PlannedStmt *plannedstmt, CachedPlan *cplan,
 		ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3,
 							 es);
 
+	if (es->work_mem)
+	{
+		ExplainPropertyFloat("Total Working Memory Estimate", "kB",
+							 es->total_workmem_estimate, 0, es);
+		ExplainPropertyFloat("Total Working Memory Limit", "kB",
+							 es->total_workmem_limit, 0, es);
+	}
+
 	ExplainCloseGroup("Query", NULL, true, es);
 }
 
@@ -1944,6 +1965,72 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		}
 	}
 
+	if (es->work_mem)
+	{
+		double		plan_estimate = 0.0;
+		double		plan_limit = 0.0;
+
+		/*
+		 * Include working memory used by this Plan's SubPlan objects, whether
+		 * they are included on the Plan's initPlan or subPlan lists.
+		 */
+		compute_subplan_workmem(planstate->initPlan, &plan_estimate,
+								&plan_limit);
+		compute_subplan_workmem(planstate->subPlan, &plan_estimate,
+								&plan_limit);
+
+		/* Include working memory used by this Plan, itself. */
+		switch (nodeTag(plan))
+		{
+			case T_Agg:
+				compute_agg_workmem(planstate, (Agg *) plan,
+									&plan_estimate, &plan_limit);
+				break;
+			case T_Hash:
+				compute_hash_workmem(planstate, &plan_estimate, &plan_limit);
+				break;
+			case T_RecursiveUnion:
+				{
+					RecursiveUnion *runion = (RecursiveUnion *) plan;
+
+					if (runion->hashWorkMemId > 0)
+						increment_workmem(planstate, runion->hashWorkMemId,
+										  &plan_estimate, &plan_limit);
+				}
+				/* FALLTHROUGH */
+			default:
+				if (plan->workmem_id > 0)
+					increment_workmem(planstate, plan->workmem_id,
+									  &plan_estimate, &plan_limit);
+				break;
+		}
+
+		/*
+		 * Every parallel worker (plus the leader) gets its own copy of
+		 * working memory.
+		 */
+		plan_estimate *= (1 + es->num_workers);
+		plan_limit *= (1 + es->num_workers);
+
+		es->total_workmem_estimate += plan_estimate;
+		es->total_workmem_limit += plan_limit;
+
+		if (plan_estimate > 0.0 || plan_limit > 0.0)
+		{
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+				appendStringInfo(es->str,
+								 "  (work_mem=%.0f kB) (limit=%.0f kB)",
+								 plan_estimate, plan_limit);
+			else
+			{
+				ExplainPropertyFloat("Working Memory Estimate", "kB",
+									 plan_estimate, 0, es);
+				ExplainPropertyFloat("Working Memory Limit", "kB",
+									 plan_limit, 0, es);
+			}
+		}
+	}
+
 	/*
 	 * We have to forcibly clean up the instrumentation state because we
 	 * haven't done ExecutorEnd yet.  This is pretty grotty ...
@@ -2488,6 +2575,24 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	if (planstate->initPlan)
 		ExplainSubPlans(planstate->initPlan, ancestors, "InitPlan", es);
 
+	if (nodeTag(plan) == T_Gather || nodeTag(plan) == T_GatherMerge)
+	{
+		/*
+		 * Other than initPlan-s, every node below us gets the # of planned
+		 * workers we specified.
+		 */
+		Assert(es->num_workers == 0);
+
+		if (nodeTag(plan) == T_Gather)
+			es->num_workers = es->analyze ?
+				((GatherState *) planstate)->nworkers_launched :
+				((Gather *) plan)->num_workers;
+		else
+			es->num_workers = es->analyze ?
+				((GatherMergeState *) planstate)->nworkers_launched :
+				((GatherMerge *) plan)->num_workers;
+	}
+
 	/* lefttree */
 	if (outerPlanState(planstate))
 		ExplainNode(outerPlanState(planstate), ancestors,
@@ -2544,6 +2649,12 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		ExplainCloseGroup("Plans", "Plans", false, es);
 	}
 
+	if (nodeTag(plan) == T_Gather || nodeTag(plan) == T_GatherMerge)
+	{
+		/* End of parallel sub-tree. */
+		es->num_workers = 0;
+	}
+
 	/* in text format, undo whatever indentation we added */
 	if (es->format == EXPLAIN_FORMAT_TEXT)
 		es->indent = save_indent;
@@ -4931,3 +5042,125 @@ ExplainFlushWorkersState(ExplainState *es)
 	pfree(wstate->worker_state_save);
 	pfree(wstate);
 }
+
+/*
+ * compute_subplan_work_mem - compute total workmem for a SubPlan object
+ *
+ * If a SubPlan object uses a hash table, then that hash table needs working
+ * memory. We display that working memory on the owning Plan. This function
+ * increments work_mem counters to include the SubPlan's working-memory.
+ */
+static void
+compute_subplan_workmem(List *plans, double *sp_estimate, double *sp_limit)
+{
+	foreach_node(SubPlanState, sps, plans)
+	{
+		SubPlan    *sp = sps->subplan;
+
+		if (sp->hashtab_workmem_id > 0)
+			increment_workmem(sps->planstate, sp->hashtab_workmem_id,
+							  sp_estimate, sp_limit);
+
+		if (sp->hashnul_workmem_id > 0)
+			increment_workmem(sps->planstate, sp->hashnul_workmem_id,
+							  sp_estimate, sp_limit);
+	}
+}
+
+static void
+compute_agg_workmem_node(PlanState *planstate, Agg *agg, double *agg_estimate,
+						 double *agg_limit)
+{
+	/* Record memory used for output data structures. */
+	if (agg->plan.workmem_id > 0)
+		increment_workmem(planstate, agg->plan.workmem_id, agg_estimate,
+						  agg_limit);
+
+	/* Record memory used for input sort buffers. */
+	if (agg->sortWorkMemId > 0)
+		increment_workmem(planstate, agg->sortWorkMemId, agg_estimate,
+						  agg_limit);
+}
+
+/*
+ * compute_agg_workmem - compute Agg node's total workmem estimate and limit
+ *
+ * An Agg node might point to a chain of additional Agg nodes. When we explain
+ * the plan, we display only the first, "main" Agg node.
+ */
+static void
+compute_agg_workmem(PlanState *planstate, Agg *agg, double *agg_estimate,
+					double *agg_limit)
+{
+	compute_agg_workmem_node(planstate, agg, agg_estimate, agg_limit);
+
+	/* Also include the chain of GROUPING SETS aggs. */
+	foreach_node(Agg, aggnode, agg->chain)
+		compute_agg_workmem_node(planstate, aggnode, agg_estimate, agg_limit);
+}
+
+/*
+ * compute_hash_workmem - compute total workmem for a Hash node
+ *
+ * This function is complicated, because we currently can adjust workmem limits
+ * for Hash (Joins), at runtime; and because the memory a Hash (Join) needs
+ * per-batch is not currently counted against the workmem limit.
+ *
+ * Here, we try to give a more accurate accounting than we'd get from just
+ * displaying limit * count.
+ */
+static void
+compute_hash_workmem(PlanState *planstate, double *hash_estimate,
+					 double *hash_limit)
+{
+	double		count = workMemCount(planstate);
+	double		estimate = workMemEstimate(planstate);
+	size_t		limit = workMemLimit(planstate);
+	HashState  *hstate = (HashState *) planstate;
+	Plan	   *plan = planstate->plan;
+	Hash	   *hash = (Hash *) plan;
+	Plan	   *outerNode = outerPlan(plan);
+	double		rows;
+	size_t		nbytes;
+	size_t		total_space_allowed;	/* ignored */
+	int			nbuckets;		/* ignored */
+	int			nbatch;
+	int			num_skew_mcvs;	/* ignored */
+	int			workmem_estimate;	/* ignored */
+
+	/*
+	 * For Hash Joins, we currently don't count per-batch memory against the
+	 * "workmem_limit", but we can at least estimate it for display with the
+	 * Plan.
+	 */
+	rows = plan->parallel_aware ? hash->rows_total : outerNode->plan_rows;
+	nbytes = limit * 1024;
+
+	ExecChooseHashTableSize(rows, outerNode->plan_width,
+							OidIsValid(hash->skewTable),
+							hstate->parallel_state != NULL,
+							hstate->parallel_state != NULL ?
+							hstate->parallel_state->nparticipants - 1 : 0,
+							&nbytes, &total_space_allowed,
+							&nbuckets, &nbatch, &num_skew_mcvs,
+							&workmem_estimate);
+
+	/* Include space for per-batch memory, if any: 2 blocks per batch. */
+	if (nbatch > 1)
+		nbytes += nbatch * 2 * BLCKSZ;
+
+	Assert(nbytes >= limit * 1024);
+
+	*hash_estimate += estimate * count;
+	*hash_limit += (double) normalize_work_bytes(nbytes) * count;
+}
+
+static void
+increment_workmem(PlanState *planstate, int workmem_id, double *estimate,
+				  double *limit)
+{
+	double		count = workMemCountFromId(planstate, workmem_id);
+
+	*estimate += workMemEstimateFromId(planstate, workmem_id) * count;
+	*limit += workMemLimitFromId(planstate, workmem_id) * count;
+}
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 7d09ac8b5a3..6ae3d649be6 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -482,7 +482,7 @@ ExecHashTableCreate(HashState *state)
 							state->parallel_state != NULL,
 							state->parallel_state != NULL ?
 							state->parallel_state->nparticipants - 1 : 0,
-							worker_space_allowed,
+							&worker_space_allowed,
 							&space_allowed,
 							&nbuckets, &nbatch, &num_skew_mcvs, &workmem);
 
@@ -666,7 +666,7 @@ void
 ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 						bool try_combined_hash_mem,
 						int parallel_workers,
-						size_t worker_space_allowed,
+						size_t *worker_space_allowed,
 						size_t *total_space_allowed,
 						int *numbuckets,
 						int *numbatches,
@@ -699,7 +699,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 	/*
 	 * Caller tells us our (per-worker) in-memory hashtable size limit.
 	 */
-	hash_table_bytes = worker_space_allowed;
+	hash_table_bytes = *worker_space_allowed;
 
 	/*
 	 * Parallel Hash tries to use the combined hash_mem of all workers to
@@ -963,6 +963,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 		nbatch /= 2;
 		nbuckets *= 2;
 
+		*worker_space_allowed = (*worker_space_allowed) * 2;
 		*total_space_allowed = (*total_space_allowed) * 2;
 	}
 
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 12b1f1d82a9..c1db6f53d10 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -4277,6 +4277,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace,
 	int			numbuckets;
 	int			numbatches;
 	int			num_skew_mcvs;
+	size_t		worker_space_allowed;
 	size_t		space_allowed;	/* unused */
 
 	/* Count up disabled nodes. */
@@ -4322,12 +4323,13 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace,
 	 * XXX at some point it might be interesting to try to account for skew
 	 * optimization in the cost estimate, but for now, we don't.
 	 */
+	worker_space_allowed = get_hash_memory_limit();
 	ExecChooseHashTableSize(inner_path_rows_total,
 							inner_path->pathtarget->width,
 							true,	/* useskew */
 							parallel_hash,	/* try_combined_hash_mem */
 							outer_path->parallel_workers,
-							get_hash_memory_limit(),
+							&worker_space_allowed,
 							&space_allowed,
 							&numbuckets,
 							&numbatches,
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index 64547bd9b9c..cd8be1c5bdb 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -53,6 +53,7 @@ typedef struct ExplainState
 	bool		timing;			/* print detailed node timing */
 	bool		summary;		/* print total planning and execution timing */
 	bool		memory;			/* print planner's memory usage information */
+	bool		work_mem;		/* print work_mem estimates per node */
 	bool		settings;		/* print modified settings */
 	bool		generic;		/* generate a generic plan */
 	ExplainSerializeOption serialize;	/* serialize the query's output? */
@@ -69,6 +70,9 @@ typedef struct ExplainState
 	bool		hide_workers;	/* set if we find an invisible Gather */
 	int			rtable_size;	/* length of rtable excluding the RTE_GROUP
 								 * entry */
+	int			num_workers;	/* # of worker processes *planned* to use */
+	double		total_workmem_estimate; /* total working memory estimate */
+	double		total_workmem_limit;	/* total working memory limit */
 	/* state related to the current plan node */
 	ExplainWorkersState *workers_state; /* needed if parallel plan */
 } ExplainState;
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h
index 6cd9bffbee5..b346a270b67 100644
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -59,7 +59,7 @@ extern void ExecHashTableResetMatchFlags(HashJoinTable hashtable);
 extern void ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 									bool try_combined_hash_mem,
 									int parallel_workers,
-									size_t worker_space_allowed,
+									size_t *worker_space_allowed,
 									size_t *total_space_allowed,
 									int *numbuckets,
 									int *numbatches,
diff --git a/src/test/regress/expected/workmem.out b/src/test/regress/expected/workmem.out
new file mode 100644
index 00000000000..ca8edde6d5f
--- /dev/null
+++ b/src/test/regress/expected/workmem.out
@@ -0,0 +1,653 @@
+----
+-- Tests that show "work_mem" output to EXPLAIN plans.
+----
+-- Note: Function derived from file explain.sql. We can't use that other
+-- function, since we're run in parallel with explain.sql.
+create or replace function workmem_filter(text) returns setof text
+language plpgsql as
+$$
+declare
+    ln text;
+begin
+    for ln in execute $1
+    loop
+        -- Mask out work_mem estimate, since it might be brittle
+        ln := regexp_replace(ln, '\mwork_mem=\d+\M', 'work_mem=N', 'g');
+        ln := regexp_replace(ln, '\mMemory Estimate: \d+\M', 'Memory Estimate: N', 'g');
+        return next ln;
+    end loop;
+end;
+$$;
+-- Unique -> hash agg
+set enable_hashagg = on;
+select workmem_filter('
+explain (costs off, work_mem on)
+select *
+from onek
+where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99))
+order by unique1;
+');
+                         workmem_filter                          
+-----------------------------------------------------------------
+ Sort  (work_mem=N kB) (limit=4096 kB)
+   Sort Key: onek.unique1
+   ->  Nested Loop
+         ->  HashAggregate  (work_mem=N kB) (limit=8192 kB)
+               Group Key: "*VALUES*".column1, "*VALUES*".column2
+               ->  Values Scan on "*VALUES*"
+         ->  Index Scan using onek_unique1 on onek
+               Index Cond: (unique1 = "*VALUES*".column1)
+               Filter: ("*VALUES*".column2 = ten)
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 12288 kB
+(11 rows)
+
+select *
+from onek
+where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99))
+order by unique1;
+ unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 
+---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
+       1 |     214 |   1 |    1 |   1 |      1 |       1 |        1 |           1 |         1 |        1 |   2 |    3 | BAAAAA   | GIAAAA   | OOOOxx
+      20 |     306 |   0 |    0 |   0 |      0 |       0 |       20 |          20 |        20 |       20 |   0 |    1 | UAAAAA   | ULAAAA   | OOOOxx
+      99 |     101 |   1 |    3 |   9 |     19 |       9 |       99 |          99 |        99 |       99 |  18 |   19 | VDAAAA   | XDAAAA   | HHHHxx
+(3 rows)
+
+reset enable_hashagg;
+-- Unique -> sort
+set enable_hashagg = off;
+select workmem_filter('
+explain (costs off, work_mem on)
+select *
+from onek
+where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99))
+order by unique1;
+');
+                            workmem_filter                            
+----------------------------------------------------------------------
+ Sort  (work_mem=N kB) (limit=4096 kB)
+   Sort Key: onek.unique1
+   ->  Nested Loop
+         ->  Unique
+               ->  Sort  (work_mem=N kB) (limit=4096 kB)
+                     Sort Key: "*VALUES*".column1, "*VALUES*".column2
+                     ->  Values Scan on "*VALUES*"
+         ->  Index Scan using onek_unique1 on onek
+               Index Cond: (unique1 = "*VALUES*".column1)
+               Filter: ("*VALUES*".column2 = ten)
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 8192 kB
+(12 rows)
+
+select *
+from onek
+where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99))
+order by unique1;
+ unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 
+---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
+       1 |     214 |   1 |    1 |   1 |      1 |       1 |        1 |           1 |         1 |        1 |   2 |    3 | BAAAAA   | GIAAAA   | OOOOxx
+      20 |     306 |   0 |    0 |   0 |      0 |       0 |       20 |          20 |        20 |       20 |   0 |    1 | UAAAAA   | ULAAAA   | OOOOxx
+      99 |     101 |   1 |    3 |   9 |     19 |       9 |       99 |          99 |        99 |       99 |  18 |   19 | VDAAAA   | XDAAAA   | HHHHxx
+(3 rows)
+
+reset enable_hashagg;
+-- Incremental Sort
+select workmem_filter('
+explain (costs off, work_mem on)
+select * from (select * from tenk1 order by four) t order by four, ten
+limit 1;
+');
+                     workmem_filter                      
+---------------------------------------------------------
+ Limit
+   ->  Incremental Sort  (work_mem=N kB) (limit=8192 kB)
+         Sort Key: tenk1.four, tenk1.ten
+         Presorted Key: tenk1.four
+         ->  Sort  (work_mem=N kB) (limit=4096 kB)
+               Sort Key: tenk1.four
+               ->  Seq Scan on tenk1
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 12288 kB
+(9 rows)
+
+select * from (select * from tenk1 order by four) t order by four, ten
+limit 1;
+ unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 
+---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
+    4220 |    5017 |   0 |    0 |   0 |      0 |      20 |      220 |         220 |      4220 |     4220 |  40 |   41 | IGAAAA   | ZKHAAA   | HHHHxx
+(1 row)
+
+-- Hash Join
+select workmem_filter('
+explain (costs off, work_mem on)
+select count(*) from (
+select t1.unique1, t2.hundred
+from onek t1, tenk1 t2
+where exists (select 1 from tenk1 t3
+              where t3.thousand = t1.unique1 and t3.tenthous = t2.hundred)
+      and t1.unique1 < 1
+) t;
+');
+                                 workmem_filter                                 
+--------------------------------------------------------------------------------
+ Aggregate
+   ->  Nested Loop
+         ->  Hash Join
+               Hash Cond: (t3.thousand = t1.unique1)
+               ->  HashAggregate  (work_mem=N kB) (limit=8192 kB)
+                     Group Key: t3.thousand, t3.tenthous
+                     ->  Index Only Scan using tenk1_thous_tenthous on tenk1 t3
+               ->  Hash  (work_mem=N kB) (limit=8192 kB)
+                     ->  Index Only Scan using onek_unique1 on onek t1
+                           Index Cond: (unique1 < 1)
+         ->  Index Only Scan using tenk1_hundred on tenk1 t2
+               Index Cond: (hundred = t3.tenthous)
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 16384 kB
+(14 rows)
+
+select count(*) from (
+select t1.unique1, t2.hundred
+from onek t1, tenk1 t2
+where exists (select 1 from tenk1 t3
+              where t3.thousand = t1.unique1 and t3.tenthous = t2.hundred)
+      and t1.unique1 < 1
+) t;
+ count 
+-------
+   100
+(1 row)
+
+-- Materialize
+select workmem_filter('
+explain (costs off, work_mem on)
+select count(*) from (
+select t1.f1
+from int4_tbl t1, int4_tbl t2
+  left join int4_tbl t3 on t3.f1 > 0
+  left join int4_tbl t4 on t3.f1 > 1
+where t4.f1 is null
+) t;
+');
+                               workmem_filter                               
+----------------------------------------------------------------------------
+ Aggregate
+   ->  Nested Loop
+         ->  Nested Loop Left Join
+               Filter: (t4.f1 IS NULL)
+               ->  Seq Scan on int4_tbl t2
+               ->  Materialize  (work_mem=N kB) (limit=4096 kB)
+                     ->  Nested Loop Left Join
+                           Join Filter: (t3.f1 > 1)
+                           ->  Seq Scan on int4_tbl t3
+                                 Filter: (f1 > 0)
+                           ->  Materialize  (work_mem=N kB) (limit=4096 kB)
+                                 ->  Seq Scan on int4_tbl t4
+         ->  Seq Scan on int4_tbl t1
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 8192 kB
+(15 rows)
+
+select count(*) from (
+select t1.f1
+from int4_tbl t1, int4_tbl t2
+  left join int4_tbl t3 on t3.f1 > 0
+  left join int4_tbl t4 on t3.f1 > 1
+where t4.f1 is null
+) t;
+ count 
+-------
+     0
+(1 row)
+
+-- Grouping Sets (Hash)
+select workmem_filter('
+explain (costs off, work_mem on)
+select a, b, row_number() over (order by a, b nulls first)
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a));
+');
+                            workmem_filter                            
+----------------------------------------------------------------------
+ WindowAgg  (work_mem=N kB) (limit=4096 kB)
+   ->  Sort  (work_mem=N kB) (limit=4096 kB)
+         Sort Key: "*VALUES*".column1, "*VALUES*".column2 NULLS FIRST
+         ->  HashAggregate  (work_mem=N kB) (limit=8192 kB)
+               Hash Key: "*VALUES*".column1, "*VALUES*".column2
+               Hash Key: "*VALUES*".column1
+               ->  Values Scan on "*VALUES*"
+                     Filter: (column1 = column2)
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 16384 kB
+(10 rows)
+
+select a, b, row_number() over (order by a, b nulls first)
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a));
+ a | b | row_number 
+---+---+------------
+ 1 |   |          1
+ 1 | 1 |          2
+ 2 |   |          3
+ 2 | 2 |          4
+(4 rows)
+
+-- Grouping Sets (Sort)
+set enable_hashagg = off;
+select workmem_filter('
+explain (costs off, work_mem on)
+select a, b, row_number() over (order by a, b nulls first)
+from (values (1, 1, 1, 1), (2, 2, 2, 2)) as t (a, b, c, d) where a = b
+group by grouping sets((a, b), (a), (b), (c), (d));
+');
+                            workmem_filter                            
+----------------------------------------------------------------------
+ WindowAgg  (work_mem=N kB) (limit=4096 kB)
+   ->  Sort  (work_mem=N kB) (limit=4096 kB)
+         Sort Key: "*VALUES*".column1, "*VALUES*".column2 NULLS FIRST
+         ->  GroupAggregate  (work_mem=N kB) (limit=8192 kB)
+               Group Key: "*VALUES*".column1, "*VALUES*".column2
+               Group Key: "*VALUES*".column1
+               Sort Key: "*VALUES*".column2
+                 Group Key: "*VALUES*".column2
+               Sort Key: "*VALUES*".column3
+                 Group Key: "*VALUES*".column3
+               Sort Key: "*VALUES*".column4
+                 Group Key: "*VALUES*".column4
+               ->  Sort  (work_mem=N kB) (limit=4096 kB)
+                     Sort Key: "*VALUES*".column1
+                     ->  Values Scan on "*VALUES*"
+                           Filter: (column1 = column2)
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 20480 kB
+(18 rows)
+
+select a, b, row_number() over (order by a, b nulls first)
+from (values (1, 1, 1, 1), (2, 2, 2, 2)) as t (a, b, c, d) where a = b
+group by grouping sets((a, b), (a), (b), (c), (d));
+ a | b | row_number 
+---+---+------------
+ 1 |   |          1
+ 1 | 1 |          2
+ 2 |   |          3
+ 2 | 2 |          4
+   |   |          5
+   |   |          6
+   |   |          7
+   |   |          8
+   | 1 |          9
+   | 2 |         10
+(10 rows)
+
+reset enable_hashagg;
+-- Agg (hash, parallel)
+set parallel_setup_cost=0;
+set parallel_tuple_cost=0;
+set min_parallel_table_scan_size=0;
+set max_parallel_workers_per_gather=4;
+select workmem_filter('
+explain (costs off, work_mem on)
+select length(stringu1) from tenk1 group by length(stringu1);
+');
+                           workmem_filter                            
+---------------------------------------------------------------------
+ Finalize HashAggregate  (work_mem=N kB) (limit=8192 kB)
+   Group Key: (length((stringu1)::text))
+   ->  Gather
+         Workers Planned: 4
+         ->  Partial HashAggregate  (work_mem=N kB) (limit=40960 kB)
+               Group Key: length((stringu1)::text)
+               ->  Parallel Seq Scan on tenk1
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 49152 kB
+(9 rows)
+
+select length(stringu1) from tenk1 group by length(stringu1);
+ length 
+--------
+      6
+(1 row)
+
+reset parallel_setup_cost;
+reset parallel_tuple_cost;
+reset min_parallel_table_scan_size;
+reset max_parallel_workers_per_gather;
+-- Agg (simple) [no work_mem]
+explain (costs off, work_mem on)
+select MAX(length(stringu1)) from tenk1;
+             QUERY PLAN              
+-------------------------------------
+ Aggregate
+   ->  Seq Scan on tenk1
+ Total Working Memory Estimate: 0 kB
+ Total Working Memory Limit: 0 kB
+(4 rows)
+
+select MAX(length(stringu1)) from tenk1;
+ max 
+-----
+   6
+(1 row)
+
+-- Function Scan
+select workmem_filter('
+explain (work_mem on, costs off)
+select count(*) from (
+select sum(n) over(partition by m)
+from (SELECT n < 3 as m, n from generate_series(1,2000) a(n))
+) t;
+');
+                              workmem_filter                               
+---------------------------------------------------------------------------
+ Aggregate
+   ->  Function Scan on generate_series a  (work_mem=N kB) (limit=4096 kB)
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 4096 kB
+(4 rows)
+
+select count(*) from (
+select sum(n) over(partition by m)
+from (SELECT n < 3 as m, n from generate_series(1,2000) a(n))
+) t;
+ count 
+-------
+  2000
+(1 row)
+
+-- Three Function Scans
+select workmem_filter('
+explain (work_mem on, costs off)
+select count(*)
+from rows from(generate_series(1, 5),
+               generate_series(2, 10),
+               generate_series(4, 15));
+');
+                              workmem_filter                              
+--------------------------------------------------------------------------
+ Aggregate
+   ->  Function Scan on generate_series  (work_mem=N kB) (limit=12288 kB)
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 12288 kB
+(4 rows)
+
+select count(*)
+from rows from(generate_series(1, 5),
+               generate_series(2, 10),
+               generate_series(4, 15));
+ count 
+-------
+    12
+(1 row)
+
+-- Table Function Scan
+CREATE TABLE workmem_xmldata(data xml);
+select workmem_filter('
+EXPLAIN (COSTS OFF, work_mem on)
+SELECT  xmltable.*
+   FROM (SELECT data FROM workmem_xmldata) x,
+        LATERAL XMLTABLE(''/ROWS/ROW''
+                         PASSING data
+                         COLUMNS id int PATH ''@id'',
+                                  _id FOR ORDINALITY,
+                                  country_name text PATH ''COUNTRY_NAME'' NOT NULL,
+                                  country_id text PATH ''COUNTRY_ID'',
+                                  region_id int PATH ''REGION_ID'',
+                                  size float PATH ''SIZE'',
+                                  unit text PATH ''SIZE/@unit'',
+                                  premier_name text PATH ''PREMIER_NAME'' DEFAULT ''not specified'');
+');
+                              workmem_filter                              
+--------------------------------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on workmem_xmldata
+   ->  Table Function Scan on "xmltable"  (work_mem=N kB) (limit=4096 kB)
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 4096 kB
+(5 rows)
+
+SELECT  xmltable.*
+   FROM (SELECT data FROM workmem_xmldata) x,
+        LATERAL XMLTABLE('/ROWS/ROW'
+                         PASSING data
+                         COLUMNS id int PATH '@id',
+                                  _id FOR ORDINALITY,
+                                  country_name text PATH 'COUNTRY_NAME' NOT NULL,
+                                  country_id text PATH 'COUNTRY_ID',
+                                  region_id int PATH 'REGION_ID',
+                                  size float PATH 'SIZE',
+                                  unit text PATH 'SIZE/@unit',
+                                  premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified');
+ id | _id | country_name | country_id | region_id | size | unit | premier_name 
+----+-----+--------------+------------+-----------+------+------+--------------
+(0 rows)
+
+drop table workmem_xmldata;
+-- SetOp [no work_mem]
+explain (costs off, work_mem on)
+select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
+                         QUERY PLAN                         
+------------------------------------------------------------
+ SetOp Except
+   ->  Index Only Scan using tenk1_unique1 on tenk1
+   ->  Index Only Scan using tenk1_unique2 on tenk1 tenk1_1
+         Filter: (unique2 <> 10)
+ Total Working Memory Estimate: 0 kB
+ Total Working Memory Limit: 0 kB
+(6 rows)
+
+select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
+ unique1 
+---------
+      10
+(1 row)
+
+-- HashSetOp
+select workmem_filter('
+explain (costs off, work_mem on)
+select count(*) from
+  ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
+');
+                          workmem_filter                          
+------------------------------------------------------------------
+ Aggregate
+   ->  HashSetOp Intersect  (work_mem=N kB) (limit=8192 kB)
+         ->  Seq Scan on tenk1
+         ->  Index Only Scan using tenk1_unique1 on tenk1 tenk1_1
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 8192 kB
+(6 rows)
+
+select count(*) from
+  ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
+ count 
+-------
+  5000
+(1 row)
+
+-- RecursiveUnion and Memoize (also WorkTable Scan [no work_mem])
+select workmem_filter('
+explain (costs off, work_mem on)
+select sum(o.four), sum(ss.a) from onek o
+cross join lateral (with recursive x(a) as (
+          select o.four as a union select a + 1 from x where a < 10)
+    select * from x) ss where o.ten = 1;
+');
+                               workmem_filter                                
+-----------------------------------------------------------------------------
+ Aggregate
+   ->  Nested Loop
+         ->  Seq Scan on onek o
+               Filter: (ten = 1)
+         ->  Memoize  (work_mem=N kB) (limit=8192 kB)
+               Cache Key: o.four
+               Cache Mode: binary
+               ->  CTE Scan on x  (work_mem=N kB) (limit=4096 kB)
+                     CTE x
+                       ->  Recursive Union  (work_mem=N kB) (limit=16384 kB)
+                             ->  Result
+                             ->  WorkTable Scan on x x_1
+                                   Filter: (a < 10)
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 28672 kB
+(15 rows)
+
+select sum(o.four), sum(ss.a) from onek o
+cross join lateral (with recursive x(a) as (
+          select o.four as a union select a + 1 from x where a < 10)
+    select * from x) ss where o.ten = 1;
+ sum  | sum  
+------+------
+ 1700 | 5350
+(1 row)
+
+-- CTE Scan
+select workmem_filter('
+explain (costs off, work_mem on)
+WITH q1(x,y) AS (
+    SELECT hundred, sum(ten) FROM tenk1 GROUP BY hundred
+  )
+SELECT count(*) FROM q1 WHERE y > (SELECT sum(y)/100 FROM q1 qsub);
+');
+                           workmem_filter                           
+--------------------------------------------------------------------
+ Aggregate
+   CTE q1
+     ->  HashAggregate  (work_mem=N kB) (limit=8192 kB)
+           Group Key: tenk1.hundred
+           ->  Seq Scan on tenk1
+   InitPlan 2
+     ->  Aggregate
+           ->  CTE Scan on q1 qsub  (work_mem=N kB) (limit=4096 kB)
+   ->  CTE Scan on q1  (work_mem=N kB) (limit=4096 kB)
+         Filter: ((y)::numeric > (InitPlan 2).col1)
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 16384 kB
+(12 rows)
+
+WITH q1(x,y) AS (
+    SELECT hundred, sum(ten) FROM tenk1 GROUP BY hundred
+  )
+SELECT count(*) FROM q1 WHERE y > (SELECT sum(y)/100 FROM q1 qsub);
+ count 
+-------
+    50
+(1 row)
+
+-- WindowAgg
+select workmem_filter('
+explain (costs off, work_mem on)
+select sum(n) over(partition by m)
+from (SELECT n < 3 as m, n from generate_series(1,2000) a(n))
+limit 5;
+');
+                                    workmem_filter                                     
+---------------------------------------------------------------------------------------
+ Limit
+   ->  WindowAgg  (work_mem=N kB) (limit=4096 kB)
+         ->  Sort  (work_mem=N kB) (limit=4096 kB)
+               Sort Key: ((a.n < 3))
+               ->  Function Scan on generate_series a  (work_mem=N kB) (limit=4096 kB)
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 12288 kB
+(7 rows)
+
+select sum(n) over(partition by m)
+from (SELECT n < 3 as m, n from generate_series(1,2000) a(n))
+limit 5;
+   sum   
+---------
+ 2000997
+ 2000997
+ 2000997
+ 2000997
+ 2000997
+(5 rows)
+
+-- Bitmap Heap Scan
+select workmem_filter('
+explain (costs off, work_mem on)
+select count(*) from (
+select * from tenk1 a join tenk1 b on
+  (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.hundred = 4)
+);
+');
+                                            workmem_filter                                             
+-------------------------------------------------------------------------------------------------------
+ Aggregate
+   ->  Nested Loop
+         Join Filter: (((a.unique1 = 1) AND (b.unique1 = 2)) OR ((a.unique2 = 3) AND (b.hundred = 4)))
+         ->  Bitmap Heap Scan on tenk1 b
+               Recheck Cond: ((hundred = 4) OR (unique1 = 2))
+               ->  BitmapOr
+                     ->  Bitmap Index Scan on tenk1_hundred  (work_mem=N kB) (limit=4096 kB)
+                           Index Cond: (hundred = 4)
+                     ->  Bitmap Index Scan on tenk1_unique1  (work_mem=N kB) (limit=4096 kB)
+                           Index Cond: (unique1 = 2)
+         ->  Materialize  (work_mem=N kB) (limit=4096 kB)
+               ->  Bitmap Heap Scan on tenk1 a
+                     Recheck Cond: ((unique2 = 3) OR (unique1 = 1))
+                     ->  BitmapOr
+                           ->  Bitmap Index Scan on tenk1_unique2  (work_mem=N kB) (limit=4096 kB)
+                                 Index Cond: (unique2 = 3)
+                           ->  Bitmap Index Scan on tenk1_unique1  (work_mem=N kB) (limit=4096 kB)
+                                 Index Cond: (unique1 = 1)
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 20480 kB
+(20 rows)
+
+select count(*) from (
+select * from tenk1 a join tenk1 b on
+  (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.hundred = 4)
+);
+ count 
+-------
+   101
+(1 row)
+
+-- InitPlan with hash table ("IN SELECT")
+select workmem_filter('
+explain (costs off, work_mem on)
+select ''foo''::text in (select ''bar''::name union all select ''bar''::name);
+');
+              workmem_filter              
+------------------------------------------
+ Result  (work_mem=N kB) (limit=16384 kB)
+   SubPlan 1
+     ->  Append
+           ->  Result
+           ->  Result
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 16384 kB
+(7 rows)
+
+select 'foo'::text in (select 'bar'::name union all select 'bar'::name);
+ ?column? 
+----------
+ f
+(1 row)
+
+-- SubPlan with hash table
+select workmem_filter('
+explain (costs off, work_mem on)
+select 1 = any (select (select 1) where 1 = any (select 1));
+');
+                         workmem_filter                         
+----------------------------------------------------------------
+ Result  (work_mem=N kB) (limit=16384 kB)
+   SubPlan 3
+     ->  Result  (work_mem=N kB) (limit=8192 kB)
+           One-Time Filter: (ANY (1 = (hashed SubPlan 2).col1))
+           InitPlan 1
+             ->  Result
+           SubPlan 2
+             ->  Result
+ Total Working Memory Estimate: N kB
+ Total Working Memory Limit: 24576 kB
+(10 rows)
+
+select 1 = any (select (select 1) where 1 = any (select 1));
+ ?column? 
+----------
+ t
+(1 row)
+
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 37b6d21e1f9..1089e3bdf96 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -119,7 +119,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr
 # The stats test resets stats, so nothing else needing stats access can be in
 # this group.
 # ----------
-test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats predicate
+test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats predicate workmem
 
 # event_trigger depends on create_am and cannot run concurrently with
 # any test that runs DDL
diff --git a/src/test/regress/sql/workmem.sql b/src/test/regress/sql/workmem.sql
new file mode 100644
index 00000000000..2de22be0427
--- /dev/null
+++ b/src/test/regress/sql/workmem.sql
@@ -0,0 +1,307 @@
+----
+-- Tests that show "work_mem" output to EXPLAIN plans.
+----
+
+-- Note: Function derived from file explain.sql. We can't use that other
+-- function, since we're run in parallel with explain.sql.
+create or replace function workmem_filter(text) returns setof text
+language plpgsql as
+$$
+declare
+    ln text;
+begin
+    for ln in execute $1
+    loop
+        -- Mask out work_mem estimate, since it might be brittle
+        ln := regexp_replace(ln, '\mwork_mem=\d+\M', 'work_mem=N', 'g');
+        ln := regexp_replace(ln, '\mMemory Estimate: \d+\M', 'Memory Estimate: N', 'g');
+        return next ln;
+    end loop;
+end;
+$$;
+
+-- Unique -> hash agg
+set enable_hashagg = on;
+
+select workmem_filter('
+explain (costs off, work_mem on)
+select *
+from onek
+where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99))
+order by unique1;
+');
+
+select *
+from onek
+where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99))
+order by unique1;
+
+reset enable_hashagg;
+
+-- Unique -> sort
+set enable_hashagg = off;
+
+select workmem_filter('
+explain (costs off, work_mem on)
+select *
+from onek
+where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99))
+order by unique1;
+');
+
+select *
+from onek
+where (unique1,ten) in (values (1,1), (20,0), (99,9), (17,99))
+order by unique1;
+
+reset enable_hashagg;
+
+-- Incremental Sort
+select workmem_filter('
+explain (costs off, work_mem on)
+select * from (select * from tenk1 order by four) t order by four, ten
+limit 1;
+');
+
+select * from (select * from tenk1 order by four) t order by four, ten
+limit 1;
+
+-- Hash Join
+select workmem_filter('
+explain (costs off, work_mem on)
+select count(*) from (
+select t1.unique1, t2.hundred
+from onek t1, tenk1 t2
+where exists (select 1 from tenk1 t3
+              where t3.thousand = t1.unique1 and t3.tenthous = t2.hundred)
+      and t1.unique1 < 1
+) t;
+');
+
+select count(*) from (
+select t1.unique1, t2.hundred
+from onek t1, tenk1 t2
+where exists (select 1 from tenk1 t3
+              where t3.thousand = t1.unique1 and t3.tenthous = t2.hundred)
+      and t1.unique1 < 1
+) t;
+
+-- Materialize
+select workmem_filter('
+explain (costs off, work_mem on)
+select count(*) from (
+select t1.f1
+from int4_tbl t1, int4_tbl t2
+  left join int4_tbl t3 on t3.f1 > 0
+  left join int4_tbl t4 on t3.f1 > 1
+where t4.f1 is null
+) t;
+');
+
+select count(*) from (
+select t1.f1
+from int4_tbl t1, int4_tbl t2
+  left join int4_tbl t3 on t3.f1 > 0
+  left join int4_tbl t4 on t3.f1 > 1
+where t4.f1 is null
+) t;
+
+-- Grouping Sets (Hash)
+select workmem_filter('
+explain (costs off, work_mem on)
+select a, b, row_number() over (order by a, b nulls first)
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a));
+');
+
+select a, b, row_number() over (order by a, b nulls first)
+from (values (1, 1), (2, 2)) as t (a, b) where a = b
+group by grouping sets((a, b), (a));
+
+-- Grouping Sets (Sort)
+set enable_hashagg = off;
+
+select workmem_filter('
+explain (costs off, work_mem on)
+select a, b, row_number() over (order by a, b nulls first)
+from (values (1, 1, 1, 1), (2, 2, 2, 2)) as t (a, b, c, d) where a = b
+group by grouping sets((a, b), (a), (b), (c), (d));
+');
+
+select a, b, row_number() over (order by a, b nulls first)
+from (values (1, 1, 1, 1), (2, 2, 2, 2)) as t (a, b, c, d) where a = b
+group by grouping sets((a, b), (a), (b), (c), (d));
+
+reset enable_hashagg;
+
+-- Agg (hash, parallel)
+set parallel_setup_cost=0;
+set parallel_tuple_cost=0;
+set min_parallel_table_scan_size=0;
+set max_parallel_workers_per_gather=4;
+
+select workmem_filter('
+explain (costs off, work_mem on)
+select length(stringu1) from tenk1 group by length(stringu1);
+');
+
+select length(stringu1) from tenk1 group by length(stringu1);
+
+reset parallel_setup_cost;
+reset parallel_tuple_cost;
+reset min_parallel_table_scan_size;
+reset max_parallel_workers_per_gather;
+
+-- Agg (simple) [no work_mem]
+explain (costs off, work_mem on)
+select MAX(length(stringu1)) from tenk1;
+
+select MAX(length(stringu1)) from tenk1;
+
+-- Function Scan
+select workmem_filter('
+explain (work_mem on, costs off)
+select count(*) from (
+select sum(n) over(partition by m)
+from (SELECT n < 3 as m, n from generate_series(1,2000) a(n))
+) t;
+');
+
+select count(*) from (
+select sum(n) over(partition by m)
+from (SELECT n < 3 as m, n from generate_series(1,2000) a(n))
+) t;
+
+-- Three Function Scans
+select workmem_filter('
+explain (work_mem on, costs off)
+select count(*)
+from rows from(generate_series(1, 5),
+               generate_series(2, 10),
+               generate_series(4, 15));
+');
+
+select count(*)
+from rows from(generate_series(1, 5),
+               generate_series(2, 10),
+               generate_series(4, 15));
+
+-- Table Function Scan
+CREATE TABLE workmem_xmldata(data xml);
+
+select workmem_filter('
+EXPLAIN (COSTS OFF, work_mem on)
+SELECT  xmltable.*
+   FROM (SELECT data FROM workmem_xmldata) x,
+        LATERAL XMLTABLE(''/ROWS/ROW''
+                         PASSING data
+                         COLUMNS id int PATH ''@id'',
+                                  _id FOR ORDINALITY,
+                                  country_name text PATH ''COUNTRY_NAME'' NOT NULL,
+                                  country_id text PATH ''COUNTRY_ID'',
+                                  region_id int PATH ''REGION_ID'',
+                                  size float PATH ''SIZE'',
+                                  unit text PATH ''SIZE/@unit'',
+                                  premier_name text PATH ''PREMIER_NAME'' DEFAULT ''not specified'');
+');
+
+SELECT  xmltable.*
+   FROM (SELECT data FROM workmem_xmldata) x,
+        LATERAL XMLTABLE('/ROWS/ROW'
+                         PASSING data
+                         COLUMNS id int PATH '@id',
+                                  _id FOR ORDINALITY,
+                                  country_name text PATH 'COUNTRY_NAME' NOT NULL,
+                                  country_id text PATH 'COUNTRY_ID',
+                                  region_id int PATH 'REGION_ID',
+                                  size float PATH 'SIZE',
+                                  unit text PATH 'SIZE/@unit',
+                                  premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified');
+
+drop table workmem_xmldata;
+
+-- SetOp [no work_mem]
+explain (costs off, work_mem on)
+select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
+
+select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10;
+
+-- HashSetOp
+select workmem_filter('
+explain (costs off, work_mem on)
+select count(*) from
+  ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
+');
+
+select count(*) from
+  ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss;
+
+-- RecursiveUnion and Memoize (also WorkTable Scan [no work_mem])
+select workmem_filter('
+explain (costs off, work_mem on)
+select sum(o.four), sum(ss.a) from onek o
+cross join lateral (with recursive x(a) as (
+          select o.four as a union select a + 1 from x where a < 10)
+    select * from x) ss where o.ten = 1;
+');
+
+select sum(o.four), sum(ss.a) from onek o
+cross join lateral (with recursive x(a) as (
+          select o.four as a union select a + 1 from x where a < 10)
+    select * from x) ss where o.ten = 1;
+
+-- CTE Scan
+select workmem_filter('
+explain (costs off, work_mem on)
+WITH q1(x,y) AS (
+    SELECT hundred, sum(ten) FROM tenk1 GROUP BY hundred
+  )
+SELECT count(*) FROM q1 WHERE y > (SELECT sum(y)/100 FROM q1 qsub);
+');
+
+WITH q1(x,y) AS (
+    SELECT hundred, sum(ten) FROM tenk1 GROUP BY hundred
+  )
+SELECT count(*) FROM q1 WHERE y > (SELECT sum(y)/100 FROM q1 qsub);
+
+-- WindowAgg
+select workmem_filter('
+explain (costs off, work_mem on)
+select sum(n) over(partition by m)
+from (SELECT n < 3 as m, n from generate_series(1,2000) a(n))
+limit 5;
+');
+
+select sum(n) over(partition by m)
+from (SELECT n < 3 as m, n from generate_series(1,2000) a(n))
+limit 5;
+
+-- Bitmap Heap Scan
+select workmem_filter('
+explain (costs off, work_mem on)
+select count(*) from (
+select * from tenk1 a join tenk1 b on
+  (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.hundred = 4)
+);
+');
+
+select count(*) from (
+select * from tenk1 a join tenk1 b on
+  (a.unique1 = 1 and b.unique1 = 2) or (a.unique2 = 3 and b.hundred = 4)
+);
+
+-- InitPlan with hash table ("IN SELECT")
+select workmem_filter('
+explain (costs off, work_mem on)
+select ''foo''::text in (select ''bar''::name union all select ''bar''::name);
+');
+
+select 'foo'::text in (select 'bar'::name union all select 'bar'::name);
+
+-- SubPlan with hash table
+select workmem_filter('
+explain (costs off, work_mem on)
+select 1 = any (select (select 1) where 1 = any (select 1));
+');
+
+select 1 = any (select (select 1) where 1 = any (select 1));
-- 
2.47.1