From 14264cc7e79482bb581dfc0a988c2e256c1fc912 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Fri, 24 May 2024 00:34:18 +0000 Subject: [PATCH v6 2/2] Mark expressions nullable by grouping sets --- src/backend/optimizer/path/pathkeys.c | 11 ++ src/backend/optimizer/plan/planner.c | 38 ++++- src/backend/optimizer/plan/setrefs.c | 21 +++ src/backend/optimizer/util/var.c | 92 ++++++++++-- src/backend/parser/parse_agg.c | 13 +- src/include/optimizer/paths.h | 1 + src/test/regress/expected/groupingsets.out | 164 +++++++++++++++++---- src/test/regress/sql/groupingsets.sql | 38 +++++ 8 files changed, 337 insertions(+), 41 deletions(-) diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 8b258cbef9..bf9772ded5 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -25,6 +25,7 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "partitioning/partbounds.h" +#include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" /* Consider reordering of GROUP BY keys? */ @@ -1355,6 +1356,7 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, &sortclauses, tlist, false, + false, &sortable); /* It's caller error if not all clauses were sortable */ Assert(sortable); @@ -1372,6 +1374,9 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, * give rise to redundant pathkeys are removed from the sortclauses list * (which therefore must be pass-by-reference in this version). * + * If remove_group_rtindex is true, then we need to remove the RT index of the + * grouping step from the sort expressions before we make PathKeys for them. + * * *sortable is set to true if all the sort clauses are in fact sortable. * If any are not, they are ignored except for setting *sortable false. * (In that case, the output pathkey list isn't really useful. However, @@ -1385,6 +1390,7 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root, List **sortclauses, List *tlist, bool remove_redundant, + bool remove_group_rtindex, bool *sortable) { List *pathkeys = NIL; @@ -1403,6 +1409,11 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root, *sortable = false; continue; } + if (remove_group_rtindex) + sortkey = (Expr *) + remove_nulling_relids((Node *) sortkey, + bms_make_singleton(root->group_rtindex), + NULL); pathkey = make_pathkey_from_sortop(root, sortkey, sortcl->sortop, diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 4a4a4d4114..f0ff6c1163 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -58,6 +58,7 @@ #include "parser/parse_relation.h" #include "parser/parsetree.h" #include "partitioning/partdesc.h" +#include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/selfuncs.h" @@ -3396,9 +3397,21 @@ standard_qp_callback(PlannerInfo *root, void *extra) if (grouping_is_sortable(groupClause)) { - root->group_pathkeys = make_pathkeys_for_sortclauses(root, - groupClause, - tlist); + bool sortable; + + /* + * The groupClause is logically below the grouping step. So we + * need to remove the RT index of the grouping step from the sort + * expressions before we make PathKeys for them. + */ + root->group_pathkeys = + make_pathkeys_for_sortclauses_extended(root, + &groupClause, + tlist, + false, + true, + &sortable); + Assert(sortable); root->num_groupby_pathkeys = list_length(root->group_pathkeys); } else @@ -3424,6 +3437,7 @@ standard_qp_callback(PlannerInfo *root, void *extra) &root->processed_groupClause, tlist, true, + false, &sortable); if (!sortable) { @@ -3474,6 +3488,7 @@ standard_qp_callback(PlannerInfo *root, void *extra) &root->processed_distinctClause, tlist, true, + false, &sortable); if (!sortable) root->distinct_pathkeys = NIL; @@ -3500,6 +3515,7 @@ standard_qp_callback(PlannerInfo *root, void *extra) &groupClauses, tlist, false, + false, &sortable); if (!sortable) root->setop_pathkeys = NIL; @@ -5404,7 +5420,15 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target) { /* * It's a grouping column, so add it to the input target as-is. + * + * Note that the target is logically below the grouping step. So + * we need to remove the RT index of the grouping step from the + * target expression. */ + expr = (Expr *) + remove_nulling_relids((Node *) expr, + bms_make_singleton(root->group_rtindex), + NULL); add_column_to_pathtarget(input_target, expr, sgref); } else @@ -5432,11 +5456,18 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target) * includes Vars used in resjunk items, so we are covering the needs of * ORDER BY and window specifications. Vars used within Aggrefs and * WindowFuncs will be pulled out here, too. + * + * Note that the target is logically below the grouping step. So we need + * to remove the RT index of the grouping step from the non-group Vars. */ non_group_vars = pull_var_clause((Node *) non_group_cols, PVC_RECURSE_AGGREGATES | PVC_RECURSE_WINDOWFUNCS | PVC_INCLUDE_PLACEHOLDERS); + non_group_vars = (List *) + remove_nulling_relids((Node *) non_group_vars, + bms_make_singleton(root->group_rtindex), + NULL); add_new_columns_to_pathtarget(input_target, non_group_vars); /* clean up cruft */ @@ -6085,6 +6116,7 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, &wc->partitionClause, tlist, true, + false, &sortable); Assert(sortable); diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 631d4d2c70..1f383476ac 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -26,6 +26,7 @@ #include "optimizer/subselect.h" #include "optimizer/tlist.h" #include "parser/parse_relation.h" +#include "rewrite/rewriteManip.h" #include "tcop/utility.h" #include "utils/syscache.h" @@ -2426,6 +2427,26 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset) subplan_itlist = build_tlist_index(subplan->targetlist); + /* + * If it's an Agg node, any Vars and PHVs appearing in the targetlist and + * quals should have nullingrels that include the effects of the grouping + * step, ie they will have nullingrels equal to the input Vars/PHVs' + * nullingrels plus the RT index of the grouping step. In order to perform + * exact nullingrels matches, we remove the RT index of the grouping step + * first. + */ + if (IsA(plan, Agg)) + { + plan->targetlist = (List *) + remove_nulling_relids((Node *) plan->targetlist, + bms_make_singleton(root->group_rtindex), + NULL); + plan->qual = (List *) + remove_nulling_relids((Node *) plan->qual, + bms_make_singleton(root->group_rtindex), + NULL); + } + output_targetlist = NIL; foreach(l, plan->targetlist) { diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 9e93370e6c..56aa9b6dc6 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -22,6 +22,7 @@ #include "access/sysattr.h" #include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" #include "optimizer/optimizer.h" #include "optimizer/placeholder.h" #include "optimizer/prep.h" @@ -83,6 +84,8 @@ static Node *flatten_join_alias_vars_mutator(Node *node, flatten_join_alias_vars_context *context); static Node *flatten_group_exprs_mutator(Node *node, flatten_join_alias_vars_context *context); +static Node *mark_nullable_by_grouping(PlannerInfo *root, Node *newnode, + Var *oldvar); static Node *add_nullingrels_if_needed(PlannerInfo *root, Node *newnode, Var *oldvar); static bool is_standard_join_alias_expression(Node *newnode, Var *oldvar); @@ -909,8 +912,17 @@ flatten_join_alias_vars_mutator(Node *node, * Replace Vars that reference GROUP outputs with the underlying grouping * expressions. * - * TODO we need to preserve any varnullingrels info attached to the group Vars - * we're replacing. + * We have to preserve any varnullingrels info attached to the group Vars we're + * replacing. If the replacement expression is a Var or PlaceHolderVar or + * constructed from those, we can just add the varnullingrels bits to the + * existing nullingrels field(s); otherwise we have to add a PlaceHolderVar + * wrapper. + * + * NOTE: this is also used by ruleutils.c, to deparse one query parsetree back + * to source text. For that use-case, root will be NULL, which is why we have + * to pass the Query separately. We need the root itself only for preserving + * varnullingrels. We can avoid preserving varnullingrels in the ruleutils.c's + * usage because it does not make any difference to the deparsed source text. */ Node * flatten_group_exprs(PlannerInfo *root, Query *query, Node *node) @@ -975,14 +987,8 @@ flatten_group_exprs_mutator(Node *node, if (context->possible_sublink && !context->inserted_sublink) context->inserted_sublink = checkExprHasSubLink(newvar); - /* - * TODO var->varnullingrels might have the nullingrel bit that - * references RTE_GROUP. We're supposed to add it to the replacement - * expression. - * - * Maybe we can do something like add_nullingrels_if_needed(). - */ - return newvar; + /* Lastly, add any varnullingrels to the replacement expression */ + return mark_nullable_by_grouping(context->root, newvar, var); } if (IsA(node, Aggref)) @@ -1049,6 +1055,72 @@ flatten_group_exprs_mutator(Node *node, (void *) context); } +/* + * Add oldvar's varnullingrels, if any, to a flattened grouping expression. + * The newnode has been copied, so we can modify it freely. + */ +static Node * +mark_nullable_by_grouping(PlannerInfo *root, Node *newnode, Var *oldvar) +{ + Relids relids; + + if (root == NULL) + return newnode; + if (oldvar->varnullingrels == NULL) + return newnode; /* nothing to do */ + + Assert(bms_equal(oldvar->varnullingrels, + bms_make_singleton(root->group_rtindex))); + + relids = pull_varnos_of_level(root, newnode, oldvar->varlevelsup); + + if (!bms_is_empty(relids)) + { + /* + * If the newnode is not variable-free, we set the nullingrels of Vars + * or PHVs that are contained in the expression. This is not really + * 'correct' in theory, because it is the whole expression that can be + * nullable by grouping sets, not its individual vars. But it works in + * practice, because what we need is that the expression can be somehow + * distinguished from the same expression in ECs, and marking its vars + * is sufficient for this purpose. + */ + newnode = add_nulling_relids(newnode, + relids, + oldvar->varnullingrels); + } + else /* variable-free? */ + { + /* + * If the newnode is variable-free and does not contain volatile + * functions, set-returning functions, aggregates, or window functions, + * it is possible that it is treated as a member of EC that is + * redundant. So we wrap it in a new PlaceHolderVar to carry the + * nullingrels. Otherwise we do not bother to make any changes. + */ + if (!contain_volatile_functions(newnode) && + !expression_returns_set(newnode) && + !contain_agg_clause(newnode) && + !contain_window_function(newnode)) + { + PlaceHolderVar *newphv; + Relids phrels; + + phrels = get_relids_in_jointree((Node *) root->parse->jointree, + true, false); + Assert(!bms_is_empty(phrels)); + + newphv = make_placeholder_expr(root, (Expr *) newnode, phrels); + /* newphv has zero phlevelsup and NULL phnullingrels; fix it */ + newphv->phlevelsup = oldvar->varlevelsup; + newphv->phnullingrels = bms_copy(oldvar->varnullingrels); + newnode = (Node *) newphv; + } + } + + return newnode; +} + /* * Add oldvar's varnullingrels, if any, to a flattened join alias expression. * The newnode has been copied, so we can modify it freely. diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 68858e6d7b..3adcfd2f25 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -1326,9 +1326,6 @@ substitute_grouped_columns_mutator(Node *node, if (node == NULL) return NULL; - if (IsA(node, Const) || - IsA(node, Param)) - return node; /* constants are always acceptable */ if (IsA(node, Aggref)) { @@ -1401,6 +1398,16 @@ substitute_grouped_columns_mutator(Node *node, } } + /* + * Constants are always acceptable. We have to do this after we checked + * the subexpression as a whole for a match, because it is possible that we + * have GROUP BY items that are constants, and the constants would become + * not so constant after the grouping step. + */ + if (IsA(node, Const) || + IsA(node, Param)) + return node; + /* * If we have an ungrouped Var of the original query level, we have a * failure. Vars below the original query level are not a problem, and diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 914d9bdef5..32e290d9c4 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -239,6 +239,7 @@ extern List *make_pathkeys_for_sortclauses_extended(PlannerInfo *root, List **sortclauses, List *tlist, bool remove_redundant, + bool remove_group_rtindex, bool *sortable); extern void initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo); diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index 9c7590e7ba..bf4e19dbe0 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -442,19 +442,22 @@ select * from ( group by grouping sets(1, 2) ) ss where x = 1 and q1 = 123; - QUERY PLAN --------------------------------------------- + QUERY PLAN +-------------------------------------------------- Subquery Scan on ss Output: ss.x, ss.q1, ss.sum Filter: ((ss.x = 1) AND (ss.q1 = 123)) -> GroupAggregate Output: (1), i1.q1, sum(i1.q2) - Group Key: 1 + Group Key: (1) Sort Key: i1.q1 Group Key: i1.q1 - -> Seq Scan on public.int8_tbl i1 - Output: 1, i1.q1, i1.q2 -(10 rows) + -> Sort + Output: (1), i1.q1, i1.q2 + Sort Key: (1) + -> Seq Scan on public.int8_tbl i1 + Output: 1, i1.q1, i1.q2 +(13 rows) select * from ( select 1 as x, q1, sum(q2) @@ -736,15 +739,18 @@ select a, b, sum(v.x) -- Test reordering of grouping sets explain (costs off) select * from gstest1 group by grouping sets((a,b,v),(v)) order by v,b,a; - QUERY PLAN ------------------------------------------------------------------------------- - GroupAggregate - Group Key: "*VALUES*".column3, "*VALUES*".column2, "*VALUES*".column1 - Group Key: "*VALUES*".column3 - -> Sort - Sort Key: "*VALUES*".column3, "*VALUES*".column2, "*VALUES*".column1 - -> Values Scan on "*VALUES*" -(6 rows) + QUERY PLAN +------------------------------------------------------------------------------------ + Incremental Sort + Sort Key: "*VALUES*".column3, "*VALUES*".column2, "*VALUES*".column1 + Presorted Key: "*VALUES*".column3 + -> GroupAggregate + Group Key: "*VALUES*".column3, "*VALUES*".column2, "*VALUES*".column1 + Group Key: "*VALUES*".column3 + -> Sort + Sort Key: "*VALUES*".column3, "*VALUES*".column2, "*VALUES*".column1 + -> Values Scan on "*VALUES*" +(9 rows) -- Agg level check. This query should error out. select (select grouping(a,b) from gstest2) from gstest2 group by a,b; @@ -816,16 +822,18 @@ select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 or explain (costs off) select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 order by a; - QUERY PLAN ----------------------------------- - GroupAggregate - Group Key: a - Group Key: () - Filter: (a IS DISTINCT FROM 1) - -> Sort - Sort Key: a - -> Seq Scan on gstest2 -(7 rows) + QUERY PLAN +---------------------------------------- + Sort + Sort Key: a + -> GroupAggregate + Group Key: a + Group Key: () + Filter: (a IS DISTINCT FROM 1) + -> Sort + Sort Key: a + -> Seq Scan on gstest2 +(9 rows) select v.c, (select count(*) from gstest2 group by () having v.c) from (values (false),(true)) v(c) order by v.c; @@ -2199,4 +2207,110 @@ order by case when grouping((select t1.v from gstest5 t2 where id = t1.id)) = 0 0 | 5 (10 rows) +-- test handling of expressions nullable by grouping sets +explain (costs off) +select distinct on (a, b) a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b; + QUERY PLAN +---------------------------------------------------------------- + Unique + -> Sort + Sort Key: "*VALUES*".column1, "*VALUES*".column2 + -> HashAggregate + Hash Key: "*VALUES*".column1, "*VALUES*".column2 + Hash Key: "*VALUES*".column1 + -> Values Scan on "*VALUES*" + Filter: (column1 = column2) +(8 rows) + +select distinct on (a, b) a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b; + a | b +---+--- + 1 | 1 + 1 | + 2 | 2 + 2 | +(4 rows) + +explain (costs off) +select distinct on (a, b+1) a, b+1 +from (values (1, 0), (2, 1)) as t (a, b) where a = b+1 +group by grouping sets((a, b+1), (a)) +order by a, b+1; + QUERY PLAN +---------------------------------------------------------------------- + Unique + -> Sort + Sort Key: "*VALUES*".column1, (("*VALUES*".column2 + 1)) + -> HashAggregate + Hash Key: "*VALUES*".column1, ("*VALUES*".column2 + 1) + Hash Key: "*VALUES*".column1 + -> Values Scan on "*VALUES*" + Filter: (column1 = (column2 + 1)) +(8 rows) + +select distinct on (a, b+1) a, b+1 +from (values (1, 0), (2, 1)) as t (a, b) where a = b+1 +group by grouping sets((a, b+1), (a)) +order by a, b+1; + a | ?column? +---+---------- + 1 | 1 + 1 | + 2 | 2 + 2 | +(4 rows) + +explain (costs off) +select a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b nulls first; + QUERY PLAN +---------------------------------------------------------------- + Sort + Sort Key: "*VALUES*".column1, "*VALUES*".column2 NULLS FIRST + -> HashAggregate + Hash Key: "*VALUES*".column1, "*VALUES*".column2 + Hash Key: "*VALUES*".column1 + -> Values Scan on "*VALUES*" + Filter: (column1 = column2) +(7 rows) + +select a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b nulls first; + a | b +---+--- + 1 | + 1 | 1 + 2 | + 2 | 2 +(4 rows) + +explain (costs off) +select 1 as one group by rollup(one) order by one nulls first; + QUERY PLAN +----------------------------- + Sort + Sort Key: (1) NULLS FIRST + -> MixedAggregate + Hash Key: 1 + Group Key: () + -> Result +(6 rows) + +select 1 as one group by rollup(one) order by one nulls first; + one +----- + + 1 +(2 rows) + -- end diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql index 0520e44aeb..0c8636cd31 100644 --- a/src/test/regress/sql/groupingsets.sql +++ b/src/test/regress/sql/groupingsets.sql @@ -612,4 +612,42 @@ order by case when grouping((select t1.v from gstest5 t2 where id = t1.id)) = 0 else null end nulls first; +-- test handling of expressions nullable by grouping sets +explain (costs off) +select distinct on (a, b) a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b; + +select distinct on (a, b) a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b; + +explain (costs off) +select distinct on (a, b+1) a, b+1 +from (values (1, 0), (2, 1)) as t (a, b) where a = b+1 +group by grouping sets((a, b+1), (a)) +order by a, b+1; + +select distinct on (a, b+1) a, b+1 +from (values (1, 0), (2, 1)) as t (a, b) where a = b+1 +group by grouping sets((a, b+1), (a)) +order by a, b+1; + +explain (costs off) +select a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b nulls first; + +select a, b +from (values (1, 1), (2, 2)) as t (a, b) where a = b +group by grouping sets((a, b), (a)) +order by a, b nulls first; + +explain (costs off) +select 1 as one group by rollup(one) order by one nulls first; +select 1 as one group by rollup(one) order by one nulls first; + -- end -- 2.34.1