Re: Removing redundant grouping columns - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: Removing redundant grouping columns |
Date | |
Msg-id | 895775.1672434126@sss.pgh.pa.us Whole thread Raw |
In response to | Re: Removing redundant grouping columns (Tom Lane <tgl@sss.pgh.pa.us>) |
Responses |
Re: Removing redundant grouping columns
|
List | pgsql-hackers |
I wrote: > Richard Guo <guofenglinux@gmail.com> writes: >> While we are here, I wonder if we can do the same trick for >> distinctClause, to cope with cases like >> select distinct a.x, b.y from a, b where a.x = b.y; > We do that already, no? Oh, wait, I see what you mean: we are smart in code paths that rely on distinct_pathkeys, but not in the hash-based code paths. Right, that can be fixed the same way. 0001 attached is the same as before, 0002 adds similar logic for the distinctClause. The plan change in expected/pg_trgm.out is surprising at first glance, but I believe it's correct: the item that is being dropped is a parameterless STABLE function, so its value is not supposed to change for the duration of the scan. regards, tom lane diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index 9524765650..450f724c49 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -3667,6 +3667,13 @@ appendGroupByClause(List *tlist, deparse_expr_cxt *context) */ Assert(!query->groupingSets); + /* + * We intentionally print query->groupClause not processed_groupClause, + * leaving it to the remote planner to get rid of any redundant GROUP BY + * items again. This is necessary in case processed_groupClause reduced + * to empty, and in any case the redundancy situation on the remote might + * be different than what we think here. + */ foreach(lc, query->groupClause) { SortGroupClause *grp = (SortGroupClause *) lfirst(lc); diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 1a2c2a665c..befb2fd4d3 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -2864,16 +2864,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i -- GROUP BY clause in various forms, cardinal, alias and constant expression explain (verbose, costs off) select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2; - QUERY PLAN ---------------------------------------------------------------------------------------- - Sort + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Foreign Scan Output: (count(c2)), c2, 5, 7.0, 9 - Sort Key: ft1.c2 - -> Foreign Scan - Output: (count(c2)), c2, 5, 7.0, 9 - Relations: Aggregate on (public.ft1) - Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 -(7 rows) + Relations: Aggregate on (public.ft1) + Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST +(4 rows) select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2; w | x | y | z @@ -2990,14 +2987,13 @@ select exists(select 1 from pg_enum), sum(c1) from ft1 group by 1; QUERY PLAN --------------------------------------------------- GroupAggregate - Output: ($0), sum(ft1.c1) - Group Key: $0 + Output: $0, sum(ft1.c1) InitPlan 1 (returns $0) -> Seq Scan on pg_catalog.pg_enum -> Foreign Scan on public.ft1 - Output: $0, ft1.c1 + Output: ft1.c1 Remote SQL: SELECT "C 1" FROM "S 1"."T 1" -(8 rows) +(7 rows) select exists(select 1 from pg_enum), sum(c1) from ft1 group by 1; exists | sum @@ -3382,14 +3378,13 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 -------------------------------------------------------------------------------------------------- GroupAggregate Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2 - Group Key: ft2.c2 -> Sort - Output: c2, c1 + Output: c1, c2 Sort Key: ft2.c1 USING <^ -> Foreign Scan on public.ft2 - Output: c2, c1 + Output: c1, c2 Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6)) -(9 rows) +(8 rows) -- This should not be pushed either. explain (verbose, costs off) @@ -3458,14 +3453,13 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 -------------------------------------------------------------------------------------------------- GroupAggregate Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2 - Group Key: ft2.c2 -> Sort - Output: c2, c1 + Output: c1, c2 Sort Key: ft2.c1 USING <^ -> Foreign Scan on public.ft2 - Output: c2, c1 + Output: c1, c2 Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6)) -(9 rows) +(8 rows) -- Cleanup drop operator class my_op_class using btree; diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index b9268e32dd..722ebc932b 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -3345,9 +3345,9 @@ estimate_path_cost_size(PlannerInfo *root, } /* Get number of grouping columns and possible number of groups */ - numGroupCols = list_length(root->parse->groupClause); + numGroupCols = list_length(root->processed_groupClause); numGroups = estimate_num_groups(root, - get_sortgrouplist_exprs(root->parse->groupClause, + get_sortgrouplist_exprs(root->processed_groupClause, fpinfo->grouped_tlist), input_rows, NULL, NULL); @@ -3636,7 +3636,7 @@ adjust_foreign_grouping_path_cost(PlannerInfo *root, * pathkeys, adjust the costs with that function. Otherwise, adjust the * costs by applying the same heuristic as for the scan or join case. */ - if (!grouping_is_sortable(root->parse->groupClause) || + if (!grouping_is_sortable(root->processed_groupClause) || !pathkeys_contained_in(pathkeys, root->group_pathkeys)) { Path sort_path; /* dummy for result of cost_sort */ @@ -6202,7 +6202,11 @@ foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel, Index sgref = get_pathtarget_sortgroupref(grouping_target, i); ListCell *l; - /* Check whether this expression is part of GROUP BY clause */ + /* + * Check whether this expression is part of GROUP BY clause. Note we + * check the whole GROUP BY clause not just processed_groupClause, + * because we will ship all of it, cf. appendGroupByClause. + */ if (sgref && get_sortgroupref_clause_noerr(sgref, query->groupClause)) { TargetEntry *tle; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index f15bb83a1a..04521bc3d1 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -2476,7 +2476,7 @@ agg_retrieve_direct(AggState *aggstate) * If we are grouping, check whether we've crossed a group * boundary. */ - if (node->aggstrategy != AGG_PLAIN) + if (node->aggstrategy != AGG_PLAIN && node->numCols > 0) { tmpcontext->ecxt_innertuple = firstSlot; if (!ExecQual(aggstate->phase->eqfunctions[node->numCols - 1], @@ -3480,8 +3480,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) */ if (aggnode->aggstrategy == AGG_SORTED) { - Assert(aggnode->numCols > 0); - /* * Build a separate function for each subset of columns that * need to be compared. @@ -3507,7 +3505,8 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) } /* and for all grouped columns, unless already computed */ - if (phasedata->eqfunctions[aggnode->numCols - 1] == NULL) + if (aggnode->numCols > 0 && + phasedata->eqfunctions[aggnode->numCols - 1] == NULL) { phasedata->eqfunctions[aggnode->numCols - 1] = execTuplesMatchPrepare(scanDesc, diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index a9943cd6e0..bb11dbec7b 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -1152,18 +1152,62 @@ List * make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist) +{ + List *result; + bool sortable; + + result = make_pathkeys_for_sortclauses_extended(root, + &sortclauses, + tlist, + false, + &sortable); + /* It's caller error if not all clauses were sortable */ + Assert(sortable); + return result; +} + +/* + * make_pathkeys_for_sortclauses_extended + * Generate a pathkeys list that represents the sort order specified + * by a list of SortGroupClauses + * + * The comments for make_pathkeys_for_sortclauses apply here too. In addition: + * + * If remove_redundant is true, then any sort clauses that are found to + * give rise to redundant pathkeys are removed from the sortclauses list + * (which therefore must be pass-by-reference in this version). + * + * *sortable is set to true if all the sort clauses are in fact sortable. + * If any are not, they are ignored except for setting *sortable false. + * (In that case, the output pathkey list isn't really useful. However, + * we process the whole sortclauses list anyway, because it's still valid + * to remove any clauses that can be proven redundant via the eclass logic. + * Even though we'll have to hash in that case, we might as well not hash + * redundant columns.) + */ +List * +make_pathkeys_for_sortclauses_extended(PlannerInfo *root, + List **sortclauses, + List *tlist, + bool remove_redundant, + bool *sortable) { List *pathkeys = NIL; ListCell *l; - foreach(l, sortclauses) + *sortable = true; + foreach(l, *sortclauses) { SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); Expr *sortkey; PathKey *pathkey; sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist); - Assert(OidIsValid(sortcl->sortop)); + if (!OidIsValid(sortcl->sortop)) + { + *sortable = false; + continue; + } pathkey = make_pathkey_from_sortop(root, sortkey, root->nullable_baserels, @@ -1175,6 +1219,8 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, /* Canonical form eliminates redundant ordering keys */ if (!pathkey_is_redundant(pathkey, pathkeys)) pathkeys = lappend(pathkeys, pathkey); + else if (remove_redundant) + *sortclauses = foreach_delete_current(*sortclauses, l); } return pathkeys; } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 66139928e8..a4c6062b41 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -2404,7 +2404,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) * sizing. */ maxref = 0; - foreach(lc, root->parse->groupClause) + foreach(lc, root->processed_groupClause) { SortGroupClause *gc = (SortGroupClause *) lfirst(lc); @@ -2415,7 +2415,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) grouping_map = (AttrNumber *) palloc0((maxref + 1) * sizeof(AttrNumber)); /* Now look up the column numbers in the child's tlist */ - foreach(lc, root->parse->groupClause) + foreach(lc, root->processed_groupClause) { SortGroupClause *gc = (SortGroupClause *) lfirst(lc); TargetEntry *tle = get_sortgroupclause_tle(gc, subplan->targetlist); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 8a41e1e6d3..bcc10942cc 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -95,17 +95,9 @@ create_upper_paths_hook_type create_upper_paths_hook = NULL; #define EXPRKIND_TABLEFUNC 11 #define EXPRKIND_TABLEFUNC_LATERAL 12 -/* Passthrough data for standard_qp_callback */ -typedef struct -{ - List *activeWindows; /* active windows, if any */ - List *groupClause; /* overrides parse->groupClause */ -} standard_qp_extra; - /* * Data specific to grouping sets */ - typedef struct { List *rollups; @@ -129,6 +121,13 @@ typedef struct * clauses per Window */ } WindowClauseSortData; +/* Passthrough data for standard_qp_callback */ +typedef struct +{ + List *activeWindows; /* active windows, if any */ + grouping_sets_data *gset_data; /* grouping sets data, if any */ +} standard_qp_extra; + /* Local functions */ static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); @@ -636,6 +635,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->rowMarks = NIL; memset(root->upper_rels, 0, sizeof(root->upper_rels)); memset(root->upper_targets, 0, sizeof(root->upper_targets)); + root->processed_groupClause = NIL; root->processed_tlist = NIL; root->update_colnos = NIL; root->grouping_map = NULL; @@ -1032,9 +1032,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse, } parse->havingQual = (Node *) newHaving; - /* Remove any redundant GROUP BY columns */ - remove_useless_groupby_columns(root); - /* * If we have any outer joins, try to reduce them to plain inner joins. * This step is most easily done after we've done expression @@ -1393,11 +1390,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) { gset_data = preprocess_grouping_sets(root); } - else + else if (parse->groupClause) { /* Preprocess regular GROUP BY clause, if any */ - if (parse->groupClause) - parse->groupClause = preprocess_groupclause(root, NIL); + root->processed_groupClause = preprocess_groupclause(root, NIL); + /* Remove any redundant GROUP BY columns */ + remove_useless_groupby_columns(root); } /* @@ -1475,9 +1473,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* Set up data needed by standard_qp_callback */ qp_extra.activeWindows = activeWindows; - qp_extra.groupClause = (gset_data - ? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause : NIL) - : parse->groupClause); + qp_extra.gset_data = gset_data; /* * Generate the best unsorted and presorted paths for the scan/join @@ -2011,6 +2007,12 @@ preprocess_grouping_sets(PlannerInfo *root) gd->unsortable_refs = NULL; gd->unsortable_sets = NIL; + /* + * We don't currently make any attempt to optimize the groupClause when + * there are grouping sets, so just duplicate it in processed_groupClause. + */ + root->processed_groupClause = parse->groupClause; + if (parse->groupClause) { ListCell *lc; @@ -2638,7 +2640,7 @@ remove_useless_groupby_columns(PlannerInfo *root) int relid; /* No chance to do anything if there are less than two GROUP BY items */ - if (list_length(parse->groupClause) < 2) + if (list_length(root->processed_groupClause) < 2) return; /* Don't fiddle with the GROUP BY clause if the query has grouping sets */ @@ -2652,7 +2654,7 @@ remove_useless_groupby_columns(PlannerInfo *root) */ groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) * (list_length(parse->rtable) + 1)); - foreach(lc, parse->groupClause) + foreach(lc, root->processed_groupClause) { SortGroupClause *sgc = lfirst_node(SortGroupClause, lc); TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList); @@ -2747,7 +2749,7 @@ remove_useless_groupby_columns(PlannerInfo *root) { List *new_groupby = NIL; - foreach(lc, parse->groupClause) + foreach(lc, root->processed_groupClause) { SortGroupClause *sgc = lfirst_node(SortGroupClause, lc); TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList); @@ -2764,7 +2766,7 @@ remove_useless_groupby_columns(PlannerInfo *root) new_groupby = lappend(new_groupby, sgc); } - parse->groupClause = new_groupby; + root->processed_groupClause = new_groupby; } } @@ -2784,6 +2786,10 @@ remove_useless_groupby_columns(PlannerInfo *root) * Note: we need no comparable processing of the distinctClause because * the parser already enforced that that matches ORDER BY. * + * Note: we return a fresh List, but its elements are the same + * SortGroupClauses appearing in parse->groupClause. This is important + * because later processing may modify the processed_groupClause list. + * * For grouping sets, the order of items is instead forced to agree with that * of the grouping set (and items not in the grouping set are skipped). The * work of sorting the order of grouping set elements to match the ORDER BY if @@ -2814,7 +2820,7 @@ preprocess_groupclause(PlannerInfo *root, List *force) /* If no ORDER BY, nothing useful to do here */ if (parse->sortClause == NIL) - return parse->groupClause; + return list_copy(parse->groupClause); /* * Scan the ORDER BY clause and construct a list of matching GROUP BY @@ -2845,7 +2851,7 @@ preprocess_groupclause(PlannerInfo *root, List *force) /* If no match at all, no point in reordering GROUP BY */ if (new_groupclause == NIL) - return parse->groupClause; + return list_copy(parse->groupClause); /* * Add any remaining GROUP BY items to the new list, but only if we were @@ -2861,10 +2867,10 @@ preprocess_groupclause(PlannerInfo *root, List *force) if (list_member_ptr(new_groupclause, gc)) continue; /* it matched an ORDER BY item */ - if (partial_match) - return parse->groupClause; /* give up, no common sort possible */ - if (!OidIsValid(gc->sortop)) - return parse->groupClause; /* give up, GROUP BY can't be sorted */ + if (partial_match) /* give up, no common sort possible */ + return list_copy(parse->groupClause); + if (!OidIsValid(gc->sortop)) /* give up, GROUP BY can't be sorted */ + return list_copy(parse->groupClause); new_groupclause = lappend(new_groupclause, gc); } @@ -3148,64 +3154,39 @@ reorder_grouping_sets(List *groupingSets, List *sortclause) } /* - * make_pathkeys_for_groupagg - * Determine the pathkeys for the GROUP BY clause and/or any ordered - * aggregates. We expect at least one of these here. + * adjust_group_pathkeys_for_groupagg + * Add pathkeys to root->group_pathkeys to reflect the best set of + * pre-ordered input for ordered aggregates. * - * Building the pathkeys for the GROUP BY is simple. Most of the complexity - * involved here comes from calculating the best pathkeys for ordered - * aggregates. We define "best" as the pathkeys that suit the most number of + * We define "best" as the pathkeys that suit the largest number of * aggregate functions. We find these by looking at the first ORDER BY / * DISTINCT aggregate and take the pathkeys for that before searching for * other aggregates that require the same or a more strict variation of the * same pathkeys. We then repeat that process for any remaining aggregates * with different pathkeys and if we find another set of pathkeys that suits a - * larger number of aggregates then we return those pathkeys instead. - * - * *number_groupby_pathkeys gets set to the number of elements in the returned - * list that belong to the GROUP BY clause. Any elements above this number - * must belong to ORDER BY / DISTINCT aggregates. + * larger number of aggregates then we select those pathkeys instead. * * When the best pathkeys are found we also mark each Aggref that can use * those pathkeys as aggpresorted = true. */ -static List * -make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist, - int *number_groupby_pathkeys) +static void +adjust_group_pathkeys_for_groupagg(PlannerInfo *root) { - List *grouppathkeys = NIL; + List *grouppathkeys = root->group_pathkeys; List *bestpathkeys; Bitmapset *bestaggs; Bitmapset *unprocessed_aggs; ListCell *lc; int i; - Assert(groupClause != NIL || root->numOrderedAggs > 0); - - if (groupClause != NIL) - { - /* no pathkeys possible if there's an unsortable GROUP BY */ - if (!grouping_is_sortable(groupClause)) - { - *number_groupby_pathkeys = 0; - return NIL; - } - - grouppathkeys = make_pathkeys_for_sortclauses(root, groupClause, - tlist); - *number_groupby_pathkeys = list_length(grouppathkeys); - } - else - *number_groupby_pathkeys = 0; + /* Shouldn't be here if there are grouping sets */ + Assert(root->parse->groupingSets == NIL); + /* Shouldn't be here unless there are some ordered aggregates */ + Assert(root->numOrderedAggs > 0); - /* - * We can't add pathkeys for ordered aggregates if there are any grouping - * sets. All handling specific to ordered aggregates must be done by the - * executor in that case. - */ - if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL || - !enable_presorted_aggregate) - return grouppathkeys; + /* Do nothing if disabled */ + if (!enable_presorted_aggregate) + return; /* * Make a first pass over all AggInfos to collect a Bitmapset containing @@ -3327,6 +3308,14 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist, } } + /* + * If we found any ordered aggregates, update root->group_pathkeys to add + * the best set of aggregate pathkeys. Note that bestpathkeys includes + * the original GROUP BY pathkeys already. + */ + if (bestpathkeys != NIL) + root->group_pathkeys = bestpathkeys; + /* * Now that we've found the best set of aggregates we can set the * presorted flag to indicate to the executor that it needn't bother @@ -3347,16 +3336,6 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist, aggref->aggpresorted = true; } } - - /* - * bestpathkeys includes the GROUP BY pathkeys, so if we found any ordered - * aggregates, then return bestpathkeys, otherwise return the - * grouppathkeys. - */ - if (bestpathkeys != NIL) - return bestpathkeys; - - return grouppathkeys; } /* @@ -3374,11 +3353,62 @@ standard_qp_callback(PlannerInfo *root, void *extra) * Calculate pathkeys that represent grouping/ordering and/or ordered * aggregate requirements. */ - if (qp_extra->groupClause != NIL || root->numOrderedAggs > 0) - root->group_pathkeys = make_pathkeys_for_groupagg(root, - qp_extra->groupClause, - tlist, - &root->num_groupby_pathkeys); + if (qp_extra->gset_data) + { + /* + * With grouping sets, just use the first RollupData's groupClause. We + * don't make any effort to optimize grouping clauses when there are + * grouping sets, nor can we combine aggregate ordering keys with + * grouping. + */ + List *rollups = qp_extra->gset_data->rollups; + List *groupClause = (rollups ? linitial_node(RollupData, rollups)->groupClause : NIL); + + if (grouping_is_sortable(groupClause)) + { + root->group_pathkeys = make_pathkeys_for_sortclauses(root, + groupClause, + tlist); + root->num_groupby_pathkeys = list_length(root->group_pathkeys); + } + else + { + root->group_pathkeys = NIL; + root->num_groupby_pathkeys = 0; + } + } + else if (parse->groupClause || root->numOrderedAggs > 0) + { + /* + * With a plain GROUP BY list, we can remove any grouping items that + * are proven redundant by EquivalenceClass processing. For example, + * we can remove y given "WHERE x = y GROUP BY x, y". These aren't + * especially common cases, but they're nearly free to detect. Note + * that we remove redundant items from processed_groupClause but not + * the original parse->groupClause. + */ + bool sortable; + + root->group_pathkeys = + make_pathkeys_for_sortclauses_extended(root, + &root->processed_groupClause, + tlist, + true, + &sortable); + if (!sortable) + { + /* Can't sort; no point in considering aggregate ordering either */ + root->group_pathkeys = NIL; + root->num_groupby_pathkeys = 0; + } + else + { + root->num_groupby_pathkeys = list_length(root->group_pathkeys); + /* If we have ordered aggs, consider adding onto group_pathkeys */ + if (root->numOrderedAggs > 0) + adjust_group_pathkeys_for_groupagg(root); + } + } else { root->group_pathkeys = NIL; @@ -3531,8 +3561,8 @@ get_number_of_groups(PlannerInfo *root, } else { - /* Plain GROUP BY */ - groupExprs = get_sortgrouplist_exprs(parse->groupClause, + /* Plain GROUP BY -- estimate based on optimized groupClause */ + groupExprs = get_sortgrouplist_exprs(root->processed_groupClause, target_list); dNumGroups = estimate_num_groups(root, groupExprs, path_rows, @@ -3610,8 +3640,8 @@ create_grouping_paths(PlannerInfo *root, /* * Determine whether it's possible to perform sort-based - * implementations of grouping. (Note that if groupClause is empty, - * grouping_is_sortable() is trivially true, and all the + * implementations of grouping. (Note that if processed_groupClause + * is empty, grouping_is_sortable() is trivially true, and all the * pathkeys_contained_in() tests will succeed too, so that we'll * consider every surviving input path.) * @@ -3620,7 +3650,7 @@ create_grouping_paths(PlannerInfo *root, * must consider any sorted-input plan. */ if ((gd && gd->rollups != NIL) - || grouping_is_sortable(parse->groupClause)) + || grouping_is_sortable(root->processed_groupClause)) flags |= GROUPING_CAN_USE_SORT; /* @@ -3645,7 +3675,7 @@ create_grouping_paths(PlannerInfo *root, */ if ((parse->groupClause != NIL && root->numOrderedAggs == 0 && - (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)))) + (gd ? gd->any_hashable : grouping_is_hashable(root->processed_groupClause)))) flags |= GROUPING_CAN_USE_HASH; /* @@ -3856,6 +3886,9 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, * partial partitionwise aggregation. But if partial aggregation is * not supported in general then we can't use it for partitionwise * aggregation either. + * + * Check parse->groupClause not processed_groupClause, because it's + * okay if some of the partitioning columns were proved redundant. */ if (extra->patype == PARTITIONWISE_AGGREGATE_FULL && group_by_has_partkey(input_rel, extra->targetList, @@ -5214,8 +5247,9 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target) Expr *expr = (Expr *) lfirst(lc); Index sgref = get_pathtarget_sortgroupref(final_target, i); - if (sgref && parse->groupClause && - get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL) + if (sgref && root->processed_groupClause && + get_sortgroupref_clause_noerr(sgref, + root->processed_groupClause) != NULL) { /* * It's a grouping column, so add it to the input target as-is. @@ -5283,7 +5317,6 @@ make_partial_grouping_target(PlannerInfo *root, PathTarget *grouping_target, Node *havingQual) { - Query *parse = root->parse; PathTarget *partial_target; List *non_group_cols; List *non_group_exprs; @@ -5299,8 +5332,9 @@ make_partial_grouping_target(PlannerInfo *root, Expr *expr = (Expr *) lfirst(lc); Index sgref = get_pathtarget_sortgroupref(grouping_target, i); - if (sgref && parse->groupClause && - get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL) + if (sgref && root->processed_groupClause && + get_sortgroupref_clause_noerr(sgref, + root->processed_groupClause) != NULL) { /* * It's a grouping column, so add it to the partial_target as-is. @@ -5747,7 +5781,6 @@ make_window_input_target(PlannerInfo *root, PathTarget *final_target, List *activeWindows) { - Query *parse = root->parse; PathTarget *input_target; Bitmapset *sgrefs; List *flattenable_cols; @@ -5755,7 +5788,7 @@ make_window_input_target(PlannerInfo *root, int i; ListCell *lc; - Assert(parse->hasWindowFuncs); + Assert(root->parse->hasWindowFuncs); /* * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses @@ -5782,7 +5815,7 @@ make_window_input_target(PlannerInfo *root, } /* Add in sortgroupref numbers of GROUP BY clauses, too */ - foreach(lc, parse->groupClause) + foreach(lc, root->processed_groupClause) { SortGroupClause *grpcl = lfirst_node(SortGroupClause, lc); @@ -6701,7 +6734,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, grouped_rel->reltarget, parse->groupClause ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_SIMPLE, - parse->groupClause, + root->processed_groupClause, havingQual, agg_costs, dNumGroups)); @@ -6716,7 +6749,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, create_group_path(root, grouped_rel, path, - parse->groupClause, + root->processed_groupClause, havingQual, dNumGroups)); } @@ -6785,7 +6818,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, grouped_rel->reltarget, parse->groupClause ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_FINAL_DESERIAL, - parse->groupClause, + root->processed_groupClause, havingQual, agg_final_costs, dNumGroups)); @@ -6794,7 +6827,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, create_group_path(root, grouped_rel, path, - parse->groupClause, + root->processed_groupClause, havingQual, dNumGroups)); @@ -6825,7 +6858,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, grouped_rel->reltarget, AGG_HASHED, AGGSPLIT_SIMPLE, - parse->groupClause, + root->processed_groupClause, havingQual, agg_costs, dNumGroups)); @@ -6846,7 +6879,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, grouped_rel->reltarget, AGG_HASHED, AGGSPLIT_FINAL_DESERIAL, - parse->groupClause, + root->processed_groupClause, havingQual, agg_final_costs, dNumGroups)); @@ -7048,7 +7081,7 @@ create_partial_grouping_paths(PlannerInfo *root, partially_grouped_rel->reltarget, parse->groupClause ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_INITIAL_SERIAL, - parse->groupClause, + root->processed_groupClause, NIL, agg_partial_costs, dNumPartialGroups)); @@ -7057,7 +7090,7 @@ create_partial_grouping_paths(PlannerInfo *root, create_group_path(root, partially_grouped_rel, path, - parse->groupClause, + root->processed_groupClause, NIL, dNumPartialGroups)); } @@ -7117,7 +7150,7 @@ create_partial_grouping_paths(PlannerInfo *root, partially_grouped_rel->reltarget, parse->groupClause ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_INITIAL_SERIAL, - parse->groupClause, + root->processed_groupClause, NIL, agg_partial_costs, dNumPartialPartialGroups)); @@ -7126,7 +7159,7 @@ create_partial_grouping_paths(PlannerInfo *root, create_group_path(root, partially_grouped_rel, path, - parse->groupClause, + root->processed_groupClause, NIL, dNumPartialPartialGroups)); } @@ -7147,7 +7180,7 @@ create_partial_grouping_paths(PlannerInfo *root, partially_grouped_rel->reltarget, AGG_HASHED, AGGSPLIT_INITIAL_SERIAL, - parse->groupClause, + root->processed_groupClause, NIL, agg_partial_costs, dNumPartialGroups)); @@ -7165,7 +7198,7 @@ create_partial_grouping_paths(PlannerInfo *root, partially_grouped_rel->reltarget, AGG_HASHED, AGGSPLIT_INITIAL_SERIAL, - parse->groupClause, + root->processed_groupClause, NIL, agg_partial_costs, dNumPartialPartialGroups)); diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 4cec12ab19..51d545bbd6 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -1008,6 +1008,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, subroot->rowMarks = NIL; memset(subroot->upper_rels, 0, sizeof(subroot->upper_rels)); memset(subroot->upper_targets, 0, sizeof(subroot->upper_targets)); + subroot->processed_groupClause = NIL; subroot->processed_tlist = NIL; subroot->update_colnos = NIL; subroot->grouping_map = NULL; diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 654dba61aa..11c3545637 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -406,6 +406,22 @@ struct PlannerInfo /* Result tlists chosen by grouping_planner for upper-stage processing */ struct PathTarget *upper_targets[UPPERREL_FINAL + 1] pg_node_attr(read_write_ignore); + /* + * The fully-processed groupClause is kept here. It differs from + * parse->groupClause in that we remove any items that we can prove + * redundant, so that only the columns named here actually need to be + * compared to determine grouping. Note that it's possible for *all* the + * items to be proven redundant, implying that there is only one group + * containing all the query's rows. Hence, if you want to check whether + * GROUP BY was specified, test for nonempty parse->groupClause, not for + * nonempty processed_groupClause. + * + * Currently, when grouping sets are specified we do not attempt to + * optimize the groupClause, so that processed_groupClause will be + * identical to parse->groupClause. + */ + List *processed_groupClause; + /* * The fully-processed targetlist is kept here. It differs from * parse->targetList in that (for INSERT) it's been reordered to match the diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 41f765d342..7719f89122 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -228,6 +228,11 @@ extern List *build_join_pathkeys(PlannerInfo *root, extern List *make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist); +extern List *make_pathkeys_for_sortclauses_extended(PlannerInfo *root, + List **sortclauses, + List *tlist, + bool remove_redundant, + bool *sortable); extern void initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo); extern void update_mergeclause_eclasses(PlannerInfo *root, diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 309c2dc865..648da0f7c4 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1289,7 +1289,7 @@ group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z; QUERY PLAN ------------------------------------------------------ HashAggregate - Group Key: t1.a, t1.b, t2.x, t2.y + Group Key: t1.a, t1.b -> Hash Join Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) -> Seq Scan on t2 @@ -1304,7 +1304,7 @@ group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z; QUERY PLAN ------------------------------------------------------ HashAggregate - Group Key: t1.a, t1.b, t2.x, t2.z + Group Key: t1.a, t1.b, t2.z -> Hash Join Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) -> Seq Scan on t2 diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index fcad5c4093..292196b169 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -2135,7 +2135,6 @@ select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1; QUERY PLAN --------------------------- GroupAggregate - Group Key: $2 InitPlan 1 (returns $1) -> Result InitPlan 3 (returns $2) @@ -2143,7 +2142,7 @@ select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1; -> Result SubPlan 2 -> Result -(9 rows) +(8 rows) select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1; grouping diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index a82b8fb8fb..1b900fddf8 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -164,10 +164,9 @@ SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; QUERY PLAN -------------------------------- GroupAggregate - Group Key: c -> Result One-Time Filter: false -(4 rows) +(3 rows) SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; c | sum @@ -805,10 +804,10 @@ SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOI -- Empty join relation because of empty outer side, no partitionwise agg plan EXPLAIN (COSTS OFF) SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUPBY a.x, a.y ORDER BY 1, 2; - QUERY PLAN ---------------------------------------- + QUERY PLAN +-------------------------------------- GroupAggregate - Group Key: pagg_tab1.x, pagg_tab1.y + Group Key: pagg_tab1.y -> Sort Sort Key: pagg_tab1.y -> Result diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index c59caf1cb3..f2418413dd 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -1340,7 +1340,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl QUERY PLAN -------------------------------------------------------------------------------- GroupAggregate - Group Key: t1.c, t2.c, t3.c + Group Key: t1.c, t3.c -> Sort Sort Key: t1.c, t3.c -> Append @@ -1484,7 +1484,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph QUERY PLAN -------------------------------------------------------------------------------- GroupAggregate - Group Key: t1.c, t2.c, t3.c + Group Key: t1.c, t3.c -> Sort Sort Key: t1.c, t3.c -> Append @@ -1576,7 +1576,7 @@ SELECT avg(t1.a), avg(t2.b), t1.c, t2.c FROM plt1 t1 RIGHT JOIN plt2 t2 ON t1.c Sort Sort Key: t1.c -> HashAggregate - Group Key: t1.c, t2.c + Group Key: t1.c -> Append -> Hash Join Hash Cond: (t2_1.c = t1_1.c) diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out index 20141ce7f3..ce4bf1d4e5 100644 --- a/contrib/pg_trgm/expected/pg_trgm.out +++ b/contrib/pg_trgm/expected/pg_trgm.out @@ -5366,10 +5366,10 @@ SELECT similarity('Szczecin', 'Warsaw'); EXPLAIN (COSTS OFF) SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit() FROM restaurants WHERE city % 'Warsaw'; - QUERY PLAN -------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------- HashAggregate - Group Key: city, similarity(city, 'Warsaw'::text), show_limit() + Group Key: city, similarity(city, 'Warsaw'::text) -> Bitmap Heap Scan on restaurants Recheck Cond: (city % 'Warsaw'::text) -> Bitmap Index Scan on restaurants_city_idx diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index befb2fd4d3..f3d1bf6676 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -2293,7 +2293,7 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM -> Subquery Scan on q -> HashAggregate Output: t2.c1, t3.c1 - Group Key: t2.c1, t3.c1 + Group Key: t2.c1 -> Foreign Scan Output: t2.c1, t3.c1 Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index bcc10942cc..c7a8ae56a1 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -636,6 +636,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, memset(root->upper_rels, 0, sizeof(root->upper_rels)); memset(root->upper_targets, 0, sizeof(root->upper_targets)); root->processed_groupClause = NIL; + root->processed_distinctClause = NIL; root->processed_tlist = NIL; root->update_colnos = NIL; root->grouping_map = NULL; @@ -3427,12 +3428,27 @@ standard_qp_callback(PlannerInfo *root, void *extra) else root->window_pathkeys = NIL; - if (parse->distinctClause && - grouping_is_sortable(parse->distinctClause)) + /* + * As with GROUP BY, we can discard any DISTINCT items that are proven + * redundant by EquivalenceClass processing. The non-redundant list is + * kept in root->processed_distinctClause, leaving the original + * parse->distinctClause alone. + */ + if (parse->distinctClause) + { + bool sortable; + + /* Make a copy since pathkey processing can modify the list */ + root->processed_distinctClause = list_copy(parse->distinctClause); root->distinct_pathkeys = - make_pathkeys_for_sortclauses(root, - parse->distinctClause, - tlist); + make_pathkeys_for_sortclauses_extended(root, + &root->processed_distinctClause, + tlist, + true, + &sortable); + if (!sortable) + root->distinct_pathkeys = NIL; + } else root->distinct_pathkeys = NIL; @@ -4679,7 +4695,7 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, cheapest_partial_path = linitial(input_rel->partial_pathlist); - distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, + distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause, parse->targetList); /* estimate how many distinct rows we'll get from each worker */ @@ -4688,7 +4704,7 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, NULL, NULL); /* first try adding unique paths atop of sorted paths */ - if (grouping_is_sortable(parse->distinctClause)) + if (grouping_is_sortable(root->processed_distinctClause)) { foreach(lc, input_rel->partial_pathlist) { @@ -4712,7 +4728,7 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, * path here, we treat enable_hashagg as a hard off-switch rather than the * slightly softer variant in create_final_distinct_paths. */ - if (enable_hashagg && grouping_is_hashable(parse->distinctClause)) + if (enable_hashagg && grouping_is_hashable(root->processed_distinctClause)) { add_partial_path(partial_distinct_rel, (Path *) create_agg_path(root, @@ -4721,7 +4737,7 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, cheapest_partial_path->pathtarget, AGG_HASHED, AGGSPLIT_SIMPLE, - parse->distinctClause, + root->processed_distinctClause, NIL, NULL, numDistinctRows)); @@ -4793,7 +4809,7 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, */ List *distinctExprs; - distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, + distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause, parse->targetList); numDistinctRows = estimate_num_groups(root, distinctExprs, cheapest_input_path->rows, @@ -4803,7 +4819,7 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, /* * Consider sort-based implementations of DISTINCT, if possible. */ - if (grouping_is_sortable(parse->distinctClause)) + if (grouping_is_sortable(root->processed_distinctClause)) { /* * First, if we have any adequately-presorted paths, just stick a @@ -4942,7 +4958,7 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, else allow_hash = true; /* default */ - if (allow_hash && grouping_is_hashable(parse->distinctClause)) + if (allow_hash && grouping_is_hashable(root->processed_distinctClause)) { /* Generate hashed aggregate path --- no sort needed */ add_path(distinct_rel, (Path *) @@ -4952,7 +4968,7 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, cheapest_input_path->pathtarget, AGG_HASHED, AGGSPLIT_SIMPLE, - parse->distinctClause, + root->processed_distinctClause, NIL, NULL, numDistinctRows)); diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 51d545bbd6..91cb323c54 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -1009,6 +1009,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, memset(subroot->upper_rels, 0, sizeof(subroot->upper_rels)); memset(subroot->upper_targets, 0, sizeof(subroot->upper_targets)); subroot->processed_groupClause = NIL; + subroot->processed_distinctClause = NIL; subroot->processed_tlist = NIL; subroot->update_colnos = NIL; subroot->grouping_map = NULL; diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 11c3545637..923fecef27 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -422,6 +422,18 @@ struct PlannerInfo */ List *processed_groupClause; + /* + * The fully-processed distinctClause is kept here. It differs from + * parse->distinctClause in that we remove any items that we can prove + * redundant, so that only the columns named here actually need to be + * compared to determine uniqueness. Note that it's possible for *all* + * the items to be proven redundant, implying that there should be only + * one output row. Hence, if you want to check whether DISTINCT was + * specified, test for nonempty parse->distinctClause, not for nonempty + * processed_distinctClause. + */ + List *processed_distinctClause; + /* * The fully-processed targetlist is kept here. It differs from * parse->targetList in that (for INSERT) it's been reordered to match the diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 6ce889d87c..d0c34e1a96 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -136,7 +136,7 @@ SELECT count(*) FROM Output: count(*) -> HashAggregate Output: tenk1.two, tenk1.four, tenk1.two - Group Key: tenk1.two, tenk1.four, tenk1.two + Group Key: tenk1.two, tenk1.four -> Seq Scan on public.tenk1 Output: tenk1.two, tenk1.four, tenk1.two (7 rows)
pgsql-hackers by date: