Home > mailing lists
Re: Removing redundant grouping columns - Mailing list pgsql-hackers

From	Tom Lane
Subject	Re: Removing redundant grouping columns
Date	December 30, 2022 21:02:06
Msg-id	895775.1672434126@sss.pgh.pa.us Whole thread
In response to	Re: Removing redundant grouping columns (Tom Lane <tgl@sss.pgh.pa.us>)
Responses	Re: Removing redundant grouping columns
List	pgsql-hackers
Tree view
I wrote:
> Richard Guo <guofenglinux@gmail.com> writes:
>> While we are here, I wonder if we can do the same trick for
>> distinctClause, to cope with cases like
>> select distinct a.x, b.y from a, b where a.x = b.y;

> We do that already, no?

Oh, wait, I see what you mean: we are smart in code paths that rely
on distinct_pathkeys, but not in the hash-based code paths.  Right,
that can be fixed the same way.  0001 attached is the same as before,
0002 adds similar logic for the distinctClause.

The plan change in expected/pg_trgm.out is surprising at first
glance, but I believe it's correct: the item that is being
dropped is a parameterless STABLE function, so its value is not
supposed to change for the duration of the scan.

            regards, tom lane

diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 9524765650..450f724c49 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -3667,6 +3667,13 @@ appendGroupByClause(List *tlist, deparse_expr_cxt *context)
      */
     Assert(!query->groupingSets);

+    /*
+     * We intentionally print query->groupClause not processed_groupClause,
+     * leaving it to the remote planner to get rid of any redundant GROUP BY
+     * items again.  This is necessary in case processed_groupClause reduced
+     * to empty, and in any case the redundancy situation on the remote might
+     * be different than what we think here.
+     */
     foreach(lc, query->groupClause)
     {
         SortGroupClause *grp = (SortGroupClause *) lfirst(lc);
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 1a2c2a665c..befb2fd4d3 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2864,16 +2864,13 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
 explain (verbose, costs off)
 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
-                                      QUERY PLAN
----------------------------------------------------------------------------------------
- Sort
+                                                 QUERY PLAN
+------------------------------------------------------------------------------------------------------------
+ Foreign Scan
    Output: (count(c2)), c2, 5, 7.0, 9
-   Sort Key: ft1.c2
-   ->  Foreign Scan
-         Output: (count(c2)), c2, 5, 7.0, 9
-         Relations: Aggregate on (public.ft1)
-         Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
-(7 rows)
+   Relations: Aggregate on (public.ft1)
+   Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
+(4 rows)

 select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
   w  | x | y |  z
@@ -2990,14 +2987,13 @@ select exists(select 1 from pg_enum), sum(c1) from ft1 group by 1;
                     QUERY PLAN
 ---------------------------------------------------
  GroupAggregate
-   Output: ($0), sum(ft1.c1)
-   Group Key: $0
+   Output: $0, sum(ft1.c1)
    InitPlan 1 (returns $0)
      ->  Seq Scan on pg_catalog.pg_enum
    ->  Foreign Scan on public.ft1
-         Output: $0, ft1.c1
+         Output: ft1.c1
          Remote SQL: SELECT "C 1" FROM "S 1"."T 1"
-(8 rows)
+(7 rows)

 select exists(select 1 from pg_enum), sum(c1) from ft1 group by 1;
  exists |  sum
@@ -3382,14 +3378,13 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
 --------------------------------------------------------------------------------------------------
  GroupAggregate
    Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
-   Group Key: ft2.c2
    ->  Sort
-         Output: c2, c1
+         Output: c1, c2
          Sort Key: ft2.c1 USING <^
          ->  Foreign Scan on public.ft2
-               Output: c2, c1
+               Output: c1, c2
                Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
-(9 rows)
+(8 rows)

 -- This should not be pushed either.
 explain (verbose, costs off)
@@ -3458,14 +3453,13 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
 --------------------------------------------------------------------------------------------------
  GroupAggregate
    Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
-   Group Key: ft2.c2
    ->  Sort
-         Output: c2, c1
+         Output: c1, c2
          Sort Key: ft2.c1 USING <^
          ->  Foreign Scan on public.ft2
-               Output: c2, c1
+               Output: c1, c2
                Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
-(9 rows)
+(8 rows)

 -- Cleanup
 drop operator class my_op_class using btree;
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index b9268e32dd..722ebc932b 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -3345,9 +3345,9 @@ estimate_path_cost_size(PlannerInfo *root,
             }

             /* Get number of grouping columns and possible number of groups */
-            numGroupCols = list_length(root->parse->groupClause);
+            numGroupCols = list_length(root->processed_groupClause);
             numGroups = estimate_num_groups(root,
-                                            get_sortgrouplist_exprs(root->parse->groupClause,
+                                            get_sortgrouplist_exprs(root->processed_groupClause,
                                                                     fpinfo->grouped_tlist),
                                             input_rows, NULL, NULL);

@@ -3636,7 +3636,7 @@ adjust_foreign_grouping_path_cost(PlannerInfo *root,
      * pathkeys, adjust the costs with that function.  Otherwise, adjust the
      * costs by applying the same heuristic as for the scan or join case.
      */
-    if (!grouping_is_sortable(root->parse->groupClause) ||
+    if (!grouping_is_sortable(root->processed_groupClause) ||
         !pathkeys_contained_in(pathkeys, root->group_pathkeys))
     {
         Path        sort_path;    /* dummy for result of cost_sort */
@@ -6202,7 +6202,11 @@ foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel,
         Index        sgref = get_pathtarget_sortgroupref(grouping_target, i);
         ListCell   *l;

-        /* Check whether this expression is part of GROUP BY clause */
+        /*
+         * Check whether this expression is part of GROUP BY clause.  Note we
+         * check the whole GROUP BY clause not just processed_groupClause,
+         * because we will ship all of it, cf. appendGroupByClause.
+         */
         if (sgref && get_sortgroupref_clause_noerr(sgref, query->groupClause))
         {
             TargetEntry *tle;
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index f15bb83a1a..04521bc3d1 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -2476,7 +2476,7 @@ agg_retrieve_direct(AggState *aggstate)
                      * If we are grouping, check whether we've crossed a group
                      * boundary.
                      */
-                    if (node->aggstrategy != AGG_PLAIN)
+                    if (node->aggstrategy != AGG_PLAIN && node->numCols > 0)
                     {
                         tmpcontext->ecxt_innertuple = firstSlot;
                         if (!ExecQual(aggstate->phase->eqfunctions[node->numCols - 1],
@@ -3480,8 +3480,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
              */
             if (aggnode->aggstrategy == AGG_SORTED)
             {
-                Assert(aggnode->numCols > 0);
-
                 /*
                  * Build a separate function for each subset of columns that
                  * need to be compared.
@@ -3507,7 +3505,8 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
                 }

                 /* and for all grouped columns, unless already computed */
-                if (phasedata->eqfunctions[aggnode->numCols - 1] == NULL)
+                if (aggnode->numCols > 0 &&
+                    phasedata->eqfunctions[aggnode->numCols - 1] == NULL)
                 {
                     phasedata->eqfunctions[aggnode->numCols - 1] =
                         execTuplesMatchPrepare(scanDesc,
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index a9943cd6e0..bb11dbec7b 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1152,18 +1152,62 @@ List *
 make_pathkeys_for_sortclauses(PlannerInfo *root,
                               List *sortclauses,
                               List *tlist)
+{
+    List       *result;
+    bool        sortable;
+
+    result = make_pathkeys_for_sortclauses_extended(root,
+                                                    &sortclauses,
+                                                    tlist,
+                                                    false,
+                                                    &sortable);
+    /* It's caller error if not all clauses were sortable */
+    Assert(sortable);
+    return result;
+}
+
+/*
+ * make_pathkeys_for_sortclauses_extended
+ *        Generate a pathkeys list that represents the sort order specified
+ *        by a list of SortGroupClauses
+ *
+ * The comments for make_pathkeys_for_sortclauses apply here too. In addition:
+ *
+ * If remove_redundant is true, then any sort clauses that are found to
+ * give rise to redundant pathkeys are removed from the sortclauses list
+ * (which therefore must be pass-by-reference in this version).
+ *
+ * *sortable is set to true if all the sort clauses are in fact sortable.
+ * If any are not, they are ignored except for setting *sortable false.
+ * (In that case, the output pathkey list isn't really useful.  However,
+ * we process the whole sortclauses list anyway, because it's still valid
+ * to remove any clauses that can be proven redundant via the eclass logic.
+ * Even though we'll have to hash in that case, we might as well not hash
+ * redundant columns.)
+ */
+List *
+make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
+                                       List **sortclauses,
+                                       List *tlist,
+                                       bool remove_redundant,
+                                       bool *sortable)
 {
     List       *pathkeys = NIL;
     ListCell   *l;

-    foreach(l, sortclauses)
+    *sortable = true;
+    foreach(l, *sortclauses)
     {
         SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
         Expr       *sortkey;
         PathKey    *pathkey;

         sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
-        Assert(OidIsValid(sortcl->sortop));
+        if (!OidIsValid(sortcl->sortop))
+        {
+            *sortable = false;
+            continue;
+        }
         pathkey = make_pathkey_from_sortop(root,
                                            sortkey,
                                            root->nullable_baserels,
@@ -1175,6 +1219,8 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
         /* Canonical form eliminates redundant ordering keys */
         if (!pathkey_is_redundant(pathkey, pathkeys))
             pathkeys = lappend(pathkeys, pathkey);
+        else if (remove_redundant)
+            *sortclauses = foreach_delete_current(*sortclauses, l);
     }
     return pathkeys;
 }
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 66139928e8..a4c6062b41 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -2404,7 +2404,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
      * sizing.
      */
     maxref = 0;
-    foreach(lc, root->parse->groupClause)
+    foreach(lc, root->processed_groupClause)
     {
         SortGroupClause *gc = (SortGroupClause *) lfirst(lc);

@@ -2415,7 +2415,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
     grouping_map = (AttrNumber *) palloc0((maxref + 1) * sizeof(AttrNumber));

     /* Now look up the column numbers in the child's tlist */
-    foreach(lc, root->parse->groupClause)
+    foreach(lc, root->processed_groupClause)
     {
         SortGroupClause *gc = (SortGroupClause *) lfirst(lc);
         TargetEntry *tle = get_sortgroupclause_tle(gc, subplan->targetlist);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 8a41e1e6d3..bcc10942cc 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -95,17 +95,9 @@ create_upper_paths_hook_type create_upper_paths_hook = NULL;
 #define EXPRKIND_TABLEFUNC            11
 #define EXPRKIND_TABLEFUNC_LATERAL    12

-/* Passthrough data for standard_qp_callback */
-typedef struct
-{
-    List       *activeWindows;    /* active windows, if any */
-    List       *groupClause;    /* overrides parse->groupClause */
-} standard_qp_extra;
-
 /*
  * Data specific to grouping sets
  */
-
 typedef struct
 {
     List       *rollups;
@@ -129,6 +121,13 @@ typedef struct
                                  * clauses per Window */
 } WindowClauseSortData;

+/* Passthrough data for standard_qp_callback */
+typedef struct
+{
+    List       *activeWindows;    /* active windows, if any */
+    grouping_sets_data *gset_data;    /* grouping sets data, if any */
+} standard_qp_extra;
+
 /* Local functions */
 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
@@ -636,6 +635,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
     root->rowMarks = NIL;
     memset(root->upper_rels, 0, sizeof(root->upper_rels));
     memset(root->upper_targets, 0, sizeof(root->upper_targets));
+    root->processed_groupClause = NIL;
     root->processed_tlist = NIL;
     root->update_colnos = NIL;
     root->grouping_map = NULL;
@@ -1032,9 +1032,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
     }
     parse->havingQual = (Node *) newHaving;

-    /* Remove any redundant GROUP BY columns */
-    remove_useless_groupby_columns(root);
-
     /*
      * If we have any outer joins, try to reduce them to plain inner joins.
      * This step is most easily done after we've done expression
@@ -1393,11 +1390,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
         {
             gset_data = preprocess_grouping_sets(root);
         }
-        else
+        else if (parse->groupClause)
         {
             /* Preprocess regular GROUP BY clause, if any */
-            if (parse->groupClause)
-                parse->groupClause = preprocess_groupclause(root, NIL);
+            root->processed_groupClause = preprocess_groupclause(root, NIL);
+            /* Remove any redundant GROUP BY columns */
+            remove_useless_groupby_columns(root);
         }

         /*
@@ -1475,9 +1473,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)

         /* Set up data needed by standard_qp_callback */
         qp_extra.activeWindows = activeWindows;
-        qp_extra.groupClause = (gset_data
-                                ? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause :
NIL)
-                                : parse->groupClause);
+        qp_extra.gset_data = gset_data;

         /*
          * Generate the best unsorted and presorted paths for the scan/join
@@ -2011,6 +2007,12 @@ preprocess_grouping_sets(PlannerInfo *root)
     gd->unsortable_refs = NULL;
     gd->unsortable_sets = NIL;

+    /*
+     * We don't currently make any attempt to optimize the groupClause when
+     * there are grouping sets, so just duplicate it in processed_groupClause.
+     */
+    root->processed_groupClause = parse->groupClause;
+
     if (parse->groupClause)
     {
         ListCell   *lc;
@@ -2638,7 +2640,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
     int            relid;

     /* No chance to do anything if there are less than two GROUP BY items */
-    if (list_length(parse->groupClause) < 2)
+    if (list_length(root->processed_groupClause) < 2)
         return;

     /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
@@ -2652,7 +2654,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
      */
     groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
                                            (list_length(parse->rtable) + 1));
-    foreach(lc, parse->groupClause)
+    foreach(lc, root->processed_groupClause)
     {
         SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
         TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
@@ -2747,7 +2749,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
     {
         List       *new_groupby = NIL;

-        foreach(lc, parse->groupClause)
+        foreach(lc, root->processed_groupClause)
         {
             SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
             TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
@@ -2764,7 +2766,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
                 new_groupby = lappend(new_groupby, sgc);
         }

-        parse->groupClause = new_groupby;
+        root->processed_groupClause = new_groupby;
     }
 }

@@ -2784,6 +2786,10 @@ remove_useless_groupby_columns(PlannerInfo *root)
  * Note: we need no comparable processing of the distinctClause because
  * the parser already enforced that that matches ORDER BY.
  *
+ * Note: we return a fresh List, but its elements are the same
+ * SortGroupClauses appearing in parse->groupClause.  This is important
+ * because later processing may modify the processed_groupClause list.
+ *
  * For grouping sets, the order of items is instead forced to agree with that
  * of the grouping set (and items not in the grouping set are skipped). The
  * work of sorting the order of grouping set elements to match the ORDER BY if
@@ -2814,7 +2820,7 @@ preprocess_groupclause(PlannerInfo *root, List *force)

     /* If no ORDER BY, nothing useful to do here */
     if (parse->sortClause == NIL)
-        return parse->groupClause;
+        return list_copy(parse->groupClause);

     /*
      * Scan the ORDER BY clause and construct a list of matching GROUP BY
@@ -2845,7 +2851,7 @@ preprocess_groupclause(PlannerInfo *root, List *force)

     /* If no match at all, no point in reordering GROUP BY */
     if (new_groupclause == NIL)
-        return parse->groupClause;
+        return list_copy(parse->groupClause);

     /*
      * Add any remaining GROUP BY items to the new list, but only if we were
@@ -2861,10 +2867,10 @@ preprocess_groupclause(PlannerInfo *root, List *force)

         if (list_member_ptr(new_groupclause, gc))
             continue;            /* it matched an ORDER BY item */
-        if (partial_match)
-            return parse->groupClause;    /* give up, no common sort possible */
-        if (!OidIsValid(gc->sortop))
-            return parse->groupClause;    /* give up, GROUP BY can't be sorted */
+        if (partial_match)        /* give up, no common sort possible */
+            return list_copy(parse->groupClause);
+        if (!OidIsValid(gc->sortop))    /* give up, GROUP BY can't be sorted */
+            return list_copy(parse->groupClause);
         new_groupclause = lappend(new_groupclause, gc);
     }

@@ -3148,64 +3154,39 @@ reorder_grouping_sets(List *groupingSets, List *sortclause)
 }

 /*
- * make_pathkeys_for_groupagg
- *        Determine the pathkeys for the GROUP BY clause and/or any ordered
- *        aggregates.  We expect at least one of these here.
+ * adjust_group_pathkeys_for_groupagg
+ *        Add pathkeys to root->group_pathkeys to reflect the best set of
+ *        pre-ordered input for ordered aggregates.
  *
- * Building the pathkeys for the GROUP BY is simple.  Most of the complexity
- * involved here comes from calculating the best pathkeys for ordered
- * aggregates.  We define "best" as the pathkeys that suit the most number of
+ * We define "best" as the pathkeys that suit the largest number of
  * aggregate functions.  We find these by looking at the first ORDER BY /
  * DISTINCT aggregate and take the pathkeys for that before searching for
  * other aggregates that require the same or a more strict variation of the
  * same pathkeys.  We then repeat that process for any remaining aggregates
  * with different pathkeys and if we find another set of pathkeys that suits a
- * larger number of aggregates then we return those pathkeys instead.
- *
- * *number_groupby_pathkeys gets set to the number of elements in the returned
- * list that belong to the GROUP BY clause.  Any elements above this number
- * must belong to ORDER BY / DISTINCT aggregates.
+ * larger number of aggregates then we select those pathkeys instead.
  *
  * When the best pathkeys are found we also mark each Aggref that can use
  * those pathkeys as aggpresorted = true.
  */
-static List *
-make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
-                           int *number_groupby_pathkeys)
+static void
+adjust_group_pathkeys_for_groupagg(PlannerInfo *root)
 {
-    List       *grouppathkeys = NIL;
+    List       *grouppathkeys = root->group_pathkeys;
     List       *bestpathkeys;
     Bitmapset  *bestaggs;
     Bitmapset  *unprocessed_aggs;
     ListCell   *lc;
     int            i;

-    Assert(groupClause != NIL || root->numOrderedAggs > 0);
-
-    if (groupClause != NIL)
-    {
-        /* no pathkeys possible if there's an unsortable GROUP BY */
-        if (!grouping_is_sortable(groupClause))
-        {
-            *number_groupby_pathkeys = 0;
-            return NIL;
-        }
-
-        grouppathkeys = make_pathkeys_for_sortclauses(root, groupClause,
-                                                      tlist);
-        *number_groupby_pathkeys = list_length(grouppathkeys);
-    }
-    else
-        *number_groupby_pathkeys = 0;
+    /* Shouldn't be here if there are grouping sets */
+    Assert(root->parse->groupingSets == NIL);
+    /* Shouldn't be here unless there are some ordered aggregates */
+    Assert(root->numOrderedAggs > 0);

-    /*
-     * We can't add pathkeys for ordered aggregates if there are any grouping
-     * sets.  All handling specific to ordered aggregates must be done by the
-     * executor in that case.
-     */
-    if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL ||
-        !enable_presorted_aggregate)
-        return grouppathkeys;
+    /* Do nothing if disabled */
+    if (!enable_presorted_aggregate)
+        return;

     /*
      * Make a first pass over all AggInfos to collect a Bitmapset containing
@@ -3327,6 +3308,14 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
         }
     }

+    /*
+     * If we found any ordered aggregates, update root->group_pathkeys to add
+     * the best set of aggregate pathkeys.  Note that bestpathkeys includes
+     * the original GROUP BY pathkeys already.
+     */
+    if (bestpathkeys != NIL)
+        root->group_pathkeys = bestpathkeys;
+
     /*
      * Now that we've found the best set of aggregates we can set the
      * presorted flag to indicate to the executor that it needn't bother
@@ -3347,16 +3336,6 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
             aggref->aggpresorted = true;
         }
     }
-
-    /*
-     * bestpathkeys includes the GROUP BY pathkeys, so if we found any ordered
-     * aggregates, then return bestpathkeys, otherwise return the
-     * grouppathkeys.
-     */
-    if (bestpathkeys != NIL)
-        return bestpathkeys;
-
-    return grouppathkeys;
 }

 /*
@@ -3374,11 +3353,62 @@ standard_qp_callback(PlannerInfo *root, void *extra)
      * Calculate pathkeys that represent grouping/ordering and/or ordered
      * aggregate requirements.
      */
-    if (qp_extra->groupClause != NIL || root->numOrderedAggs > 0)
-        root->group_pathkeys = make_pathkeys_for_groupagg(root,
-                                                          qp_extra->groupClause,
-                                                          tlist,
-                                                          &root->num_groupby_pathkeys);
+    if (qp_extra->gset_data)
+    {
+        /*
+         * With grouping sets, just use the first RollupData's groupClause. We
+         * don't make any effort to optimize grouping clauses when there are
+         * grouping sets, nor can we combine aggregate ordering keys with
+         * grouping.
+         */
+        List       *rollups = qp_extra->gset_data->rollups;
+        List       *groupClause = (rollups ? linitial_node(RollupData, rollups)->groupClause : NIL);
+
+        if (grouping_is_sortable(groupClause))
+        {
+            root->group_pathkeys = make_pathkeys_for_sortclauses(root,
+                                                                 groupClause,
+                                                                 tlist);
+            root->num_groupby_pathkeys = list_length(root->group_pathkeys);
+        }
+        else
+        {
+            root->group_pathkeys = NIL;
+            root->num_groupby_pathkeys = 0;
+        }
+    }
+    else if (parse->groupClause || root->numOrderedAggs > 0)
+    {
+        /*
+         * With a plain GROUP BY list, we can remove any grouping items that
+         * are proven redundant by EquivalenceClass processing.  For example,
+         * we can remove y given "WHERE x = y GROUP BY x, y".  These aren't
+         * especially common cases, but they're nearly free to detect.  Note
+         * that we remove redundant items from processed_groupClause but not
+         * the original parse->groupClause.
+         */
+        bool        sortable;
+
+        root->group_pathkeys =
+            make_pathkeys_for_sortclauses_extended(root,
+                                                   &root->processed_groupClause,
+                                                   tlist,
+                                                   true,
+                                                   &sortable);
+        if (!sortable)
+        {
+            /* Can't sort; no point in considering aggregate ordering either */
+            root->group_pathkeys = NIL;
+            root->num_groupby_pathkeys = 0;
+        }
+        else
+        {
+            root->num_groupby_pathkeys = list_length(root->group_pathkeys);
+            /* If we have ordered aggs, consider adding onto group_pathkeys */
+            if (root->numOrderedAggs > 0)
+                adjust_group_pathkeys_for_groupagg(root);
+        }
+    }
     else
     {
         root->group_pathkeys = NIL;
@@ -3531,8 +3561,8 @@ get_number_of_groups(PlannerInfo *root,
         }
         else
         {
-            /* Plain GROUP BY */
-            groupExprs = get_sortgrouplist_exprs(parse->groupClause,
+            /* Plain GROUP BY -- estimate based on optimized groupClause */
+            groupExprs = get_sortgrouplist_exprs(root->processed_groupClause,
                                                  target_list);

             dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
@@ -3610,8 +3640,8 @@ create_grouping_paths(PlannerInfo *root,

         /*
          * Determine whether it's possible to perform sort-based
-         * implementations of grouping.  (Note that if groupClause is empty,
-         * grouping_is_sortable() is trivially true, and all the
+         * implementations of grouping.  (Note that if processed_groupClause
+         * is empty, grouping_is_sortable() is trivially true, and all the
          * pathkeys_contained_in() tests will succeed too, so that we'll
          * consider every surviving input path.)
          *
@@ -3620,7 +3650,7 @@ create_grouping_paths(PlannerInfo *root,
          * must consider any sorted-input plan.
          */
         if ((gd && gd->rollups != NIL)
-            || grouping_is_sortable(parse->groupClause))
+            || grouping_is_sortable(root->processed_groupClause))
             flags |= GROUPING_CAN_USE_SORT;

         /*
@@ -3645,7 +3675,7 @@ create_grouping_paths(PlannerInfo *root,
          */
         if ((parse->groupClause != NIL &&
              root->numOrderedAggs == 0 &&
-             (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))))
+             (gd ? gd->any_hashable : grouping_is_hashable(root->processed_groupClause))))
             flags |= GROUPING_CAN_USE_HASH;

         /*
@@ -3856,6 +3886,9 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
          * partial partitionwise aggregation.  But if partial aggregation is
          * not supported in general then we can't use it for partitionwise
          * aggregation either.
+         *
+         * Check parse->groupClause not processed_groupClause, because it's
+         * okay if some of the partitioning columns were proved redundant.
          */
         if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
             group_by_has_partkey(input_rel, extra->targetList,
@@ -5214,8 +5247,9 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target)
         Expr       *expr = (Expr *) lfirst(lc);
         Index        sgref = get_pathtarget_sortgroupref(final_target, i);

-        if (sgref && parse->groupClause &&
-            get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
+        if (sgref && root->processed_groupClause &&
+            get_sortgroupref_clause_noerr(sgref,
+                                          root->processed_groupClause) != NULL)
         {
             /*
              * It's a grouping column, so add it to the input target as-is.
@@ -5283,7 +5317,6 @@ make_partial_grouping_target(PlannerInfo *root,
                              PathTarget *grouping_target,
                              Node *havingQual)
 {
-    Query       *parse = root->parse;
     PathTarget *partial_target;
     List       *non_group_cols;
     List       *non_group_exprs;
@@ -5299,8 +5332,9 @@ make_partial_grouping_target(PlannerInfo *root,
         Expr       *expr = (Expr *) lfirst(lc);
         Index        sgref = get_pathtarget_sortgroupref(grouping_target, i);

-        if (sgref && parse->groupClause &&
-            get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
+        if (sgref && root->processed_groupClause &&
+            get_sortgroupref_clause_noerr(sgref,
+                                          root->processed_groupClause) != NULL)
         {
             /*
              * It's a grouping column, so add it to the partial_target as-is.
@@ -5747,7 +5781,6 @@ make_window_input_target(PlannerInfo *root,
                          PathTarget *final_target,
                          List *activeWindows)
 {
-    Query       *parse = root->parse;
     PathTarget *input_target;
     Bitmapset  *sgrefs;
     List       *flattenable_cols;
@@ -5755,7 +5788,7 @@ make_window_input_target(PlannerInfo *root,
     int            i;
     ListCell   *lc;

-    Assert(parse->hasWindowFuncs);
+    Assert(root->parse->hasWindowFuncs);

     /*
      * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
@@ -5782,7 +5815,7 @@ make_window_input_target(PlannerInfo *root,
     }

     /* Add in sortgroupref numbers of GROUP BY clauses, too */
-    foreach(lc, parse->groupClause)
+    foreach(lc, root->processed_groupClause)
     {
         SortGroupClause *grpcl = lfirst_node(SortGroupClause, lc);

@@ -6701,7 +6734,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
                                          grouped_rel->reltarget,
                                          parse->groupClause ? AGG_SORTED : AGG_PLAIN,
                                          AGGSPLIT_SIMPLE,
-                                         parse->groupClause,
+                                         root->processed_groupClause,
                                          havingQual,
                                          agg_costs,
                                          dNumGroups));
@@ -6716,7 +6749,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
                          create_group_path(root,
                                            grouped_rel,
                                            path,
-                                           parse->groupClause,
+                                           root->processed_groupClause,
                                            havingQual,
                                            dNumGroups));
             }
@@ -6785,7 +6818,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
                                              grouped_rel->reltarget,
                                              parse->groupClause ? AGG_SORTED : AGG_PLAIN,
                                              AGGSPLIT_FINAL_DESERIAL,
-                                             parse->groupClause,
+                                             root->processed_groupClause,
                                              havingQual,
                                              agg_final_costs,
                                              dNumGroups));
@@ -6794,7 +6827,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
                              create_group_path(root,
                                                grouped_rel,
                                                path,
-                                               parse->groupClause,
+                                               root->processed_groupClause,
                                                havingQual,
                                                dNumGroups));

@@ -6825,7 +6858,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
                                      grouped_rel->reltarget,
                                      AGG_HASHED,
                                      AGGSPLIT_SIMPLE,
-                                     parse->groupClause,
+                                     root->processed_groupClause,
                                      havingQual,
                                      agg_costs,
                                      dNumGroups));
@@ -6846,7 +6879,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
                                      grouped_rel->reltarget,
                                      AGG_HASHED,
                                      AGGSPLIT_FINAL_DESERIAL,
-                                     parse->groupClause,
+                                     root->processed_groupClause,
                                      havingQual,
                                      agg_final_costs,
                                      dNumGroups));
@@ -7048,7 +7081,7 @@ create_partial_grouping_paths(PlannerInfo *root,
                                          partially_grouped_rel->reltarget,
                                          parse->groupClause ? AGG_SORTED : AGG_PLAIN,
                                          AGGSPLIT_INITIAL_SERIAL,
-                                         parse->groupClause,
+                                         root->processed_groupClause,
                                          NIL,
                                          agg_partial_costs,
                                          dNumPartialGroups));
@@ -7057,7 +7090,7 @@ create_partial_grouping_paths(PlannerInfo *root,
                          create_group_path(root,
                                            partially_grouped_rel,
                                            path,
-                                           parse->groupClause,
+                                           root->processed_groupClause,
                                            NIL,
                                            dNumPartialGroups));
         }
@@ -7117,7 +7150,7 @@ create_partial_grouping_paths(PlannerInfo *root,
                                                  partially_grouped_rel->reltarget,
                                                  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
                                                  AGGSPLIT_INITIAL_SERIAL,
-                                                 parse->groupClause,
+                                                 root->processed_groupClause,
                                                  NIL,
                                                  agg_partial_costs,
                                                  dNumPartialPartialGroups));
@@ -7126,7 +7159,7 @@ create_partial_grouping_paths(PlannerInfo *root,
                                  create_group_path(root,
                                                    partially_grouped_rel,
                                                    path,
-                                                   parse->groupClause,
+                                                   root->processed_groupClause,
                                                    NIL,
                                                    dNumPartialPartialGroups));
         }
@@ -7147,7 +7180,7 @@ create_partial_grouping_paths(PlannerInfo *root,
                                  partially_grouped_rel->reltarget,
                                  AGG_HASHED,
                                  AGGSPLIT_INITIAL_SERIAL,
-                                 parse->groupClause,
+                                 root->processed_groupClause,
                                  NIL,
                                  agg_partial_costs,
                                  dNumPartialGroups));
@@ -7165,7 +7198,7 @@ create_partial_grouping_paths(PlannerInfo *root,
                                          partially_grouped_rel->reltarget,
                                          AGG_HASHED,
                                          AGGSPLIT_INITIAL_SERIAL,
-                                         parse->groupClause,
+                                         root->processed_groupClause,
                                          NIL,
                                          agg_partial_costs,
                                          dNumPartialPartialGroups));
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 4cec12ab19..51d545bbd6 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -1008,6 +1008,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
     subroot->rowMarks = NIL;
     memset(subroot->upper_rels, 0, sizeof(subroot->upper_rels));
     memset(subroot->upper_targets, 0, sizeof(subroot->upper_targets));
+    subroot->processed_groupClause = NIL;
     subroot->processed_tlist = NIL;
     subroot->update_colnos = NIL;
     subroot->grouping_map = NULL;
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 654dba61aa..11c3545637 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -406,6 +406,22 @@ struct PlannerInfo
     /* Result tlists chosen by grouping_planner for upper-stage processing */
     struct PathTarget *upper_targets[UPPERREL_FINAL + 1] pg_node_attr(read_write_ignore);

+    /*
+     * The fully-processed groupClause is kept here.  It differs from
+     * parse->groupClause in that we remove any items that we can prove
+     * redundant, so that only the columns named here actually need to be
+     * compared to determine grouping.  Note that it's possible for *all* the
+     * items to be proven redundant, implying that there is only one group
+     * containing all the query's rows.  Hence, if you want to check whether
+     * GROUP BY was specified, test for nonempty parse->groupClause, not for
+     * nonempty processed_groupClause.
+     *
+     * Currently, when grouping sets are specified we do not attempt to
+     * optimize the groupClause, so that processed_groupClause will be
+     * identical to parse->groupClause.
+     */
+    List       *processed_groupClause;
+
     /*
      * The fully-processed targetlist is kept here.  It differs from
      * parse->targetList in that (for INSERT) it's been reordered to match the
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 41f765d342..7719f89122 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -228,6 +228,11 @@ extern List *build_join_pathkeys(PlannerInfo *root,
 extern List *make_pathkeys_for_sortclauses(PlannerInfo *root,
                                            List *sortclauses,
                                            List *tlist);
+extern List *make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
+                                                    List **sortclauses,
+                                                    List *tlist,
+                                                    bool remove_redundant,
+                                                    bool *sortable);
 extern void initialize_mergeclause_eclasses(PlannerInfo *root,
                                             RestrictInfo *restrictinfo);
 extern void update_mergeclause_eclasses(PlannerInfo *root,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 309c2dc865..648da0f7c4 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1289,7 +1289,7 @@ group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z;
                       QUERY PLAN
 ------------------------------------------------------
  HashAggregate
-   Group Key: t1.a, t1.b, t2.x, t2.y
+   Group Key: t1.a, t1.b
    ->  Hash Join
          Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
          ->  Seq Scan on t2
@@ -1304,7 +1304,7 @@ group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z;
                       QUERY PLAN
 ------------------------------------------------------
  HashAggregate
-   Group Key: t1.a, t1.b, t2.x, t2.z
+   Group Key: t1.a, t1.b, t2.z
    ->  Hash Join
          Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
          ->  Seq Scan on t2
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
index fcad5c4093..292196b169 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -2135,7 +2135,6 @@ select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1;
         QUERY PLAN
 ---------------------------
  GroupAggregate
-   Group Key: $2
    InitPlan 1 (returns $1)
      ->  Result
    InitPlan 3 (returns $2)
@@ -2143,7 +2142,7 @@ select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1;
    ->  Result
    SubPlan 2
      ->  Result
-(9 rows)
+(8 rows)

 select (select grouping(v1)) from (values ((select 1))) v(v1) group by v1;
  grouping
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index a82b8fb8fb..1b900fddf8 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -164,10 +164,9 @@ SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c;
            QUERY PLAN
 --------------------------------
  GroupAggregate
-   Group Key: c
    ->  Result
          One-Time Filter: false
-(4 rows)
+(3 rows)

 SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c;
  c | sum
@@ -805,10 +804,10 @@ SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOI
 -- Empty join relation because of empty outer side, no partitionwise agg plan
 EXPLAIN (COSTS OFF)
 SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y
GROUPBY a.x, a.y ORDER BY 1, 2; 
-              QUERY PLAN
----------------------------------------
+              QUERY PLAN
+--------------------------------------
  GroupAggregate
-   Group Key: pagg_tab1.x, pagg_tab1.y
+   Group Key: pagg_tab1.y
    ->  Sort
          Sort Key: pagg_tab1.y
          ->  Result
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index c59caf1cb3..f2418413dd 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1340,7 +1340,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
                                    QUERY PLAN
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1484,7 +1484,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
                                    QUERY PLAN
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t3.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1576,7 +1576,7 @@ SELECT avg(t1.a), avg(t2.b), t1.c, t2.c FROM plt1 t1 RIGHT JOIN plt2 t2 ON t1.c
  Sort
    Sort Key: t1.c
    ->  HashAggregate
-         Group Key: t1.c, t2.c
+         Group Key: t1.c
          ->  Append
                ->  Hash Join
                      Hash Cond: (t2_1.c = t1_1.c)
diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out
index 20141ce7f3..ce4bf1d4e5 100644
--- a/contrib/pg_trgm/expected/pg_trgm.out
+++ b/contrib/pg_trgm/expected/pg_trgm.out
@@ -5366,10 +5366,10 @@ SELECT similarity('Szczecin', 'Warsaw');
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit()
   FROM restaurants WHERE city % 'Warsaw';
-                            QUERY PLAN
--------------------------------------------------------------------
+                      QUERY PLAN
+-------------------------------------------------------
  HashAggregate
-   Group Key: city, similarity(city, 'Warsaw'::text), show_limit()
+   Group Key: city, similarity(city, 'Warsaw'::text)
    ->  Bitmap Heap Scan on restaurants
          Recheck Cond: (city % 'Warsaw'::text)
          ->  Bitmap Index Scan on restaurants_city_idx
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index befb2fd4d3..f3d1bf6676 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2293,7 +2293,7 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
                ->  Subquery Scan on q
                      ->  HashAggregate
                            Output: t2.c1, t3.c1
-                           Group Key: t2.c1, t3.c1
+                           Group Key: t2.c1
                            ->  Foreign Scan
                                  Output: t2.c1, t3.c1
                                  Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3)
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index bcc10942cc..c7a8ae56a1 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -636,6 +636,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
     memset(root->upper_rels, 0, sizeof(root->upper_rels));
     memset(root->upper_targets, 0, sizeof(root->upper_targets));
     root->processed_groupClause = NIL;
+    root->processed_distinctClause = NIL;
     root->processed_tlist = NIL;
     root->update_colnos = NIL;
     root->grouping_map = NULL;
@@ -3427,12 +3428,27 @@ standard_qp_callback(PlannerInfo *root, void *extra)
     else
         root->window_pathkeys = NIL;

-    if (parse->distinctClause &&
-        grouping_is_sortable(parse->distinctClause))
+    /*
+     * As with GROUP BY, we can discard any DISTINCT items that are proven
+     * redundant by EquivalenceClass processing.  The non-redundant list is
+     * kept in root->processed_distinctClause, leaving the original
+     * parse->distinctClause alone.
+     */
+    if (parse->distinctClause)
+    {
+        bool        sortable;
+
+        /* Make a copy since pathkey processing can modify the list */
+        root->processed_distinctClause = list_copy(parse->distinctClause);
         root->distinct_pathkeys =
-            make_pathkeys_for_sortclauses(root,
-                                          parse->distinctClause,
-                                          tlist);
+            make_pathkeys_for_sortclauses_extended(root,
+                                                   &root->processed_distinctClause,
+                                                   tlist,
+                                                   true,
+                                                   &sortable);
+        if (!sortable)
+            root->distinct_pathkeys = NIL;
+    }
     else
         root->distinct_pathkeys = NIL;

@@ -4679,7 +4695,7 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,

     cheapest_partial_path = linitial(input_rel->partial_pathlist);

-    distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
+    distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
                                             parse->targetList);

     /* estimate how many distinct rows we'll get from each worker */
@@ -4688,7 +4704,7 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
                                           NULL, NULL);

     /* first try adding unique paths atop of sorted paths */
-    if (grouping_is_sortable(parse->distinctClause))
+    if (grouping_is_sortable(root->processed_distinctClause))
     {
         foreach(lc, input_rel->partial_pathlist)
         {
@@ -4712,7 +4728,7 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
      * path here, we treat enable_hashagg as a hard off-switch rather than the
      * slightly softer variant in create_final_distinct_paths.
      */
-    if (enable_hashagg && grouping_is_hashable(parse->distinctClause))
+    if (enable_hashagg && grouping_is_hashable(root->processed_distinctClause))
     {
         add_partial_path(partial_distinct_rel, (Path *)
                          create_agg_path(root,
@@ -4721,7 +4737,7 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
                                          cheapest_partial_path->pathtarget,
                                          AGG_HASHED,
                                          AGGSPLIT_SIMPLE,
-                                         parse->distinctClause,
+                                         root->processed_distinctClause,
                                          NIL,
                                          NULL,
                                          numDistinctRows));
@@ -4793,7 +4809,7 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
          */
         List       *distinctExprs;

-        distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
+        distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
                                                 parse->targetList);
         numDistinctRows = estimate_num_groups(root, distinctExprs,
                                               cheapest_input_path->rows,
@@ -4803,7 +4819,7 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
     /*
      * Consider sort-based implementations of DISTINCT, if possible.
      */
-    if (grouping_is_sortable(parse->distinctClause))
+    if (grouping_is_sortable(root->processed_distinctClause))
     {
         /*
          * First, if we have any adequately-presorted paths, just stick a
@@ -4942,7 +4958,7 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
     else
         allow_hash = true;        /* default */

-    if (allow_hash && grouping_is_hashable(parse->distinctClause))
+    if (allow_hash && grouping_is_hashable(root->processed_distinctClause))
     {
         /* Generate hashed aggregate path --- no sort needed */
         add_path(distinct_rel, (Path *)
@@ -4952,7 +4968,7 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
                                  cheapest_input_path->pathtarget,
                                  AGG_HASHED,
                                  AGGSPLIT_SIMPLE,
-                                 parse->distinctClause,
+                                 root->processed_distinctClause,
                                  NIL,
                                  NULL,
                                  numDistinctRows));
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 51d545bbd6..91cb323c54 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -1009,6 +1009,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
     memset(subroot->upper_rels, 0, sizeof(subroot->upper_rels));
     memset(subroot->upper_targets, 0, sizeof(subroot->upper_targets));
     subroot->processed_groupClause = NIL;
+    subroot->processed_distinctClause = NIL;
     subroot->processed_tlist = NIL;
     subroot->update_colnos = NIL;
     subroot->grouping_map = NULL;
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 11c3545637..923fecef27 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -422,6 +422,18 @@ struct PlannerInfo
      */
     List       *processed_groupClause;

+    /*
+     * The fully-processed distinctClause is kept here.  It differs from
+     * parse->distinctClause in that we remove any items that we can prove
+     * redundant, so that only the columns named here actually need to be
+     * compared to determine uniqueness.  Note that it's possible for *all*
+     * the items to be proven redundant, implying that there should be only
+     * one output row.  Hence, if you want to check whether DISTINCT was
+     * specified, test for nonempty parse->distinctClause, not for nonempty
+     * processed_distinctClause.
+     */
+    List       *processed_distinctClause;
+
     /*
      * The fully-processed targetlist is kept here.  It differs from
      * parse->targetList in that (for INSERT) it's been reordered to match the
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index 6ce889d87c..d0c34e1a96 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -136,7 +136,7 @@ SELECT count(*) FROM
    Output: count(*)
    ->  HashAggregate
          Output: tenk1.two, tenk1.four, tenk1.two
-         Group Key: tenk1.two, tenk1.four, tenk1.two
+         Group Key: tenk1.two, tenk1.four
          ->  Seq Scan on public.tenk1
                Output: tenk1.two, tenk1.four, tenk1.two
 (7 rows)
pgsql-hackers by date:
From: Jeff Davis
Date: 30 December 2022, 20:43:04
Subject: Re: New strategies for freezing, advancing relfrozenxid early
From: Thomas Munro
Date: 30 December 2022, 21:06:53
Subject: Re: Is RecoveryConflictInterrupt() entirely safe in a signal handler?
Re: Removing redundant grouping columns - Mailing list pgsql-hackers

Previous

Next