Re: [PATCH] GROUP BY ALL - Mailing list pgsql-hackers

From Tom Lane
Subject Re: [PATCH] GROUP BY ALL
Date
Msg-id 702762.1759011796@sss.pgh.pa.us
Whole thread Raw
In response to Re: [PATCH] GROUP BY ALL  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-hackers
Here's a v6 that's rebased up to HEAD and contains fixes for the
semantic issues we discussed.  It still lacks documentation, but
otherwise I think it's about ready to go.

            regards, tom lane

From 5503815351b2e45661262fddaab1271a435dd730 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 27 Sep 2025 18:19:16 -0400
Subject: [PATCH v6] Add GROUP BY ALL.

GROUP BY ALL is a form of GROUP BY that adds any TargetExpr that does
not contain an aggregate or window function into the groupClause of
the query, making it exactly equivalent to specifying those same
expressions in an explicit GROUP BY list.

This feature is useful for certain kinds of data exploration.  It's
already present in some other DBMSes, and the SQL committee recently
accepted it into the standard, so we can be reasonably confident in
the syntax being stable.  We do have to invent part of the semantics,
as the standard doesn't allow for expressions in GROUP BY, so they
haven't specified what to do with window functions.  We assume that
those should be treated like aggregates, i.e., left out of the
constructed GROUP BY list.

Author: David Christensen <david@pgguru.net>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/CAHM0NXjz0kDwtzoe-fnHAqPB1qA8_VJN0XAmCgUZ+iPnvP5LbA@mail.gmail.com
---
 src/backend/parser/analyze.c             |   2 +
 src/backend/parser/gram.y                |  14 +++
 src/backend/parser/parse_clause.c        |  65 ++++++++++++-
 src/backend/utils/adt/ruleutils.c        |   4 +-
 src/include/nodes/parsenodes.h           |   4 +-
 src/include/parser/parse_clause.h        |   1 +
 src/test/regress/expected/aggregates.out | 113 +++++++++++++++++++++++
 src/test/regress/sql/aggregates.sql      |  50 ++++++++++
 8 files changed, 250 insertions(+), 3 deletions(-)

diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 5aeb54eb5f6..99ba043961b 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1467,12 +1467,14 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt,

     qry->groupClause = transformGroupClause(pstate,
                                             stmt->groupClause,
+                                            stmt->groupAll,
                                             &qry->groupingSets,
                                             &qry->targetList,
                                             qry->sortClause,
                                             EXPR_KIND_GROUP_BY,
                                             false /* allow SQL92 rules */ );
     qry->groupDistinct = stmt->groupDistinct;
+    qry->groupAll = stmt->groupAll;

     if (stmt->distinctClause == NIL)
     {
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 9fd48acb1f8..8a0aa1899e2 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -120,6 +120,7 @@ typedef struct SelectLimit
 typedef struct GroupClause
 {
     bool        distinct;
+    bool        all;
     List       *list;
 } GroupClause;

@@ -12993,6 +12994,7 @@ simple_select:
                     n->whereClause = $6;
                     n->groupClause = ($7)->list;
                     n->groupDistinct = ($7)->distinct;
+                    n->groupAll = ($7)->all;
                     n->havingClause = $8;
                     n->windowClause = $9;
                     $$ = (Node *) n;
@@ -13010,6 +13012,7 @@ simple_select:
                     n->whereClause = $6;
                     n->groupClause = ($7)->list;
                     n->groupDistinct = ($7)->distinct;
+                    n->groupAll = ($7)->all;
                     n->havingClause = $8;
                     n->windowClause = $9;
                     $$ = (Node *) n;
@@ -13507,14 +13510,24 @@ group_clause:
                     GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));

                     n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
+                    n->all = false;
                     n->list = $4;
                     $$ = n;
                 }
+            | GROUP_P BY ALL
+                {
+                    GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
+                    n->distinct = false;
+                    n->all = true;
+                    n->list = NIL;
+                    $$ = n;
+                }
             | /*EMPTY*/
                 {
                     GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));

                     n->distinct = false;
+                    n->all = false;
                     n->list = NIL;
                     $$ = n;
                 }
@@ -17618,6 +17631,7 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
                     n->whereClause = $4;
                     n->groupClause = ($5)->list;
                     n->groupDistinct = ($5)->distinct;
+                    n->groupAll = ($5)->all;
                     n->havingClause = $6;
                     n->windowClause = $7;
                     n->sortClause = $8;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 9f20a70ce13..ca26f6f61f2 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -2598,6 +2598,9 @@ transformGroupingSet(List **flatresult,
  * GROUP BY items will be added to the targetlist (as resjunk columns)
  * if not already present, so the targetlist must be passed by reference.
  *
+ * If GROUP BY ALL is specified, the groupClause will be inferred to be all
+ * non-aggregate, non-window expressions in the targetlist.
+ *
  * This is also used for window PARTITION BY clauses (which act almost the
  * same, but are always interpreted per SQL99 rules).
  *
@@ -2622,6 +2625,7 @@ transformGroupingSet(List **flatresult,
  *
  * pstate        ParseState
  * grouplist    clause to transform
+ * groupByAll    is this a GROUP BY ALL statement?
  * groupingSets reference to list to contain the grouping set tree
  * targetlist    reference to TargetEntry list
  * sortClause    ORDER BY clause (SortGroupClause nodes)
@@ -2629,7 +2633,8 @@ transformGroupingSet(List **flatresult,
  * useSQL99        SQL99 rather than SQL92 syntax
  */
 List *
-transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets,
+transformGroupClause(ParseState *pstate, List *grouplist, bool groupByAll,
+                     List **groupingSets,
                      List **targetlist, List *sortClause,
                      ParseExprKind exprKind, bool useSQL99)
 {
@@ -2640,6 +2645,63 @@ transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets,
     bool        hasGroupingSets = false;
     Bitmapset  *seen_local = NULL;

+    /* Handle GROUP BY ALL */
+    if (groupByAll)
+    {
+        /* There cannot have been any explicit grouplist items */
+        Assert(grouplist == NIL);
+
+        /* Iterate over targets, adding acceptable ones to the result list */
+        foreach_ptr(TargetEntry, tle, *targetlist)
+        {
+            /* Ignore junk TLEs */
+            if (tle->resjunk)
+                continue;
+
+            /*
+             * TLEs containing aggregates are not okay to add to GROUP BY
+             * (compare checkTargetlistEntrySQL92).  But the SQL standard
+             * directs us to skip them, so it's fine.
+             */
+            if (pstate->p_hasAggs &&
+                contain_aggs_of_level((Node *) tle->expr, 0))
+                continue;
+
+            /*
+             * Likewise, TLEs containing window functions are not okay to add
+             * to GROUP BY.  At this writing, the SQL standard is silent on
+             * what to do with them, but by analogy to aggregates we'll just
+             * skip them.
+             */
+            if (pstate->p_hasWindowFuncs &&
+                contain_windowfuncs((Node *) tle->expr))
+                continue;
+
+            /*
+             * Otherwise, add the TLE to the result using default sort/group
+             * semantics.  We specify the parse location as the TLE's
+             * location, despite the comment for addTargetToGroupList
+             * discouraging that.  The only other thing we could point to is
+             * the ALL keyword, which seems unhelpful when there are multiple
+             * TLEs.
+             */
+            result = addTargetToGroupList(pstate, tle,
+                                          result, *targetlist,
+                                          exprLocation((Node *) tle->expr));
+        }
+
+        /* If we found any acceptable targets, we're done */
+        if (result != NIL)
+            return result;
+
+        /*
+         * Otherwise, the SQL standard says to treat it like "GROUP BY ()".
+         * Build a representation of that, and let the rest of this function
+         * handle it.
+         */
+        grouplist = list_make1(makeGroupingSet(GROUPING_SET_EMPTY, NIL, -1));
+    }
+
     /*
      * Recursively flatten implicit RowExprs. (Technically this is only needed
      * for GROUP BY, per the syntax rules for grouping sets, but we do it
@@ -2818,6 +2880,7 @@ transformWindowDefinitions(ParseState *pstate,
                                           true /* force SQL99 rules */ );
         partitionClause = transformGroupClause(pstate,
                                                windef->partitionClause,
+                                               false /* not GROUP BY ALL */ ,
                                                NULL,
                                                targetlist,
                                                orderClause,
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index defcdaa8b34..d0c6d3b55bd 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -6186,7 +6186,9 @@ get_basic_select_query(Query *query, deparse_context *context)
         save_ingroupby = context->inGroupBy;
         context->inGroupBy = true;

-        if (query->groupingSets == NIL)
+        if (query->groupAll)
+            appendStringInfoString(buf, "ALL");
+        else if (query->groupingSets == NIL)
         {
             sep = "";
             foreach(l, query->groupClause)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index f1706df58fd..aa73f7d76dd 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -214,7 +214,8 @@ typedef struct Query
     List       *returningList;    /* return-values list (of TargetEntry) */

     List       *groupClause;    /* a list of SortGroupClause's */
-    bool        groupDistinct;    /* is the group by clause distinct? */
+    bool        groupDistinct;    /* was GROUP BY DISTINCT used? */
+    bool        groupAll;        /* was GROUP BY ALL used? */

     List       *groupingSets;    /* a list of GroupingSet's if present */

@@ -2192,6 +2193,7 @@ typedef struct SelectStmt
     Node       *whereClause;    /* WHERE qualification */
     List       *groupClause;    /* GROUP BY clauses */
     bool        groupDistinct;    /* Is this GROUP BY DISTINCT? */
+    bool        groupAll;        /* Is this GROUP BY ALL? */
     Node       *havingClause;    /* HAVING conditional-expression */
     List       *windowClause;    /* WINDOW window_name AS (...), ... */

diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h
index 3e9894926de..ede3903d1dd 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -26,6 +26,7 @@ extern Node *transformLimitClause(ParseState *pstate, Node *clause,
                                   ParseExprKind exprKind, const char *constructName,
                                   LimitOption limitOption);
 extern List *transformGroupClause(ParseState *pstate, List *grouplist,
+                                  bool groupByAll,
                                   List **groupingSets,
                                   List **targetlist, List *sortClause,
                                   ParseExprKind exprKind, bool useSQL99);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 1f24f6ffd1f..2cdb7562846 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1557,6 +1557,119 @@ drop table t2;
 drop table t3;
 drop table p_t1;
 --
+-- Test GROUP BY ALL
+--
+-- We don't care about the data here, just the proper transformation of the
+-- GROUP BY clause, so test some queries and verify the EXPLAIN plans.
+--
+CREATE TEMP TABLE t1 (
+  a int,
+  b int,
+  c int
+);
+-- basic example
+EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+      QUERY PLAN
+----------------------
+ HashAggregate
+   Group Key: b
+   ->  Seq Scan on t1
+(3 rows)
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+      QUERY PLAN
+----------------------
+ HashAggregate
+   Group Key: a, b
+   ->  Seq Scan on t1
+(3 rows)
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+      QUERY PLAN
+----------------------
+ HashAggregate
+   Group Key: (a + b)
+   ->  Seq Scan on t1
+(3 rows)
+
+-- check we detect a non-top-level aggregate
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+      QUERY PLAN
+----------------------
+ HashAggregate
+   Group Key: a
+   ->  Seq Scan on t1
+(3 rows)
+
+-- including grouped column is okay
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+      QUERY PLAN
+----------------------
+ HashAggregate
+   Group Key: a
+   ->  Seq Scan on t1
+(3 rows)
+
+-- including non-grouped column, not so much
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+ERROR:  column "t1.c" must appear in the GROUP BY clause or be used in an aggregate function
+LINE 1: EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY AL...
+                                               ^
+-- all aggregates, should reduce to GROUP BY ()
+EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+      QUERY PLAN
+----------------------
+ Aggregate
+   Group Key: ()
+   ->  Seq Scan on t1
+(3 rows)
+
+-- likewise with empty target list
+EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
+      QUERY PLAN
+-----------------------
+ Result
+   Replaces: Aggregate
+(2 rows)
+
+-- window functions are not to be included in GROUP BY, either
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) OVER (PARTITION BY a) FROM t1 GROUP BY ALL;
+            QUERY PLAN
+----------------------------------
+ WindowAgg
+   Window: w1 AS (PARTITION BY a)
+   ->  Sort
+         Sort Key: a
+         ->  HashAggregate
+               Group Key: a
+               ->  Seq Scan on t1
+(7 rows)
+
+-- all cols
+EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+      QUERY PLAN
+----------------------
+ HashAggregate
+   Group Key: a, b, c
+   ->  Seq Scan on t1
+(3 rows)
+
+-- verify deparsing of GROUP BY ALL
+CREATE TEMP VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+SELECT pg_get_viewdef('v1'::regclass);
+    pg_get_viewdef
+-----------------------
+  SELECT b,           +
+     count(*) AS count+
+    FROM t1           +
+   GROUP BY ALL;
+(1 row)
+
+DROP VIEW v1;
+DROP TABLE t1;
+--
 -- Test GROUP BY matching of join columns that are type-coerced due to USING
 --
 create temp table t1(f1 int, f2 int);
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 62540b1ffa4..ec41da493ad 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -549,6 +549,56 @@ drop table t2;
 drop table t3;
 drop table p_t1;

+--
+-- Test GROUP BY ALL
+--
+-- We don't care about the data here, just the proper transformation of the
+-- GROUP BY clause, so test some queries and verify the EXPLAIN plans.
+--
+
+CREATE TEMP TABLE t1 (
+  a int,
+  b int,
+  c int
+);
+
+-- basic example
+EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+
+-- multiple columns, non-consecutive order
+EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL;
+
+-- multi columns, no aggregate
+EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL;
+
+-- check we detect a non-top-level aggregate
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL;
+
+-- including grouped column is okay
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL;
+
+-- including non-grouped column, not so much
+EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL;
+
+-- all aggregates, should reduce to GROUP BY ()
+EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL;
+
+-- likewise with empty target list
+EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL;
+
+-- window functions are not to be included in GROUP BY, either
+EXPLAIN (COSTS OFF) SELECT a, COUNT(a) OVER (PARTITION BY a) FROM t1 GROUP BY ALL;
+
+-- all cols
+EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL;
+
+-- verify deparsing of GROUP BY ALL
+CREATE TEMP VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL;
+SELECT pg_get_viewdef('v1'::regclass);
+
+DROP VIEW v1;
+DROP TABLE t1;
+
 --
 -- Test GROUP BY matching of join columns that are type-coerced due to USING
 --
--
2.43.7


pgsql-hackers by date:

Previous
From: Amit Kapila
Date:
Subject: Re: Proposal: Conflict log history table for Logical Replication