Re: [PATCH] GROUP BY ALL - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: [PATCH] GROUP BY ALL |
Date | |
Msg-id | 31027.1758919078@sss.pgh.pa.us Whole thread Raw |
In response to | Re: [PATCH] GROUP BY ALL (David Christensen <david@pgguru.net>) |
List | pgsql-hackers |
[ trimming the cc: list because gmail decided my last was spam ] David Christensen <david@pgguru.net> writes: > Great feedback, thanks; attached is v4 which should address these comments. I did a pass of cleanup over this --- mostly cosmetic, but not entirely. Along the way I discovered a pre-existing bug: transformSelectStmt does qry->groupDistinct = stmt->groupDistinct; but transformPLAssignStmt fails to, with the result that GROUP BY DISTINCT would misbehave if used in a plpgsql "expression" context. I'm not hugely surprised that no one has reported that from the field, but nonetheless it's broken. In the attached v5 I just quickly added the missing line and moved on, but we'll need to back-patch that bit. (Maybe we'd be well advised to refactor to reduce the amount of duplicated code that needs to be kept in sync?) I have not attempted to address the definitional issues I just queried Peter about. Other open items: * Documentation * The test cases deserve reconsideration now that we think their charter only goes as far as EXPLAIN'ing the results; some of them seem pretty redundant in this context. regards, tom lane diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index b9763ea1714..fe632d3dabf 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -1440,12 +1440,14 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) qry->groupClause = transformGroupClause(pstate, stmt->groupClause, + stmt->groupAll, &qry->groupingSets, &qry->targetList, qry->sortClause, EXPR_KIND_GROUP_BY, false /* allow SQL92 rules */ ); qry->groupDistinct = stmt->groupDistinct; + qry->groupAll = stmt->groupAll; if (stmt->distinctClause == NIL) { @@ -2930,11 +2932,14 @@ transformPLAssignStmt(ParseState *pstate, PLAssignStmt *stmt) qry->groupClause = transformGroupClause(pstate, sstmt->groupClause, + sstmt->groupAll, &qry->groupingSets, &qry->targetList, qry->sortClause, EXPR_KIND_GROUP_BY, false /* allow SQL92 rules */ ); + qry->groupDistinct = sstmt->groupDistinct; + qry->groupAll = sstmt->groupAll; if (sstmt->distinctClause == NIL) { diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 9fd48acb1f8..8a0aa1899e2 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -120,6 +120,7 @@ typedef struct SelectLimit typedef struct GroupClause { bool distinct; + bool all; List *list; } GroupClause; @@ -12993,6 +12994,7 @@ simple_select: n->whereClause = $6; n->groupClause = ($7)->list; n->groupDistinct = ($7)->distinct; + n->groupAll = ($7)->all; n->havingClause = $8; n->windowClause = $9; $$ = (Node *) n; @@ -13010,6 +13012,7 @@ simple_select: n->whereClause = $6; n->groupClause = ($7)->list; n->groupDistinct = ($7)->distinct; + n->groupAll = ($7)->all; n->havingClause = $8; n->windowClause = $9; $$ = (Node *) n; @@ -13507,14 +13510,24 @@ group_clause: GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause)); n->distinct = $3 == SET_QUANTIFIER_DISTINCT; + n->all = false; n->list = $4; $$ = n; } + | GROUP_P BY ALL + { + GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause)); + n->distinct = false; + n->all = true; + n->list = NIL; + $$ = n; + } | /*EMPTY*/ { GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause)); n->distinct = false; + n->all = false; n->list = NIL; $$ = n; } @@ -17618,6 +17631,7 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list n->whereClause = $4; n->groupClause = ($5)->list; n->groupDistinct = ($5)->distinct; + n->groupAll = ($5)->all; n->havingClause = $6; n->windowClause = $7; n->sortClause = $8; diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 9f20a70ce13..562e3e90e34 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -2598,6 +2598,9 @@ transformGroupingSet(List **flatresult, * GROUP BY items will be added to the targetlist (as resjunk columns) * if not already present, so the targetlist must be passed by reference. * + * If GROUP BY ALL is specified, the groupClause will be inferred to be all + * non-aggregate expressions in the targetlist. + * * This is also used for window PARTITION BY clauses (which act almost the * same, but are always interpreted per SQL99 rules). * @@ -2622,6 +2625,7 @@ transformGroupingSet(List **flatresult, * * pstate ParseState * grouplist clause to transform + * groupByAll is this a GROUP BY ALL statement? * groupingSets reference to list to contain the grouping set tree * targetlist reference to TargetEntry list * sortClause ORDER BY clause (SortGroupClause nodes) @@ -2629,7 +2633,8 @@ transformGroupingSet(List **flatresult, * useSQL99 SQL99 rather than SQL92 syntax */ List * -transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets, +transformGroupClause(ParseState *pstate, List *grouplist, bool groupByAll, + List **groupingSets, List **targetlist, List *sortClause, ParseExprKind exprKind, bool useSQL99) { @@ -2640,6 +2645,41 @@ transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets, bool hasGroupingSets = false; Bitmapset *seen_local = NULL; + /* Handle GROUP BY ALL */ + if (groupByAll) + { + /* There cannot have been any explicit list items */ + Assert(grouplist == NIL); + + /* + * Iterate over targets, any non-aggregate gets added as a Target. + * Note that it's not enough to check for a top-level Aggref; we need + * to ensure that any sub-expression here does not include an Aggref + * (for instance an expression such as `sum(col) + 4` should not be + * added as a grouping target). + * + * We specify the parse location as the TLE's location, despite the + * comment for addTargetToGroupList discouraging that. The only other + * thing we could point to is the ALL keyword, which seems unhelpful + * when there are multiple TLEs. + */ + foreach_ptr(TargetEntry, tle, *targetlist) + { + /* Ignore junk TLEs */ + if (tle->resjunk) + continue; + + /* Use contain_aggs_of_level to detect Aggrefs in SubLinks */ + if (!(pstate->p_hasAggs && + contain_aggs_of_level((Node *) tle->expr, 0))) + result = addTargetToGroupList(pstate, tle, + result, *targetlist, + exprLocation((Node *) tle->expr)); + } + + return result; + } + /* * Recursively flatten implicit RowExprs. (Technically this is only needed * for GROUP BY, per the syntax rules for grouping sets, but we do it @@ -2818,6 +2858,7 @@ transformWindowDefinitions(ParseState *pstate, true /* force SQL99 rules */ ); partitionClause = transformGroupClause(pstate, windef->partitionClause, + false /* not GROUP BY ALL */ , NULL, targetlist, orderClause, diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index defcdaa8b34..d0c6d3b55bd 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -6186,7 +6186,9 @@ get_basic_select_query(Query *query, deparse_context *context) save_ingroupby = context->inGroupBy; context->inGroupBy = true; - if (query->groupingSets == NIL) + if (query->groupAll) + appendStringInfoString(buf, "ALL"); + else if (query->groupingSets == NIL) { sep = ""; foreach(l, query->groupClause) diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 4ed14fc5b78..d379879ae80 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -214,7 +214,8 @@ typedef struct Query List *returningList; /* return-values list (of TargetEntry) */ List *groupClause; /* a list of SortGroupClause's */ - bool groupDistinct; /* is the group by clause distinct? */ + bool groupDistinct; /* was GROUP BY DISTINCT used? */ + bool groupAll; /* was GROUP BY ALL used? */ List *groupingSets; /* a list of GroupingSet's if present */ @@ -2192,6 +2193,7 @@ typedef struct SelectStmt Node *whereClause; /* WHERE qualification */ List *groupClause; /* GROUP BY clauses */ bool groupDistinct; /* Is this GROUP BY DISTINCT? */ + bool groupAll; /* Is this GROUP BY ALL? */ Node *havingClause; /* HAVING conditional-expression */ List *windowClause; /* WINDOW window_name AS (...), ... */ diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h index 3e9894926de..ede3903d1dd 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -26,6 +26,7 @@ extern Node *transformLimitClause(ParseState *pstate, Node *clause, ParseExprKind exprKind, const char *constructName, LimitOption limitOption); extern List *transformGroupClause(ParseState *pstate, List *grouplist, + bool groupByAll, List **groupingSets, List **targetlist, List *sortClause, ParseExprKind exprKind, bool useSQL99); diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 1f24f6ffd1f..aeeb452ec26 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1557,6 +1557,153 @@ drop table t2; drop table t3; drop table p_t1; -- +-- Test GROUP BY ALL +-- +-- We don't care about the data here, just the proper transformation of the +-- GROUP BY clause, so test some queries and verify the EXPLAIN plans. +CREATE TEMP TABLE t1 ( + a int, + b int, + c int +); +-- basic field check +EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + HashAggregate + Group Key: b + -> Seq Scan on t1 +(3 rows) + +-- throw a null in the values too +EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a + -> Seq Scan on t1 +(3 rows) + +-- multiple columns, non-consecutive order +EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a, b + -> Seq Scan on t1 +(3 rows) + +-- multi columns, no aggregate +EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + HashAggregate + Group Key: (a + b) + -> Seq Scan on t1 +(3 rows) + +-- non-top-level aggregate +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a + -> Seq Scan on t1 +(3 rows) + +-- including grouped column +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a + -> Seq Scan on t1 +(3 rows) + +-- oops all aggregates +EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + Aggregate + -> Seq Scan on t1 +(2 rows) + +-- empty column list +EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL; + QUERY PLAN +---------------- + Seq Scan on t1 +(1 row) + +-- filter +EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FILTER(WHERE b = 2) FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a + -> Seq Scan on t1 +(3 rows) + +-- all cols +EXPLAIN (COSTS OFF) SELECT * FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a, b, c + -> Seq Scan on t1 +(3 rows) + +EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a, b, c + -> Seq Scan on t1 +(3 rows) + +-- expression without including aggregate columns +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL; +ERROR: column "t1.c" must appear in the GROUP BY clause or be used in an aggregate function +LINE 1: EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY AL... + ^ +-- subquery +EXPLAIN (COSTS OFF) SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GROUP BY ALL; + QUERY PLAN +----------------------------------- + GroupAggregate + InitPlan 1 + -> Limit + -> Seq Scan on t1 t1_1 + -> Seq Scan on t1 +(5 rows) + +-- cte +EXPLAIN (COSTS OFF) WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GROUP BY ALL) +SELECT AVG(a), sum_b FROM a_query GROUP BY ALL; + QUERY PLAN +---------------------------- + HashAggregate + Group Key: sum(t1.b) + -> HashAggregate + Group Key: t1.a + -> Seq Scan on t1 +(5 rows) + +-- verify deparse +CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL; +NOTICE: view "v1" will be a temporary view +SELECT pg_get_viewdef('v1'::regclass); + pg_get_viewdef +----------------------- + SELECT b, + + count(*) AS count+ + FROM t1 + + GROUP BY ALL; +(1 row) + +DROP VIEW v1; +DROP TABLE t1; +-- -- Test GROUP BY matching of join columns that are type-coerced due to USING -- create temp table t1(f1 int, f2 int); diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 62540b1ffa4..6b2516cd9b1 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -549,6 +549,68 @@ drop table t2; drop table t3; drop table p_t1; + +-- +-- Test GROUP BY ALL +-- + +-- We don't care about the data here, just the proper transformation of the +-- GROUP BY clause, so test some queries and verify the EXPLAIN plans. + +CREATE TEMP TABLE t1 ( + a int, + b int, + c int +); + +-- basic field check +EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL; + +-- throw a null in the values too +EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FROM t1 GROUP BY ALL; + +-- multiple columns, non-consecutive order +EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL; + +-- multi columns, no aggregate +EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL; + +-- non-top-level aggregate +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL; + +-- including grouped column +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL; + +-- oops all aggregates +EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL; + +-- empty column list +EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL; + +-- filter +EXPLAIN (COSTS OFF) SELECT a, COUNT(a) FILTER(WHERE b = 2) FROM t1 GROUP BY ALL; + +-- all cols +EXPLAIN (COSTS OFF) SELECT * FROM t1 GROUP BY ALL; +EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL; + +-- expression without including aggregate columns +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL; + +-- subquery +EXPLAIN (COSTS OFF) SELECT (SELECT a FROM t1 LIMIT 1), SUM(b) FROM t1 GROUP BY ALL; + +-- cte +EXPLAIN (COSTS OFF) WITH a_query AS (SELECT a, SUM(b) AS sum_b FROM t1 GROUP BY ALL) +SELECT AVG(a), sum_b FROM a_query GROUP BY ALL; + +-- verify deparse +CREATE VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL; +SELECT pg_get_viewdef('v1'::regclass); + +DROP VIEW v1; +DROP TABLE t1; + -- -- Test GROUP BY matching of join columns that are type-coerced due to USING --
pgsql-hackers by date: