From ecb14a529bc91a2a806e00f93be6402fef52b879 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= Date: Wed, 6 May 2020 16:32:28 +0800 Subject: [PATCH v8 5/7] If the group by clause is unique and we have aggregation function, We treat the input as sorted without an explicitly sort since each group has one 1 row. --- src/backend/commands/explain.c | 4 + src/backend/executor/nodeAgg.c | 6 ++ src/backend/optimizer/plan/createplan.c | 3 +- src/backend/optimizer/plan/planner.c | 127 +++++++++++++++-------- src/include/nodes/nodes.h | 3 +- src/include/nodes/pathnodes.h | 1 + src/include/nodes/plannodes.h | 1 + src/test/regress/expected/aggregates.out | 42 ++++++++ src/test/regress/sql/aggregates.sql | 17 +++ 9 files changed, 159 insertions(+), 45 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 5695802081..a7b38cfc08 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1929,6 +1929,10 @@ ExplainNode(PlanState *planstate, List *ancestors, show_agg_keys(castNode(AggState, planstate), ancestors, es); show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); show_hashagg_info((AggState *) planstate, es); + if (es->format != EXPLAIN_FORMAT_TEXT || + (es->verbose && ((Agg *) plan)->input_unique)) + ExplainPropertyBool("Input Unique", + ((Agg *) plan)->input_unique, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 9f4229de60..2d5493c744 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -2145,6 +2145,12 @@ ExecAgg(PlanState *pstate) case AGG_SORTED: result = agg_retrieve_direct(node); break; + case AGG_UNIQUE: + /* AGG_UNIQUE is translated to AGG_SORTED, Handle it here + * to make compiler quiet. + */ + Assert(false); + break; } if (!TupIsNull(result)) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 9941dfe65e..0049d22227 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -6353,7 +6353,7 @@ make_agg(List *tlist, List *qual, /* Reduce to long, but 'ware overflow! */ numGroups = (long) Min(dNumGroups, (double) LONG_MAX); - node->aggstrategy = aggstrategy; + node->aggstrategy = aggstrategy == AGG_UNIQUE ? AGG_SORTED : aggstrategy; node->aggsplit = aggsplit; node->numCols = numGroupCols; node->grpColIdx = grpColIdx; @@ -6364,6 +6364,7 @@ make_agg(List *tlist, List *qual, node->aggParams = NULL; /* SS_finalize_plan() will fill this */ node->groupingSets = groupingSets; node->chain = chain; + node->input_unique = aggstrategy == AGG_UNIQUE; plan->qual = qual; plan->targetlist = tlist; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 74dcd0ff3d..1e72411d51 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3852,51 +3852,61 @@ create_grouping_paths(PlannerInfo *root, int flags = 0; GroupPathExtraData extra; - /* - * Determine whether it's possible to perform sort-based - * implementations of grouping. (Note that if groupClause is empty, - * grouping_is_sortable() is trivially true, and all the - * pathkeys_contained_in() tests will succeed too, so that we'll - * consider every surviving input path.) - * - * If we have grouping sets, we might be able to sort some but not all - * of them; in this case, we need can_sort to be true as long as we - * must consider any sorted-input plan. - */ - if ((gd && gd->rollups != NIL) - || grouping_is_sortable(parse->groupClause)) - flags |= GROUPING_CAN_USE_SORT; + if (group_unique_input) + { + /* In this case we don't need to set other flags */ + Assert(parse->groupClause != NIL); + Assert(gd == NULL); + flags |= GROUPING_INPUT_UNIQUE; + } + else + { + /* + * Determine whether it's possible to perform sort-based + * implementations of grouping. (Note that if groupClause is empty, + * grouping_is_sortable() is trivially true, and all the + * pathkeys_contained_in() tests will succeed too, so that we'll + * consider every surviving input path.) + * + * If we have grouping sets, we might be able to sort some but not all + * of them; in this case, we need can_sort to be true as long as we + * must consider any sorted-input plan. + */ + if ((gd && gd->rollups != NIL) + || grouping_is_sortable(parse->groupClause)) + flags |= GROUPING_CAN_USE_SORT; - /* - * Determine whether we should consider hash-based implementations of - * grouping. - * - * Hashed aggregation only applies if we're grouping. If we have - * grouping sets, some groups might be hashable but others not; in - * this case we set can_hash true as long as there is nothing globally - * preventing us from hashing (and we should therefore consider plans - * with hashes). - * - * Executor doesn't support hashed aggregation with DISTINCT or ORDER - * BY aggregates. (Doing so would imply storing *all* the input - * values in the hash table, and/or running many sorts in parallel, - * either of which seems like a certain loser.) We similarly don't - * support ordered-set aggregates in hashed aggregation, but that case - * is also included in the numOrderedAggs count. - * - * Note: grouping_is_hashable() is much more expensive to check than - * the other gating conditions, so we want to do it last. - */ - if ((parse->groupClause != NIL && - agg_costs->numOrderedAggs == 0 && - (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)))) - flags |= GROUPING_CAN_USE_HASH; + /* + * Determine whether we should consider hash-based implementations of + * grouping. + * + * Hashed aggregation only applies if we're grouping. If we have + * grouping sets, some groups might be hashable but others not; in + * this case we set can_hash true as long as there is nothing globally + * preventing us from hashing (and we should therefore consider plans + * with hashes). + * + * Executor doesn't support hashed aggregation with DISTINCT or ORDER + * BY aggregates. (Doing so would imply storing *all* the input + * values in the hash table, and/or running many sorts in parallel, + * either of which seems like a certain loser.) We similarly don't + * support ordered-set aggregates in hashed aggregation, but that case + * is also included in the numOrderedAggs count. + * + * Note: grouping_is_hashable() is much more expensive to check than + * the other gating conditions, so we want to do it last. + */ + if ((parse->groupClause != NIL && + agg_costs->numOrderedAggs == 0 && + (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)))) + flags |= GROUPING_CAN_USE_HASH; - /* - * Determine whether partial aggregation is possible. - */ - if (can_partial_agg(root, agg_costs)) - flags |= GROUPING_CAN_PARTIAL_AGG; + /* + * Determine whether partial aggregation is possible. + */ + if (can_partial_agg(root, agg_costs)) + flags |= GROUPING_CAN_PARTIAL_AGG; + } extra.flags = flags; extra.target_parallel_safe = target_parallel_safe; @@ -6524,9 +6534,40 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, ListCell *lc; bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0; bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0; + bool group_input_unique = (extra->flags & GROUPING_INPUT_UNIQUE) != 0; List *havingQual = (List *) extra->havingQual; AggClauseCosts *agg_final_costs = &extra->agg_final_costs; + if (group_input_unique) + { + Path *path = input_rel->cheapest_total_path; + add_path(grouped_rel, (Path *) create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + AGG_UNIQUE, + AGGSPLIT_SIMPLE, + parse->groupClause, + havingQual, + agg_costs, + dNumGroups)); + + if (path != input_rel->cheapest_startup_path) + { + path = input_rel->cheapest_startup_path; + add_path(grouped_rel, (Path *) create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + AGG_UNIQUE, + AGGSPLIT_SIMPLE, + parse->groupClause, + havingQual, + agg_costs, + dNumGroups)); + } + return; + } if (can_sort) { /* diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 41110ed888..010266ed4b 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -761,7 +761,8 @@ typedef enum AggStrategy AGG_PLAIN, /* simple agg across all input rows */ AGG_SORTED, /* grouped agg, input must be sorted */ AGG_HASHED, /* grouped agg, use internal hashtable */ - AGG_MIXED /* grouped agg, hash and sort both used */ + AGG_MIXED, /* grouped agg, hash and sort both used */ + AGG_UNIQUE /* grouped agg, the group clause is unique */ } AggStrategy; /* diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 575353d86c..9f0cfaf094 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -2475,6 +2475,7 @@ typedef struct JoinPathExtraData #define GROUPING_CAN_USE_SORT 0x0001 #define GROUPING_CAN_USE_HASH 0x0002 #define GROUPING_CAN_PARTIAL_AGG 0x0004 +#define GROUPING_INPUT_UNIQUE 0x0008 /* * What kind of partitionwise aggregation is in use? diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 55f363f70c..e8300d9f37 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -828,6 +828,7 @@ typedef struct Agg /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */ List *groupingSets; /* grouping sets to use */ List *chain; /* chained Agg/Sort nodes */ + bool input_unique; /* The input is unique already */ } Agg; /* ---------------- diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 42bd180895..65912142f8 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -2544,6 +2544,46 @@ set work_mem to default; ----+----+---- (0 rows) +create table agg_unique_1(pk int primary key, b int); +create table agg_unique_2(a int, unsortable_col xid); +insert into agg_unique_2 values(1, '1'), (2, '2'), (2, '1'); +explain (costs off, verbose) select pk, sum(b) from agg_unique_1 +group by pk; + QUERY PLAN +--------------------------------------- + GroupAggregate + Output: pk, sum(b) + Group Key: agg_unique_1.pk + Input Unique: true + -> Seq Scan on public.agg_unique_1 + Output: pk, b +(6 rows) + +explain (costs off, verbose) select unsortable_col, count(*) +from (select distinct unsortable_col from agg_unique_2) t +group by unsortable_col; + QUERY PLAN +------------------------------------------------------------------- + GroupAggregate + Output: agg_unique_2.unsortable_col, count(*) + Group Key: agg_unique_2.unsortable_col + Input Unique: true + -> HashAggregate + Output: agg_unique_2.unsortable_col + Group Key: agg_unique_2.unsortable_col + -> Seq Scan on public.agg_unique_2 + Output: agg_unique_2.a, agg_unique_2.unsortable_col +(9 rows) + +select unsortable_col, count(*) +from (select distinct unsortable_col from agg_unique_2) t +group by unsortable_col; + unsortable_col | count +----------------+------- + 2 | 1 + 1 | 1 +(2 rows) + drop table agg_group_1; drop table agg_group_2; drop table agg_group_3; @@ -2552,3 +2592,5 @@ drop table agg_hash_1; drop table agg_hash_2; drop table agg_hash_3; drop table agg_hash_4; +drop table agg_unique_1; +drop table agg_unique_2; diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 3446c3e9fd..3dc40b13d5 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1159,6 +1159,21 @@ set work_mem to default; union all (select * from agg_group_4 except select * from agg_hash_4); +create table agg_unique_1(pk int primary key, b int); +create table agg_unique_2(a int, unsortable_col xid); +insert into agg_unique_2 values(1, '1'), (2, '2'), (2, '1'); + +explain (costs off, verbose) select pk, sum(b) from agg_unique_1 +group by pk; + +explain (costs off, verbose) select unsortable_col, count(*) +from (select distinct unsortable_col from agg_unique_2) t +group by unsortable_col; + +select unsortable_col, count(*) +from (select distinct unsortable_col from agg_unique_2) t +group by unsortable_col; + drop table agg_group_1; drop table agg_group_2; drop table agg_group_3; @@ -1167,3 +1182,5 @@ drop table agg_hash_1; drop table agg_hash_2; drop table agg_hash_3; drop table agg_hash_4; +drop table agg_unique_1; +drop table agg_unique_2; -- 2.21.0