From 9c2c1c6310150e9ac12f16259e16a07616a1f479 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= Date: Wed, 6 May 2020 16:32:28 +0800 Subject: [PATCH v9 5/6] Treat the input as sorted if the group by clause is unique already. In the previous commit, if no aggregation call, the group node will be removed totally, so this patch is just for the cases where aggregation call exists --- src/backend/commands/explain.c | 4 + src/backend/executor/nodeAgg.c | 6 ++ src/backend/optimizer/plan/createplan.c | 3 +- src/backend/optimizer/plan/planner.c | 127 +++++++++++++++-------- src/include/nodes/nodes.h | 3 +- src/include/nodes/pathnodes.h | 1 + src/include/nodes/plannodes.h | 1 + src/test/regress/expected/aggregates.out | 42 ++++++++ src/test/regress/sql/aggregates.sql | 17 +++ 9 files changed, 159 insertions(+), 45 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index a131d15ac0..e122809454 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1930,6 +1930,10 @@ ExplainNode(PlanState *planstate, List *ancestors, show_agg_keys(castNode(AggState, planstate), ancestors, es); show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); show_hashagg_info((AggState *) planstate, es); + if (es->format != EXPLAIN_FORMAT_TEXT || + (es->verbose && ((Agg *) plan)->input_unique)) + ExplainPropertyBool("Input Unique", + ((Agg *) plan)->input_unique, es); if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index a20554ae65..6ecef58b88 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -2139,6 +2139,12 @@ ExecAgg(PlanState *pstate) case AGG_SORTED: result = agg_retrieve_direct(node); break; + case AGG_UNIQUE: + /* AGG_UNIQUE is translated to AGG_SORTED, Handle it here + * to make compiler quiet. + */ + Assert(false); + break; } if (!TupIsNull(result)) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index eb9543f6ad..e6fe2d249b 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -6373,7 +6373,7 @@ make_agg(List *tlist, List *qual, /* Reduce to long, but 'ware overflow! */ numGroups = (long) Min(dNumGroups, (double) LONG_MAX); - node->aggstrategy = aggstrategy; + node->aggstrategy = aggstrategy == AGG_UNIQUE ? AGG_SORTED : aggstrategy; node->aggsplit = aggsplit; node->numCols = numGroupCols; node->grpColIdx = grpColIdx; @@ -6384,6 +6384,7 @@ make_agg(List *tlist, List *qual, node->aggParams = NULL; /* SS_finalize_plan() will fill this */ node->groupingSets = groupingSets; node->chain = chain; + node->input_unique = aggstrategy == AGG_UNIQUE; plan->qual = qual; plan->targetlist = tlist; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 999c1250c4..9d9cbdd569 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3852,51 +3852,61 @@ create_grouping_paths(PlannerInfo *root, int flags = 0; GroupPathExtraData extra; - /* - * Determine whether it's possible to perform sort-based - * implementations of grouping. (Note that if groupClause is empty, - * grouping_is_sortable() is trivially true, and all the - * pathkeys_contained_in() tests will succeed too, so that we'll - * consider every surviving input path.) - * - * If we have grouping sets, we might be able to sort some but not all - * of them; in this case, we need can_sort to be true as long as we - * must consider any sorted-input plan. - */ - if ((gd && gd->rollups != NIL) - || grouping_is_sortable(parse->groupClause)) - flags |= GROUPING_CAN_USE_SORT; + if (group_unique_input) + { + /* In this case we don't need to set other flags */ + Assert(parse->groupClause != NIL); + Assert(gd == NULL); + flags |= GROUPING_INPUT_UNIQUE; + } + else + { + /* + * Determine whether it's possible to perform sort-based + * implementations of grouping. (Note that if groupClause is empty, + * grouping_is_sortable() is trivially true, and all the + * pathkeys_contained_in() tests will succeed too, so that we'll + * consider every surviving input path.) + * + * If we have grouping sets, we might be able to sort some but not all + * of them; in this case, we need can_sort to be true as long as we + * must consider any sorted-input plan. + */ + if ((gd && gd->rollups != NIL) + || grouping_is_sortable(parse->groupClause)) + flags |= GROUPING_CAN_USE_SORT; - /* - * Determine whether we should consider hash-based implementations of - * grouping. - * - * Hashed aggregation only applies if we're grouping. If we have - * grouping sets, some groups might be hashable but others not; in - * this case we set can_hash true as long as there is nothing globally - * preventing us from hashing (and we should therefore consider plans - * with hashes). - * - * Executor doesn't support hashed aggregation with DISTINCT or ORDER - * BY aggregates. (Doing so would imply storing *all* the input - * values in the hash table, and/or running many sorts in parallel, - * either of which seems like a certain loser.) We similarly don't - * support ordered-set aggregates in hashed aggregation, but that case - * is also included in the numOrderedAggs count. - * - * Note: grouping_is_hashable() is much more expensive to check than - * the other gating conditions, so we want to do it last. - */ - if ((parse->groupClause != NIL && - agg_costs->numOrderedAggs == 0 && - (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)))) - flags |= GROUPING_CAN_USE_HASH; + /* + * Determine whether we should consider hash-based implementations of + * grouping. + * + * Hashed aggregation only applies if we're grouping. If we have + * grouping sets, some groups might be hashable but others not; in + * this case we set can_hash true as long as there is nothing globally + * preventing us from hashing (and we should therefore consider plans + * with hashes). + * + * Executor doesn't support hashed aggregation with DISTINCT or ORDER + * BY aggregates. (Doing so would imply storing *all* the input + * values in the hash table, and/or running many sorts in parallel, + * either of which seems like a certain loser.) We similarly don't + * support ordered-set aggregates in hashed aggregation, but that case + * is also included in the numOrderedAggs count. + * + * Note: grouping_is_hashable() is much more expensive to check than + * the other gating conditions, so we want to do it last. + */ + if ((parse->groupClause != NIL && + agg_costs->numOrderedAggs == 0 && + (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)))) + flags |= GROUPING_CAN_USE_HASH; - /* - * Determine whether partial aggregation is possible. - */ - if (can_partial_agg(root, agg_costs)) - flags |= GROUPING_CAN_PARTIAL_AGG; + /* + * Determine whether partial aggregation is possible. + */ + if (can_partial_agg(root, agg_costs)) + flags |= GROUPING_CAN_PARTIAL_AGG; + } extra.flags = flags; extra.target_parallel_safe = target_parallel_safe; @@ -6522,9 +6532,40 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, ListCell *lc; bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0; bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0; + bool group_input_unique = (extra->flags & GROUPING_INPUT_UNIQUE) != 0; List *havingQual = (List *) extra->havingQual; AggClauseCosts *agg_final_costs = &extra->agg_final_costs; + if (group_input_unique) + { + Path *path = input_rel->cheapest_total_path; + add_path(grouped_rel, (Path *) create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + AGG_UNIQUE, + AGGSPLIT_SIMPLE, + parse->groupClause, + havingQual, + agg_costs, + dNumGroups)); + + if (path != input_rel->cheapest_startup_path) + { + path = input_rel->cheapest_startup_path; + add_path(grouped_rel, (Path *) create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + AGG_UNIQUE, + AGGSPLIT_SIMPLE, + parse->groupClause, + havingQual, + agg_costs, + dNumGroups)); + } + return; + } if (can_sort) { /* diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 41110ed888..010266ed4b 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -761,7 +761,8 @@ typedef enum AggStrategy AGG_PLAIN, /* simple agg across all input rows */ AGG_SORTED, /* grouped agg, input must be sorted */ AGG_HASHED, /* grouped agg, use internal hashtable */ - AGG_MIXED /* grouped agg, hash and sort both used */ + AGG_MIXED, /* grouped agg, hash and sort both used */ + AGG_UNIQUE /* grouped agg, the group clause is unique */ } AggStrategy; /* diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 02e4458bef..b4bb4e6267 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -2472,6 +2472,7 @@ typedef struct JoinPathExtraData #define GROUPING_CAN_USE_SORT 0x0001 #define GROUPING_CAN_USE_HASH 0x0002 #define GROUPING_CAN_PARTIAL_AGG 0x0004 +#define GROUPING_INPUT_UNIQUE 0x0008 /* * What kind of partitionwise aggregation is in use? diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 83e01074ed..be31020a40 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -828,6 +828,7 @@ typedef struct Agg /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */ List *groupingSets; /* grouping sets to use */ List *chain; /* chained Agg/Sort nodes */ + bool input_unique; /* The input is unique already */ } Agg; /* ---------------- diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 87fef9f417..582b55b1cf 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -2699,6 +2699,46 @@ set work_mem to default; ----+----+---- (0 rows) +create table agg_unique_1(pk int primary key, b int); +create table agg_unique_2(a int, unsortable_col xid); +insert into agg_unique_2 values(1, '1'), (2, '2'), (2, '1'); +explain (costs off, verbose) select pk, sum(b) from agg_unique_1 +group by pk; + QUERY PLAN +--------------------------------------- + GroupAggregate + Output: pk, sum(b) + Group Key: agg_unique_1.pk + Input Unique: true + -> Seq Scan on public.agg_unique_1 + Output: pk, b +(6 rows) + +explain (costs off, verbose) select unsortable_col, count(*) +from (select distinct unsortable_col from agg_unique_2) t +group by unsortable_col; + QUERY PLAN +--------------------------------------------------- + GroupAggregate + Output: agg_unique_2.unsortable_col, count(*) + Group Key: agg_unique_2.unsortable_col + Input Unique: true + -> HashAggregate + Output: agg_unique_2.unsortable_col + Group Key: agg_unique_2.unsortable_col + -> Seq Scan on public.agg_unique_2 + Output: agg_unique_2.unsortable_col +(9 rows) + +select unsortable_col, count(*) +from (select distinct unsortable_col from agg_unique_2) t +group by unsortable_col; + unsortable_col | count +----------------+------- + 2 | 1 + 1 | 1 +(2 rows) + drop table agg_group_1; drop table agg_group_2; drop table agg_group_3; @@ -2707,3 +2747,5 @@ drop table agg_hash_1; drop table agg_hash_2; drop table agg_hash_3; drop table agg_hash_4; +drop table agg_unique_1; +drop table agg_unique_2; diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index ad025206fb..439eec18ac 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1202,6 +1202,21 @@ set work_mem to default; union all (select * from agg_group_4 except select * from agg_hash_4); +create table agg_unique_1(pk int primary key, b int); +create table agg_unique_2(a int, unsortable_col xid); +insert into agg_unique_2 values(1, '1'), (2, '2'), (2, '1'); + +explain (costs off, verbose) select pk, sum(b) from agg_unique_1 +group by pk; + +explain (costs off, verbose) select unsortable_col, count(*) +from (select distinct unsortable_col from agg_unique_2) t +group by unsortable_col; + +select unsortable_col, count(*) +from (select distinct unsortable_col from agg_unique_2) t +group by unsortable_col; + drop table agg_group_1; drop table agg_group_2; drop table agg_group_3; @@ -1210,3 +1225,5 @@ drop table agg_hash_1; drop table agg_hash_2; drop table agg_hash_3; drop table agg_hash_4; +drop table agg_unique_1; +drop table agg_unique_2; -- 2.21.0