Re: Question: test "aggregates" failed in 32-bit machine - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: Question: test "aggregates" failed in 32-bit machine |
Date | |
Msg-id | 1224783.1664737819@sss.pgh.pa.us Whole thread Raw |
In response to | Re: Question: test "aggregates" failed in 32-bit machine (Tom Lane <tgl@sss.pgh.pa.us>) |
Responses |
Re: Question: test "aggregates" failed in 32-bit machine
|
List | pgsql-hackers |
I wrote: > I'm just about to throw up my hands and go for reversion in both branches, As attached. regards, tom lane diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 2e4e82a94f..cc9e39c4a5 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -2862,13 +2862,16 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i -- GROUP BY clause in various forms, cardinal, alias and constant expression explain (verbose, costs off) select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2; - QUERY PLAN ------------------------------------------------------------------------------------------------------------- - Foreign Scan + QUERY PLAN +--------------------------------------------------------------------------------------- + Sort Output: (count(c2)), c2, 5, 7.0, 9 - Relations: Aggregate on (public.ft1) - Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST -(4 rows) + Sort Key: ft1.c2 + -> Foreign Scan + Output: (count(c2)), c2, 5, 7.0, 9 + Relations: Aggregate on (public.ft1) + Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 +(7 rows) select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2; w | x | y | z diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index d8848bc774..d750290f13 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -5062,20 +5062,6 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class=" </listitem> </varlistentry> - <varlistentry id="guc-enable-groupby-reordering" xreflabel="enable_group_by_reordering"> - <term><varname>enable_group_by_reordering</varname> (<type>boolean</type>) - <indexterm> - <primary><varname>enable_group_by_reordering</varname> configuration parameter</primary> - </indexterm> - </term> - <listitem> - <para> - Enables or disables reordering of keys in a <literal>GROUP BY</literal> - clause. The default is <literal>on</literal>. - </para> - </listitem> - </varlistentry> - <varlistentry id="guc-enable-hashagg" xreflabel="enable_hashagg"> <term><varname>enable_hashagg</varname> (<type>boolean</type>) <indexterm> diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index f486d42441..5ef29eea69 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1814,327 +1814,6 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm) rterm->pathtarget->width); } -/* - * is_fake_var - * Workaround for generate_append_tlist() which generates fake Vars with - * varno == 0, that will cause a fail of estimate_num_group() call - * - * XXX Ummm, why would estimate_num_group fail with this? - */ -static bool -is_fake_var(Expr *expr) -{ - if (IsA(expr, RelabelType)) - expr = (Expr *) ((RelabelType *) expr)->arg; - - return (IsA(expr, Var) && ((Var *) expr)->varno == 0); -} - -/* - * get_width_cost_multiplier - * Returns relative complexity of comparing two values based on its width. - * The idea behind is that the comparison becomes more expensive the longer the - * value is. Return value is in cpu_operator_cost units. - */ -static double -get_width_cost_multiplier(PlannerInfo *root, Expr *expr) -{ - double width = -1.0; /* fake value */ - - if (IsA(expr, RelabelType)) - expr = (Expr *) ((RelabelType *) expr)->arg; - - /* Try to find actual stat in corresponding relation */ - if (IsA(expr, Var)) - { - Var *var = (Var *) expr; - - if (var->varno > 0 && var->varno < root->simple_rel_array_size) - { - RelOptInfo *rel = root->simple_rel_array[var->varno]; - - if (rel != NULL && - var->varattno >= rel->min_attr && - var->varattno <= rel->max_attr) - { - int ndx = var->varattno - rel->min_attr; - - if (rel->attr_widths[ndx] > 0) - width = rel->attr_widths[ndx]; - } - } - } - - /* Didn't find any actual stats, try using type width instead. */ - if (width < 0.0) - { - Node *node = (Node *) expr; - - width = get_typavgwidth(exprType(node), exprTypmod(node)); - } - - /* - * Values are passed as Datum type, so comparisons can't be cheaper than - * comparing a Datum value. - * - * FIXME I find this reasoning questionable. We may pass int2, and - * comparing it is probably a bit cheaper than comparing a bigint. - */ - if (width <= sizeof(Datum)) - return 1.0; - - /* - * We consider the cost of a comparison not to be directly proportional to - * width of the argument, because widths of the arguments could be - * slightly different (we only know the average width for the whole - * column). So we use log16(width) as an estimate. - */ - return 1.0 + 0.125 * LOG2(width / sizeof(Datum)); -} - -/* - * compute_cpu_sort_cost - * compute CPU cost of sort (i.e. in-memory) - * - * The main thing we need to calculate to estimate sort CPU costs is the number - * of calls to the comparator functions. The difficulty is that for multi-column - * sorts there may be different data types involved (for some of which the calls - * may be much more expensive). Furthermore, columns may have a very different - * number of distinct values - the higher the number, the fewer comparisons will - * be needed for the following columns. - * - * The algorithm is incremental - we add pathkeys one by one, and at each step we - * estimate the number of necessary comparisons (based on the number of distinct - * groups in the current pathkey prefix and the new pathkey), and the comparison - * costs (which is data type specific). - * - * Estimation of the number of comparisons is based on ideas from: - * - * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002 - * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf] - * - * In term of that paper, let N - number of tuples, Xi - number of identical - * tuples with value Ki, then the estimate of number of comparisons is: - * - * log(N! / (X1! * X2! * ..)) ~ sum(Xi * log(N/Xi)) - * - * We assume all Xi the same because now we don't have any estimation of - * group sizes, we have only know the estimate of number of groups (distinct - * values). In that case, formula becomes: - * - * N * log(NumberOfGroups) - * - * For multi-column sorts we need to estimate the number of comparisons for - * each individual column - for example with columns (c1, c2, ..., ck) we - * can estimate that number of comparisons on ck is roughly - * - * ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1)) - * - * Let k be a column number, Gk - number of groups defined by k columns, and Fk - * the cost of the comparison is - * - * N * sum( Fk * log(Gk) ) - * - * Note: We also consider column width, not just the comparator cost. - * - * NOTE: some callers currently pass NIL for pathkeys because they - * can't conveniently supply the sort keys. In this case, it will fallback to - * simple comparison cost estimate. - */ -static Cost -compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, - Cost comparison_cost, double tuples, double output_tuples, - bool heapSort) -{ - Cost per_tuple_cost = 0.0; - ListCell *lc; - List *pathkeyExprs = NIL; - double tuplesPerPrevGroup = tuples; - double totalFuncCost = 1.0; - bool has_fake_var = false; - int i = 0; - Oid prev_datatype = InvalidOid; - List *cache_varinfos = NIL; - - /* fallback if pathkeys is unknown */ - if (pathkeys == NIL) - { - /* - * If we'll use a bounded heap-sort keeping just K tuples in memory, - * for a total number of tuple comparisons of N log2 K; but the - * constant factor is a bit higher than for quicksort. Tweak it so - * that the cost curve is continuous at the crossover point. - */ - output_tuples = (heapSort) ? 2.0 * output_tuples : tuples; - per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples); - - /* add cost provided by caller */ - per_tuple_cost += comparison_cost; - - return per_tuple_cost * tuples; - } - - /* - * Computing total cost of sorting takes into account the per-column - * comparison function cost. We try to compute the needed number of - * comparisons per column. - */ - foreach(lc, pathkeys) - { - PathKey *pathkey = (PathKey *) lfirst(lc); - EquivalenceMember *em; - double nGroups, - correctedNGroups; - Cost funcCost = 1.0; - - /* - * We believe that equivalence members aren't very different, so, to - * estimate cost we consider just the first member. - */ - em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members); - - if (em->em_datatype != InvalidOid) - { - /* do not lookup funcCost if the data type is the same */ - if (prev_datatype != em->em_datatype) - { - Oid sortop; - QualCost cost; - - sortop = get_opfamily_member(pathkey->pk_opfamily, - em->em_datatype, em->em_datatype, - pathkey->pk_strategy); - - cost.startup = 0; - cost.per_tuple = 0; - add_function_cost(root, get_opcode(sortop), NULL, &cost); - - /* - * add_function_cost returns the product of cpu_operator_cost - * and procost, but we need just procost, co undo that. - */ - funcCost = cost.per_tuple / cpu_operator_cost; - - prev_datatype = em->em_datatype; - } - } - - /* factor in the width of the values in this column */ - funcCost *= get_width_cost_multiplier(root, em->em_expr); - - /* now we have per-key cost, so add to the running total */ - totalFuncCost += funcCost; - - /* remember if we have found a fake Var in pathkeys */ - has_fake_var |= is_fake_var(em->em_expr); - pathkeyExprs = lappend(pathkeyExprs, em->em_expr); - - /* - * We need to calculate the number of comparisons for this column, - * which requires knowing the group size. So we estimate the number of - * groups by calling estimate_num_groups_incremental(), which - * estimates the group size for "new" pathkeys. - * - * Note: estimate_num_groups_incremental does not handle fake Vars, so - * use a default estimate otherwise. - */ - if (!has_fake_var) - nGroups = estimate_num_groups_incremental(root, pathkeyExprs, - tuplesPerPrevGroup, NULL, NULL, - &cache_varinfos, - list_length(pathkeyExprs) - 1); - else if (tuples > 4.0) - - /* - * Use geometric mean as estimation if there are no stats. - * - * We don't use DEFAULT_NUM_DISTINCT here, because that's used for - * a single column, but here we're dealing with multiple columns. - */ - nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys)); - else - nGroups = tuples; - - /* - * Presorted keys are not considered in the cost above, but we still - * do have to compare them in the qsort comparator. So make sure to - * factor in the cost in that case. - */ - if (i >= nPresortedKeys) - { - if (heapSort) - { - /* - * have to keep at least one group, and a multiple of group - * size - */ - correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup); - } - else - /* all groups in the input */ - correctedNGroups = nGroups; - - correctedNGroups = Max(1.0, ceil(correctedNGroups)); - - per_tuple_cost += totalFuncCost * LOG2(correctedNGroups); - } - - i++; - - /* - * Uniform distributions with all groups being of the same size are - * the best case, with nice smooth behavior. Real-world distributions - * tend not to be uniform, though, and we don't have any reliable - * easy-to-use information. As a basic defense against skewed - * distributions, we use a 1.5 factor to make the expected group a bit - * larger, but we need to be careful not to make the group larger than - * in the preceding step. - */ - tuplesPerPrevGroup = Min(tuplesPerPrevGroup, - ceil(1.5 * tuplesPerPrevGroup / nGroups)); - - /* - * Once we get single-row group, it means tuples in the group are - * unique and we can skip all remaining columns. - */ - if (tuplesPerPrevGroup <= 1.0) - break; - } - - list_free(pathkeyExprs); - - /* per_tuple_cost is in cpu_operator_cost units */ - per_tuple_cost *= cpu_operator_cost; - - /* - * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles - * E. Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort - * estimation formula has additional term proportional to number of tuples - * (see Chapter 8.2 and Theorem 4.1). That affects cases with a low number - * of tuples, approximately less than 1e4. We could implement it as an - * additional multiplier under the logarithm, but we use a bit more - * complex formula which takes into account the number of unique tuples - * and it's not clear how to combine the multiplier with the number of - * groups. Estimate it as 10 cpu_operator_cost units. - */ - per_tuple_cost += 10 * cpu_operator_cost; - - per_tuple_cost += comparison_cost; - - return tuples * per_tuple_cost; -} - -/* - * simple wrapper just to estimate best sort path - */ -Cost -cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys, - double tuples) -{ - return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys, - 0, tuples, tuples, false); -} - /* * cost_tuplesort * Determines and returns the cost of sorting a relation using tuplesort, @@ -2151,7 +1830,7 @@ cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys, * number of initial runs formed and M is the merge order used by tuplesort.c. * Since the average initial run should be about sort_mem, we have * disk traffic = 2 * relsize * ceil(logM(p / sort_mem)) - * and cpu cost (computed by compute_cpu_sort_cost()). + * cpu = comparison_cost * t * log2(t) * * If the sort is bounded (i.e., only the first k result tuples are needed) * and k tuples can fit into sort_mem, we use a heap method that keeps only @@ -2170,11 +1849,9 @@ cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys, * 'comparison_cost' is the extra cost per comparison, if any * 'sort_mem' is the number of kilobytes of work memory allowed for the sort * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound - * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should - * be added by caller later */ static void -cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost, +cost_tuplesort(Cost *startup_cost, Cost *run_cost, double tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples) @@ -2191,6 +1868,9 @@ cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_ if (tuples < 2.0) tuples = 2.0; + /* Include the default cost-per-comparison */ + comparison_cost += 2.0 * cpu_operator_cost; + /* Do we have a useful LIMIT? */ if (limit_tuples > 0 && limit_tuples < tuples) { @@ -2214,10 +1894,12 @@ cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_ double log_runs; double npageaccesses; - /* CPU costs */ - *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0, - comparison_cost, tuples, - tuples, false); + /* + * CPU costs + * + * Assume about N log2 N comparisons + */ + *startup_cost = comparison_cost * tuples * LOG2(tuples); /* Disk costs */ @@ -2233,17 +1915,18 @@ cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_ } else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes) { - /* We'll use a bounded heap-sort keeping just K tuples in memory. */ - *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0, - comparison_cost, tuples, - output_tuples, true); + /* + * We'll use a bounded heap-sort keeping just K tuples in memory, for + * a total number of tuple comparisons of N log2 K; but the constant + * factor is a bit higher than for quicksort. Tweak it so that the + * cost curve is continuous at the crossover point. + */ + *startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples); } else { /* We'll use plain quicksort on all the input tuples */ - *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0, - comparison_cost, tuples, - tuples, false); + *startup_cost = comparison_cost * tuples * LOG2(tuples); } /* @@ -2276,8 +1959,8 @@ cost_incremental_sort(Path *path, double input_tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples) { - Cost startup_cost, - run_cost, + Cost startup_cost = 0, + run_cost = 0, input_run_cost = input_total_cost - input_startup_cost; double group_tuples, input_groups; @@ -2362,7 +2045,7 @@ cost_incremental_sort(Path *path, * pessimistic about incremental sort performance and increase its average * group size by half. */ - cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost, + cost_tuplesort(&group_startup_cost, &group_run_cost, 1.5 * group_tuples, width, comparison_cost, sort_mem, limit_tuples); @@ -2370,7 +2053,7 @@ cost_incremental_sort(Path *path, * Startup cost of incremental sort is the startup cost of its first group * plus the cost of its input. */ - startup_cost = group_startup_cost + startup_cost += group_startup_cost + input_startup_cost + group_input_run_cost; /* @@ -2379,7 +2062,7 @@ cost_incremental_sort(Path *path, * group, plus the total cost to process the remaining groups, plus the * remaining cost of input. */ - run_cost = group_run_cost + run_cost += group_run_cost + (group_run_cost + group_startup_cost) * (input_groups - 1) + group_input_run_cost * (input_groups - 1); @@ -2419,7 +2102,7 @@ cost_sort(Path *path, PlannerInfo *root, Cost startup_cost; Cost run_cost; - cost_tuplesort(root, pathkeys, &startup_cost, &run_cost, + cost_tuplesort(&startup_cost, &run_cost, tuples, width, comparison_cost, sort_mem, limit_tuples); @@ -2517,7 +2200,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers) * Determines and returns the cost of an Append node. */ void -cost_append(AppendPath *apath, PlannerInfo *root) +cost_append(AppendPath *apath) { ListCell *l; @@ -2585,7 +2268,7 @@ cost_append(AppendPath *apath, PlannerInfo *root) * any child. */ cost_sort(&sort_path, - root, + NULL, /* doesn't currently need root */ pathkeys, subpath->total_cost, subpath->rows, diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 799bdc91d0..f962ff82ad 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -681,18 +681,7 @@ get_eclass_for_sort_expr(PlannerInfo *root, if (opcintype == cur_em->em_datatype && equal(expr, cur_em->em_expr)) - { - /* - * Match! - * - * Copy the sortref if it wasn't set yet. That may happen if - * the ec was constructed from WHERE clause, i.e. it doesn't - * have a target reference at all. - */ - if (cur_ec->ec_sortref == 0 && sortref > 0) - cur_ec->ec_sortref = sortref; - return cur_ec; - } + return cur_ec; /* Match! */ } } diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index e2fdcd3163..a9943cd6e0 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -17,24 +17,17 @@ */ #include "postgres.h" -#include <float.h> - -#include "miscadmin.h" #include "access/stratnum.h" #include "catalog/pg_opfamily.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/plannodes.h" -#include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "partitioning/partbounds.h" #include "utils/lsyscache.h" -#include "utils/selfuncs.h" -/* Consider reordering of GROUP BY keys? */ -bool enable_group_by_reordering = true; static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); static bool matches_boolean_partition_clause(RestrictInfo *rinfo, @@ -363,540 +356,6 @@ pathkeys_contained_in(List *keys1, List *keys2) return false; } -/* - * group_keys_reorder_by_pathkeys - * Reorder GROUP BY keys to match pathkeys of input path. - * - * Function returns new lists (pathkeys and clauses), original GROUP BY lists - * stay untouched. - * - * Returns the number of GROUP BY keys with a matching pathkey. - */ -int -group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys, - List **group_clauses) -{ - List *new_group_pathkeys = NIL, - *new_group_clauses = NIL; - ListCell *lc; - int n; - - if (pathkeys == NIL || *group_pathkeys == NIL) - return 0; - - /* - * Walk the pathkeys (determining ordering of the input path) and see if - * there's a matching GROUP BY key. If we find one, we append it to the - * list, and do the same for the clauses. - * - * Once we find the first pathkey without a matching GROUP BY key, the - * rest of the pathkeys are useless and can't be used to evaluate the - * grouping, so we abort the loop and ignore the remaining pathkeys. - * - * XXX Pathkeys are built in a way to allow simply comparing pointers. - */ - foreach(lc, pathkeys) - { - PathKey *pathkey = (PathKey *) lfirst(lc); - SortGroupClause *sgc; - - /* abort on first mismatch */ - if (!list_member_ptr(*group_pathkeys, pathkey)) - break; - - new_group_pathkeys = lappend(new_group_pathkeys, pathkey); - - sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref, - *group_clauses); - - new_group_clauses = lappend(new_group_clauses, sgc); - } - - /* remember the number of pathkeys with a matching GROUP BY key */ - n = list_length(new_group_pathkeys); - - /* append the remaining group pathkeys (will be treated as not sorted) */ - *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys, - *group_pathkeys); - *group_clauses = list_concat_unique_ptr(new_group_clauses, - *group_clauses); - - return n; -} - -/* - * Used to generate all permutations of a pathkey list. - */ -typedef struct PathkeyMutatorState -{ - List *elemsList; - ListCell **elemCells; - void **elems; - int *positions; - int mutatorNColumns; - int count; -} PathkeyMutatorState; - - -/* - * PathkeyMutatorInit - * Initialize state of the permutation generator. - * - * We want to generate permutations of elements in the "elems" list. We may want - * to skip some number of elements at the beginning (when treating as presorted) - * or at the end (we only permute a limited number of group keys). - * - * The list is decomposed into elements, and we also keep pointers to individual - * cells. This allows us to build the permuted list quickly and cheaply, without - * creating any copies. - */ -static void -PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end) -{ - int i; - int n = end - start; - ListCell *lc; - - memset(state, 0, sizeof(*state)); - - state->mutatorNColumns = n; - - state->elemsList = list_copy(elems); - - state->elems = palloc(sizeof(void *) * n); - state->elemCells = palloc(sizeof(ListCell *) * n); - state->positions = palloc(sizeof(int) * n); - - i = 0; - for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start)) - { - state->elemCells[i] = lc; - state->elems[i] = lfirst(lc); - state->positions[i] = i + 1; - i++; - - if (i >= n) - break; - } -} - -/* Swap two elements of an array. */ -static void -PathkeyMutatorSwap(int *a, int i, int j) -{ - int s = a[i]; - - a[i] = a[j]; - a[j] = s; -} - -/* - * Generate the next permutation of elements. - */ -static bool -PathkeyMutatorNextSet(int *a, int n) -{ - int j, - k, - l, - r; - - j = n - 2; - - while (j >= 0 && a[j] >= a[j + 1]) - j--; - - if (j < 0) - return false; - - k = n - 1; - - while (k >= 0 && a[j] >= a[k]) - k--; - - PathkeyMutatorSwap(a, j, k); - - l = j + 1; - r = n - 1; - - while (l < r) - PathkeyMutatorSwap(a, l++, r--); - - return true; -} - -/* - * PathkeyMutatorNext - * Generate the next permutation of list of elements. - * - * Returns the next permutation (as a list of elements) or NIL if there are no - * more permutations. - */ -static List * -PathkeyMutatorNext(PathkeyMutatorState *state) -{ - int i; - - state->count++; - - /* first permutation is original list */ - if (state->count == 1) - return state->elemsList; - - /* when there are no more permutations, return NIL */ - if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns)) - { - pfree(state->elems); - pfree(state->elemCells); - pfree(state->positions); - - list_free(state->elemsList); - - return NIL; - } - - /* update the list cells to point to the right elements */ - for (i = 0; i < state->mutatorNColumns; i++) - lfirst(state->elemCells[i]) = - (void *) state->elems[state->positions[i] - 1]; - - return state->elemsList; -} - -/* - * Cost of comparing pathkeys. - */ -typedef struct PathkeySortCost -{ - Cost cost; - PathKey *pathkey; -} PathkeySortCost; - -static int -pathkey_sort_cost_comparator(const void *_a, const void *_b) -{ - const PathkeySortCost *a = (PathkeySortCost *) _a; - const PathkeySortCost *b = (PathkeySortCost *) _b; - - if (a->cost < b->cost) - return -1; - else if (a->cost == b->cost) - return 0; - return 1; -} - -/* - * get_cheapest_group_keys_order - * Reorders the group pathkeys / clauses to minimize the comparison cost. - * - * Given the list of pathkeys in '*group_pathkeys', we try to arrange these - * in an order that minimizes the sort costs that will be incurred by the - * GROUP BY. The costs mainly depend on the cost of the sort comparator - * function(s) and the number of distinct values in each column of the GROUP - * BY clause (*group_clauses). Sorting on subsequent columns is only required - * for tiebreak situations where two values sort equally. - * - * In case the input is partially sorted, only the remaining pathkeys are - * considered. 'n_preordered' denotes how many of the leading *group_pathkeys - * the input is presorted by. - * - * Returns true and sets *group_pathkeys and *group_clauses to the newly - * ordered versions of the lists that were passed in via these parameters. - * If no reordering was deemed necessary then we return false, in which case - * the *group_pathkeys and *group_clauses lists are left untouched. The - * original *group_pathkeys and *group_clauses parameter values are never - * destructively modified in place. - */ -static bool -get_cheapest_group_keys_order(PlannerInfo *root, double nrows, - List **group_pathkeys, List **group_clauses, - int n_preordered) -{ - List *new_group_pathkeys = NIL, - *new_group_clauses = NIL, - *var_group_pathkeys; - - ListCell *cell; - PathkeyMutatorState mstate; - double cheapest_sort_cost = DBL_MAX; - - int nFreeKeys; - int nToPermute; - - /* If there are less than 2 unsorted pathkeys, we're done. */ - if (list_length(*group_pathkeys) - n_preordered < 2) - return false; - - /* - * We could exhaustively cost all possible orderings of the pathkeys, but - * for a large number of pathkeys it might be prohibitively expensive. So - * we try to apply simple cheap heuristics first - we sort the pathkeys by - * sort cost (as if the pathkey was sorted independently) and then check - * only the four cheapest pathkeys. The remaining pathkeys are kept - * ordered by cost. - * - * XXX This is a very simple heuristics, but likely to work fine for most - * cases (because the number of GROUP BY clauses tends to be lower than - * 4). But it ignores how the number of distinct values in each pathkey - * affects the following steps. It might be better to use "more expensive" - * pathkey first if it has many distinct values, because it then limits - * the number of comparisons for the remaining pathkeys. But evaluating - * that is likely quite the expensive. - */ - nFreeKeys = list_length(*group_pathkeys) - n_preordered; - nToPermute = 4; - if (nFreeKeys > nToPermute) - { - PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys); - PathkeySortCost *cost = costs; - - /* - * Estimate cost for sorting individual pathkeys skipping the - * pre-ordered pathkeys. - */ - for_each_from(cell, *group_pathkeys, n_preordered) - { - PathKey *pathkey = (PathKey *) lfirst(cell); - List *to_cost = list_make1(pathkey); - - cost->pathkey = pathkey; - cost->cost = cost_sort_estimate(root, to_cost, 0, nrows); - cost++; - - list_free(to_cost); - } - - /* sort the pathkeys by sort cost in ascending order */ - qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator); - - /* - * Rebuild the list of pathkeys - first the preordered ones, then the - * rest ordered by cost. - */ - new_group_pathkeys = list_copy_head(*group_pathkeys, n_preordered); - - for (int i = 0; i < nFreeKeys; i++) - new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey); - - pfree(costs); - } - else - { - /* Copy the list, so that we can free the new list by list_free. */ - new_group_pathkeys = list_copy(*group_pathkeys); - nToPermute = nFreeKeys; - } - - Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys)); - - /* - * Generate pathkey lists with permutations of the first nToPermute - * pathkeys. - * - * XXX We simply calculate sort cost for each individual pathkey list, but - * there's room for two dynamic programming optimizations here. Firstly, - * we may pass the current "best" cost to cost_sort_estimate so that it - * can "abort" if the estimated pathkeys list exceeds it. Secondly, it - * could pass the return information about the position when it exceeded - * the cost, and we could skip all permutations with the same prefix. - * - * Imagine we've already found ordering with cost C1, and we're evaluating - * another ordering - cost_sort_estimate() calculates cost by adding the - * pathkeys one by one (more or less), and the cost only grows. If at any - * point it exceeds C1, it can't possibly be "better" so we can discard - * it. But we also know that we can discard all ordering with the same - * prefix, because if we're estimating (a,b,c,d) and we exceed C1 at (a,b) - * then the same thing will happen for any ordering with this prefix. - */ - PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute); - - while ((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL) - { - Cost cost; - - cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows); - - if (cost < cheapest_sort_cost) - { - list_free(new_group_pathkeys); - new_group_pathkeys = list_copy(var_group_pathkeys); - cheapest_sort_cost = cost; - } - } - - /* Reorder the group clauses according to the reordered pathkeys. */ - foreach(cell, new_group_pathkeys) - { - PathKey *pathkey = (PathKey *) lfirst(cell); - - new_group_clauses = lappend(new_group_clauses, - get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref, - *group_clauses)); - } - - /* Just append the rest GROUP BY clauses */ - new_group_clauses = list_concat_unique_ptr(new_group_clauses, - *group_clauses); - - *group_pathkeys = new_group_pathkeys; - *group_clauses = new_group_clauses; - - return true; -} - -/* - * get_useful_group_keys_orderings - * Determine which orderings of GROUP BY keys are potentially interesting. - * - * Returns list of PathKeyInfo items, each representing an interesting ordering - * of GROUP BY keys. Each item stores pathkeys and clauses in matching order. - * - * The function considers (and keeps) multiple group by orderings: - * - * - the original ordering, as specified by the GROUP BY clause - * - * - GROUP BY keys reordered to minimize the sort cost - * - * - GROUP BY keys reordered to match path ordering (as much as possible), with - * the tail reordered to minimize the sort cost - * - * - GROUP BY keys to match target ORDER BY clause (as much as possible), with - * the tail reordered to minimize the sort cost - * - * There are other potentially interesting orderings (e.g. it might be best to - * match the first ORDER BY key, order the remaining keys differently and then - * rely on the incremental sort to fix this), but we ignore those for now. To - * make this work we'd have to pretty much generate all possible permutations. - */ -List * -get_useful_group_keys_orderings(PlannerInfo *root, double nrows, - List *path_pathkeys, - List *group_pathkeys, List *group_clauses, - List *aggregate_pathkeys) -{ - Query *parse = root->parse; - List *infos = NIL; - PathKeyInfo *info; - int n_preordered = 0; - - List *pathkeys = group_pathkeys; - List *clauses = group_clauses; - - /* always return at least the original pathkeys/clauses */ - info = makeNode(PathKeyInfo); - if (aggregate_pathkeys != NIL) - info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys); - else - info->pathkeys = pathkeys; - info->clauses = clauses; - - infos = lappend(infos, info); - - /* - * Should we try generating alternative orderings of the group keys? If - * not, we produce only the order specified in the query, i.e. the - * optimization is effectively disabled. - */ - if (!enable_group_by_reordering) - return infos; - - /* for grouping sets we can't do any reordering */ - if (parse->groupingSets) - return infos; - - /* - * Try reordering pathkeys to minimize the sort cost, ignoring both the - * target ordering (ORDER BY) and ordering of the input path. - */ - if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses, - n_preordered)) - { - info = makeNode(PathKeyInfo); - if (aggregate_pathkeys != NIL) - info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys); - else - info->pathkeys = pathkeys; - info->clauses = clauses; - - infos = lappend(infos, info); - } - - /* - * If the path is sorted in some way, try reordering the group keys to - * match as much of the ordering as possible - we get this sort for free - * (mostly). - * - * We must not do this when there are no grouping sets, because those use - * more complex logic to decide the ordering. - * - * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's - * more a complement, because it allows benefiting from incremental sort - * as much as possible. - * - * XXX This does nothing if (n_preordered == 0). We shouldn't create the - * info in this case. - */ - if (path_pathkeys) - { - n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys, - &pathkeys, - &clauses); - - /* reorder the tail to minimize sort cost */ - get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses, - n_preordered); - - /* - * reorder the tail to minimize sort cost - * - * XXX Ignore the return value - there may be nothing to reorder, in - * which case get_cheapest_group_keys_order returns false. But we - * still want to keep the keys reordered to path_pathkeys. - */ - info = makeNode(PathKeyInfo); - if (aggregate_pathkeys != NIL) - info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys); - else - info->pathkeys = pathkeys; - info->clauses = clauses; - - infos = lappend(infos, info); - } - - /* - * Try reordering pathkeys to minimize the sort cost (this time consider - * the ORDER BY clause, but only if set debug_group_by_match_order_by). - */ - if (root->sort_pathkeys) - { - n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys, - &pathkeys, - &clauses); - - /* - * reorder the tail to minimize sort cost - * - * XXX Ignore the return value - there may be nothing to reorder, in - * which case get_cheapest_group_keys_order returns false. But we - * still want to keep the keys reordered to sort_pathkeys. - */ - get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses, - n_preordered); - - /* keep the group keys reordered to match ordering of input path */ - info = makeNode(PathKeyInfo); - if (aggregate_pathkeys != NIL) - info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys); - else - info->pathkeys = pathkeys; - info->clauses = clauses; - - infos = lappend(infos, info); - } - - return infos; -} - /* * pathkeys_count_contained_in * Same as pathkeys_contained_in, but also sets length of longest @@ -2456,54 +1915,6 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) return n_common_pathkeys; } -/* - * pathkeys_useful_for_grouping - * Count the number of pathkeys that are useful for grouping (instead of - * explicit sort) - * - * Group pathkeys could be reordered to benefit from the ordering. The - * ordering may not be "complete" and may require incremental sort, but that's - * fine. So we simply count prefix pathkeys with a matching group key, and - * stop once we find the first pathkey without a match. - * - * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b) - * pathkeys are useful for grouping, and we might do incremental sort to get - * path ordered by (a,b,e). - * - * This logic is necessary to retain paths with ordering not matching grouping - * keys directly, without the reordering. - * - * Returns the length of pathkey prefix with matching group keys. - */ -static int -pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys) -{ - ListCell *key; - int n = 0; - - /* no special ordering requested for grouping */ - if (root->group_pathkeys == NIL) - return 0; - - /* unordered path */ - if (pathkeys == NIL) - return 0; - - /* walk the pathkeys and search for matching group key */ - foreach(key, pathkeys) - { - PathKey *pathkey = (PathKey *) lfirst(key); - - /* no matching group key, we're done */ - if (!list_member_ptr(root->group_pathkeys, pathkey)) - break; - - n++; - } - - return n; -} - /* * truncate_useless_pathkeys * Shorten the given pathkey list to just the useful pathkeys. @@ -2518,9 +1929,6 @@ truncate_useless_pathkeys(PlannerInfo *root, nuseful = pathkeys_useful_for_merging(root, rel, pathkeys); nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); - if (nuseful2 > nuseful) - nuseful = nuseful2; - nuseful2 = pathkeys_useful_for_grouping(root, pathkeys); if (nuseful2 > nuseful) nuseful = nuseful2; @@ -2556,8 +1964,6 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel) { if (rel->joininfo != NIL || rel->has_eclass_joins) return true; /* might be able to use pathkeys for merging */ - if (root->group_pathkeys != NIL) - return true; /* might be able to use pathkeys for grouping */ if (root->query_pathkeys != NIL) return true; /* might be able to use them for ordering */ return false; /* definitely useless */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 8014d1fd25..9c5836683c 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2758,9 +2758,8 @@ remove_useless_groupby_columns(PlannerInfo *root) * * In principle it might be interesting to consider other orderings of the * GROUP BY elements, which could match the sort ordering of other - * possible plans (eg an indexscan) and thereby reduce cost. However, we - * don't yet have sufficient information to do that here, so that's left until - * later in planning. See get_useful_group_keys_orderings(). + * possible plans (eg an indexscan) and thereby reduce cost. We don't + * bother with that, though. Hashed grouping will frequently win anyway. * * Note: we need no comparable processing of the distinctClause because * the parser already enforced that that matches ORDER BY. @@ -6433,148 +6432,30 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, if (can_sort) { - List *group_pathkeys; - List *orderAggPathkeys; - int numAggPathkeys; - - numAggPathkeys = list_length(root->group_pathkeys) - - root->num_groupby_pathkeys; - - if (numAggPathkeys > 0) - { - group_pathkeys = list_copy_head(root->group_pathkeys, - root->num_groupby_pathkeys); - orderAggPathkeys = list_copy_tail(root->group_pathkeys, - root->num_groupby_pathkeys); - } - else - { - group_pathkeys = root->group_pathkeys; - orderAggPathkeys = NIL; - } - /* * Use any available suitably-sorted path as input, and also consider * sorting the cheapest-total path. */ foreach(lc, input_rel->pathlist) { - ListCell *lc2; Path *path = (Path *) lfirst(lc); Path *path_original = path; + bool is_sorted; + int presorted_keys; - List *pathkey_orderings = NIL; - - List *group_clauses = parse->groupClause; - - /* generate alternative group orderings that might be useful */ - pathkey_orderings = get_useful_group_keys_orderings(root, - path->rows, - path->pathkeys, - group_pathkeys, - group_clauses, - orderAggPathkeys); - - Assert(pathkey_orderings != NIL); + is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); - /* process all potentially interesting grouping reorderings */ - foreach(lc2, pathkey_orderings) + if (path == cheapest_path || is_sorted) { - bool is_sorted; - int presorted_keys = 0; - PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); - - /* restore the path (we replace it in the loop) */ - path = path_original; - - is_sorted = pathkeys_count_contained_in(info->pathkeys, - path->pathkeys, - &presorted_keys); - - if (path == cheapest_path || is_sorted) - { - /* Sort the cheapest-total path if it isn't already sorted */ - if (!is_sorted) - path = (Path *) create_sort_path(root, - grouped_rel, - path, - info->pathkeys, - -1.0); - - /* Now decide what to stick atop it */ - if (parse->groupingSets) - { - consider_groupingsets_paths(root, grouped_rel, - path, true, can_hash, - gd, agg_costs, dNumGroups); - } - else if (parse->hasAggs) - { - /* - * We have aggregation, possibly with plain GROUP BY. - * Make an AggPath. - */ - add_path(grouped_rel, (Path *) - create_agg_path(root, - grouped_rel, - path, - grouped_rel->reltarget, - info->clauses ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_SIMPLE, - info->clauses, - havingQual, - agg_costs, - dNumGroups)); - } - else if (group_clauses) - { - /* - * We have GROUP BY without aggregation or grouping - * sets. Make a GroupPath. - */ - add_path(grouped_rel, (Path *) - create_group_path(root, - grouped_rel, - path, - info->clauses, - havingQual, - dNumGroups)); - } - else - { - /* Other cases should have been handled above */ - Assert(false); - } - } - - /* - * Now we may consider incremental sort on this path, but only - * when the path is not already sorted and when incremental - * sort is enabled. - */ - if (is_sorted || !enable_incremental_sort) - continue; - - /* Restore the input path (we might have added Sort on top). */ - path = path_original; - - /* no shared prefix, no point in building incremental sort */ - if (presorted_keys == 0) - continue; - - /* - * We should have already excluded pathkeys of length 1 - * because then presorted_keys > 0 would imply is_sorted was - * true. - */ - Assert(list_length(root->group_pathkeys) != 1); - - path = (Path *) create_incremental_sort_path(root, - grouped_rel, - path, - info->pathkeys, - presorted_keys, - -1.0); + /* Sort the cheapest-total path if it isn't already sorted */ + if (!is_sorted) + path = (Path *) create_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + -1.0); /* Now decide what to stick atop it */ if (parse->groupingSets) @@ -6594,9 +6475,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, grouped_rel, path, grouped_rel->reltarget, - info->clauses ? AGG_SORTED : AGG_PLAIN, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_SIMPLE, - info->clauses, + parse->groupClause, havingQual, agg_costs, dNumGroups)); @@ -6611,7 +6492,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, create_group_path(root, grouped_rel, path, - info->clauses, + parse->groupClause, havingQual, dNumGroups)); } @@ -6621,6 +6502,79 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, Assert(false); } } + + /* + * Now we may consider incremental sort on this path, but only + * when the path is not already sorted and when incremental sort + * is enabled. + */ + if (is_sorted || !enable_incremental_sort) + continue; + + /* Restore the input path (we might have added Sort on top). */ + path = path_original; + + /* no shared prefix, no point in building incremental sort */ + if (presorted_keys == 0) + continue; + + /* + * We should have already excluded pathkeys of length 1 because + * then presorted_keys > 0 would imply is_sorted was true. + */ + Assert(list_length(root->group_pathkeys) != 1); + + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + /* Now decide what to stick atop it */ + if (parse->groupingSets) + { + consider_groupingsets_paths(root, grouped_rel, + path, true, can_hash, + gd, agg_costs, dNumGroups); + } + else if (parse->hasAggs) + { + /* + * We have aggregation, possibly with plain GROUP BY. Make an + * AggPath. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_SIMPLE, + parse->groupClause, + havingQual, + agg_costs, + dNumGroups)); + } + else if (parse->groupClause) + { + /* + * We have GROUP BY without aggregation or grouping sets. Make + * a GroupPath. + */ + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); + } + else + { + /* Other cases should have been handled above */ + Assert(false); + } } /* @@ -6631,128 +6585,100 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, { foreach(lc, partially_grouped_rel->pathlist) { - ListCell *lc2; Path *path = (Path *) lfirst(lc); Path *path_original = path; - List *pathkey_orderings = NIL; - List *group_clauses = parse->groupClause; - - /* generate alternative group orderings that might be useful */ - pathkey_orderings = get_useful_group_keys_orderings(root, - path->rows, - path->pathkeys, - group_pathkeys, - group_clauses, - orderAggPathkeys); + bool is_sorted; + int presorted_keys; - Assert(pathkey_orderings != NIL); + is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); - /* process all potentially interesting grouping reorderings */ - foreach(lc2, pathkey_orderings) + /* + * Insert a Sort node, if required. But there's no point in + * sorting anything but the cheapest path. + */ + if (!is_sorted) { - bool is_sorted; - int presorted_keys = 0; - PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); - - /* restore the path (we replace it in the loop) */ - path = path_original; - - is_sorted = pathkeys_count_contained_in(info->pathkeys, - path->pathkeys, - &presorted_keys); - - /* - * Insert a Sort node, if required. But there's no point - * in sorting anything but the cheapest path. - */ - if (!is_sorted) - { - if (path != partially_grouped_rel->cheapest_total_path) - continue; - path = (Path *) create_sort_path(root, - grouped_rel, - path, - info->pathkeys, - -1.0); - } + if (path != partially_grouped_rel->cheapest_total_path) + continue; + path = (Path *) create_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + -1.0); + } - if (parse->hasAggs) - add_path(grouped_rel, (Path *) - create_agg_path(root, - grouped_rel, - path, - grouped_rel->reltarget, - info->clauses ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_FINAL_DESERIAL, - info->clauses, - havingQual, - agg_final_costs, - dNumGroups)); - else - add_path(grouped_rel, (Path *) - create_group_path(root, - grouped_rel, - path, - info->clauses, - havingQual, - dNumGroups)); + if (parse->hasAggs) + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_FINAL_DESERIAL, + parse->groupClause, + havingQual, + agg_final_costs, + dNumGroups)); + else + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); - /* - * Now we may consider incremental sort on this path, but - * only when the path is not already sorted and when - * incremental sort is enabled. - */ - if (is_sorted || !enable_incremental_sort) - continue; + /* + * Now we may consider incremental sort on this path, but only + * when the path is not already sorted and when incremental + * sort is enabled. + */ + if (is_sorted || !enable_incremental_sort) + continue; - /* - * Restore the input path (we might have added Sort on - * top). - */ - path = path_original; + /* Restore the input path (we might have added Sort on top). */ + path = path_original; - /* - * no shared prefix, not point in building incremental - * sort - */ - if (presorted_keys == 0) - continue; + /* no shared prefix, not point in building incremental sort */ + if (presorted_keys == 0) + continue; - /* - * We should have already excluded pathkeys of length 1 - * because then presorted_keys > 0 would imply is_sorted - * was true. - */ - Assert(list_length(root->group_pathkeys) != 1); + /* + * We should have already excluded pathkeys of length 1 + * because then presorted_keys > 0 would imply is_sorted was + * true. + */ + Assert(list_length(root->group_pathkeys) != 1); - path = (Path *) create_incremental_sort_path(root, - grouped_rel, - path, - info->pathkeys, - presorted_keys, - -1.0); + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); - if (parse->hasAggs) - add_path(grouped_rel, (Path *) - create_agg_path(root, - grouped_rel, - path, - grouped_rel->reltarget, - info->clauses ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_FINAL_DESERIAL, - info->clauses, - havingQual, - agg_final_costs, - dNumGroups)); - else - add_path(grouped_rel, (Path *) - create_group_path(root, - grouped_rel, - path, - info->clauses, - havingQual, - dNumGroups)); - } + if (parse->hasAggs) + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_FINAL_DESERIAL, + parse->groupClause, + havingQual, + agg_final_costs, + dNumGroups)); + else + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); } } } @@ -6946,26 +6872,6 @@ create_partial_grouping_paths(PlannerInfo *root, if (can_sort && cheapest_total_path != NULL) { - List *group_pathkeys; - List *orderAggPathkeys; - int numAggPathkeys; - - numAggPathkeys = list_length(root->group_pathkeys) - - root->num_groupby_pathkeys; - - if (numAggPathkeys > 0) - { - group_pathkeys = list_copy_head(root->group_pathkeys, - root->num_groupby_pathkeys); - orderAggPathkeys = list_copy_tail(root->group_pathkeys, - root->num_groupby_pathkeys); - } - else - { - group_pathkeys = root->group_pathkeys; - orderAggPathkeys = NIL; - } - /* This should have been checked previously */ Assert(parse->hasAggs || parse->groupClause); @@ -6975,69 +6881,41 @@ create_partial_grouping_paths(PlannerInfo *root, */ foreach(lc, input_rel->pathlist) { - ListCell *lc2; Path *path = (Path *) lfirst(lc); - Path *path_save = path; - List *pathkey_orderings = NIL; - List *group_clauses = parse->groupClause; - - /* generate alternative group orderings that might be useful */ - pathkey_orderings = get_useful_group_keys_orderings(root, - path->rows, - path->pathkeys, - group_pathkeys, - group_clauses, - orderAggPathkeys); - - Assert(pathkey_orderings != NIL); - - /* process all potentially interesting grouping reorderings */ - foreach(lc2, pathkey_orderings) - { - bool is_sorted; - int presorted_keys = 0; - PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); - - /* restore the path (we replace it in the loop) */ - path = path_save; - - is_sorted = pathkeys_count_contained_in(info->pathkeys, - path->pathkeys, - &presorted_keys); + bool is_sorted; - if (path == cheapest_total_path || is_sorted) - { - /* Sort the cheapest partial path, if it isn't already */ - if (!is_sorted) - { - path = (Path *) create_sort_path(root, - partially_grouped_rel, - path, - info->pathkeys, - -1.0); - } + is_sorted = pathkeys_contained_in(root->group_pathkeys, + path->pathkeys); + if (path == cheapest_total_path || is_sorted) + { + /* Sort the cheapest partial path, if it isn't already */ + if (!is_sorted) + path = (Path *) create_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + -1.0); - if (parse->hasAggs) - add_path(partially_grouped_rel, (Path *) - create_agg_path(root, - partially_grouped_rel, - path, - partially_grouped_rel->reltarget, - info->clauses ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_INITIAL_SERIAL, - info->clauses, - NIL, - agg_partial_costs, - dNumPartialGroups)); - else - add_path(partially_grouped_rel, (Path *) - create_group_path(root, - partially_grouped_rel, - path, - info->clauses, - NIL, - dNumPartialGroups)); - } + if (parse->hasAggs) + add_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialGroups)); + else + add_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialGroups)); } } @@ -7047,8 +6925,6 @@ create_partial_grouping_paths(PlannerInfo *root, * We can also skip the entire loop when we only have a single-item * group_pathkeys because then we can't possibly have a presorted * prefix of the list without having the list be fully sorted. - * - * XXX Shouldn't this also consider the group-key-reordering? */ if (enable_incremental_sort && list_length(root->group_pathkeys) > 1) { @@ -7103,122 +6979,27 @@ create_partial_grouping_paths(PlannerInfo *root, if (can_sort && cheapest_partial_path != NULL) { - List *group_pathkeys; - List *orderAggPathkeys; - int numAggPathkeys; - - numAggPathkeys = list_length(root->group_pathkeys) - - root->num_groupby_pathkeys; - - if (numAggPathkeys > 0) - { - group_pathkeys = list_copy_head(root->group_pathkeys, - root->num_groupby_pathkeys); - orderAggPathkeys = list_copy_tail(root->group_pathkeys, - root->num_groupby_pathkeys); - } - else - { - group_pathkeys = root->group_pathkeys; - orderAggPathkeys = NIL; - } - /* Similar to above logic, but for partial paths. */ foreach(lc, input_rel->partial_pathlist) { - ListCell *lc2; Path *path = (Path *) lfirst(lc); Path *path_original = path; - List *pathkey_orderings = NIL; - List *group_clauses = parse->groupClause; - - /* generate alternative group orderings that might be useful */ - pathkey_orderings = get_useful_group_keys_orderings(root, - path->rows, - path->pathkeys, - group_pathkeys, - group_clauses, - orderAggPathkeys); + bool is_sorted; + int presorted_keys; - Assert(pathkey_orderings != NIL); + is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); - /* process all potentially interesting grouping reorderings */ - foreach(lc2, pathkey_orderings) + if (path == cheapest_partial_path || is_sorted) { - bool is_sorted; - int presorted_keys = 0; - PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); - - /* restore the path (we replace it in the loop) */ - path = path_original; - - is_sorted = pathkeys_count_contained_in(info->pathkeys, - path->pathkeys, - &presorted_keys); - - if (path == cheapest_partial_path || is_sorted) - { - - /* Sort the cheapest partial path, if it isn't already */ - if (!is_sorted) - { - path = (Path *) create_sort_path(root, - partially_grouped_rel, - path, - info->pathkeys, - -1.0); - } - - if (parse->hasAggs) - add_partial_path(partially_grouped_rel, (Path *) - create_agg_path(root, - partially_grouped_rel, - path, - partially_grouped_rel->reltarget, - info->clauses ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_INITIAL_SERIAL, - info->clauses, - NIL, - agg_partial_costs, - dNumPartialPartialGroups)); - else - add_partial_path(partially_grouped_rel, (Path *) - create_group_path(root, - partially_grouped_rel, - path, - info->clauses, - NIL, - dNumPartialPartialGroups)); - } - - /* - * Now we may consider incremental sort on this path, but only - * when the path is not already sorted and when incremental - * sort is enabled. - */ - if (is_sorted || !enable_incremental_sort) - continue; - - /* Restore the input path (we might have added Sort on top). */ - path = path_original; - - /* no shared prefix, not point in building incremental sort */ - if (presorted_keys == 0) - continue; - - /* - * We should have already excluded pathkeys of length 1 - * because then presorted_keys > 0 would imply is_sorted was - * true. - */ - Assert(list_length(root->group_pathkeys) != 1); - - path = (Path *) create_incremental_sort_path(root, - partially_grouped_rel, - path, - info->pathkeys, - presorted_keys, - -1.0); + /* Sort the cheapest partial path, if it isn't already */ + if (!is_sorted) + path = (Path *) create_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + -1.0); if (parse->hasAggs) add_partial_path(partially_grouped_rel, (Path *) @@ -7226,9 +7007,9 @@ create_partial_grouping_paths(PlannerInfo *root, partially_grouped_rel, path, partially_grouped_rel->reltarget, - info->clauses ? AGG_SORTED : AGG_PLAIN, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_INITIAL_SERIAL, - info->clauses, + parse->groupClause, NIL, agg_partial_costs, dNumPartialPartialGroups)); @@ -7237,10 +7018,59 @@ create_partial_grouping_paths(PlannerInfo *root, create_group_path(root, partially_grouped_rel, path, - info->clauses, + parse->groupClause, NIL, dNumPartialPartialGroups)); } + + /* + * Now we may consider incremental sort on this path, but only + * when the path is not already sorted and when incremental sort + * is enabled. + */ + if (is_sorted || !enable_incremental_sort) + continue; + + /* Restore the input path (we might have added Sort on top). */ + path = path_original; + + /* no shared prefix, not point in building incremental sort */ + if (presorted_keys == 0) + continue; + + /* + * We should have already excluded pathkeys of length 1 because + * then presorted_keys > 0 would imply is_sorted was true. + */ + Assert(list_length(root->group_pathkeys) != 1); + + path = (Path *) create_incremental_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + if (parse->hasAggs) + add_partial_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialPartialGroups)); + else + add_partial_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialPartialGroups)); } } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index e10561d843..70f61ae7b1 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1346,12 +1346,12 @@ create_append_path(PlannerInfo *root, pathnode->path.total_cost = child->total_cost; } else - cost_append(pathnode, root); + cost_append(pathnode); /* Must do this last, else cost_append complains */ pathnode->path.pathkeys = child->pathkeys; } else - cost_append(pathnode, root); + cost_append(pathnode); /* If the caller provided a row estimate, override the computed value. */ if (rows >= 0) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 1808388397..234fb66580 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3369,28 +3369,11 @@ double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo) { - return estimate_num_groups_incremental(root, groupExprs, - input_rows, pgset, estinfo, - NULL, 0); -} - -/* - * estimate_num_groups_incremental - * An estimate_num_groups variant, optimized for cases that are adding the - * expressions incrementally (e.g. one by one). - */ -double -estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, - double input_rows, - List **pgset, EstimationInfo *estinfo, - List **cache_varinfos, int prevNExprs) -{ - List *varinfos = (cache_varinfos) ? *cache_varinfos : NIL; + List *varinfos = NIL; double srf_multiplier = 1.0; double numdistinct; ListCell *l; - int i, - j; + int i; /* Zero the estinfo output parameter, if non-NULL */ if (estinfo != NULL) @@ -3421,7 +3404,7 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, */ numdistinct = 1.0; - i = j = 0; + i = 0; foreach(l, groupExprs) { Node *groupexpr = (Node *) lfirst(l); @@ -3430,14 +3413,6 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, List *varshere; ListCell *l2; - /* was done on previous call */ - if (cache_varinfos && j++ < prevNExprs) - { - if (pgset) - i++; /* to keep in sync with lines below */ - continue; - } - /* is expression in this grouping set? */ if (pgset && !list_member_int(*pgset, i++)) continue; @@ -3507,11 +3482,7 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, if (varshere == NIL) { if (contain_volatile_functions(groupexpr)) - { - if (cache_varinfos) - *cache_varinfos = varinfos; return input_rows; - } continue; } @@ -3528,9 +3499,6 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, } } - if (cache_varinfos) - *cache_varinfos = varinfos; - /* * If now no Vars, we must have an all-constant or all-boolean GROUP BY * list. diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index fda3f9befb..7ff653b517 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -967,16 +967,6 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, - { - {"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD, - gettext_noop("Enables reordering of GROUP BY keys."), - NULL, - GUC_EXPLAIN - }, - &enable_group_by_reordering, - true, - NULL, NULL, NULL - }, { {"geqo", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("Enables genetic query optimization."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 2ae76e5cfb..868d21c351 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -388,7 +388,6 @@ #enable_seqscan = on #enable_sort = on #enable_tidscan = on -#enable_group_by_reordering = on # - Planner Cost Constants - diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 294cfe9c47..6bda383bea 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1372,18 +1372,6 @@ typedef struct PathKey bool pk_nulls_first; /* do NULLs come before normal values? */ } PathKey; -/* - * Combines information about pathkeys and the associated clauses. - */ -typedef struct PathKeyInfo -{ - pg_node_attr(no_read) - - NodeTag type; - List *pathkeys; - List *clauses; -} PathKeyInfo; - /* * VolatileFunctionStatus -- allows nodes to cache their * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index f27d11eaa9..204e94b6d1 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -115,9 +115,7 @@ extern void cost_incremental_sort(Path *path, Cost input_startup_cost, Cost input_total_cost, double input_tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples); -extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys, - int nPresortedKeys, double tuples); -extern void cost_append(AppendPath *apath, PlannerInfo *root); +extern void cost_append(AppendPath *apath); extern void cost_merge_append(Path *path, PlannerInfo *root, List *pathkeys, int n_streams, Cost input_startup_cost, Cost input_total_cost, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 881386997c..41f765d342 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -24,7 +24,6 @@ extern PGDLLIMPORT bool enable_geqo; extern PGDLLIMPORT int geqo_threshold; extern PGDLLIMPORT int min_parallel_table_scan_size; extern PGDLLIMPORT int min_parallel_index_scan_size; -extern PGDLLIMPORT bool enable_group_by_reordering; /* Hook for plugins to get control in set_rel_pathlist() */ typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root, @@ -203,13 +202,6 @@ typedef enum extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2); extern bool pathkeys_contained_in(List *keys1, List *keys2); extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common); -extern int group_keys_reorder_by_pathkeys(List *pathkeys, - List **group_pathkeys, - List **group_clauses); -extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows, - List *path_pathkeys, - List *group_pathkeys, List *group_clauses, - List *aggregate_pathkeys); extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index d485b9bfcd..8f3d73edfb 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -214,11 +214,6 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo); -extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, - double input_rows, List **pgset, - EstimationInfo *estinfo, - List **cache_varinfos, int prevNExprs); - extern void estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets, Selectivity *mcv_freq, diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index b2198724e3..fc2bd40be2 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1210,8 +1210,7 @@ explain (costs off) select distinct min(f1), max(f1) from minmaxtest; QUERY PLAN --------------------------------------------------------------------------------------------- - HashAggregate - Group Key: $0, $1 + Unique InitPlan 1 (returns $0) -> Limit -> Merge Append @@ -1234,8 +1233,10 @@ explain (costs off) -> Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8 Index Cond: (f1 IS NOT NULL) -> Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9 - -> Result -(25 rows) + -> Sort + Sort Key: ($0), ($1) + -> Result +(26 rows) select distinct min(f1), max(f1) from minmaxtest; min | max @@ -2525,241 +2526,6 @@ SELECT balk(hundred) FROM tenk1; (1 row) ROLLBACK; --- GROUP BY optimization by reorder columns -SELECT - i AS id, - i/2 AS p, - format('%60s', i%2) AS v, - i/4 AS c, - i/8 AS d, - (random() * (10000/8))::int as e --the same as d but no correlation with p - INTO btg -FROM - generate_series(1, 10000) i; -VACUUM btg; -ANALYZE btg; --- GROUP BY optimization by reorder columns by frequency -SET enable_hashagg=off; -SET max_parallel_workers= 0; -SET max_parallel_workers_per_gather = 0; -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, v; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: p, v - -> Sort - Sort Key: p, v - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: p, v - -> Sort - Sort Key: p, v - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: p, c, v - -> Sort - Sort Key: p, c, v - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: v, p, c - -> Sort - Sort Key: v, p, c - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, d, c; - QUERY PLAN ------------------------------- - GroupAggregate - Group Key: p, d, c, v - -> Sort - Sort Key: p, d, c, v - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c; - QUERY PLAN ------------------------------- - GroupAggregate - Group Key: v, p, d, c - -> Sort - Sort Key: v, p, d, c - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c; - QUERY PLAN ------------------------------- - GroupAggregate - Group Key: p, v, d, c - -> Sort - Sort Key: p, v, d, c - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, d, e; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: p, d, e - -> Sort - Sort Key: p, d, e - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, e, d; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: p, e, d - -> Sort - Sort Key: p, e, d - -> Seq Scan on btg -(5 rows) - -CREATE STATISTICS btg_dep ON d, e, p FROM btg; -ANALYZE btg; -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, d, e; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: p, d, e - -> Sort - Sort Key: p, d, e - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, e, d; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: p, e, d - -> Sort - Sort Key: p, e, d - -> Seq Scan on btg -(5 rows) - --- GROUP BY optimization by reorder columns by index scan -CREATE INDEX ON btg(p, v); -SET enable_seqscan=off; -SET enable_bitmapscan=off; -VACUUM btg; -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, v; - QUERY PLAN ------------------------------------------------- - GroupAggregate - Group Key: p, v - -> Index Only Scan using btg_p_v_idx on btg -(3 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v; - QUERY PLAN ------------------------------------------------- - GroupAggregate - Group Key: p, v - -> Index Only Scan using btg_p_v_idx on btg -(3 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p; - QUERY PLAN ------------------------------------------------- - GroupAggregate - Group Key: p, v - -> Index Only Scan using btg_p_v_idx on btg -(3 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v; - QUERY PLAN ------------------------------------------------- - GroupAggregate - Group Key: p, v - -> Index Only Scan using btg_p_v_idx on btg -(3 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c; - QUERY PLAN -------------------------------------------------- - GroupAggregate - Group Key: p, c, v - -> Incremental Sort - Sort Key: p, c, v - Presorted Key: p - -> Index Scan using btg_p_v_idx on btg -(6 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v; - QUERY PLAN -------------------------------------------------- - GroupAggregate - Group Key: p, v, c - -> Incremental Sort - Sort Key: p, v, c - Presorted Key: p, v - -> Index Scan using btg_p_v_idx on btg -(6 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, c, p, d; - QUERY PLAN -------------------------------------------------- - GroupAggregate - Group Key: p, c, d, v - -> Incremental Sort - Sort Key: p, c, d, v - Presorted Key: p - -> Index Scan using btg_p_v_idx on btg -(6 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v; - QUERY PLAN -------------------------------------------------- - GroupAggregate - Group Key: p, v, c, d - -> Incremental Sort - Sort Key: p, v, c, d - Presorted Key: p, v - -> Index Scan using btg_p_v_idx on btg -(6 rows) - -DROP TABLE btg; -RESET enable_hashagg; -RESET max_parallel_workers; -RESET max_parallel_workers_per_gather; -RESET enable_seqscan; -RESET enable_bitmapscan; -- Secondly test the case of a parallel aggregate combiner function -- returning NULL. For that use normal transition function, but a -- combiner function returning NULL. diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index 49953eaade..0a631124c2 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1439,7 +1439,7 @@ set parallel_setup_cost = 0; set parallel_tuple_cost = 0; set max_parallel_workers_per_gather = 2; create table t (a int, b int, c int); -insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i); +insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i); create index on t (a); analyze t; set enable_incremental_sort = off; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 2ed2e542a4..08334761ae 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -1984,8 +1984,8 @@ USING (name); ------+----+---- bb | 12 | 13 cc | 22 | 23 - ee | 42 | dd | | 33 + ee | 42 | (4 rows) -- Cases with non-nullable expressions in subquery results; @@ -2019,8 +2019,8 @@ NATURAL FULL JOIN ------+------+------+------+------ bb | 12 | 2 | 13 | 3 cc | 22 | 2 | 23 | 3 - ee | 42 | 2 | | dd | | | 33 | 3 + ee | 42 | 2 | | (4 rows) SELECT * FROM @@ -4676,20 +4676,18 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s explain (costs off) select d.* from d left join (select distinct * from b) s on d.a = s.id; - QUERY PLAN ---------------------------------------------- - Merge Left Join - Merge Cond: (d.a = s.id) + QUERY PLAN +-------------------------------------- + Merge Right Join + Merge Cond: (b.id = d.a) + -> Unique + -> Sort + Sort Key: b.id, b.c_id + -> Seq Scan on b -> Sort Sort Key: d.a -> Seq Scan on d - -> Sort - Sort Key: s.id - -> Subquery Scan on s - -> HashAggregate - Group Key: b.id, b.c_id - -> Seq Scan on b -(11 rows) +(9 rows) -- check join removal works when uniqueness of the join condition is enforced -- by a UNION @@ -6399,39 +6397,44 @@ select * from j1 natural join j2; explain (verbose, costs off) select * from j1 inner join (select distinct id from j3) j3 on j1.id = j3.id; - QUERY PLAN ------------------------------------ + QUERY PLAN +----------------------------------------- Nested Loop Output: j1.id, j3.id Inner Unique: true Join Filter: (j1.id = j3.id) - -> HashAggregate + -> Unique Output: j3.id - Group Key: j3.id - -> Seq Scan on public.j3 + -> Sort Output: j3.id + Sort Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id -> Seq Scan on public.j1 Output: j1.id -(11 rows) +(13 rows) -- ensure group by clause allows the inner to become unique explain (verbose, costs off) select * from j1 inner join (select id from j3 group by id) j3 on j1.id = j3.id; - QUERY PLAN ------------------------------------ + QUERY PLAN +----------------------------------------- Nested Loop Output: j1.id, j3.id Inner Unique: true Join Filter: (j1.id = j3.id) - -> HashAggregate + -> Group Output: j3.id Group Key: j3.id - -> Seq Scan on public.j3 + -> Sort Output: j3.id + Sort Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id -> Seq Scan on public.j1 Output: j1.id -(11 rows) +(14 rows) drop table j1; drop table j2; diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out index 4047c3e761..787af41dfe 100644 --- a/src/test/regress/expected/merge.out +++ b/src/test/regress/expected/merge.out @@ -1460,15 +1460,18 @@ WHEN MATCHED AND t.a < 10 THEN explain_merge -------------------------------------------------------------------- Merge on ex_mtarget t (actual rows=0 loops=1) - -> Hash Join (actual rows=0 loops=1) - Hash Cond: (s.a = t.a) - -> Seq Scan on ex_msource s (actual rows=1 loops=1) - -> Hash (actual rows=0 loops=1) - Buckets: xxx Batches: xxx Memory Usage: xxx + -> Merge Join (actual rows=0 loops=1) + Merge Cond: (t.a = s.a) + -> Sort (actual rows=0 loops=1) + Sort Key: t.a + Sort Method: quicksort Memory: xxx -> Seq Scan on ex_mtarget t (actual rows=0 loops=1) Filter: (a < '-1000'::integer) Rows Removed by Filter: 54 -(9 rows) + -> Sort (never executed) + Sort Key: s.a + -> Seq Scan on ex_msource s (never executed) +(12 rows) DROP TABLE ex_msource, ex_mtarget; DROP FUNCTION explain_merge(text); diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index db36e3a150..a82b8fb8fb 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -949,12 +949,12 @@ SET parallel_setup_cost = 0; -- is not partial agg safe. EXPLAIN (COSTS OFF) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Merge - Workers Planned: 2 - -> Sort - Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) + QUERY PLAN +-------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) + -> Gather + Workers Planned: 2 -> Parallel Append -> GroupAggregate Group Key: pagg_tab_ml.a @@ -1381,26 +1381,28 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < -- When GROUP BY clause does not match; partial aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; - QUERY PLAN -------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x)) - -> Finalize HashAggregate + -> Finalize GroupAggregate Group Key: pagg_tab_para.y Filter: (avg(pagg_tab_para.x) < '12'::numeric) - -> Gather + -> Gather Merge Workers Planned: 2 - -> Parallel Append - -> Partial HashAggregate - Group Key: pagg_tab_para.y - -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para - -> Partial HashAggregate - Group Key: pagg_tab_para_1.y - -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 - -> Partial HashAggregate - Group Key: pagg_tab_para_2.y - -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 -(17 rows) + -> Sort + Sort Key: pagg_tab_para.y + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_para.y + -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para + -> Partial HashAggregate + Group Key: pagg_tab_para_1.y + -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 + -> Partial HashAggregate + Group Key: pagg_tab_para_2.y + -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 +(19 rows) SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; y | sum | avg | count diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 03926a8413..bb5b7c47a4 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -466,41 +466,52 @@ EXPLAIN (COSTS OFF) SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) WHERE a BETWEEN 490 AND 510 GROUP BY 1, 2 ORDER BY 1, 2; - QUERY PLAN ------------------------------------------------------------------------------------------------------------ + QUERY PLAN +----------------------------------------------------------------------------------------------------------------- Group Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Sort + -> Merge Append Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Append - -> Merge Full Join - Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b)) - Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510)) - -> Sort - Sort Key: prt1_1.a, prt1_1.b - -> Seq Scan on prt1_p1 prt1_1 - -> Sort - Sort Key: p2_1.a, p2_1.b - -> Seq Scan on prt2_p1 p2_1 - -> Merge Full Join - Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b)) - Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510)) - -> Sort - Sort Key: prt1_2.a, prt1_2.b - -> Seq Scan on prt1_p2 prt1_2 - -> Sort - Sort Key: p2_2.a, p2_2.b - -> Seq Scan on prt2_p2 p2_2 - -> Merge Full Join - Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a)) - Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510)) - -> Sort - Sort Key: prt1_3.b, prt1_3.a - -> Seq Scan on prt1_p3 prt1_3 - -> Sort - Sort Key: p2_3.b, p2_3.a - -> Seq Scan on prt2_p3 p2_3 -(32 rows) + -> Group + Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) + -> Sort + Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) + -> Merge Full Join + Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b)) + Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510)) + -> Sort + Sort Key: prt1.a, prt1.b + -> Seq Scan on prt1_p1 prt1 + -> Sort + Sort Key: p2.a, p2.b + -> Seq Scan on prt2_p1 p2 + -> Group + Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b)) + -> Sort + Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b)) + -> Merge Full Join + Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b)) + Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510)) + -> Sort + Sort Key: prt1_1.a, prt1_1.b + -> Seq Scan on prt1_p2 prt1_1 + -> Sort + Sort Key: p2_1.a, p2_1.b + -> Seq Scan on prt2_p2 p2_1 + -> Group + Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b)) + -> Sort + Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b)) + -> Merge Full Join + Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b)) + Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510)) + -> Sort + Sort Key: prt1_2.a, prt1_2.b + -> Seq Scan on prt1_p3 prt1_2 + -> Sort + Sort Key: p2_2.a, p2_2.b + -> Seq Scan on prt2_p3 p2_2 +(43 rows) SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) WHERE a BETWEEN 490 AND 510 diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 4e775af175..579b861d84 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -114,7 +114,6 @@ select name, setting from pg_settings where name like 'enable%'; enable_async_append | on enable_bitmapscan | on enable_gathermerge | on - enable_group_by_reordering | on enable_hashagg | on enable_hashjoin | on enable_incremental_sort | on @@ -132,7 +131,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(21 rows) +(20 rows) -- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- more-or-less working. We can't test their contents in any great detail diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index 7ac4a9380e..dece7310cf 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -1303,22 +1303,24 @@ select distinct q1 from union all select distinct * from int8_tbl i82) ss where q2 = q2; - QUERY PLAN ----------------------------------------------------- - HashAggregate - Group Key: "*SELECT* 1".q1 - -> Append + QUERY PLAN +---------------------------------------------------------- + Unique + -> Merge Append + Sort Key: "*SELECT* 1".q1 -> Subquery Scan on "*SELECT* 1" - -> HashAggregate - Group Key: i81.q1, i81.q2 - -> Seq Scan on int8_tbl i81 - Filter: (q2 IS NOT NULL) + -> Unique + -> Sort + Sort Key: i81.q1, i81.q2 + -> Seq Scan on int8_tbl i81 + Filter: (q2 IS NOT NULL) -> Subquery Scan on "*SELECT* 2" - -> HashAggregate - Group Key: i82.q1, i82.q2 - -> Seq Scan on int8_tbl i82 - Filter: (q2 IS NOT NULL) -(13 rows) + -> Unique + -> Sort + Sort Key: i82.q1, i82.q2 + -> Seq Scan on int8_tbl i82 + Filter: (q2 IS NOT NULL) +(15 rows) select distinct q1 from (select distinct * from int8_tbl i81 @@ -1337,22 +1339,24 @@ select distinct q1 from union all select distinct * from int8_tbl i82) ss where -q1 = q2; - QUERY PLAN --------------------------------------------------- - HashAggregate - Group Key: "*SELECT* 1".q1 - -> Append + QUERY PLAN +-------------------------------------------------------- + Unique + -> Merge Append + Sort Key: "*SELECT* 1".q1 -> Subquery Scan on "*SELECT* 1" - -> HashAggregate - Group Key: i81.q1, i81.q2 - -> Seq Scan on int8_tbl i81 - Filter: ((- q1) = q2) + -> Unique + -> Sort + Sort Key: i81.q1, i81.q2 + -> Seq Scan on int8_tbl i81 + Filter: ((- q1) = q2) -> Subquery Scan on "*SELECT* 2" - -> HashAggregate - Group Key: i82.q1, i82.q2 - -> Seq Scan on int8_tbl i82 - Filter: ((- q1) = q2) -(13 rows) + -> Unique + -> Sort + Sort Key: i82.q1, i82.q2 + -> Seq Scan on int8_tbl i82 + Filter: ((- q1) = q2) +(15 rows) select distinct q1 from (select distinct * from int8_tbl i81 diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 4540a06f45..a4c00ff7a9 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1068,105 +1068,6 @@ SELECT balk(hundred) FROM tenk1; ROLLBACK; --- GROUP BY optimization by reorder columns - -SELECT - i AS id, - i/2 AS p, - format('%60s', i%2) AS v, - i/4 AS c, - i/8 AS d, - (random() * (10000/8))::int as e --the same as d but no correlation with p - INTO btg -FROM - generate_series(1, 10000) i; - -VACUUM btg; -ANALYZE btg; - --- GROUP BY optimization by reorder columns by frequency - -SET enable_hashagg=off; -SET max_parallel_workers= 0; -SET max_parallel_workers_per_gather = 0; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, v; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, d, c; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, d, e; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, e, d; - -CREATE STATISTICS btg_dep ON d, e, p FROM btg; -ANALYZE btg; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, d, e; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, e, d; - - --- GROUP BY optimization by reorder columns by index scan - -CREATE INDEX ON btg(p, v); -SET enable_seqscan=off; -SET enable_bitmapscan=off; -VACUUM btg; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, v; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, c, p, d; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v; - -DROP TABLE btg; - -RESET enable_hashagg; -RESET max_parallel_workers; -RESET max_parallel_workers_per_gather; -RESET enable_seqscan; -RESET enable_bitmapscan; - - -- Secondly test the case of a parallel aggregate combiner function -- returning NULL. For that use normal transition function, but a -- combiner function returning NULL. diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql index 6a0e87c7f6..284a354dbb 100644 --- a/src/test/regress/sql/incremental_sort.sql +++ b/src/test/regress/sql/incremental_sort.sql @@ -213,7 +213,7 @@ set parallel_tuple_cost = 0; set max_parallel_workers_per_gather = 2; create table t (a int, b int, c int); -insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i); +insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i); create index on t (a); analyze t; diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 6e581899ed..1ca248f337 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -2840,13 +2840,16 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i -- GROUP BY clause in various forms, cardinal, alias and constant expression explain (verbose, costs off) select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2; - QUERY PLAN ------------------------------------------------------------------------------------------------------------- - Foreign Scan + QUERY PLAN +--------------------------------------------------------------------------------------- + Sort Output: (count(c2)), c2, 5, 7.0, 9 - Relations: Aggregate on (public.ft1) - Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST -(4 rows) + Sort Key: ft1.c2 + -> Foreign Scan + Output: (count(c2)), c2, 5, 7.0, 9 + Relations: Aggregate on (public.ft1) + Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 +(7 rows) select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2; w | x | y | z diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 2fe902eed2..331836ba5d 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -5055,20 +5055,6 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class=" </listitem> </varlistentry> - <varlistentry id="guc-enable-groupby-reordering" xreflabel="enable_group_by_reordering"> - <term><varname>enable_group_by_reordering</varname> (<type>boolean</type>) - <indexterm> - <primary><varname>enable_group_by_reordering</varname> configuration parameter</primary> - </indexterm> - </term> - <listitem> - <para> - Enables or disables reordering of keys in a <literal>GROUP BY</literal> - clause. The default is <literal>on</literal>. - </para> - </listitem> - </varlistentry> - <varlistentry id="guc-enable-hashagg" xreflabel="enable_hashagg"> <term><varname>enable_hashagg</varname> (<type>boolean</type>) <indexterm> diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 8a7f61b0ae..0ba26b207b 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1796,327 +1796,6 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm) rterm->pathtarget->width); } -/* - * is_fake_var - * Workaround for generate_append_tlist() which generates fake Vars with - * varno == 0, that will cause a fail of estimate_num_group() call - * - * XXX Ummm, why would estimate_num_group fail with this? - */ -static bool -is_fake_var(Expr *expr) -{ - if (IsA(expr, RelabelType)) - expr = (Expr *) ((RelabelType *) expr)->arg; - - return (IsA(expr, Var) && ((Var *) expr)->varno == 0); -} - -/* - * get_width_cost_multiplier - * Returns relative complexity of comparing two values based on its width. - * The idea behind is that the comparison becomes more expensive the longer the - * value is. Return value is in cpu_operator_cost units. - */ -static double -get_width_cost_multiplier(PlannerInfo *root, Expr *expr) -{ - double width = -1.0; /* fake value */ - - if (IsA(expr, RelabelType)) - expr = (Expr *) ((RelabelType *) expr)->arg; - - /* Try to find actual stat in corresponding relation */ - if (IsA(expr, Var)) - { - Var *var = (Var *) expr; - - if (var->varno > 0 && var->varno < root->simple_rel_array_size) - { - RelOptInfo *rel = root->simple_rel_array[var->varno]; - - if (rel != NULL && - var->varattno >= rel->min_attr && - var->varattno <= rel->max_attr) - { - int ndx = var->varattno - rel->min_attr; - - if (rel->attr_widths[ndx] > 0) - width = rel->attr_widths[ndx]; - } - } - } - - /* Didn't find any actual stats, try using type width instead. */ - if (width < 0.0) - { - Node *node = (Node *) expr; - - width = get_typavgwidth(exprType(node), exprTypmod(node)); - } - - /* - * Values are passed as Datum type, so comparisons can't be cheaper than - * comparing a Datum value. - * - * FIXME I find this reasoning questionable. We may pass int2, and - * comparing it is probably a bit cheaper than comparing a bigint. - */ - if (width <= sizeof(Datum)) - return 1.0; - - /* - * We consider the cost of a comparison not to be directly proportional to - * width of the argument, because widths of the arguments could be - * slightly different (we only know the average width for the whole - * column). So we use log16(width) as an estimate. - */ - return 1.0 + 0.125 * LOG2(width / sizeof(Datum)); -} - -/* - * compute_cpu_sort_cost - * compute CPU cost of sort (i.e. in-memory) - * - * The main thing we need to calculate to estimate sort CPU costs is the number - * of calls to the comparator functions. The difficulty is that for multi-column - * sorts there may be different data types involved (for some of which the calls - * may be much more expensive). Furthermore, columns may have a very different - * number of distinct values - the higher the number, the fewer comparisons will - * be needed for the following columns. - * - * The algorithm is incremental - we add pathkeys one by one, and at each step we - * estimate the number of necessary comparisons (based on the number of distinct - * groups in the current pathkey prefix and the new pathkey), and the comparison - * costs (which is data type specific). - * - * Estimation of the number of comparisons is based on ideas from: - * - * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002 - * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf] - * - * In term of that paper, let N - number of tuples, Xi - number of identical - * tuples with value Ki, then the estimate of number of comparisons is: - * - * log(N! / (X1! * X2! * ..)) ~ sum(Xi * log(N/Xi)) - * - * We assume all Xi the same because now we don't have any estimation of - * group sizes, we have only know the estimate of number of groups (distinct - * values). In that case, formula becomes: - * - * N * log(NumberOfGroups) - * - * For multi-column sorts we need to estimate the number of comparisons for - * each individual column - for example with columns (c1, c2, ..., ck) we - * can estimate that number of comparisons on ck is roughly - * - * ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1)) - * - * Let k be a column number, Gk - number of groups defined by k columns, and Fk - * the cost of the comparison is - * - * N * sum( Fk * log(Gk) ) - * - * Note: We also consider column width, not just the comparator cost. - * - * NOTE: some callers currently pass NIL for pathkeys because they - * can't conveniently supply the sort keys. In this case, it will fallback to - * simple comparison cost estimate. - */ -static Cost -compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, - Cost comparison_cost, double tuples, double output_tuples, - bool heapSort) -{ - Cost per_tuple_cost = 0.0; - ListCell *lc; - List *pathkeyExprs = NIL; - double tuplesPerPrevGroup = tuples; - double totalFuncCost = 1.0; - bool has_fake_var = false; - int i = 0; - Oid prev_datatype = InvalidOid; - List *cache_varinfos = NIL; - - /* fallback if pathkeys is unknown */ - if (list_length(pathkeys) == 0) - { - /* - * If we'll use a bounded heap-sort keeping just K tuples in memory, - * for a total number of tuple comparisons of N log2 K; but the - * constant factor is a bit higher than for quicksort. Tweak it so - * that the cost curve is continuous at the crossover point. - */ - output_tuples = (heapSort) ? 2.0 * output_tuples : tuples; - per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples); - - /* add cost provided by caller */ - per_tuple_cost += comparison_cost; - - return per_tuple_cost * tuples; - } - - /* - * Computing total cost of sorting takes into account the per-column - * comparison function cost. We try to compute the needed number of - * comparisons per column. - */ - foreach(lc, pathkeys) - { - PathKey *pathkey = (PathKey *) lfirst(lc); - EquivalenceMember *em; - double nGroups, - correctedNGroups; - Cost funcCost = 1.0; - - /* - * We believe that equivalence members aren't very different, so, to - * estimate cost we consider just the first member. - */ - em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members); - - if (em->em_datatype != InvalidOid) - { - /* do not lookup funcCost if the data type is the same */ - if (prev_datatype != em->em_datatype) - { - Oid sortop; - QualCost cost; - - sortop = get_opfamily_member(pathkey->pk_opfamily, - em->em_datatype, em->em_datatype, - pathkey->pk_strategy); - - cost.startup = 0; - cost.per_tuple = 0; - add_function_cost(root, get_opcode(sortop), NULL, &cost); - - /* - * add_function_cost returns the product of cpu_operator_cost - * and procost, but we need just procost, co undo that. - */ - funcCost = cost.per_tuple / cpu_operator_cost; - - prev_datatype = em->em_datatype; - } - } - - /* factor in the width of the values in this column */ - funcCost *= get_width_cost_multiplier(root, em->em_expr); - - /* now we have per-key cost, so add to the running total */ - totalFuncCost += funcCost; - - /* remember if we have found a fake Var in pathkeys */ - has_fake_var |= is_fake_var(em->em_expr); - pathkeyExprs = lappend(pathkeyExprs, em->em_expr); - - /* - * We need to calculate the number of comparisons for this column, - * which requires knowing the group size. So we estimate the number of - * groups by calling estimate_num_groups_incremental(), which - * estimates the group size for "new" pathkeys. - * - * Note: estimate_num_groups_incremental does not handle fake Vars, so - * use a default estimate otherwise. - */ - if (!has_fake_var) - nGroups = estimate_num_groups_incremental(root, pathkeyExprs, - tuplesPerPrevGroup, NULL, NULL, - &cache_varinfos, - list_length(pathkeyExprs) - 1); - else if (tuples > 4.0) - - /* - * Use geometric mean as estimation if there are no stats. - * - * We don't use DEFAULT_NUM_DISTINCT here, because that's used for - * a single column, but here we're dealing with multiple columns. - */ - nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys)); - else - nGroups = tuples; - - /* - * Presorted keys are not considered in the cost above, but we still - * do have to compare them in the qsort comparator. So make sure to - * factor in the cost in that case. - */ - if (i >= nPresortedKeys) - { - if (heapSort) - { - /* - * have to keep at least one group, and a multiple of group - * size - */ - correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup); - } - else - /* all groups in the input */ - correctedNGroups = nGroups; - - correctedNGroups = Max(1.0, ceil(correctedNGroups)); - - per_tuple_cost += totalFuncCost * LOG2(correctedNGroups); - } - - i++; - - /* - * Uniform distributions with all groups being of the same size are - * the best case, with nice smooth behavior. Real-world distributions - * tend not to be uniform, though, and we don't have any reliable - * easy-to-use information. As a basic defense against skewed - * distributions, we use a 1.5 factor to make the expected group a bit - * larger, but we need to be careful not to make the group larger than - * in the preceding step. - */ - tuplesPerPrevGroup = Min(tuplesPerPrevGroup, - ceil(1.5 * tuplesPerPrevGroup / nGroups)); - - /* - * Once we get single-row group, it means tuples in the group are - * unique and we can skip all remaining columns. - */ - if (tuplesPerPrevGroup <= 1.0) - break; - } - - list_free(pathkeyExprs); - - /* per_tuple_cost is in cpu_operator_cost units */ - per_tuple_cost *= cpu_operator_cost; - - /* - * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles - * E. Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort - * estimation formula has additional term proportional to number of tuples - * (see Chapter 8.2 and Theorem 4.1). That affects cases with a low number - * of tuples, approximately less than 1e4. We could implement it as an - * additional multiplier under the logarithm, but we use a bit more - * complex formula which takes into account the number of unique tuples - * and it's not clear how to combine the multiplier with the number of - * groups. Estimate it as 10 cpu_operator_cost units. - */ - per_tuple_cost += 10 * cpu_operator_cost; - - per_tuple_cost += comparison_cost; - - return tuples * per_tuple_cost; -} - -/* - * simple wrapper just to estimate best sort path - */ -Cost -cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys, - double tuples) -{ - return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys, - 0, tuples, tuples, false); -} - /* * cost_tuplesort * Determines and returns the cost of sorting a relation using tuplesort, @@ -2133,7 +1812,7 @@ cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys, * number of initial runs formed and M is the merge order used by tuplesort.c. * Since the average initial run should be about sort_mem, we have * disk traffic = 2 * relsize * ceil(logM(p / sort_mem)) - * and cpu cost (computed by compute_cpu_sort_cost()). + * cpu = comparison_cost * t * log2(t) * * If the sort is bounded (i.e., only the first k result tuples are needed) * and k tuples can fit into sort_mem, we use a heap method that keeps only @@ -2152,11 +1831,9 @@ cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys, * 'comparison_cost' is the extra cost per comparison, if any * 'sort_mem' is the number of kilobytes of work memory allowed for the sort * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound - * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should - * be added by caller later */ static void -cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost, +cost_tuplesort(Cost *startup_cost, Cost *run_cost, double tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples) @@ -2173,6 +1850,9 @@ cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_ if (tuples < 2.0) tuples = 2.0; + /* Include the default cost-per-comparison */ + comparison_cost += 2.0 * cpu_operator_cost; + /* Do we have a useful LIMIT? */ if (limit_tuples > 0 && limit_tuples < tuples) { @@ -2196,10 +1876,12 @@ cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_ double log_runs; double npageaccesses; - /* CPU costs */ - *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0, - comparison_cost, tuples, - tuples, false); + /* + * CPU costs + * + * Assume about N log2 N comparisons + */ + *startup_cost = comparison_cost * tuples * LOG2(tuples); /* Disk costs */ @@ -2215,17 +1897,18 @@ cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_ } else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes) { - /* We'll use a bounded heap-sort keeping just K tuples in memory. */ - *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0, - comparison_cost, tuples, - output_tuples, true); + /* + * We'll use a bounded heap-sort keeping just K tuples in memory, for + * a total number of tuple comparisons of N log2 K; but the constant + * factor is a bit higher than for quicksort. Tweak it so that the + * cost curve is continuous at the crossover point. + */ + *startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples); } else { /* We'll use plain quicksort on all the input tuples */ - *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0, - comparison_cost, tuples, - tuples, false); + *startup_cost = comparison_cost * tuples * LOG2(tuples); } /* @@ -2258,8 +1941,8 @@ cost_incremental_sort(Path *path, double input_tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples) { - Cost startup_cost, - run_cost, + Cost startup_cost = 0, + run_cost = 0, input_run_cost = input_total_cost - input_startup_cost; double group_tuples, input_groups; @@ -2344,7 +2027,7 @@ cost_incremental_sort(Path *path, * pessimistic about incremental sort performance and increase its average * group size by half. */ - cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost, + cost_tuplesort(&group_startup_cost, &group_run_cost, 1.5 * group_tuples, width, comparison_cost, sort_mem, limit_tuples); @@ -2352,7 +2035,7 @@ cost_incremental_sort(Path *path, * Startup cost of incremental sort is the startup cost of its first group * plus the cost of its input. */ - startup_cost = group_startup_cost + startup_cost += group_startup_cost + input_startup_cost + group_input_run_cost; /* @@ -2361,7 +2044,7 @@ cost_incremental_sort(Path *path, * group, plus the total cost to process the remaining groups, plus the * remaining cost of input. */ - run_cost = group_run_cost + run_cost += group_run_cost + (group_run_cost + group_startup_cost) * (input_groups - 1) + group_input_run_cost * (input_groups - 1); @@ -2401,7 +2084,7 @@ cost_sort(Path *path, PlannerInfo *root, Cost startup_cost; Cost run_cost; - cost_tuplesort(root, pathkeys, &startup_cost, &run_cost, + cost_tuplesort(&startup_cost, &run_cost, tuples, width, comparison_cost, sort_mem, limit_tuples); @@ -2499,7 +2182,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers) * Determines and returns the cost of an Append node. */ void -cost_append(AppendPath *apath, PlannerInfo *root) +cost_append(AppendPath *apath) { ListCell *l; @@ -2567,7 +2250,7 @@ cost_append(AppendPath *apath, PlannerInfo *root) * any child. */ cost_sort(&sort_path, - root, + NULL, /* doesn't currently need root */ pathkeys, subpath->total_cost, subpath->rows, diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 7991295548..9f39f4661a 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -681,18 +681,7 @@ get_eclass_for_sort_expr(PlannerInfo *root, if (opcintype == cur_em->em_datatype && equal(expr, cur_em->em_expr)) - { - /* - * Match! - * - * Copy the sortref if it wasn't set yet. That may happen if - * the ec was constructed from WHERE clause, i.e. it doesn't - * have a target reference at all. - */ - if (cur_ec->ec_sortref == 0 && sortref > 0) - cur_ec->ec_sortref = sortref; - return cur_ec; - } + return cur_ec; /* Match! */ } } diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 1d21215f85..86a35cdef1 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -17,24 +17,17 @@ */ #include "postgres.h" -#include <float.h> - -#include "miscadmin.h" #include "access/stratnum.h" #include "catalog/pg_opfamily.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/plannodes.h" -#include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "partitioning/partbounds.h" #include "utils/lsyscache.h" -#include "utils/selfuncs.h" -/* Consider reordering of GROUP BY keys? */ -bool enable_group_by_reordering = true; static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); static bool matches_boolean_partition_clause(RestrictInfo *rinfo, @@ -341,527 +334,6 @@ pathkeys_contained_in(List *keys1, List *keys2) return false; } -/* - * group_keys_reorder_by_pathkeys - * Reorder GROUP BY keys to match pathkeys of input path. - * - * Function returns new lists (pathkeys and clauses), original GROUP BY lists - * stay untouched. - * - * Returns the number of GROUP BY keys with a matching pathkey. - */ -int -group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys, - List **group_clauses) -{ - List *new_group_pathkeys = NIL, - *new_group_clauses = NIL; - ListCell *lc; - int n; - - if (pathkeys == NIL || *group_pathkeys == NIL) - return 0; - - /* - * Walk the pathkeys (determining ordering of the input path) and see if - * there's a matching GROUP BY key. If we find one, we append it to the - * list, and do the same for the clauses. - * - * Once we find the first pathkey without a matching GROUP BY key, the - * rest of the pathkeys are useless and can't be used to evaluate the - * grouping, so we abort the loop and ignore the remaining pathkeys. - * - * XXX Pathkeys are built in a way to allow simply comparing pointers. - */ - foreach(lc, pathkeys) - { - PathKey *pathkey = (PathKey *) lfirst(lc); - SortGroupClause *sgc; - - /* abort on first mismatch */ - if (!list_member_ptr(*group_pathkeys, pathkey)) - break; - - new_group_pathkeys = lappend(new_group_pathkeys, pathkey); - - sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref, - *group_clauses); - - new_group_clauses = lappend(new_group_clauses, sgc); - } - - /* remember the number of pathkeys with a matching GROUP BY key */ - n = list_length(new_group_pathkeys); - - /* append the remaining group pathkeys (will be treated as not sorted) */ - *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys, - *group_pathkeys); - *group_clauses = list_concat_unique_ptr(new_group_clauses, - *group_clauses); - - return n; -} - -/* - * Used to generate all permutations of a pathkey list. - */ -typedef struct PathkeyMutatorState -{ - List *elemsList; - ListCell **elemCells; - void **elems; - int *positions; - int mutatorNColumns; - int count; -} PathkeyMutatorState; - - -/* - * PathkeyMutatorInit - * Initialize state of the permutation generator. - * - * We want to generate permutations of elements in the "elems" list. We may want - * to skip some number of elements at the beginning (when treating as presorted) - * or at the end (we only permute a limited number of group keys). - * - * The list is decomposed into elements, and we also keep pointers to individual - * cells. This allows us to build the permuted list quickly and cheaply, without - * creating any copies. - */ -static void -PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end) -{ - int i; - int n = end - start; - ListCell *lc; - - memset(state, 0, sizeof(*state)); - - state->mutatorNColumns = n; - - state->elemsList = list_copy(elems); - - state->elems = palloc(sizeof(void *) * n); - state->elemCells = palloc(sizeof(ListCell *) * n); - state->positions = palloc(sizeof(int) * n); - - i = 0; - for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start)) - { - state->elemCells[i] = lc; - state->elems[i] = lfirst(lc); - state->positions[i] = i + 1; - i++; - - if (i >= n) - break; - } -} - -/* Swap two elements of an array. */ -static void -PathkeyMutatorSwap(int *a, int i, int j) -{ - int s = a[i]; - - a[i] = a[j]; - a[j] = s; -} - -/* - * Generate the next permutation of elements. - */ -static bool -PathkeyMutatorNextSet(int *a, int n) -{ - int j, - k, - l, - r; - - j = n - 2; - - while (j >= 0 && a[j] >= a[j + 1]) - j--; - - if (j < 0) - return false; - - k = n - 1; - - while (k >= 0 && a[j] >= a[k]) - k--; - - PathkeyMutatorSwap(a, j, k); - - l = j + 1; - r = n - 1; - - while (l < r) - PathkeyMutatorSwap(a, l++, r--); - - return true; -} - -/* - * PathkeyMutatorNext - * Generate the next permutation of list of elements. - * - * Returns the next permutation (as a list of elements) or NIL if there are no - * more permutations. - */ -static List * -PathkeyMutatorNext(PathkeyMutatorState *state) -{ - int i; - - state->count++; - - /* first permutation is original list */ - if (state->count == 1) - return state->elemsList; - - /* when there are no more permutations, return NIL */ - if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns)) - { - pfree(state->elems); - pfree(state->elemCells); - pfree(state->positions); - - list_free(state->elemsList); - - return NIL; - } - - /* update the list cells to point to the right elements */ - for (i = 0; i < state->mutatorNColumns; i++) - lfirst(state->elemCells[i]) = - (void *) state->elems[state->positions[i] - 1]; - - return state->elemsList; -} - -/* - * Cost of comparing pathkeys. - */ -typedef struct PathkeySortCost -{ - Cost cost; - PathKey *pathkey; -} PathkeySortCost; - -static int -pathkey_sort_cost_comparator(const void *_a, const void *_b) -{ - const PathkeySortCost *a = (PathkeySortCost *) _a; - const PathkeySortCost *b = (PathkeySortCost *) _b; - - if (a->cost < b->cost) - return -1; - else if (a->cost == b->cost) - return 0; - return 1; -} - -/* - * get_cheapest_group_keys_order - * Reorders the group pathkeys / clauses to minimize the comparison cost. - * - * Given the list of pathkeys in '*group_pathkeys', we try to arrange these - * in an order that minimizes the sort costs that will be incurred by the - * GROUP BY. The costs mainly depend on the cost of the sort comparator - * function(s) and the number of distinct values in each column of the GROUP - * BY clause (*group_clauses). Sorting on subsequent columns is only required - * for tiebreak situations where two values sort equally. - * - * In case the input is partially sorted, only the remaining pathkeys are - * considered. 'n_preordered' denotes how many of the leading *group_pathkeys - * the input is presorted by. - * - * Returns true and sets *group_pathkeys and *group_clauses to the newly - * ordered versions of the lists that were passed in via these parameters. - * If no reordering was deemed necessary then we return false, in which case - * the *group_pathkeys and *group_clauses lists are left untouched. The - * original *group_pathkeys and *group_clauses parameter values are never - * destructively modified in place. - */ -static bool -get_cheapest_group_keys_order(PlannerInfo *root, double nrows, - List **group_pathkeys, List **group_clauses, - int n_preordered) -{ - List *new_group_pathkeys = NIL, - *new_group_clauses = NIL, - *var_group_pathkeys; - - ListCell *cell; - PathkeyMutatorState mstate; - double cheapest_sort_cost = DBL_MAX; - - int nFreeKeys; - int nToPermute; - - /* If there are less than 2 unsorted pathkeys, we're done. */ - if (list_length(*group_pathkeys) - n_preordered < 2) - return false; - - /* - * We could exhaustively cost all possible orderings of the pathkeys, but - * for a large number of pathkeys it might be prohibitively expensive. So - * we try to apply simple cheap heuristics first - we sort the pathkeys by - * sort cost (as if the pathkey was sorted independently) and then check - * only the four cheapest pathkeys. The remaining pathkeys are kept - * ordered by cost. - * - * XXX This is a very simple heuristics, but likely to work fine for most - * cases (because the number of GROUP BY clauses tends to be lower than - * 4). But it ignores how the number of distinct values in each pathkey - * affects the following steps. It might be better to use "more expensive" - * pathkey first if it has many distinct values, because it then limits - * the number of comparisons for the remaining pathkeys. But evaluating - * that is likely quite the expensive. - */ - nFreeKeys = list_length(*group_pathkeys) - n_preordered; - nToPermute = 4; - if (nFreeKeys > nToPermute) - { - PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys); - PathkeySortCost *cost = costs; - - /* - * Estimate cost for sorting individual pathkeys skipping the - * pre-ordered pathkeys. - */ - for_each_from(cell, *group_pathkeys, n_preordered) - { - PathKey *pathkey = (PathKey *) lfirst(cell); - List *to_cost = list_make1(pathkey); - - cost->pathkey = pathkey; - cost->cost = cost_sort_estimate(root, to_cost, 0, nrows); - cost++; - - list_free(to_cost); - } - - /* sort the pathkeys by sort cost in ascending order */ - qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator); - - /* - * Rebuild the list of pathkeys - first the preordered ones, then the - * rest ordered by cost. - */ - new_group_pathkeys = list_copy_head(*group_pathkeys, n_preordered); - - for (int i = 0; i < nFreeKeys; i++) - new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey); - - pfree(costs); - } - else - { - /* Copy the list, so that we can free the new list by list_free. */ - new_group_pathkeys = list_copy(*group_pathkeys); - nToPermute = nFreeKeys; - } - - Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys)); - - /* - * Generate pathkey lists with permutations of the first nToPermute - * pathkeys. - * - * XXX We simply calculate sort cost for each individual pathkey list, but - * there's room for two dynamic programming optimizations here. Firstly, - * we may pass the current "best" cost to cost_sort_estimate so that it - * can "abort" if the estimated pathkeys list exceeds it. Secondly, it - * could pass the return information about the position when it exceeded - * the cost, and we could skip all permutations with the same prefix. - * - * Imagine we've already found ordering with cost C1, and we're evaluating - * another ordering - cost_sort_estimate() calculates cost by adding the - * pathkeys one by one (more or less), and the cost only grows. If at any - * point it exceeds C1, it can't possibly be "better" so we can discard - * it. But we also know that we can discard all ordering with the same - * prefix, because if we're estimating (a,b,c,d) and we exceed C1 at (a,b) - * then the same thing will happen for any ordering with this prefix. - */ - PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute); - - while ((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL) - { - Cost cost; - - cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows); - - if (cost < cheapest_sort_cost) - { - list_free(new_group_pathkeys); - new_group_pathkeys = list_copy(var_group_pathkeys); - cheapest_sort_cost = cost; - } - } - - /* Reorder the group clauses according to the reordered pathkeys. */ - foreach(cell, new_group_pathkeys) - { - PathKey *pathkey = (PathKey *) lfirst(cell); - - new_group_clauses = lappend(new_group_clauses, - get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref, - *group_clauses)); - } - - /* Just append the rest GROUP BY clauses */ - new_group_clauses = list_concat_unique_ptr(new_group_clauses, - *group_clauses); - - *group_pathkeys = new_group_pathkeys; - *group_clauses = new_group_clauses; - - return true; -} - -/* - * get_useful_group_keys_orderings - * Determine which orderings of GROUP BY keys are potentially interesting. - * - * Returns list of PathKeyInfo items, each representing an interesting ordering - * of GROUP BY keys. Each item stores pathkeys and clauses in matching order. - * - * The function considers (and keeps) multiple group by orderings: - * - * - the original ordering, as specified by the GROUP BY clause - * - * - GROUP BY keys reordered to minimize the sort cost - * - * - GROUP BY keys reordered to match path ordering (as much as possible), with - * the tail reordered to minimize the sort cost - * - * - GROUP BY keys to match target ORDER BY clause (as much as possible), with - * the tail reordered to minimize the sort cost - * - * There are other potentially interesting orderings (e.g. it might be best to - * match the first ORDER BY key, order the remaining keys differently and then - * rely on the incremental sort to fix this), but we ignore those for now. To - * make this work we'd have to pretty much generate all possible permutations. - */ -List * -get_useful_group_keys_orderings(PlannerInfo *root, double nrows, - List *path_pathkeys, - List *group_pathkeys, List *group_clauses) -{ - Query *parse = root->parse; - List *infos = NIL; - PathKeyInfo *info; - int n_preordered = 0; - - List *pathkeys = group_pathkeys; - List *clauses = group_clauses; - - /* always return at least the original pathkeys/clauses */ - info = makeNode(PathKeyInfo); - info->pathkeys = pathkeys; - info->clauses = clauses; - - infos = lappend(infos, info); - - /* - * Should we try generating alternative orderings of the group keys? If - * not, we produce only the order specified in the query, i.e. the - * optimization is effectively disabled. - */ - if (!enable_group_by_reordering) - return infos; - - /* for grouping sets we can't do any reordering */ - if (parse->groupingSets) - return infos; - - /* - * Try reordering pathkeys to minimize the sort cost, ignoring both the - * target ordering (ORDER BY) and ordering of the input path. - */ - if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses, - n_preordered)) - { - info = makeNode(PathKeyInfo); - info->pathkeys = pathkeys; - info->clauses = clauses; - - infos = lappend(infos, info); - } - - /* - * If the path is sorted in some way, try reordering the group keys to - * match as much of the ordering as possible - we get this sort for free - * (mostly). - * - * We must not do this when there are no grouping sets, because those use - * more complex logic to decide the ordering. - * - * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's - * more a complement, because it allows benefiting from incremental sort - * as much as possible. - * - * XXX This does nothing if (n_preordered == 0). We shouldn't create the - * info in this case. - */ - if (path_pathkeys) - { - n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys, - &pathkeys, - &clauses); - - /* reorder the tail to minimize sort cost */ - get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses, - n_preordered); - - /* - * reorder the tail to minimize sort cost - * - * XXX Ignore the return value - there may be nothing to reorder, in - * which case get_cheapest_group_keys_order returns false. But we - * still want to keep the keys reordered to path_pathkeys. - */ - info = makeNode(PathKeyInfo); - info->pathkeys = pathkeys; - info->clauses = clauses; - - infos = lappend(infos, info); - } - - /* - * Try reordering pathkeys to minimize the sort cost (this time consider - * the ORDER BY clause, but only if set debug_group_by_match_order_by). - */ - if (root->sort_pathkeys) - { - n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys, - &pathkeys, - &clauses); - - /* - * reorder the tail to minimize sort cost - * - * XXX Ignore the return value - there may be nothing to reorder, in - * which case get_cheapest_group_keys_order returns false. But we - * still want to keep the keys reordered to sort_pathkeys. - */ - get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses, - n_preordered); - - /* keep the group keys reordered to match ordering of input path */ - info = makeNode(PathKeyInfo); - info->pathkeys = pathkeys; - info->clauses = clauses; - - infos = lappend(infos, info); - } - - return infos; -} - /* * pathkeys_count_contained_in * Same as pathkeys_contained_in, but also sets length of longest @@ -2390,54 +1862,6 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) return n_common_pathkeys; } -/* - * pathkeys_useful_for_grouping - * Count the number of pathkeys that are useful for grouping (instead of - * explicit sort) - * - * Group pathkeys could be reordered to benefit from the ordering. The - * ordering may not be "complete" and may require incremental sort, but that's - * fine. So we simply count prefix pathkeys with a matching group key, and - * stop once we find the first pathkey without a match. - * - * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b) - * pathkeys are useful for grouping, and we might do incremental sort to get - * path ordered by (a,b,e). - * - * This logic is necessary to retain paths with ordering not matching grouping - * keys directly, without the reordering. - * - * Returns the length of pathkey prefix with matching group keys. - */ -static int -pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys) -{ - ListCell *key; - int n = 0; - - /* no special ordering requested for grouping */ - if (root->group_pathkeys == NIL) - return 0; - - /* unordered path */ - if (pathkeys == NIL) - return 0; - - /* walk the pathkeys and search for matching group key */ - foreach(key, pathkeys) - { - PathKey *pathkey = (PathKey *) lfirst(key); - - /* no matching group key, we're done */ - if (!list_member_ptr(root->group_pathkeys, pathkey)) - break; - - n++; - } - - return n; -} - /* * truncate_useless_pathkeys * Shorten the given pathkey list to just the useful pathkeys. @@ -2452,9 +1876,6 @@ truncate_useless_pathkeys(PlannerInfo *root, nuseful = pathkeys_useful_for_merging(root, rel, pathkeys); nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); - if (nuseful2 > nuseful) - nuseful = nuseful2; - nuseful2 = pathkeys_useful_for_grouping(root, pathkeys); if (nuseful2 > nuseful) nuseful = nuseful2; @@ -2490,8 +1911,6 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel) { if (rel->joininfo != NIL || rel->has_eclass_joins) return true; /* might be able to use pathkeys for merging */ - if (root->group_pathkeys != NIL) - return true; /* might be able to use pathkeys for grouping */ if (root->query_pathkeys != NIL) return true; /* might be able to use them for ordering */ return false; /* definitely useless */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index d8e8f607b2..468105d91e 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2756,9 +2756,8 @@ remove_useless_groupby_columns(PlannerInfo *root) * * In principle it might be interesting to consider other orderings of the * GROUP BY elements, which could match the sort ordering of other - * possible plans (eg an indexscan) and thereby reduce cost. However, we - * don't yet have sufficient information to do that here, so that's left until - * later in planning. See get_useful_group_keys_orderings(). + * possible plans (eg an indexscan) and thereby reduce cost. We don't + * bother with that, though. Hashed grouping will frequently win anyway. * * Note: we need no comparable processing of the distinctClause because * the parser already enforced that that matches ORDER BY. @@ -6232,122 +6231,24 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, */ foreach(lc, input_rel->pathlist) { - ListCell *lc2; Path *path = (Path *) lfirst(lc); Path *path_original = path; + bool is_sorted; + int presorted_keys; - List *pathkey_orderings = NIL; - - List *group_pathkeys = root->group_pathkeys; - List *group_clauses = parse->groupClause; - - /* generate alternative group orderings that might be useful */ - pathkey_orderings = get_useful_group_keys_orderings(root, - path->rows, - path->pathkeys, - group_pathkeys, - group_clauses); + is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); - Assert(list_length(pathkey_orderings) > 0); - - /* process all potentially interesting grouping reorderings */ - foreach(lc2, pathkey_orderings) + if (path == cheapest_path || is_sorted) { - bool is_sorted; - int presorted_keys = 0; - PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); - - /* restore the path (we replace it in the loop) */ - path = path_original; - - is_sorted = pathkeys_count_contained_in(info->pathkeys, - path->pathkeys, - &presorted_keys); - - if (path == cheapest_path || is_sorted) - { - /* Sort the cheapest-total path if it isn't already sorted */ - if (!is_sorted) - path = (Path *) create_sort_path(root, - grouped_rel, - path, - info->pathkeys, - -1.0); - - /* Now decide what to stick atop it */ - if (parse->groupingSets) - { - consider_groupingsets_paths(root, grouped_rel, - path, true, can_hash, - gd, agg_costs, dNumGroups); - } - else if (parse->hasAggs) - { - /* - * We have aggregation, possibly with plain GROUP BY. - * Make an AggPath. - */ - add_path(grouped_rel, (Path *) - create_agg_path(root, - grouped_rel, - path, - grouped_rel->reltarget, - info->clauses ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_SIMPLE, - info->clauses, - havingQual, - agg_costs, - dNumGroups)); - } - else if (group_clauses) - { - /* - * We have GROUP BY without aggregation or grouping - * sets. Make a GroupPath. - */ - add_path(grouped_rel, (Path *) - create_group_path(root, - grouped_rel, - path, - info->clauses, - havingQual, - dNumGroups)); - } - else - { - /* Other cases should have been handled above */ - Assert(false); - } - } - - /* - * Now we may consider incremental sort on this path, but only - * when the path is not already sorted and when incremental - * sort is enabled. - */ - if (is_sorted || !enable_incremental_sort) - continue; - - /* Restore the input path (we might have added Sort on top). */ - path = path_original; - - /* no shared prefix, no point in building incremental sort */ - if (presorted_keys == 0) - continue; - - /* - * We should have already excluded pathkeys of length 1 - * because then presorted_keys > 0 would imply is_sorted was - * true. - */ - Assert(list_length(root->group_pathkeys) != 1); - - path = (Path *) create_incremental_sort_path(root, - grouped_rel, - path, - info->pathkeys, - presorted_keys, - -1.0); + /* Sort the cheapest-total path if it isn't already sorted */ + if (!is_sorted) + path = (Path *) create_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + -1.0); /* Now decide what to stick atop it */ if (parse->groupingSets) @@ -6367,9 +6268,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, grouped_rel, path, grouped_rel->reltarget, - info->clauses ? AGG_SORTED : AGG_PLAIN, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_SIMPLE, - info->clauses, + parse->groupClause, havingQual, agg_costs, dNumGroups)); @@ -6384,7 +6285,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, create_group_path(root, grouped_rel, path, - info->clauses, + parse->groupClause, havingQual, dNumGroups)); } @@ -6394,6 +6295,79 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, Assert(false); } } + + /* + * Now we may consider incremental sort on this path, but only + * when the path is not already sorted and when incremental sort + * is enabled. + */ + if (is_sorted || !enable_incremental_sort) + continue; + + /* Restore the input path (we might have added Sort on top). */ + path = path_original; + + /* no shared prefix, no point in building incremental sort */ + if (presorted_keys == 0) + continue; + + /* + * We should have already excluded pathkeys of length 1 because + * then presorted_keys > 0 would imply is_sorted was true. + */ + Assert(list_length(root->group_pathkeys) != 1); + + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + /* Now decide what to stick atop it */ + if (parse->groupingSets) + { + consider_groupingsets_paths(root, grouped_rel, + path, true, can_hash, + gd, agg_costs, dNumGroups); + } + else if (parse->hasAggs) + { + /* + * We have aggregation, possibly with plain GROUP BY. Make an + * AggPath. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_SIMPLE, + parse->groupClause, + havingQual, + agg_costs, + dNumGroups)); + } + else if (parse->groupClause) + { + /* + * We have GROUP BY without aggregation or grouping sets. Make + * a GroupPath. + */ + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); + } + else + { + /* Other cases should have been handled above */ + Assert(false); + } } /* @@ -6404,130 +6378,100 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, { foreach(lc, partially_grouped_rel->pathlist) { - ListCell *lc2; Path *path = (Path *) lfirst(lc); Path *path_original = path; + bool is_sorted; + int presorted_keys; - List *pathkey_orderings = NIL; - - List *group_pathkeys = root->group_pathkeys; - List *group_clauses = parse->groupClause; - - /* generate alternative group orderings that might be useful */ - pathkey_orderings = get_useful_group_keys_orderings(root, - path->rows, - path->pathkeys, - group_pathkeys, - group_clauses); - - Assert(list_length(pathkey_orderings) > 0); + is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); - /* process all potentially interesting grouping reorderings */ - foreach(lc2, pathkey_orderings) + /* + * Insert a Sort node, if required. But there's no point in + * sorting anything but the cheapest path. + */ + if (!is_sorted) { - bool is_sorted; - int presorted_keys = 0; - PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); - - /* restore the path (we replace it in the loop) */ - path = path_original; - - is_sorted = pathkeys_count_contained_in(info->pathkeys, - path->pathkeys, - &presorted_keys); - - /* - * Insert a Sort node, if required. But there's no point - * in sorting anything but the cheapest path. - */ - if (!is_sorted) - { - if (path != partially_grouped_rel->cheapest_total_path) - continue; - path = (Path *) create_sort_path(root, - grouped_rel, - path, - info->pathkeys, - -1.0); - } + if (path != partially_grouped_rel->cheapest_total_path) + continue; + path = (Path *) create_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + -1.0); + } - if (parse->hasAggs) - add_path(grouped_rel, (Path *) - create_agg_path(root, - grouped_rel, - path, - grouped_rel->reltarget, - info->clauses ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_FINAL_DESERIAL, - info->clauses, - havingQual, - agg_final_costs, - dNumGroups)); - else - add_path(grouped_rel, (Path *) - create_group_path(root, - grouped_rel, - path, - info->clauses, - havingQual, - dNumGroups)); + if (parse->hasAggs) + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_FINAL_DESERIAL, + parse->groupClause, + havingQual, + agg_final_costs, + dNumGroups)); + else + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); - /* - * Now we may consider incremental sort on this path, but - * only when the path is not already sorted and when - * incremental sort is enabled. - */ - if (is_sorted || !enable_incremental_sort) - continue; + /* + * Now we may consider incremental sort on this path, but only + * when the path is not already sorted and when incremental + * sort is enabled. + */ + if (is_sorted || !enable_incremental_sort) + continue; - /* - * Restore the input path (we might have added Sort on - * top). - */ - path = path_original; + /* Restore the input path (we might have added Sort on top). */ + path = path_original; - /* - * no shared prefix, not point in building incremental - * sort - */ - if (presorted_keys == 0) - continue; + /* no shared prefix, not point in building incremental sort */ + if (presorted_keys == 0) + continue; - /* - * We should have already excluded pathkeys of length 1 - * because then presorted_keys > 0 would imply is_sorted - * was true. - */ - Assert(list_length(root->group_pathkeys) != 1); + /* + * We should have already excluded pathkeys of length 1 + * because then presorted_keys > 0 would imply is_sorted was + * true. + */ + Assert(list_length(root->group_pathkeys) != 1); - path = (Path *) create_incremental_sort_path(root, - grouped_rel, - path, - info->pathkeys, - presorted_keys, - -1.0); + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); - if (parse->hasAggs) - add_path(grouped_rel, (Path *) - create_agg_path(root, - grouped_rel, - path, - grouped_rel->reltarget, - info->clauses ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_FINAL_DESERIAL, - info->clauses, - havingQual, - agg_final_costs, - dNumGroups)); - else - add_path(grouped_rel, (Path *) - create_group_path(root, - grouped_rel, - path, - info->clauses, - havingQual, - dNumGroups)); - } + if (parse->hasAggs) + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_FINAL_DESERIAL, + parse->groupClause, + havingQual, + agg_final_costs, + dNumGroups)); + else + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); } } } @@ -6730,71 +6674,41 @@ create_partial_grouping_paths(PlannerInfo *root, */ foreach(lc, input_rel->pathlist) { - ListCell *lc2; Path *path = (Path *) lfirst(lc); - Path *path_save = path; - - List *pathkey_orderings = NIL; - - List *group_pathkeys = root->group_pathkeys; - List *group_clauses = parse->groupClause; + bool is_sorted; - /* generate alternative group orderings that might be useful */ - pathkey_orderings = get_useful_group_keys_orderings(root, - path->rows, - path->pathkeys, - group_pathkeys, - group_clauses); - - Assert(list_length(pathkey_orderings) > 0); - - /* process all potentially interesting grouping reorderings */ - foreach(lc2, pathkey_orderings) + is_sorted = pathkeys_contained_in(root->group_pathkeys, + path->pathkeys); + if (path == cheapest_total_path || is_sorted) { - bool is_sorted; - int presorted_keys = 0; - PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); - - /* restore the path (we replace it in the loop) */ - path = path_save; - - is_sorted = pathkeys_count_contained_in(info->pathkeys, - path->pathkeys, - &presorted_keys); - - if (path == cheapest_total_path || is_sorted) - { - /* Sort the cheapest partial path, if it isn't already */ - if (!is_sorted) - { - path = (Path *) create_sort_path(root, - partially_grouped_rel, - path, - info->pathkeys, - -1.0); - } + /* Sort the cheapest partial path, if it isn't already */ + if (!is_sorted) + path = (Path *) create_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + -1.0); - if (parse->hasAggs) - add_path(partially_grouped_rel, (Path *) - create_agg_path(root, - partially_grouped_rel, - path, - partially_grouped_rel->reltarget, - info->clauses ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_INITIAL_SERIAL, - info->clauses, - NIL, - agg_partial_costs, - dNumPartialGroups)); - else - add_path(partially_grouped_rel, (Path *) - create_group_path(root, - partially_grouped_rel, - path, - info->clauses, - NIL, - dNumPartialGroups)); - } + if (parse->hasAggs) + add_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialGroups)); + else + add_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialGroups)); } } @@ -6804,8 +6718,6 @@ create_partial_grouping_paths(PlannerInfo *root, * We can also skip the entire loop when we only have a single-item * group_pathkeys because then we can't possibly have a presorted * prefix of the list without having the list be fully sorted. - * - * XXX Shouldn't this also consider the group-key-reordering? */ if (enable_incremental_sort && list_length(root->group_pathkeys) > 1) { @@ -6863,101 +6775,24 @@ create_partial_grouping_paths(PlannerInfo *root, /* Similar to above logic, but for partial paths. */ foreach(lc, input_rel->partial_pathlist) { - ListCell *lc2; Path *path = (Path *) lfirst(lc); Path *path_original = path; + bool is_sorted; + int presorted_keys; - List *pathkey_orderings = NIL; - - List *group_pathkeys = root->group_pathkeys; - List *group_clauses = parse->groupClause; + is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); - /* generate alternative group orderings that might be useful */ - pathkey_orderings = get_useful_group_keys_orderings(root, - path->rows, - path->pathkeys, - group_pathkeys, - group_clauses); - - Assert(list_length(pathkey_orderings) > 0); - - /* process all potentially interesting grouping reorderings */ - foreach(lc2, pathkey_orderings) + if (path == cheapest_partial_path || is_sorted) { - bool is_sorted; - int presorted_keys = 0; - PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); - - /* restore the path (we replace it in the loop) */ - path = path_original; - - is_sorted = pathkeys_count_contained_in(info->pathkeys, - path->pathkeys, - &presorted_keys); - - if (path == cheapest_partial_path || is_sorted) - { - - /* Sort the cheapest partial path, if it isn't already */ - if (!is_sorted) - { - path = (Path *) create_sort_path(root, - partially_grouped_rel, - path, - info->pathkeys, - -1.0); - } - - if (parse->hasAggs) - add_partial_path(partially_grouped_rel, (Path *) - create_agg_path(root, - partially_grouped_rel, - path, - partially_grouped_rel->reltarget, - info->clauses ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_INITIAL_SERIAL, - info->clauses, - NIL, - agg_partial_costs, - dNumPartialPartialGroups)); - else - add_partial_path(partially_grouped_rel, (Path *) - create_group_path(root, - partially_grouped_rel, - path, - info->clauses, - NIL, - dNumPartialPartialGroups)); - } - - /* - * Now we may consider incremental sort on this path, but only - * when the path is not already sorted and when incremental - * sort is enabled. - */ - if (is_sorted || !enable_incremental_sort) - continue; - - /* Restore the input path (we might have added Sort on top). */ - path = path_original; - - /* no shared prefix, not point in building incremental sort */ - if (presorted_keys == 0) - continue; - - /* - * We should have already excluded pathkeys of length 1 - * because then presorted_keys > 0 would imply is_sorted was - * true. - */ - Assert(list_length(root->group_pathkeys) != 1); - - path = (Path *) create_incremental_sort_path(root, - partially_grouped_rel, - path, - info->pathkeys, - presorted_keys, - -1.0); + /* Sort the cheapest partial path, if it isn't already */ + if (!is_sorted) + path = (Path *) create_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + -1.0); if (parse->hasAggs) add_partial_path(partially_grouped_rel, (Path *) @@ -6965,9 +6800,9 @@ create_partial_grouping_paths(PlannerInfo *root, partially_grouped_rel, path, partially_grouped_rel->reltarget, - info->clauses ? AGG_SORTED : AGG_PLAIN, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_INITIAL_SERIAL, - info->clauses, + parse->groupClause, NIL, agg_partial_costs, dNumPartialPartialGroups)); @@ -6976,10 +6811,59 @@ create_partial_grouping_paths(PlannerInfo *root, create_group_path(root, partially_grouped_rel, path, - info->clauses, + parse->groupClause, NIL, dNumPartialPartialGroups)); } + + /* + * Now we may consider incremental sort on this path, but only + * when the path is not already sorted and when incremental sort + * is enabled. + */ + if (is_sorted || !enable_incremental_sort) + continue; + + /* Restore the input path (we might have added Sort on top). */ + path = path_original; + + /* no shared prefix, not point in building incremental sort */ + if (presorted_keys == 0) + continue; + + /* + * We should have already excluded pathkeys of length 1 because + * then presorted_keys > 0 would imply is_sorted was true. + */ + Assert(list_length(root->group_pathkeys) != 1); + + path = (Path *) create_incremental_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + if (parse->hasAggs) + add_partial_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialPartialGroups)); + else + add_partial_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialPartialGroups)); } } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index e2a3c110ce..2de5b0c836 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1342,7 +1342,7 @@ create_append_path(PlannerInfo *root, pathnode->path.pathkeys = child->pathkeys; } else - cost_append(pathnode, root); + cost_append(pathnode); /* If the caller provided a row estimate, override the computed value. */ if (rows >= 0) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 1884918318..8d1b374bdf 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3368,28 +3368,11 @@ double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo) { - return estimate_num_groups_incremental(root, groupExprs, - input_rows, pgset, estinfo, - NULL, 0); -} - -/* - * estimate_num_groups_incremental - * An estimate_num_groups variant, optimized for cases that are adding the - * expressions incrementally (e.g. one by one). - */ -double -estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, - double input_rows, - List **pgset, EstimationInfo *estinfo, - List **cache_varinfos, int prevNExprs) -{ - List *varinfos = (cache_varinfos) ? *cache_varinfos : NIL; + List *varinfos = NIL; double srf_multiplier = 1.0; double numdistinct; ListCell *l; - int i, - j; + int i; /* Zero the estinfo output parameter, if non-NULL */ if (estinfo != NULL) @@ -3420,7 +3403,7 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, */ numdistinct = 1.0; - i = j = 0; + i = 0; foreach(l, groupExprs) { Node *groupexpr = (Node *) lfirst(l); @@ -3429,14 +3412,6 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, List *varshere; ListCell *l2; - /* was done on previous call */ - if (cache_varinfos && j++ < prevNExprs) - { - if (pgset) - i++; /* to keep in sync with lines below */ - continue; - } - /* is expression in this grouping set? */ if (pgset && !list_member_int(*pgset, i++)) continue; @@ -3506,11 +3481,7 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, if (varshere == NIL) { if (contain_volatile_functions(groupexpr)) - { - if (cache_varinfos) - *cache_varinfos = varinfos; return input_rows; - } continue; } @@ -3527,9 +3498,6 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, } } - if (cache_varinfos) - *cache_varinfos = varinfos; - /* * If now no Vars, we must have an all-constant or all-boolean GROUP BY * list. diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index be7bf9d218..328aab0771 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1214,16 +1214,6 @@ static struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, - { - {"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD, - gettext_noop("Enables reordering of GROUP BY keys."), - NULL, - GUC_EXPLAIN - }, - &enable_group_by_reordering, - true, - NULL, NULL, NULL - }, { {"geqo", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("Enables genetic query optimization."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index b4bc06e5f5..f92ff4cc29 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -388,7 +388,6 @@ #enable_seqscan = on #enable_sort = on #enable_tidscan = on -#enable_group_by_reordering = on # - Planner Cost Constants - diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index a6e5db4eec..8556b2ffe7 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1071,16 +1071,6 @@ typedef struct PathKey bool pk_nulls_first; /* do NULLs come before normal values? */ } PathKey; -/* - * Combines information about pathkeys and the associated clauses. - */ -typedef struct PathKeyInfo -{ - NodeTag type; - List *pathkeys; - List *clauses; -} PathKeyInfo; - /* * VolatileFunctionStatus -- allows nodes to cache their * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index dc7fc17411..bc12071af6 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -114,9 +114,7 @@ extern void cost_incremental_sort(Path *path, Cost input_startup_cost, Cost input_total_cost, double input_tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples); -extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys, - int nPresortedKeys, double tuples); -extern void cost_append(AppendPath *path, PlannerInfo *root); +extern void cost_append(AppendPath *path); extern void cost_merge_append(Path *path, PlannerInfo *root, List *pathkeys, int n_streams, Cost input_startup_cost, Cost input_total_cost, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 54ab709c67..e313eb2138 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -24,7 +24,6 @@ extern PGDLLIMPORT bool enable_geqo; extern PGDLLIMPORT int geqo_threshold; extern PGDLLIMPORT int min_parallel_table_scan_size; extern PGDLLIMPORT int min_parallel_index_scan_size; -extern PGDLLIMPORT bool enable_group_by_reordering; /* Hook for plugins to get control in set_rel_pathlist() */ typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root, @@ -203,12 +202,6 @@ typedef enum extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2); extern bool pathkeys_contained_in(List *keys1, List *keys2); extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common); -extern int group_keys_reorder_by_pathkeys(List *pathkeys, - List **group_pathkeys, - List **group_clauses); -extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows, - List *path_pathkeys, - List *group_pathkeys, List *group_clauses); extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index d485b9bfcd..8f3d73edfb 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -214,11 +214,6 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo); -extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, - double input_rows, List **pgset, - EstimationInfo *estinfo, - List **cache_varinfos, int prevNExprs); - extern void estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets, Selectivity *mcv_freq, diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 601047fa3d..0a23a39aa2 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1210,8 +1210,7 @@ explain (costs off) select distinct min(f1), max(f1) from minmaxtest; QUERY PLAN --------------------------------------------------------------------------------------------- - HashAggregate - Group Key: $0, $1 + Unique InitPlan 1 (returns $0) -> Limit -> Merge Append @@ -1234,8 +1233,10 @@ explain (costs off) -> Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8 Index Cond: (f1 IS NOT NULL) -> Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9 - -> Result -(25 rows) + -> Sort + Sort Key: ($0), ($1) + -> Result +(26 rows) select distinct min(f1), max(f1) from minmaxtest; min | max @@ -2447,241 +2448,6 @@ SELECT balk(hundred) FROM tenk1; (1 row) ROLLBACK; --- GROUP BY optimization by reorder columns -SELECT - i AS id, - i/2 AS p, - format('%60s', i%2) AS v, - i/4 AS c, - i/8 AS d, - (random() * (10000/8))::int as e --the same as d but no correlation with p - INTO btg -FROM - generate_series(1, 10000) i; -VACUUM btg; -ANALYZE btg; --- GROUP BY optimization by reorder columns by frequency -SET enable_hashagg=off; -SET max_parallel_workers= 0; -SET max_parallel_workers_per_gather = 0; -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, v; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: p, v - -> Sort - Sort Key: p, v - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: p, v - -> Sort - Sort Key: p, v - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: p, c, v - -> Sort - Sort Key: p, c, v - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: v, p, c - -> Sort - Sort Key: v, p, c - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, d, c; - QUERY PLAN ------------------------------- - GroupAggregate - Group Key: p, d, c, v - -> Sort - Sort Key: p, d, c, v - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c; - QUERY PLAN ------------------------------- - GroupAggregate - Group Key: v, p, d, c - -> Sort - Sort Key: v, p, d, c - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c; - QUERY PLAN ------------------------------- - GroupAggregate - Group Key: p, v, d, c - -> Sort - Sort Key: p, v, d, c - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, d, e; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: p, d, e - -> Sort - Sort Key: p, d, e - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, e, d; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: p, e, d - -> Sort - Sort Key: p, e, d - -> Seq Scan on btg -(5 rows) - -CREATE STATISTICS btg_dep ON d, e, p FROM btg; -ANALYZE btg; -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, d, e; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: p, d, e - -> Sort - Sort Key: p, d, e - -> Seq Scan on btg -(5 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, e, d; - QUERY PLAN ------------------------------ - GroupAggregate - Group Key: p, e, d - -> Sort - Sort Key: p, e, d - -> Seq Scan on btg -(5 rows) - --- GROUP BY optimization by reorder columns by index scan -CREATE INDEX ON btg(p, v); -SET enable_seqscan=off; -SET enable_bitmapscan=off; -VACUUM btg; -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, v; - QUERY PLAN ------------------------------------------------- - GroupAggregate - Group Key: p, v - -> Index Only Scan using btg_p_v_idx on btg -(3 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v; - QUERY PLAN ------------------------------------------------- - GroupAggregate - Group Key: p, v - -> Index Only Scan using btg_p_v_idx on btg -(3 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p; - QUERY PLAN ------------------------------------------------- - GroupAggregate - Group Key: p, v - -> Index Only Scan using btg_p_v_idx on btg -(3 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v; - QUERY PLAN ------------------------------------------------- - GroupAggregate - Group Key: p, v - -> Index Only Scan using btg_p_v_idx on btg -(3 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c; - QUERY PLAN -------------------------------------------------- - GroupAggregate - Group Key: p, c, v - -> Incremental Sort - Sort Key: p, c, v - Presorted Key: p - -> Index Scan using btg_p_v_idx on btg -(6 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v; - QUERY PLAN -------------------------------------------------- - GroupAggregate - Group Key: p, v, c - -> Incremental Sort - Sort Key: p, v, c - Presorted Key: p, v - -> Index Scan using btg_p_v_idx on btg -(6 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, c, p, d; - QUERY PLAN -------------------------------------------------- - GroupAggregate - Group Key: p, c, d, v - -> Incremental Sort - Sort Key: p, c, d, v - Presorted Key: p - -> Index Scan using btg_p_v_idx on btg -(6 rows) - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v; - QUERY PLAN -------------------------------------------------- - GroupAggregate - Group Key: p, v, c, d - -> Incremental Sort - Sort Key: p, v, c, d - Presorted Key: p, v - -> Index Scan using btg_p_v_idx on btg -(6 rows) - -DROP TABLE btg; -RESET enable_hashagg; -RESET max_parallel_workers; -RESET max_parallel_workers_per_gather; -RESET enable_seqscan; -RESET enable_bitmapscan; -- Secondly test the case of a parallel aggregate combiner function -- returning NULL. For that use normal transition function, but a -- combiner function returning NULL. diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index 49953eaade..0a631124c2 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1439,7 +1439,7 @@ set parallel_setup_cost = 0; set parallel_tuple_cost = 0; set max_parallel_workers_per_gather = 2; create table t (a int, b int, c int); -insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i); +insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i); create index on t (a); analyze t; set enable_incremental_sort = off; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 1f0df6b7d9..517c7b2ee2 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -1984,8 +1984,8 @@ USING (name); ------+----+---- bb | 12 | 13 cc | 22 | 23 - ee | 42 | dd | | 33 + ee | 42 | (4 rows) -- Cases with non-nullable expressions in subquery results; @@ -2019,8 +2019,8 @@ NATURAL FULL JOIN ------+------+------+------+------ bb | 12 | 2 | 13 | 3 cc | 22 | 2 | 23 | 3 - ee | 42 | 2 | | dd | | | 33 | 3 + ee | 42 | 2 | | (4 rows) SELECT * FROM @@ -4645,20 +4645,18 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s explain (costs off) select d.* from d left join (select distinct * from b) s on d.a = s.id; - QUERY PLAN ---------------------------------------------- - Merge Left Join - Merge Cond: (d.a = s.id) + QUERY PLAN +-------------------------------------- + Merge Right Join + Merge Cond: (b.id = d.a) + -> Unique + -> Sort + Sort Key: b.id, b.c_id + -> Seq Scan on b -> Sort Sort Key: d.a -> Seq Scan on d - -> Sort - Sort Key: s.id - -> Subquery Scan on s - -> HashAggregate - Group Key: b.id, b.c_id - -> Seq Scan on b -(11 rows) +(9 rows) -- check join removal works when uniqueness of the join condition is enforced -- by a UNION @@ -6368,39 +6366,44 @@ select * from j1 natural join j2; explain (verbose, costs off) select * from j1 inner join (select distinct id from j3) j3 on j1.id = j3.id; - QUERY PLAN ------------------------------------ + QUERY PLAN +----------------------------------------- Nested Loop Output: j1.id, j3.id Inner Unique: true Join Filter: (j1.id = j3.id) - -> HashAggregate + -> Unique Output: j3.id - Group Key: j3.id - -> Seq Scan on public.j3 + -> Sort Output: j3.id + Sort Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id -> Seq Scan on public.j1 Output: j1.id -(11 rows) +(13 rows) -- ensure group by clause allows the inner to become unique explain (verbose, costs off) select * from j1 inner join (select id from j3 group by id) j3 on j1.id = j3.id; - QUERY PLAN ------------------------------------ + QUERY PLAN +----------------------------------------- Nested Loop Output: j1.id, j3.id Inner Unique: true Join Filter: (j1.id = j3.id) - -> HashAggregate + -> Group Output: j3.id Group Key: j3.id - -> Seq Scan on public.j3 + -> Sort Output: j3.id + Sort Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id -> Seq Scan on public.j1 Output: j1.id -(11 rows) +(14 rows) drop table j1; drop table j2; diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out index 4047c3e761..787af41dfe 100644 --- a/src/test/regress/expected/merge.out +++ b/src/test/regress/expected/merge.out @@ -1460,15 +1460,18 @@ WHEN MATCHED AND t.a < 10 THEN explain_merge -------------------------------------------------------------------- Merge on ex_mtarget t (actual rows=0 loops=1) - -> Hash Join (actual rows=0 loops=1) - Hash Cond: (s.a = t.a) - -> Seq Scan on ex_msource s (actual rows=1 loops=1) - -> Hash (actual rows=0 loops=1) - Buckets: xxx Batches: xxx Memory Usage: xxx + -> Merge Join (actual rows=0 loops=1) + Merge Cond: (t.a = s.a) + -> Sort (actual rows=0 loops=1) + Sort Key: t.a + Sort Method: quicksort Memory: xxx -> Seq Scan on ex_mtarget t (actual rows=0 loops=1) Filter: (a < '-1000'::integer) Rows Removed by Filter: 54 -(9 rows) + -> Sort (never executed) + Sort Key: s.a + -> Seq Scan on ex_msource s (never executed) +(12 rows) DROP TABLE ex_msource, ex_mtarget; DROP FUNCTION explain_merge(text); diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index 0dc6d63347..0d69619a2f 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -949,12 +949,12 @@ SET parallel_setup_cost = 0; -- is not partial agg safe. EXPLAIN (COSTS OFF) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; - QUERY PLAN --------------------------------------------------------------------------------------------- - Gather Merge - Workers Planned: 2 - -> Sort - Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) + QUERY PLAN +-------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) + -> Gather + Workers Planned: 2 -> Parallel Append -> GroupAggregate Group Key: pagg_tab_ml.a @@ -1381,26 +1381,28 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < -- When GROUP BY clause does not match; partial aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; - QUERY PLAN -------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x)) - -> Finalize HashAggregate + -> Finalize GroupAggregate Group Key: pagg_tab_para.y Filter: (avg(pagg_tab_para.x) < '12'::numeric) - -> Gather + -> Gather Merge Workers Planned: 2 - -> Parallel Append - -> Partial HashAggregate - Group Key: pagg_tab_para.y - -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para - -> Partial HashAggregate - Group Key: pagg_tab_para_1.y - -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 - -> Partial HashAggregate - Group Key: pagg_tab_para_2.y - -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 -(17 rows) + -> Sort + Sort Key: pagg_tab_para.y + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_para.y + -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para + -> Partial HashAggregate + Group Key: pagg_tab_para_1.y + -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 + -> Partial HashAggregate + Group Key: pagg_tab_para_2.y + -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 +(19 rows) SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; y | sum | avg | count diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 03926a8413..bb5b7c47a4 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -466,41 +466,52 @@ EXPLAIN (COSTS OFF) SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) WHERE a BETWEEN 490 AND 510 GROUP BY 1, 2 ORDER BY 1, 2; - QUERY PLAN ------------------------------------------------------------------------------------------------------------ + QUERY PLAN +----------------------------------------------------------------------------------------------------------------- Group Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Sort + -> Merge Append Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Append - -> Merge Full Join - Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b)) - Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510)) - -> Sort - Sort Key: prt1_1.a, prt1_1.b - -> Seq Scan on prt1_p1 prt1_1 - -> Sort - Sort Key: p2_1.a, p2_1.b - -> Seq Scan on prt2_p1 p2_1 - -> Merge Full Join - Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b)) - Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510)) - -> Sort - Sort Key: prt1_2.a, prt1_2.b - -> Seq Scan on prt1_p2 prt1_2 - -> Sort - Sort Key: p2_2.a, p2_2.b - -> Seq Scan on prt2_p2 p2_2 - -> Merge Full Join - Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a)) - Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510)) - -> Sort - Sort Key: prt1_3.b, prt1_3.a - -> Seq Scan on prt1_p3 prt1_3 - -> Sort - Sort Key: p2_3.b, p2_3.a - -> Seq Scan on prt2_p3 p2_3 -(32 rows) + -> Group + Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) + -> Sort + Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) + -> Merge Full Join + Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b)) + Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510)) + -> Sort + Sort Key: prt1.a, prt1.b + -> Seq Scan on prt1_p1 prt1 + -> Sort + Sort Key: p2.a, p2.b + -> Seq Scan on prt2_p1 p2 + -> Group + Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b)) + -> Sort + Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b)) + -> Merge Full Join + Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b)) + Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510)) + -> Sort + Sort Key: prt1_1.a, prt1_1.b + -> Seq Scan on prt1_p2 prt1_1 + -> Sort + Sort Key: p2_1.a, p2_1.b + -> Seq Scan on prt2_p2 p2_1 + -> Group + Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b)) + -> Sort + Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b)) + -> Merge Full Join + Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b)) + Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510)) + -> Sort + Sort Key: prt1_2.a, prt1_2.b + -> Seq Scan on prt1_p3 prt1_2 + -> Sort + Sort Key: p2_2.a, p2_2.b + -> Seq Scan on prt2_p3 p2_2 +(43 rows) SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) WHERE a BETWEEN 490 AND 510 diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 4e775af175..579b861d84 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -114,7 +114,6 @@ select name, setting from pg_settings where name like 'enable%'; enable_async_append | on enable_bitmapscan | on enable_gathermerge | on - enable_group_by_reordering | on enable_hashagg | on enable_hashjoin | on enable_incremental_sort | on @@ -132,7 +131,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(21 rows) +(20 rows) -- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- more-or-less working. We can't test their contents in any great detail diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index 7ac4a9380e..dece7310cf 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -1303,22 +1303,24 @@ select distinct q1 from union all select distinct * from int8_tbl i82) ss where q2 = q2; - QUERY PLAN ----------------------------------------------------- - HashAggregate - Group Key: "*SELECT* 1".q1 - -> Append + QUERY PLAN +---------------------------------------------------------- + Unique + -> Merge Append + Sort Key: "*SELECT* 1".q1 -> Subquery Scan on "*SELECT* 1" - -> HashAggregate - Group Key: i81.q1, i81.q2 - -> Seq Scan on int8_tbl i81 - Filter: (q2 IS NOT NULL) + -> Unique + -> Sort + Sort Key: i81.q1, i81.q2 + -> Seq Scan on int8_tbl i81 + Filter: (q2 IS NOT NULL) -> Subquery Scan on "*SELECT* 2" - -> HashAggregate - Group Key: i82.q1, i82.q2 - -> Seq Scan on int8_tbl i82 - Filter: (q2 IS NOT NULL) -(13 rows) + -> Unique + -> Sort + Sort Key: i82.q1, i82.q2 + -> Seq Scan on int8_tbl i82 + Filter: (q2 IS NOT NULL) +(15 rows) select distinct q1 from (select distinct * from int8_tbl i81 @@ -1337,22 +1339,24 @@ select distinct q1 from union all select distinct * from int8_tbl i82) ss where -q1 = q2; - QUERY PLAN --------------------------------------------------- - HashAggregate - Group Key: "*SELECT* 1".q1 - -> Append + QUERY PLAN +-------------------------------------------------------- + Unique + -> Merge Append + Sort Key: "*SELECT* 1".q1 -> Subquery Scan on "*SELECT* 1" - -> HashAggregate - Group Key: i81.q1, i81.q2 - -> Seq Scan on int8_tbl i81 - Filter: ((- q1) = q2) + -> Unique + -> Sort + Sort Key: i81.q1, i81.q2 + -> Seq Scan on int8_tbl i81 + Filter: ((- q1) = q2) -> Subquery Scan on "*SELECT* 2" - -> HashAggregate - Group Key: i82.q1, i82.q2 - -> Seq Scan on int8_tbl i82 - Filter: ((- q1) = q2) -(13 rows) + -> Unique + -> Sort + Sort Key: i82.q1, i82.q2 + -> Seq Scan on int8_tbl i82 + Filter: ((- q1) = q2) +(15 rows) select distinct q1 from (select distinct * from int8_tbl i81 diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index c6e0d7ba2b..2f5d0e00f3 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1025,105 +1025,6 @@ SELECT balk(hundred) FROM tenk1; ROLLBACK; --- GROUP BY optimization by reorder columns - -SELECT - i AS id, - i/2 AS p, - format('%60s', i%2) AS v, - i/4 AS c, - i/8 AS d, - (random() * (10000/8))::int as e --the same as d but no correlation with p - INTO btg -FROM - generate_series(1, 10000) i; - -VACUUM btg; -ANALYZE btg; - --- GROUP BY optimization by reorder columns by frequency - -SET enable_hashagg=off; -SET max_parallel_workers= 0; -SET max_parallel_workers_per_gather = 0; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, v; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, d, c; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, d, e; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, e, d; - -CREATE STATISTICS btg_dep ON d, e, p FROM btg; -ANALYZE btg; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, d, e; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, e, d; - - --- GROUP BY optimization by reorder columns by index scan - -CREATE INDEX ON btg(p, v); -SET enable_seqscan=off; -SET enable_bitmapscan=off; -VACUUM btg; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, v; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, c, p, d; - -EXPLAIN (COSTS off) -SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v; - -DROP TABLE btg; - -RESET enable_hashagg; -RESET max_parallel_workers; -RESET max_parallel_workers_per_gather; -RESET enable_seqscan; -RESET enable_bitmapscan; - - -- Secondly test the case of a parallel aggregate combiner function -- returning NULL. For that use normal transition function, but a -- combiner function returning NULL. diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql index 6a0e87c7f6..284a354dbb 100644 --- a/src/test/regress/sql/incremental_sort.sql +++ b/src/test/regress/sql/incremental_sort.sql @@ -213,7 +213,7 @@ set parallel_tuple_cost = 0; set max_parallel_workers_per_gather = 2; create table t (a int, b int, c int); -insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i); +insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i); create index on t (a); analyze t;
pgsql-hackers by date: