commit d07b4086918a0d255e02985a1172bcca471efa8b Author: jcoleman Date: Sat Jul 20 14:09:20 2019 +0000 WIP: Parallel + more create_incremental_sort_paths() diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 3efc807164..c4c6714218 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -2730,6 +2730,220 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) } } +/* + * Find an equivalence class member expression, all of whose Vars, come from + * the indicated relation. + */ +static Expr * +find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) +{ + ListCell *lc_em; + + foreach(lc_em, ec->ec_members) + { + EquivalenceMember *em = lfirst(lc_em); + + if (bms_is_subset(em->em_relids, rel->relids) && + !bms_is_empty(em->em_relids)) + { + /* + * If there is more than one equivalence member whose Vars are + * taken entirely from this relation, we'll be content to choose + * any one of those. + */ + return em->em_expr; + } + } + + /* We didn't find any suitable equivalence class expression */ + return NULL; +} + +/* + * get_useful_pathkeys_for_relation + * Determine which orderings of a relation might be useful. + * + * Getting data in sorted order can be useful either because the requested + * order matches the final output ordering for the overall query we're + * planning, or because it enables an efficient merge join. Here, we try + * to figure out which pathkeys to consider. + */ +static List * +get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) +{ + List *useful_pathkeys_list = NIL; + ListCell *lc; + + /* + * Pushing the query_pathkeys to the remote server is always worth + * considering, because it might let us avoid a local sort. + */ + if (root->query_pathkeys) + { + bool query_pathkeys_ok = true; + + foreach(lc, root->query_pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *pathkey_ec = pathkey->pk_eclass; + Expr *em_expr; + + /* + * The planner and executor don't have any clever strategy for + * taking data sorted by a prefix of the query's pathkeys and + * getting it to be sorted by all of those pathkeys. We'll just + * end up resorting the entire data set. So, unless we can push + * down all of the query pathkeys, forget it. + * + * is_foreign_expr would detect volatile expressions as well, but + * checking ec_has_volatile here saves some cycles. + */ + if (pathkey_ec->ec_has_volatile || + !(em_expr = find_em_expr_for_rel(pathkey_ec, rel))) + { + query_pathkeys_ok = false; + break; + } + } + + /* + * This ends up allowing us to do incremental sort on top of + * an index scan all parallelized under a gather merge node. + */ + if (query_pathkeys_ok) + useful_pathkeys_list = list_make1(list_copy(root->query_pathkeys)); + } + + return useful_pathkeys_list; +} + +/* + * generate_useful_gather_paths + * Generate parallel access paths for a relation by pushing a Gather or + * Gather Merge on top of a partial path. + * + * Unlike generate_gather_paths, this does not look just as pathkeys of the + * input paths (aiming to preserve the ordering). It also considers ordering + * that might be useful by nodes above the gather merge node, and tries to + * add a sort (regular or incremental) to provide that. + */ +void +generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) +{ + ListCell *lc; + double rows; + double *rowsp = NULL; + List *useful_pathkeys_list = NIL; + Path *cheapest_partial_path = NULL; + + /* If there are no partial paths, there's nothing to do here. */ + if (rel->partial_pathlist == NIL) + return; + + /* Should we override the rel's rowcount estimate? */ + if (override_rows) + rowsp = &rows; + + /* generate the regular gather merge paths */ + generate_gather_paths(root, rel, override_rows); + + /* consider incremental sort for interesting orderings */ + useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel); + + /* used for explicit sort paths */ + cheapest_partial_path = linitial(rel->partial_pathlist); + + /* + * Consider incremental sort paths for each interesting ordering. + * + * XXX I wonder if we need to consider adding a projection here, as + * create_ordered_paths does. + */ + foreach(lc, useful_pathkeys_list) + { + List *useful_pathkeys = lfirst(lc); + ListCell *lc2; + bool is_sorted; + int presorted_keys; + + foreach(lc2, rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc2); + GatherMergePath *path; + + /* path has no ordering at all, can't use incremental sort */ + if (subpath->pathkeys == NIL) + continue; + + is_sorted = pathkeys_common_contained_in(useful_pathkeys, + subpath->pathkeys, + &presorted_keys); + + if (is_sorted) + { + path = create_gather_merge_path(root, rel, subpath, rel->reltarget, + subpath->pathkeys, NULL, rowsp); + + add_path(rel, &path->path); + continue; + } + + /* now we know is_sorted == false */ + + /* + * consider regular sort for cheapest partial path (for each + * useful pathkeys) + */ + if (cheapest_partial_path == subpath) + { + Path *tmp; + + tmp = (Path *) create_sort_path(root, + rel, + subpath, + useful_pathkeys, + -1.0); + + rows = tmp->rows * tmp->parallel_workers; + + path = create_gather_merge_path(root, rel, + tmp, + rel->reltarget, + tmp->pathkeys, + NULL, + rowsp); + + add_path(rel, &path->path); + + /* continue */ + } + + /* finally, consider incremental sort */ + if (presorted_keys > 0) + { + Path *tmp; + + /* Also consider incremental sort. */ + tmp = (Path *) create_incremental_sort_path(root, + rel, + subpath, + useful_pathkeys, + presorted_keys, + -1); + + path = create_gather_merge_path(root, rel, + tmp, + rel->reltarget, + tmp->pathkeys, + NULL, + rowsp); + + add_path(rel, &path->path); + } + } + } +} + /* * make_rel_from_joinlist * Build access paths using a "joinlist" to guide the join path search. @@ -2902,7 +3116,7 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) * once we know the final targetlist (see grouping_planner). */ if (lev < levels_needed) - generate_gather_paths(root, rel, false); + generate_useful_gather_paths(root, rel, false); /* Find and save the cheapest paths for this rel */ set_cheapest(rel); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index bfb52f21ab..c2877942cb 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -5932,7 +5932,10 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, } } if (!j) - elog(ERROR, "could not find pathkey item to sort"); + { + elog(WARNING, "could not find pathkey item to sort"); + Assert(false); + } /* * Do we need to insert a Result node? @@ -6491,7 +6494,10 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) } if (!tle) - elog(ERROR, "could not find pathkey item to sort"); + { + elog(WARNING, "could not find pathkey item to sort"); + Assert(false); + } /* * Look up the correct equality operator from the PathKey's slightly diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 16996b1bc2..54b244b158 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4922,8 +4922,9 @@ create_distinct_paths(PlannerInfo *root, * Build a new upperrel containing Paths for ORDER BY evaluation. * * All paths in the result must satisfy the ORDER BY ordering. - * The only new paths we need consider is an explicit full or - * incremental sort on the cheapest-total existing path. + * The only new paths we need consider is an explicit full sort + * on the cheapest-total existing path and incremental sort on + * partially presorted paths. * * input_rel: contains the source-data Paths * target: the output tlist the result Paths must emit @@ -5001,7 +5002,12 @@ create_ordered_paths(PlannerInfo *root, } if (presorted_keys > 0) { - /* Also consider incremental sort. */ + /* + * Also consider incremental sort. Unlike standard sort, + * we don't care about the cheapest input path is; we're + * concerned only with whether the input path is already + * usefully, but partially, sorted. + */ sorted_path = (Path *) create_incremental_sort_path(root, ordered_rel, input_path, @@ -5068,6 +5074,62 @@ create_ordered_paths(PlannerInfo *root, add_path(ordered_rel, path); } + + /* also consider incremental sorts on all partial paths */ + { + ListCell *lc; + foreach (lc, input_rel->partial_pathlist) + { + Path *input_path = (Path *) lfirst(lc); + Path *sorted_path = input_path; + bool is_sorted; + int presorted_keys; + double total_groups; + + /* already handled above */ + /* if (input_path == cheapest_partial_path) */ + /* continue; */ + + is_sorted = pathkeys_common_contained_in(root->sort_pathkeys, + input_path->pathkeys, &presorted_keys); + + /* also ignore already sorted paths */ + if (is_sorted) + continue; + + if (presorted_keys > 0) + { + /* Also consider incremental sort. */ + sorted_path = (Path *) create_incremental_sort_path(root, + ordered_rel, + input_path, + root->sort_pathkeys, + presorted_keys, + limit_tuples); + total_groups = input_path->rows * + input_path->parallel_workers; + sorted_path = (Path *) + create_gather_merge_path(root, ordered_rel, + sorted_path, + sorted_path->pathtarget, + root->sort_pathkeys, NULL, + &total_groups); + + /* Add projection step if needed */ + if (sorted_path->pathtarget != target) + sorted_path = apply_projection_to_path(root, ordered_rel, + sorted_path, target); + + /* + * XXX: what case does this cover? + * (or is it entirely duplicative of generate_useful_gather_paths() + * in apply_scanjoin_target_to_paths()) + */ + add_path(ordered_rel, sorted_path); + } + } + + } } /* @@ -6484,6 +6546,80 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, } } + + /* + * Use any available suitably-sorted path as input, with incremental + * sort path. + */ + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_common_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + /* Now decide what to stick atop it */ + if (parse->groupingSets) + { + consider_groupingsets_paths(root, grouped_rel, + path, true, can_hash, + gd, agg_costs, dNumGroups); + } + else if (parse->hasAggs) + { + /* + * We have aggregation, possibly with plain GROUP BY. Make + * an AggPath. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_SIMPLE, + parse->groupClause, + havingQual, + agg_costs, + dNumGroups)); + } + else if (parse->groupClause) + { + /* + * We have GROUP BY without aggregation or grouping sets. + * Make a GroupPath. + */ + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); + } + else + { + /* Other cases should have been handled above */ + Assert(false); + } + } + /* * Instead of operating directly on the input relation, we can * consider finalizing a partially aggregated path. @@ -6530,6 +6666,53 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, havingQual, dNumGroups)); } + + /* incremental sort */ + foreach(lc, partially_grouped_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_common_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + if (parse->hasAggs) + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_FINAL_DESERIAL, + parse->groupClause, + havingQual, + agg_final_costs, + dNumGroups)); + else + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); + } + } } @@ -6798,6 +6981,57 @@ create_partial_grouping_paths(PlannerInfo *root, dNumPartialGroups)); } } + + /* + * Use any available suitably-sorted path as input, and also consider + * sorting the cheapest partial path. + */ + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_common_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + /* also ignore already sorted paths */ + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + /* add incremental sort */ + path = (Path *) create_incremental_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + if (parse->hasAggs) + add_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialGroups)); + else + add_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialGroups)); + } } if (can_sort && cheapest_partial_path != NULL) @@ -6842,6 +7076,52 @@ create_partial_grouping_paths(PlannerInfo *root, dNumPartialPartialGroups)); } } + + /* consider incremental sort */ + foreach(lc, input_rel->partial_pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_common_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + path = (Path *) create_incremental_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + if (parse->hasAggs) + add_partial_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialPartialGroups)); + else + add_partial_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialPartialGroups)); + } } if (can_hash && cheapest_total_path != NULL) @@ -6938,6 +7218,7 @@ create_partial_grouping_paths(PlannerInfo *root, static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) { + ListCell *lc; Path *cheapest_partial_path; /* Try Gather for unordered paths and Gather Merge for ordered ones. */ @@ -6967,6 +7248,44 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) add_path(rel, path); } + + /* also consider incremental sort on all partial paths */ + foreach (lc, rel->partial_pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + int presorted_keys; + double total_groups; + + is_sorted = pathkeys_common_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + path = (Path *) create_incremental_sort_path(root, + rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + path = (Path *) + create_gather_merge_path(root, + rel, + path, + rel->reltarget, + root->group_pathkeys, + NULL, + &total_groups); + + add_path(rel, path); + } + } /* @@ -7222,7 +7541,7 @@ apply_scanjoin_target_to_paths(PlannerInfo *root, * one of the generated paths may turn out to be the cheapest one. */ if (rel->consider_parallel && !IS_OTHER_REL(rel)) - generate_gather_paths(root, rel, false); + generate_useful_gather_paths(root, rel, false); /* * Reassess which paths are the cheapest, now that we've potentially added diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index e7a40cec3f..20fa94281b 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -54,6 +54,8 @@ extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed, extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows); +extern void generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, + bool override_rows); extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages, int max_workers); extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel, diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index 9775cc898c..b687c062ec 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -940,6 +940,64 @@ explain (costs off) Index Cond: (unique1 = 1) (5 rows) +ROLLBACK TO SAVEPOINT settings; +SAVEPOINT settings; +set local max_parallel_workers_per_gather=4; +set local min_parallel_table_scan_size=0; +set local parallel_tuple_cost=0; +set local parallel_setup_cost=0; +-- incremental sort tests +-- without generate_useful_gather_paths() in apply_scanjoin_target_to_paths() +-- we don't get the following plan (though regardless of that choice, with +-- enable_sort=off we get a similar plan, but using: +-- Finalize GroupAggregate, +-- -> Gather Merge +-- -> Partial GroupAggregate +-- instead of: +-- GroupAggregate, +-- -> Gather Merge +explain (costs off) select hundred, thousand, sum(twenty) from tenk1 group by 1,2 order by 1,2,3 limit 1; + QUERY PLAN +-------------------------------------------------------------------------------- + Limit + -> Incremental Sort + Sort Key: hundred, thousand, (sum(twenty)) + Presorted Key: hundred, thousand + -> GroupAggregate + Group Key: hundred, thousand + -> Gather Merge + Workers Planned: 4 + -> Incremental Sort + Sort Key: hundred, thousand + Presorted Key: hundred + -> Parallel Index Scan using tenk1_hundred on tenk1 +(12 rows) + +-- without generate_useful_gather_paths() in standard_join_search() +-- we don't get the following plan +explain (costs off) select * from tenk1 t1 join tenk1 t2 on t1.hundred = t2.hundred join tenk1 t3 on t1.hundred = t3.hundred order by t1.hundred, t1.twenty limit 50; + QUERY PLAN +------------------------------------------------------------------- + Limit + -> Merge Join + Merge Cond: (t1.hundred = t3.hundred) + -> Gather Merge + Workers Planned: 4 + -> Incremental Sort + Sort Key: t1.hundred, t1.twenty + Presorted Key: t1.hundred + -> Merge Join + Merge Cond: (t1.hundred = t2.hundred) + -> Sort + Sort Key: t1.hundred + -> Parallel Seq Scan on tenk1 t1 + -> Sort + Sort Key: t2.hundred + -> Seq Scan on tenk1 t2 + -> Materialize + -> Index Scan using tenk1_hundred on tenk1 t3 +(18 rows) + ROLLBACK TO SAVEPOINT settings; -- exercise record typmod remapping between backends CREATE FUNCTION make_record(n int) diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql index f96812b550..9b4d5a5cd8 100644 --- a/src/test/regress/sql/select_parallel.sql +++ b/src/test/regress/sql/select_parallel.sql @@ -339,6 +339,30 @@ explain (costs off) select stringu1::int2 from tenk1 where unique1 = 1; ROLLBACK TO SAVEPOINT settings; + +SAVEPOINT settings; +set local max_parallel_workers_per_gather=4; +set local min_parallel_table_scan_size=0; +set local parallel_tuple_cost=0; +set local parallel_setup_cost=0; + +-- incremental sort tests + +-- without generate_useful_gather_paths() in apply_scanjoin_target_to_paths() +-- we don't get the following plan (though regardless of that choice, with +-- enable_sort=off we get a similar plan, but using: +-- Finalize GroupAggregate, +-- -> Gather Merge +-- -> Partial GroupAggregate +-- instead of: +-- GroupAggregate, +-- -> Gather Merge +explain (costs off) select hundred, thousand, sum(twenty) from tenk1 group by 1,2 order by 1,2,3 limit 1; +-- without generate_useful_gather_paths() in standard_join_search() +-- we don't get the following plan +explain (costs off) select * from tenk1 t1 join tenk1 t2 on t1.hundred = t2.hundred join tenk1 t3 on t1.hundred = t3.hundred order by t1.hundred, t1.twenty limit 50; +ROLLBACK TO SAVEPOINT settings; + -- exercise record typmod remapping between backends CREATE FUNCTION make_record(n int) RETURNS RECORD LANGUAGE plpgsql PARALLEL SAFE AS