From 4ff3140640a2cba300dfe4993a16932bf6e96ba9 Mon Sep 17 00:00:00 2001 From: James Coleman Date: Fri, 20 Nov 2020 20:29:51 -0500 Subject: [PATCH v2 2/5] Enforce parallel safety of pathkeys in generate_useful_gather_paths If, for example, a pathkey expression (e.g., a correlated subquery which is currently always treated this way) is unsafe for parallel query then we must not generate a gather merge path with either incremental or full sort including that pathkey. --- src/backend/optimizer/path/allpaths.c | 13 ++++-- src/backend/optimizer/path/equivclass.c | 7 +++- src/include/optimizer/paths.h | 2 +- .../regress/expected/incremental_sort.out | 40 +++++++++++++++++++ src/test/regress/sql/incremental_sort.sql | 11 +++++ 5 files changed, 68 insertions(+), 5 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 672a20760c..ac21b08801 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -2802,6 +2802,9 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) * This allows us to do incremental sort on top of an index scan under a gather * merge node, i.e. parallelized. * + * If the parallel argument is true then we'll require that pathkey expressions + * be parallel safe. + * * XXX At the moment this can only ever return a list with a single element, * because it looks at query_pathkeys only. So we might return the pathkeys * directly, but it seems plausible we'll want to consider other orderings @@ -2809,7 +2812,7 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) * merge joins. */ static List * -get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) +get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel, bool parallel) { List *useful_pathkeys_list = NIL; @@ -2839,8 +2842,12 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) * meet criteria of EC membership in the current relation, we * enable not just an incremental sort on the entirety of * query_pathkeys but also incremental sort below a JOIN. + * + * By passing true for the final argument + * find_em_expr_usable_for_sorting_rel will ensure the chosen + * expression is parallel safe. */ - if (!find_em_expr_usable_for_sorting_rel(pathkey_ec, rel)) + if (!find_em_expr_usable_for_sorting_rel(root, pathkey_ec, rel, true)) break; npathkeys++; @@ -2894,7 +2901,7 @@ generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_r generate_gather_paths(root, rel, override_rows); /* consider incremental sort for interesting orderings */ - useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel); + useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel, true); /* used for explicit (full) sort paths */ cheapest_partial_path = linitial(rel->partial_pathlist); diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index f98fd7b3eb..e47a16827d 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -801,9 +801,12 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) * Find an equivalence class member expression that can be safely used by a * sort node on top of the provided relation. The rules here must match those * applied in prepare_sort_from_pathkeys. + * + * If parallel is set to true, then we'll also enforce that the chosen + * expression is parallel safe. */ Expr * -find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel) +find_em_expr_usable_for_sorting_rel(PlannerInfo *root, EquivalenceClass *ec, RelOptInfo *rel, bool parallel) { ListCell *lc_em; @@ -833,6 +836,8 @@ find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel) if (!bms_is_subset(em->em_relids, rel->relids)) continue; + if (parallel && !is_parallel_safe(root, (Node *) em_expr)) + continue; /* * As long as the expression isn't volatile then * prepare_sort_from_pathkeys is able to generate a new target entry, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 8a4c6f8b59..afe1b41a4d 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -135,7 +135,7 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root, Relids rel, bool create_it); extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel); -extern Expr *find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel); +extern Expr *find_em_expr_usable_for_sorting_rel(PlannerInfo *root, EquivalenceClass *ec, RelOptInfo *rel, bool parallel); extern void generate_base_implied_equalities(PlannerInfo *root); extern List *generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index 51471ae92d..d316a7c4b9 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1551,6 +1551,46 @@ order by 1, 2; -> Function Scan on generate_series (7 rows) +-- Parallel sort but with expression (correlated subquery) that +-- is prohibited in parallel plans. +explain (costs off) select distinct + unique1, + (select t.unique1 from tenk1 where tenk1.unique1 = t.unique1) +from tenk1 t, generate_series(1, 1000); + QUERY PLAN +--------------------------------------------------------------------------------- + Unique + -> Sort + Sort Key: t.unique1, ((SubPlan 1)) + -> Gather + Workers Planned: 2 + -> Nested Loop + -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t + -> Function Scan on generate_series + SubPlan 1 + -> Index Only Scan using tenk1_unique1 on tenk1 + Index Cond: (unique1 = t.unique1) +(11 rows) + +explain (costs off) select + unique1, + (select t.unique1 from tenk1 where tenk1.unique1 = t.unique1) +from tenk1 t, generate_series(1, 1000) +order by 1, 2; + QUERY PLAN +--------------------------------------------------------------------------- + Sort + Sort Key: t.unique1, ((SubPlan 1)) + -> Gather + Workers Planned: 2 + -> Nested Loop + -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t + -> Function Scan on generate_series + SubPlan 1 + -> Index Only Scan using tenk1_unique1 on tenk1 + Index Cond: (unique1 = t.unique1) +(10 rows) + -- Parallel sort but with expression not available until the upper rel. explain (costs off) select distinct sub.unique1, stringu1 || random()::text from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub; diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql index cb48f200ce..ff3af0fd16 100644 --- a/src/test/regress/sql/incremental_sort.sql +++ b/src/test/regress/sql/incremental_sort.sql @@ -250,6 +250,17 @@ from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub; explain (costs off) select sub.unique1, md5(stringu1) from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub order by 1, 2; +-- Parallel sort but with expression (correlated subquery) that +-- is prohibited in parallel plans. +explain (costs off) select distinct + unique1, + (select t.unique1 from tenk1 where tenk1.unique1 = t.unique1) +from tenk1 t, generate_series(1, 1000); +explain (costs off) select + unique1, + (select t.unique1 from tenk1 where tenk1.unique1 = t.unique1) +from tenk1 t, generate_series(1, 1000) +order by 1, 2; -- Parallel sort but with expression not available until the upper rel. explain (costs off) select distinct sub.unique1, stringu1 || random()::text from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub; -- 2.17.1