From 8f7fcf0ffc12407f48c12761a3b4686239d5716d Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Tue, 26 Dec 2023 17:30:36 +0800 Subject: [PATCH v1] Improve parallel DISTINCT Commit 22c4e88ebf introduced parallel DISTINCT, which is implemented by adding unique/aggregate paths on top of the input rel's partial paths, and then adding Gather/GatherMerge paths on top and adding a final unique/aggregate path at last. The Gather/GatherMerge paths are added by generate_gather_paths(), which does not consider ordering that might be useful above the GatherMerge node. This can be improved by using generate_useful_gather_paths() instead. As the plan change in regression test shows, this change improves query plans. In passing, this patch also avoids the use of root->upper_targets to set target for partial_distinct_rel, because root->upper_targets is not supposed to be used by the core code. --- src/backend/optimizer/path/allpaths.c | 8 ++++---- src/backend/optimizer/plan/planner.c | 13 ++++++++++--- src/test/regress/expected/select_distinct.out | 8 ++++---- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 67921a0826..b84bf7ee8b 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -3053,10 +3053,10 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) * * If we're generating paths for a scan or join relation, override_rows will * be false, and we'll just use the relation's size estimate. When we're - * being called for a partially-grouped path, though, we need to override - * the rowcount estimate. (It's not clear that the particular value we're - * using here is actually best, but the underlying rel has no estimate so - * we must do something.) + * being called for a partially-grouped or partially-distinct path, though, we + * need to override the rowcount estimate. (It's not clear that the + * particular value we're using here is actually best, but the underlying rel + * has no estimate so we must do something.) */ void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 6f45efde21..c4698eb951 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1644,8 +1644,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) */ root->upper_targets[UPPERREL_FINAL] = final_target; root->upper_targets[UPPERREL_ORDERED] = final_target; - root->upper_targets[UPPERREL_PARTIAL_DISTINCT] = sort_input_target; root->upper_targets[UPPERREL_DISTINCT] = sort_input_target; + root->upper_targets[UPPERREL_PARTIAL_DISTINCT] = sort_input_target; root->upper_targets[UPPERREL_WINDOW] = sort_input_target; root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target; @@ -4751,7 +4751,6 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, partial_distinct_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_DISTINCT, NULL); - partial_distinct_rel->reltarget = root->upper_targets[UPPERREL_PARTIAL_DISTINCT]; partial_distinct_rel->consider_parallel = input_rel->consider_parallel; /* @@ -4872,7 +4871,15 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, if (partial_distinct_rel->partial_pathlist != NIL) { - generate_gather_paths(root, partial_distinct_rel, true); + /* + * Set target for partial_distinct_rel as generate_useful_gather_paths + * requires that the input rel has a valid reltarget. + */ + partial_distinct_rel->reltarget = cheapest_partial_path->pathtarget; + + /* Consider generating Gather or Gather Merge paths */ + generate_useful_gather_paths(root, partial_distinct_rel, true); + set_cheapest(partial_distinct_rel); /* diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 9d44ea8056..f32d39f76c 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -235,10 +235,10 @@ SELECT DISTINCT four FROM tenk1; QUERY PLAN ---------------------------------------------------- Unique - -> Sort - Sort Key: four - -> Gather - Workers Planned: 2 + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: four -> HashAggregate Group Key: four -> Parallel Seq Scan on tenk1 -- 2.31.0