From 37fbcef2c8b0e1e150f5d938d8ebf20658fba1f2 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Fri, 26 Jan 2024 14:39:43 +0800 Subject: [PATCH v1] Apply the "LIMIT 1" optimization to partial DISTINCT --- src/backend/optimizer/plan/planner.c | 38 ++++++++++++++++--- src/test/regress/expected/select_distinct.out | 27 +++++++++++++ src/test/regress/sql/select_distinct.sql | 16 ++++++++ 3 files changed, 76 insertions(+), 5 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 2e2458b128..247902fba7 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4737,11 +4737,39 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, -1.0); } - add_partial_path(partial_distinct_rel, (Path *) - create_upper_unique_path(root, partial_distinct_rel, - sorted_path, - list_length(root->distinct_pathkeys), - numDistinctRows)); + /* + * See comments in create_final_distinct_paths(). + */ + if (root->distinct_pathkeys == NIL) + { + Node *limitCount; + + limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, + sizeof(int64), + Int64GetDatum(1), false, + FLOAT8PASSBYVAL); + + /* + * If the query already has a LIMIT clause, then we could end + * up with a duplicate LimitPath in the final plan. That does + * not seem worth troubling over too much. + */ + add_partial_path(partial_distinct_rel, (Path *) + create_limit_path(root, partial_distinct_rel, + sorted_path, + NULL, + limitCount, + LIMIT_OPTION_COUNT, + 0, 1)); + } + else + { + add_partial_path(partial_distinct_rel, (Path *) + create_upper_unique_path(root, partial_distinct_rel, + sorted_path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } } } diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 9d44ea8056..0d4cdbe67b 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -348,6 +348,33 @@ SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; 0 | 1 | 2 | 3 (1 row) +-- Ensure we get a plan with a Limit 1 in both partial distinct and final +-- distinct +SET parallel_tuple_cost=0; +SET parallel_setup_cost=0; +SET min_parallel_table_scan_size=0; +SET max_parallel_workers_per_gather=2; +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 4; + QUERY PLAN +---------------------------------------------- + Limit + -> Gather + Workers Planned: 2 + -> Limit + -> Parallel Seq Scan on tenk1 + Filter: (four = 4) +(6 rows) + +SELECT DISTINCT four FROM tenk1 WHERE four = 4; + four +------ +(0 rows) + +RESET max_parallel_workers_per_gather; +RESET min_parallel_table_scan_size; +RESET parallel_setup_cost; +RESET parallel_tuple_cost; -- -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- very own regression file. diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index 1643526d99..4e073c590e 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -180,6 +180,22 @@ SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; -- Ensure we only get 1 row SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; +-- Ensure we get a plan with a Limit 1 in both partial distinct and final +-- distinct +SET parallel_tuple_cost=0; +SET parallel_setup_cost=0; +SET min_parallel_table_scan_size=0; +SET max_parallel_workers_per_gather=2; + +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 4; +SELECT DISTINCT four FROM tenk1 WHERE four = 4; + +RESET max_parallel_workers_per_gather; +RESET min_parallel_table_scan_size; +RESET parallel_setup_cost; +RESET parallel_tuple_cost; + -- -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- very own regression file. -- 2.31.0