From 37fbcef2c8b0e1e150f5d938d8ebf20658fba1f2 Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Fri, 26 Jan 2024 14:39:43 +0800
Subject: [PATCH v1] Apply the "LIMIT 1" optimization to partial DISTINCT

---
 src/backend/optimizer/plan/planner.c          | 38 ++++++++++++++++---
 src/test/regress/expected/select_distinct.out | 27 +++++++++++++
 src/test/regress/sql/select_distinct.sql      | 16 ++++++++
 3 files changed, 76 insertions(+), 5 deletions(-)

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 2e2458b128..247902fba7 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -4737,11 +4737,39 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 																		-1.0);
 			}
 
-			add_partial_path(partial_distinct_rel, (Path *)
-							 create_upper_unique_path(root, partial_distinct_rel,
-													  sorted_path,
-													  list_length(root->distinct_pathkeys),
-													  numDistinctRows));
+			/*
+			 * See comments in create_final_distinct_paths().
+			 */
+			if (root->distinct_pathkeys == NIL)
+			{
+				Node	   *limitCount;
+
+				limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
+												sizeof(int64),
+												Int64GetDatum(1), false,
+												FLOAT8PASSBYVAL);
+
+				/*
+				 * If the query already has a LIMIT clause, then we could end
+				 * up with a duplicate LimitPath in the final plan. That does
+				 * not seem worth troubling over too much.
+				 */
+				add_partial_path(partial_distinct_rel, (Path *)
+								 create_limit_path(root, partial_distinct_rel,
+												   sorted_path,
+												   NULL,
+												   limitCount,
+												   LIMIT_OPTION_COUNT,
+												   0, 1));
+			}
+			else
+			{
+				add_partial_path(partial_distinct_rel, (Path *)
+								 create_upper_unique_path(root, partial_distinct_rel,
+														  sorted_path,
+														  list_length(root->distinct_pathkeys),
+														  numDistinctRows));
+			}
 		}
 	}
 
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index 9d44ea8056..0d4cdbe67b 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -348,6 +348,33 @@ SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
     0 |        1 |        2 |        3
 (1 row)
 
+-- Ensure we get a plan with a Limit 1 in both partial distinct and final
+-- distinct
+SET parallel_tuple_cost=0;
+SET parallel_setup_cost=0;
+SET min_parallel_table_scan_size=0;
+SET max_parallel_workers_per_gather=2;
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1 WHERE four = 4;
+                  QUERY PLAN                  
+----------------------------------------------
+ Limit
+   ->  Gather
+         Workers Planned: 2
+         ->  Limit
+               ->  Parallel Seq Scan on tenk1
+                     Filter: (four = 4)
+(6 rows)
+
+SELECT DISTINCT four FROM tenk1 WHERE four = 4;
+ four 
+------
+(0 rows)
+
+RESET max_parallel_workers_per_gather;
+RESET min_parallel_table_scan_size;
+RESET parallel_setup_cost;
+RESET parallel_tuple_cost;
 --
 -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
 -- very own regression file.
diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql
index 1643526d99..4e073c590e 100644
--- a/src/test/regress/sql/select_distinct.sql
+++ b/src/test/regress/sql/select_distinct.sql
@@ -180,6 +180,22 @@ SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
 -- Ensure we only get 1 row
 SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
 
+-- Ensure we get a plan with a Limit 1 in both partial distinct and final
+-- distinct
+SET parallel_tuple_cost=0;
+SET parallel_setup_cost=0;
+SET min_parallel_table_scan_size=0;
+SET max_parallel_workers_per_gather=2;
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1 WHERE four = 4;
+SELECT DISTINCT four FROM tenk1 WHERE four = 4;
+
+RESET max_parallel_workers_per_gather;
+RESET min_parallel_table_scan_size;
+RESET parallel_setup_cost;
+RESET parallel_tuple_cost;
+
 --
 -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
 -- very own regression file.
-- 
2.31.0