From 94edaece73503572d4489d41e6644efe1e13f652 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= <yizhi.fzh@alibaba-inc.com>
Date: Fri, 31 Jan 2020 19:38:05 +0800
Subject: [PATCH] Erase the distinctClause if the result is unique by
 definition

For a single relation, we can tell it by any one of the following
is true:
1. The pk is in the target list.
2. The uk is in the target list and the columns is not null
3. The columns in group-by clause is also in the target list

for relation join, we can tell it by:
if every relation in the jointree yield a unique result set,then
the final result is unique as well regardless the join method.
for semi/anti join, we don't have such restriction on the right
table.
---
 src/backend/nodes/bitmapset.c                 |  40 +++
 src/backend/optimizer/path/costsize.c         |   1 +
 src/backend/optimizer/plan/planner.c          | 292 ++++++++++++++++++
 src/backend/utils/cache/relcache.c            |  23 ++
 src/backend/utils/misc/guc.c                  |  10 +
 src/include/nodes/bitmapset.h                 |   2 +
 src/include/optimizer/cost.h                  |   1 +
 src/include/utils/rel.h                       |   3 +
 src/test/regress/expected/join.out            |  16 +-
 .../regress/expected/select_distinct_2.out    | 141 +++++++++
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/sql/select_distinct_2.sql    |  42 +++
 12 files changed, 565 insertions(+), 9 deletions(-)
 create mode 100644 src/test/regress/expected/select_distinct_2.out
 create mode 100644 src/test/regress/sql/select_distinct_2.sql

diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 648cc1a7eb..76ce9b526e 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -1167,3 +1167,43 @@ bms_hash_value(const Bitmapset *a)
 	return DatumGetUInt32(hash_any((const unsigned char *) a->words,
 								   (lastword + 1) * sizeof(bitmapword)));
 }
+
+/*
+ * bms_array_copy --
+ *
+ * copy the bms data in the newly palloc memory
+ */
+
+Bitmapset**
+bms_array_copy(Bitmapset **bms_array, int len)
+{
+	Bitmapset **res;
+	int i;
+	if (bms_array == NULL || len < 1)
+		return NULL;
+
+	res = palloc(sizeof(Bitmapset*) * len);
+	for(i = 0; i < len; i++)
+	{
+		res[i] = bms_copy(bms_array[i]);
+	}
+	return res;
+}
+
+/*
+ * bms_array_free
+ *
+ * free the element in the array one by one, free the array as well at last
+ */
+void
+bms_array_free(Bitmapset **bms_array,  int len)
+{
+	int idx;
+	if (bms_array == NULL)
+		return;
+	for(idx = 0 ; idx < len; idx++)
+	{
+		bms_free(bms_array[idx]);
+	}
+	pfree(bms_array);
+}
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b5a0033721..dde16b5d44 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -138,6 +138,7 @@ bool		enable_partitionwise_aggregate = false;
 bool		enable_parallel_append = true;
 bool		enable_parallel_hash = true;
 bool		enable_partition_pruning = true;
+bool		enable_distinct_elimination = true;
 
 typedef struct
 {
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index d6f2153593..6f7d85f96e 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -22,8 +22,10 @@
 #include "access/htup_details.h"
 #include "access/parallel.h"
 #include "access/sysattr.h"
+#include "access/relation.h"
 #include "access/table.h"
 #include "access/xact.h"
+#include "catalog/index.h"
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_inherits.h"
 #include "catalog/pg_proc.h"
@@ -35,6 +37,7 @@
 #include "lib/bipartite_match.h"
 #include "lib/knapsack.h"
 #include "miscadmin.h"
+#include "nodes/bitmapset.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #ifdef OPTIMIZER_DEBUG
@@ -248,6 +251,7 @@ static bool group_by_has_partkey(RelOptInfo *input_rel,
 								 List *targetList,
 								 List *groupClause);
 static int	common_prefix_cmp(const void *a, const void *b);
+static void	preprocess_distinct_node(PlannerInfo *root);
 
 
 /*****************************************************************************
@@ -989,6 +993,9 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	/* Remove any redundant GROUP BY columns */
 	remove_useless_groupby_columns(root);
 
+	if (enable_distinct_elimination)
+		preprocess_distinct_node(root);
+
 	/*
 	 * If we have any outer joins, try to reduce them to plain inner joins.
 	 * This step is most easily done after we've done expression
@@ -7409,3 +7416,288 @@ group_by_has_partkey(RelOptInfo *input_rel,
 
 	return true;
 }
+
+/*
+ * is_unique_result_already
+ *
+ * Given a relation, we can know its primary key + unique key information
+ * unique target is the target list of distinct/distinct on target.
+ * not_null_columns is a union of not null columns based on catalog and quals.
+ * then we can know the result is unique already before executing it if
+ * the primary key or uk + not null in target list.
+ */
+static bool
+is_unique_result_already(Relation relation,
+						 Bitmapset *unique_target,
+						 Bitmapset *not_null_columns)
+{
+	int i;
+	Bitmapset *pkattr = RelationGetIndexAttrBitmap(relation,
+												   INDEX_ATTR_BITMAP_PRIMARY_KEY);
+
+	/*
+	 * if the pk is in the target list,
+	 * the result set is unique for this relation
+	 */
+	if (pkattr != NULL &&
+		!bms_is_empty(pkattr) &&
+		bms_is_subset(pkattr, unique_target))
+	{
+		return true;
+	}
+
+	/*
+	 * check if the pk is in the unique index
+	 */
+	for (i = 0; i < relation->rd_plain_ukcount; i++)
+	{
+		Bitmapset *ukattr = relation->rd_plain_ukattrs[i];
+		if (!bms_is_empty(ukattr)
+			&& bms_is_subset(ukattr, unique_target)
+			&& bms_is_subset(ukattr, not_null_columns))
+			return true;
+	}
+
+	/*
+	 * If a unique index is in the target list, and the columns are not null
+	 * the result set is unique as well
+	 */
+
+	return false;
+}
+
+
+/*
+ * scan_non_semi_anti_relids
+ *
+ * scan jointree to get non-semi/anti join rtindex.
+ */
+static void
+scan_non_semi_anti_relids(Node* jtnode, Relids* relids)
+{
+	if (jtnode == NULL)
+		return;
+
+	if (IsA(jtnode, RangeTblRef))
+	{
+		int			varno = ((RangeTblRef *) jtnode)->rtindex;
+
+		*relids = bms_add_member(*relids, varno);
+	}
+	else if (IsA(jtnode, FromExpr))
+	{
+		FromExpr   *f = (FromExpr *) jtnode;
+		ListCell   *l;
+
+		foreach(l, f->fromlist)
+			scan_non_semi_anti_relids(lfirst(l), relids);
+	}
+	else if (IsA(jtnode, JoinExpr))
+	{
+		JoinExpr   *j = (JoinExpr *) jtnode;
+
+		scan_non_semi_anti_relids(j->larg, relids);
+		if (j->jointype != JOIN_SEMI && j->jointype != JOIN_ANTI)
+		{
+			scan_non_semi_anti_relids(j->rarg, relids);
+		}
+	}
+	else
+		elog(ERROR, "unrecognized node type: %d",
+			 (int) nodeTag(jtnode));
+
+}
+
+/*
+ * preprocess_distinct_node
+ *
+ * remove the distinctClause if it is not necessary by definition
+ */
+static void
+preprocess_distinct_node(PlannerInfo *root)
+{
+	Query *query = root->parse;
+	ListCell *lc;
+	int num_of_rtables;
+	Bitmapset **targetlist_by_table = NULL;
+    Bitmapset **notnullcolumns = NULL;
+	Index rel_idx = 0;
+	bool should_distinct_elimination = false;
+	Relids non_semi_anti_relids = NULL;
+
+	if (query->distinctClause == NIL)
+		return;
+
+	scan_non_semi_anti_relids((Node*)query->jointree, &non_semi_anti_relids);
+
+	foreach(lc, query->rtable)
+	{
+		RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
+		rel_idx++;
+		if (!bms_is_member(rel_idx, non_semi_anti_relids))
+			continue;
+		/* we only handle the basic Relation for now */
+		if (rte->rtekind != RTE_RELATION)
+			return;
+	}
+
+	num_of_rtables = bms_num_members(non_semi_anti_relids);
+
+	/*
+	 * If the columns in group clause is in the target list
+	 * we don't need distinct
+	 */
+	if (query->groupClause != NIL)
+	{
+		Bitmapset *groupclause_bm = NULL;
+		Bitmapset *groupclause_in_targetlist_bm = NULL;
+		ListCell *lc;
+		foreach(lc, query->groupClause)
+			groupclause_bm = bms_add_member(groupclause_bm,
+												lfirst_node(SortGroupClause, lc)->tleSortGroupRef);
+
+		foreach(lc, query->targetList)
+		{
+			TargetEntry *te = lfirst_node(TargetEntry, lc);
+			if (te->resjunk)
+				continue;
+			groupclause_in_targetlist_bm = bms_add_member(groupclause_in_targetlist_bm,
+														  te->ressortgroupref);
+		}
+
+		should_distinct_elimination = bms_is_subset(groupclause_bm,
+													groupclause_in_targetlist_bm);
+		bms_free(groupclause_bm);
+		bms_free(groupclause_in_targetlist_bm);
+		if (should_distinct_elimination)
+			goto ret;
+	}
+
+	targetlist_by_table = palloc0(sizeof(Bitmapset*) * num_of_rtables);
+	notnullcolumns = palloc0(sizeof(Bitmapset* ) * num_of_rtables);
+
+	/* build the targetlist_by_table */
+	foreach(lc, query->targetList)
+	{
+		TargetEntry *te = lfirst_node(TargetEntry, lc);
+		Expr *expr = te->expr;
+		Var *var;
+		Bitmapset **target_column_per_rel;
+		int target_attno;
+
+		if (!IsA(expr, Var))
+			continue;
+
+		var = (Var *)(expr);
+		if (var->varlevelsup != 0)
+			continue;
+
+		target_column_per_rel = &targetlist_by_table[var->varno - 1];
+		target_attno = var->varattno - FirstLowInvalidHeapAttributeNumber;
+
+		/*
+		 * for distinct On (..), we only count the field in .. rather than
+		 * all the entries in target list
+		 */
+		if (query->hasDistinctOn)
+		{
+			Index ref = te->ressortgroupref;
+			ListCell *lc;
+
+			/*
+			 * A fastpath to know if the targetEntry is in the distinctClause
+			 */
+			if (ref == 0)
+				continue;
+
+			/*
+			 * Even the ref is not zero, it may be in sort as well, so we
+			 * need dobule check.
+			 */
+			foreach(lc, query->distinctClause)
+			{
+				if (ref == lfirst_node(SortGroupClause, lc)->tleSortGroupRef)
+					*target_column_per_rel = bms_add_member(*target_column_per_rel,
+															target_attno);
+			}
+		}
+		else
+			*target_column_per_rel = bms_add_member(*target_column_per_rel,
+													target_attno);
+	}
+
+	/* find out nonnull columns from qual via find_nonnullable_vars */
+	foreach(lc, find_nonnullable_vars(query->jointree->quals))
+	{
+		Var *not_null_var;
+		Bitmapset **notnullcolumns_per_rel;
+		int notnull_attno;
+		if (!IsA(lfirst(lc), Var))
+			continue;
+		not_null_var = lfirst_node(Var, lc);
+		if (not_null_var->varno == INNER_VAR ||
+			not_null_var->varno == OUTER_VAR ||
+			not_null_var->varno == INDEX_VAR)
+			continue;
+		notnullcolumns_per_rel = &notnullcolumns[not_null_var->varno - 1];
+		notnull_attno = not_null_var->varattno - FirstLowInvalidHeapAttributeNumber;
+		*notnullcolumns_per_rel = bms_add_member(*notnullcolumns_per_rel,
+												 notnull_attno);
+	}
+
+	/* Check if each related rtable can yield a unique result set */
+	rel_idx = 0;
+	foreach(lc, query->rtable)
+	{
+		Relation relation;
+		TupleDesc desc;
+		RangeTblEntry *rte;
+		int attr_idx;
+
+		if (!bms_is_member(rel_idx+1, non_semi_anti_relids))
+			continue;
+
+		rte = lfirst_node(RangeTblEntry, lc);
+		Assert(rte->rtekind == RTE_RELATION);
+		Assert(rte->relid != InvalidOid);
+
+	    relation = relation_open(rte->relid, RowExclusiveLock);
+		desc = relation->rd_att;
+		attr_idx = 0;
+
+		/* Add the notnullcolumns based on catalog */
+		for(; attr_idx < desc->natts; attr_idx++)
+		{
+			int notnull_attno;
+			if (!desc->attrs[attr_idx].attnotnull)
+				continue;
+			notnull_attno = attr_idx + 1 - FirstLowInvalidHeapAttributeNumber;
+			notnullcolumns[rel_idx] = bms_add_member(notnullcolumns[rel_idx],
+													 notnull_attno);
+		}
+
+		/* check non-nullable in qual, only col is not null checked now */
+		if (!is_unique_result_already(relation,
+									  targetlist_by_table[rel_idx],
+									  notnullcolumns[rel_idx]))
+		{
+			RelationClose(relation);
+			goto ret;
+		}
+		RelationClose(relation);
+		rel_idx++;
+	}
+
+	should_distinct_elimination = true;
+
+ ret:
+	bms_array_free(notnullcolumns, num_of_rtables);
+	bms_array_free(targetlist_by_table, num_of_rtables);
+	bms_free(non_semi_anti_relids);
+
+	if (should_distinct_elimination)
+	{
+		query->distinctClause = NIL;
+		query->hasDistinctOn = false;
+	}
+}
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index df025a5a30..d8a76a2273 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -2346,6 +2346,8 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
 	bms_free(relation->rd_keyattr);
 	bms_free(relation->rd_pkattr);
 	bms_free(relation->rd_idattr);
+	if (relation->rd_plain_ukattrs)
+		bms_array_free(relation->rd_plain_ukattrs, relation->rd_plain_ukcount);
 	if (relation->rd_pubactions)
 		pfree(relation->rd_pubactions);
 	if (relation->rd_options)
@@ -4762,6 +4764,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
 	Bitmapset  *indexattrs;		/* indexed columns */
 	Bitmapset  *uindexattrs;	/* columns in unique indexes */
 	Bitmapset  *pkindexattrs;	/* columns in the primary index */
+	Bitmapset  **ukindexattrs = NULL; /* columns in the unique indexes */
 	Bitmapset  *idindexattrs;	/* columns in the replica identity */
 	List	   *indexoidlist;
 	List	   *newindexoidlist;
@@ -4769,6 +4772,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
 	Oid			relreplindex;
 	ListCell   *l;
 	MemoryContext oldcxt;
+	int plain_uk_index_count = 0, index_count = 0, indexno = 0;
 
 	/* Quick exit if we already computed the result. */
 	if (relation->rd_indexattr != NULL)
@@ -4826,6 +4830,9 @@ restart:
 	uindexattrs = NULL;
 	pkindexattrs = NULL;
 	idindexattrs = NULL;
+	index_count = list_length(indexoidlist);
+	ukindexattrs = palloc0(sizeof(Bitmapset *) * index_count);
+
 	foreach(l, indexoidlist)
 	{
 		Oid			indexOid = lfirst_oid(l);
@@ -4875,6 +4882,9 @@ restart:
 		/* Is this index the configured (or default) replica identity? */
 		isIDKey = (indexOid == relreplindex);
 
+		if (isKey)
+			plain_uk_index_count++;
+
 		/* Collect simple attribute references */
 		for (i = 0; i < indexDesc->rd_index->indnatts; i++)
 		{
@@ -4904,6 +4914,11 @@ restart:
 				if (isIDKey && i < indexDesc->rd_index->indnkeyatts)
 					idindexattrs = bms_add_member(idindexattrs,
 												  attrnum - FirstLowInvalidHeapAttributeNumber);
+
+				if (isKey)
+					ukindexattrs[indexno] = bms_add_member(ukindexattrs[indexno],
+														   attrnum - FirstLowInvalidHeapAttributeNumber);
+
 			}
 		}
 
@@ -4914,6 +4929,7 @@ restart:
 		pull_varattnos(indexPredicate, 1, &indexattrs);
 
 		index_close(indexDesc, AccessShareLock);
+		indexno++;
 	}
 
 	/*
@@ -4940,6 +4956,7 @@ restart:
 		bms_free(pkindexattrs);
 		bms_free(idindexattrs);
 		bms_free(indexattrs);
+		bms_array_free(ukindexattrs, index_count);
 
 		goto restart;
 	}
@@ -4953,6 +4970,8 @@ restart:
 	relation->rd_pkattr = NULL;
 	bms_free(relation->rd_idattr);
 	relation->rd_idattr = NULL;
+	bms_array_free(relation->rd_plain_ukattrs, relation->rd_plain_ukcount);
+	relation->rd_plain_ukattrs = NULL;
 
 	/*
 	 * Now save copies of the bitmaps in the relcache entry.  We intentionally
@@ -4966,6 +4985,8 @@ restart:
 	relation->rd_pkattr = bms_copy(pkindexattrs);
 	relation->rd_idattr = bms_copy(idindexattrs);
 	relation->rd_indexattr = bms_copy(indexattrs);
+	relation->rd_plain_ukattrs = bms_array_copy(ukindexattrs, index_count);
+	relation->rd_plain_ukcount = plain_uk_index_count;
 	MemoryContextSwitchTo(oldcxt);
 
 	/* We return our original working copy for caller to play with */
@@ -5618,6 +5639,8 @@ load_relcache_init_file(bool shared)
 		rel->rd_keyattr = NULL;
 		rel->rd_pkattr = NULL;
 		rel->rd_idattr = NULL;
+		rel->rd_plain_ukattrs = NULL;
+		rel->rd_plain_ukcount = 0;
 		rel->rd_pubactions = NULL;
 		rel->rd_statvalid = false;
 		rel->rd_statlist = NIL;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index e44f71e991..fa798dd564 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1064,6 +1064,16 @@ static struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_distinct_elimination", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables plan-time and run-time unique elimination."),
+		    gettext_noop("Allows the query planner to remove the uncecessary distinct clause."), 
+			GUC_EXPLAIN
+		},
+		&enable_distinct_elimination,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index b7b18a0b68..ff30feb521 100644
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -117,4 +117,6 @@ extern int	bms_prev_member(const Bitmapset *a, int prevbit);
 /* support for hashtables using Bitmapsets as keys: */
 extern uint32 bms_hash_value(const Bitmapset *a);
 
+extern Bitmapset **bms_array_copy(Bitmapset **bms_array, int len);
+extern void bms_array_free(Bitmapset **bms_array,  int len);
 #endif							/* BITMAPSET_H */
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index cb012ba198..4fa5d32df6 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -64,6 +64,7 @@ extern PGDLLIMPORT bool enable_partitionwise_aggregate;
 extern PGDLLIMPORT bool enable_parallel_append;
 extern PGDLLIMPORT bool enable_parallel_hash;
 extern PGDLLIMPORT bool enable_partition_pruning;
+extern PGDLLIMPORT bool enable_distinct_elimination;
 extern PGDLLIMPORT int constraint_exclusion;
 
 extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 44ed04dd3f..7c5a6d65b6 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -120,6 +120,9 @@ typedef struct RelationData
 	Bitmapset  *rd_indexattr;	/* identifies columns used in indexes */
 	Bitmapset  *rd_keyattr;		/* cols that can be ref'd by foreign keys */
 	Bitmapset  *rd_pkattr;		/* cols included in primary key */
+	Bitmapset  **rd_plain_ukattrs;    /* cols included in the plain unique indexes,
+                                   only non-expression, non-partical columns are count */
+	int        rd_plain_ukcount;  /* the no. of uk count */
 	Bitmapset  *rd_idattr;		/* included in replica identity index */
 
 	PublicationActions *rd_pubactions;	/* publication actions */
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 761376b007..3f6595d53b 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4433,17 +4433,17 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
+           QUERY PLAN            
+---------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
    ->  Sort
          Sort Key: d.a
          ->  Seq Scan on d
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  Seq Scan on b
 (9 rows)
 
 -- check join removal works when uniqueness of the join condition is enforced
diff --git a/src/test/regress/expected/select_distinct_2.out b/src/test/regress/expected/select_distinct_2.out
new file mode 100644
index 0000000000..9c0dd564c8
--- /dev/null
+++ b/src/test/regress/expected/select_distinct_2.out
@@ -0,0 +1,141 @@
+create table select_distinct_a(a int, b char(20),  c char(20) not null,  d int, e int, primary key(a, b));
+set enable_mergejoin to off;
+set enable_hashjoin to off;
+-- no node for distinct.
+explain (costs off) select distinct * from select_distinct_a;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on select_distinct_a
+(1 row)
+
+explain (costs off) select distinct b,c,d,e from select_distinct_a;
+             QUERY PLAN              
+-------------------------------------
+ HashAggregate
+   Group Key: b, c, d, e
+   ->  Seq Scan on select_distinct_a
+(3 rows)
+
+create unique index select_distinct_a_uk on select_distinct_a(c, d);
+explain (costs off) select distinct b,c,d,e from select_distinct_a where c is not null;
+             QUERY PLAN              
+-------------------------------------
+ HashAggregate
+   Group Key: b, c, d, e
+   ->  Seq Scan on select_distinct_a
+         Filter: (c IS NOT NULL)
+(4 rows)
+
+explain (costs off) select distinct b,c,d,e from select_distinct_a where c is not null and d is not null;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Seq Scan on select_distinct_a
+   Filter: ((c IS NOT NULL) AND (d IS NOT NULL))
+(2 rows)
+
+explain select distinct d, e from select_distinct_a group by d, e;
+                                QUERY PLAN                                
+--------------------------------------------------------------------------
+ HashAggregate  (cost=15.85..17.85 rows=200 width=8)
+   Group Key: d, e
+   ->  Seq Scan on select_distinct_a  (cost=0.00..13.90 rows=390 width=8)
+(3 rows)
+
+create table select_distinct_b(a int, b char(20),  c char(20) not null,  d int, e int, primary key(a, b));
+explain (costs off) select distinct * from select_distinct_a a, select_distinct_b b;
+                 QUERY PLAN                  
+---------------------------------------------
+ Nested Loop
+   ->  Seq Scan on select_distinct_a a
+   ->  Materialize
+         ->  Seq Scan on select_distinct_b b
+(4 rows)
+
+explain (costs off) select distinct a.b, a.c, b.a, b.b from select_distinct_a a, select_distinct_b b;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: a.b, a.c, b.a, b.b
+         ->  Nested Loop
+               ->  Seq Scan on select_distinct_a a
+               ->  Materialize
+                     ->  Seq Scan on select_distinct_b b
+(7 rows)
+
+explain (costs off) select distinct a.d, a.c, b.a, b.b from select_distinct_a a, select_distinct_b b where a.d is not null;
+                 QUERY PLAN                  
+---------------------------------------------
+ Nested Loop
+   ->  Seq Scan on select_distinct_b b
+   ->  Materialize
+         ->  Seq Scan on select_distinct_a a
+               Filter: (d IS NOT NULL)
+(5 rows)
+
+explain (costs off) select distinct a.d, b.a from select_distinct_a a, select_distinct_b b group by a.d, b.a;
+                    QUERY PLAN                     
+---------------------------------------------------
+ HashAggregate
+   Group Key: a.d, b.a
+   ->  Nested Loop
+         ->  Seq Scan on select_distinct_a a
+         ->  Materialize
+               ->  Seq Scan on select_distinct_b b
+(6 rows)
+
+explain (costs off) select distinct a, b from select_distinct_a where a in (select a from select_distinct_b);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on select_distinct_a
+   ->  Index Only Scan using select_distinct_b_pkey on select_distinct_b
+         Index Cond: (a = select_distinct_a.a)
+(4 rows)
+
+explain (costs off) select distinct a, b from select_distinct_a where a not in (select a from select_distinct_b);
+              QUERY PLAN               
+---------------------------------------
+ Seq Scan on select_distinct_a
+   Filter: (NOT (hashed SubPlan 1))
+   SubPlan 1
+     ->  Seq Scan on select_distinct_b
+(4 rows)
+
+explain (costs off) select distinct * from select_distinct_a a, (select a, max(b) as b from select_distinct_b group by a) b
+where a.a in (select a from select_distinct_b)
+and a.b = b.b;
+                                           QUERY PLAN                                            
+-------------------------------------------------------------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: a.a, a.b, a.c, a.d, a.e, select_distinct_b_1.a
+         ->  Nested Loop
+               Join Filter: (a.b = (max(select_distinct_b_1.b)))
+               ->  HashAggregate
+                     Group Key: select_distinct_b_1.a
+                     ->  Seq Scan on select_distinct_b select_distinct_b_1
+               ->  Materialize
+                     ->  Nested Loop Semi Join
+                           ->  Seq Scan on select_distinct_a a
+                           ->  Index Only Scan using select_distinct_b_pkey on select_distinct_b
+                                 Index Cond: (a = a.a)
+(13 rows)
+
+explain (costs off) select distinct on(a) a, b from select_distinct_a;
+                QUERY PLAN                 
+-------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: a
+         ->  Seq Scan on select_distinct_a
+(4 rows)
+
+explain (costs off) select distinct on(a, b) a, b from select_distinct_a;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on select_distinct_a
+(1 row)
+
+drop table select_distinct_a;
+drop table select_distinct_b;
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index a1c90eb905..e053214f9d 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -73,6 +73,7 @@ select name, setting from pg_settings where name like 'enable%';
               name              | setting 
 --------------------------------+---------
  enable_bitmapscan              | on
+ enable_distinct_elimination    | on
  enable_gathermerge             | on
  enable_hashagg                 | on
  enable_hashjoin                | on
@@ -89,7 +90,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(17 rows)
+(18 rows)
 
 -- Test that the pg_timezone_names and pg_timezone_abbrevs views are
 -- more-or-less working.  We can't test their contents in any great detail
diff --git a/src/test/regress/sql/select_distinct_2.sql b/src/test/regress/sql/select_distinct_2.sql
new file mode 100644
index 0000000000..cad8d40dc6
--- /dev/null
+++ b/src/test/regress/sql/select_distinct_2.sql
@@ -0,0 +1,42 @@
+create table select_distinct_a(a int, b char(20),  c char(20) not null,  d int, e int, primary key(a, b));
+
+set enable_mergejoin to off;
+set enable_hashjoin to off;
+
+-- no node for distinct.
+explain (costs off) select distinct * from select_distinct_a;
+
+explain (costs off) select distinct b,c,d,e from select_distinct_a;
+
+create unique index select_distinct_a_uk on select_distinct_a(c, d);
+
+explain (costs off) select distinct b,c,d,e from select_distinct_a where c is not null;
+
+explain (costs off) select distinct b,c,d,e from select_distinct_a where c is not null and d is not null;
+
+explain select distinct d, e from select_distinct_a group by d, e;
+
+
+create table select_distinct_b(a int, b char(20),  c char(20) not null,  d int, e int, primary key(a, b));
+
+explain (costs off) select distinct * from select_distinct_a a, select_distinct_b b;
+
+explain (costs off) select distinct a.b, a.c, b.a, b.b from select_distinct_a a, select_distinct_b b;
+
+explain (costs off) select distinct a.d, a.c, b.a, b.b from select_distinct_a a, select_distinct_b b where a.d is not null;
+
+explain (costs off) select distinct a.d, b.a from select_distinct_a a, select_distinct_b b group by a.d, b.a;
+
+explain (costs off) select distinct a, b from select_distinct_a where a in (select a from select_distinct_b);
+
+explain (costs off) select distinct a, b from select_distinct_a where a not in (select a from select_distinct_b);
+
+explain (costs off) select distinct * from select_distinct_a a, (select a, max(b) as b from select_distinct_b group by a) b
+where a.a in (select a from select_distinct_b)
+and a.b = b.b;
+
+explain (costs off) select distinct on(a) a, b from select_distinct_a;
+explain (costs off) select distinct on(a, b) a, b from select_distinct_a;
+
+drop table select_distinct_a;
+drop table select_distinct_b;
-- 
2.20.1 (Apple Git-117)