From 94edaece73503572d4489d41e6644efe1e13f652 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= Date: Fri, 31 Jan 2020 19:38:05 +0800 Subject: [PATCH] Erase the distinctClause if the result is unique by definition For a single relation, we can tell it by any one of the following is true: 1. The pk is in the target list. 2. The uk is in the target list and the columns is not null 3. The columns in group-by clause is also in the target list for relation join, we can tell it by: if every relation in the jointree yield a unique result set,then the final result is unique as well regardless the join method. for semi/anti join, we don't have such restriction on the right table. --- src/backend/nodes/bitmapset.c | 40 +++ src/backend/optimizer/path/costsize.c | 1 + src/backend/optimizer/plan/planner.c | 292 ++++++++++++++++++ src/backend/utils/cache/relcache.c | 23 ++ src/backend/utils/misc/guc.c | 10 + src/include/nodes/bitmapset.h | 2 + src/include/optimizer/cost.h | 1 + src/include/utils/rel.h | 3 + src/test/regress/expected/join.out | 16 +- .../regress/expected/select_distinct_2.out | 141 +++++++++ src/test/regress/expected/sysviews.out | 3 +- src/test/regress/sql/select_distinct_2.sql | 42 +++ 12 files changed, 565 insertions(+), 9 deletions(-) create mode 100644 src/test/regress/expected/select_distinct_2.out create mode 100644 src/test/regress/sql/select_distinct_2.sql diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 648cc1a7eb..76ce9b526e 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -1167,3 +1167,43 @@ bms_hash_value(const Bitmapset *a) return DatumGetUInt32(hash_any((const unsigned char *) a->words, (lastword + 1) * sizeof(bitmapword))); } + +/* + * bms_array_copy -- + * + * copy the bms data in the newly palloc memory + */ + +Bitmapset** +bms_array_copy(Bitmapset **bms_array, int len) +{ + Bitmapset **res; + int i; + if (bms_array == NULL || len < 1) + return NULL; + + res = palloc(sizeof(Bitmapset*) * len); + for(i = 0; i < len; i++) + { + res[i] = bms_copy(bms_array[i]); + } + return res; +} + +/* + * bms_array_free + * + * free the element in the array one by one, free the array as well at last + */ +void +bms_array_free(Bitmapset **bms_array, int len) +{ + int idx; + if (bms_array == NULL) + return; + for(idx = 0 ; idx < len; idx++) + { + bms_free(bms_array[idx]); + } + pfree(bms_array); +} diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index b5a0033721..dde16b5d44 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -138,6 +138,7 @@ bool enable_partitionwise_aggregate = false; bool enable_parallel_append = true; bool enable_parallel_hash = true; bool enable_partition_pruning = true; +bool enable_distinct_elimination = true; typedef struct { diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index d6f2153593..6f7d85f96e 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -22,8 +22,10 @@ #include "access/htup_details.h" #include "access/parallel.h" #include "access/sysattr.h" +#include "access/relation.h" #include "access/table.h" #include "access/xact.h" +#include "catalog/index.h" #include "catalog/pg_constraint.h" #include "catalog/pg_inherits.h" #include "catalog/pg_proc.h" @@ -35,6 +37,7 @@ #include "lib/bipartite_match.h" #include "lib/knapsack.h" #include "miscadmin.h" +#include "nodes/bitmapset.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #ifdef OPTIMIZER_DEBUG @@ -248,6 +251,7 @@ static bool group_by_has_partkey(RelOptInfo *input_rel, List *targetList, List *groupClause); static int common_prefix_cmp(const void *a, const void *b); +static void preprocess_distinct_node(PlannerInfo *root); /***************************************************************************** @@ -989,6 +993,9 @@ subquery_planner(PlannerGlobal *glob, Query *parse, /* Remove any redundant GROUP BY columns */ remove_useless_groupby_columns(root); + if (enable_distinct_elimination) + preprocess_distinct_node(root); + /* * If we have any outer joins, try to reduce them to plain inner joins. * This step is most easily done after we've done expression @@ -7409,3 +7416,288 @@ group_by_has_partkey(RelOptInfo *input_rel, return true; } + +/* + * is_unique_result_already + * + * Given a relation, we can know its primary key + unique key information + * unique target is the target list of distinct/distinct on target. + * not_null_columns is a union of not null columns based on catalog and quals. + * then we can know the result is unique already before executing it if + * the primary key or uk + not null in target list. + */ +static bool +is_unique_result_already(Relation relation, + Bitmapset *unique_target, + Bitmapset *not_null_columns) +{ + int i; + Bitmapset *pkattr = RelationGetIndexAttrBitmap(relation, + INDEX_ATTR_BITMAP_PRIMARY_KEY); + + /* + * if the pk is in the target list, + * the result set is unique for this relation + */ + if (pkattr != NULL && + !bms_is_empty(pkattr) && + bms_is_subset(pkattr, unique_target)) + { + return true; + } + + /* + * check if the pk is in the unique index + */ + for (i = 0; i < relation->rd_plain_ukcount; i++) + { + Bitmapset *ukattr = relation->rd_plain_ukattrs[i]; + if (!bms_is_empty(ukattr) + && bms_is_subset(ukattr, unique_target) + && bms_is_subset(ukattr, not_null_columns)) + return true; + } + + /* + * If a unique index is in the target list, and the columns are not null + * the result set is unique as well + */ + + return false; +} + + +/* + * scan_non_semi_anti_relids + * + * scan jointree to get non-semi/anti join rtindex. + */ +static void +scan_non_semi_anti_relids(Node* jtnode, Relids* relids) +{ + if (jtnode == NULL) + return; + + if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + + *relids = bms_add_member(*relids, varno); + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + ListCell *l; + + foreach(l, f->fromlist) + scan_non_semi_anti_relids(lfirst(l), relids); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + scan_non_semi_anti_relids(j->larg, relids); + if (j->jointype != JOIN_SEMI && j->jointype != JOIN_ANTI) + { + scan_non_semi_anti_relids(j->rarg, relids); + } + } + else + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(jtnode)); + +} + +/* + * preprocess_distinct_node + * + * remove the distinctClause if it is not necessary by definition + */ +static void +preprocess_distinct_node(PlannerInfo *root) +{ + Query *query = root->parse; + ListCell *lc; + int num_of_rtables; + Bitmapset **targetlist_by_table = NULL; + Bitmapset **notnullcolumns = NULL; + Index rel_idx = 0; + bool should_distinct_elimination = false; + Relids non_semi_anti_relids = NULL; + + if (query->distinctClause == NIL) + return; + + scan_non_semi_anti_relids((Node*)query->jointree, &non_semi_anti_relids); + + foreach(lc, query->rtable) + { + RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc); + rel_idx++; + if (!bms_is_member(rel_idx, non_semi_anti_relids)) + continue; + /* we only handle the basic Relation for now */ + if (rte->rtekind != RTE_RELATION) + return; + } + + num_of_rtables = bms_num_members(non_semi_anti_relids); + + /* + * If the columns in group clause is in the target list + * we don't need distinct + */ + if (query->groupClause != NIL) + { + Bitmapset *groupclause_bm = NULL; + Bitmapset *groupclause_in_targetlist_bm = NULL; + ListCell *lc; + foreach(lc, query->groupClause) + groupclause_bm = bms_add_member(groupclause_bm, + lfirst_node(SortGroupClause, lc)->tleSortGroupRef); + + foreach(lc, query->targetList) + { + TargetEntry *te = lfirst_node(TargetEntry, lc); + if (te->resjunk) + continue; + groupclause_in_targetlist_bm = bms_add_member(groupclause_in_targetlist_bm, + te->ressortgroupref); + } + + should_distinct_elimination = bms_is_subset(groupclause_bm, + groupclause_in_targetlist_bm); + bms_free(groupclause_bm); + bms_free(groupclause_in_targetlist_bm); + if (should_distinct_elimination) + goto ret; + } + + targetlist_by_table = palloc0(sizeof(Bitmapset*) * num_of_rtables); + notnullcolumns = palloc0(sizeof(Bitmapset* ) * num_of_rtables); + + /* build the targetlist_by_table */ + foreach(lc, query->targetList) + { + TargetEntry *te = lfirst_node(TargetEntry, lc); + Expr *expr = te->expr; + Var *var; + Bitmapset **target_column_per_rel; + int target_attno; + + if (!IsA(expr, Var)) + continue; + + var = (Var *)(expr); + if (var->varlevelsup != 0) + continue; + + target_column_per_rel = &targetlist_by_table[var->varno - 1]; + target_attno = var->varattno - FirstLowInvalidHeapAttributeNumber; + + /* + * for distinct On (..), we only count the field in .. rather than + * all the entries in target list + */ + if (query->hasDistinctOn) + { + Index ref = te->ressortgroupref; + ListCell *lc; + + /* + * A fastpath to know if the targetEntry is in the distinctClause + */ + if (ref == 0) + continue; + + /* + * Even the ref is not zero, it may be in sort as well, so we + * need dobule check. + */ + foreach(lc, query->distinctClause) + { + if (ref == lfirst_node(SortGroupClause, lc)->tleSortGroupRef) + *target_column_per_rel = bms_add_member(*target_column_per_rel, + target_attno); + } + } + else + *target_column_per_rel = bms_add_member(*target_column_per_rel, + target_attno); + } + + /* find out nonnull columns from qual via find_nonnullable_vars */ + foreach(lc, find_nonnullable_vars(query->jointree->quals)) + { + Var *not_null_var; + Bitmapset **notnullcolumns_per_rel; + int notnull_attno; + if (!IsA(lfirst(lc), Var)) + continue; + not_null_var = lfirst_node(Var, lc); + if (not_null_var->varno == INNER_VAR || + not_null_var->varno == OUTER_VAR || + not_null_var->varno == INDEX_VAR) + continue; + notnullcolumns_per_rel = ¬nullcolumns[not_null_var->varno - 1]; + notnull_attno = not_null_var->varattno - FirstLowInvalidHeapAttributeNumber; + *notnullcolumns_per_rel = bms_add_member(*notnullcolumns_per_rel, + notnull_attno); + } + + /* Check if each related rtable can yield a unique result set */ + rel_idx = 0; + foreach(lc, query->rtable) + { + Relation relation; + TupleDesc desc; + RangeTblEntry *rte; + int attr_idx; + + if (!bms_is_member(rel_idx+1, non_semi_anti_relids)) + continue; + + rte = lfirst_node(RangeTblEntry, lc); + Assert(rte->rtekind == RTE_RELATION); + Assert(rte->relid != InvalidOid); + + relation = relation_open(rte->relid, RowExclusiveLock); + desc = relation->rd_att; + attr_idx = 0; + + /* Add the notnullcolumns based on catalog */ + for(; attr_idx < desc->natts; attr_idx++) + { + int notnull_attno; + if (!desc->attrs[attr_idx].attnotnull) + continue; + notnull_attno = attr_idx + 1 - FirstLowInvalidHeapAttributeNumber; + notnullcolumns[rel_idx] = bms_add_member(notnullcolumns[rel_idx], + notnull_attno); + } + + /* check non-nullable in qual, only col is not null checked now */ + if (!is_unique_result_already(relation, + targetlist_by_table[rel_idx], + notnullcolumns[rel_idx])) + { + RelationClose(relation); + goto ret; + } + RelationClose(relation); + rel_idx++; + } + + should_distinct_elimination = true; + + ret: + bms_array_free(notnullcolumns, num_of_rtables); + bms_array_free(targetlist_by_table, num_of_rtables); + bms_free(non_semi_anti_relids); + + if (should_distinct_elimination) + { + query->distinctClause = NIL; + query->hasDistinctOn = false; + } +} diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index df025a5a30..d8a76a2273 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -2346,6 +2346,8 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) bms_free(relation->rd_keyattr); bms_free(relation->rd_pkattr); bms_free(relation->rd_idattr); + if (relation->rd_plain_ukattrs) + bms_array_free(relation->rd_plain_ukattrs, relation->rd_plain_ukcount); if (relation->rd_pubactions) pfree(relation->rd_pubactions); if (relation->rd_options) @@ -4762,6 +4764,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) Bitmapset *indexattrs; /* indexed columns */ Bitmapset *uindexattrs; /* columns in unique indexes */ Bitmapset *pkindexattrs; /* columns in the primary index */ + Bitmapset **ukindexattrs = NULL; /* columns in the unique indexes */ Bitmapset *idindexattrs; /* columns in the replica identity */ List *indexoidlist; List *newindexoidlist; @@ -4769,6 +4772,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) Oid relreplindex; ListCell *l; MemoryContext oldcxt; + int plain_uk_index_count = 0, index_count = 0, indexno = 0; /* Quick exit if we already computed the result. */ if (relation->rd_indexattr != NULL) @@ -4826,6 +4830,9 @@ restart: uindexattrs = NULL; pkindexattrs = NULL; idindexattrs = NULL; + index_count = list_length(indexoidlist); + ukindexattrs = palloc0(sizeof(Bitmapset *) * index_count); + foreach(l, indexoidlist) { Oid indexOid = lfirst_oid(l); @@ -4875,6 +4882,9 @@ restart: /* Is this index the configured (or default) replica identity? */ isIDKey = (indexOid == relreplindex); + if (isKey) + plain_uk_index_count++; + /* Collect simple attribute references */ for (i = 0; i < indexDesc->rd_index->indnatts; i++) { @@ -4904,6 +4914,11 @@ restart: if (isIDKey && i < indexDesc->rd_index->indnkeyatts) idindexattrs = bms_add_member(idindexattrs, attrnum - FirstLowInvalidHeapAttributeNumber); + + if (isKey) + ukindexattrs[indexno] = bms_add_member(ukindexattrs[indexno], + attrnum - FirstLowInvalidHeapAttributeNumber); + } } @@ -4914,6 +4929,7 @@ restart: pull_varattnos(indexPredicate, 1, &indexattrs); index_close(indexDesc, AccessShareLock); + indexno++; } /* @@ -4940,6 +4956,7 @@ restart: bms_free(pkindexattrs); bms_free(idindexattrs); bms_free(indexattrs); + bms_array_free(ukindexattrs, index_count); goto restart; } @@ -4953,6 +4970,8 @@ restart: relation->rd_pkattr = NULL; bms_free(relation->rd_idattr); relation->rd_idattr = NULL; + bms_array_free(relation->rd_plain_ukattrs, relation->rd_plain_ukcount); + relation->rd_plain_ukattrs = NULL; /* * Now save copies of the bitmaps in the relcache entry. We intentionally @@ -4966,6 +4985,8 @@ restart: relation->rd_pkattr = bms_copy(pkindexattrs); relation->rd_idattr = bms_copy(idindexattrs); relation->rd_indexattr = bms_copy(indexattrs); + relation->rd_plain_ukattrs = bms_array_copy(ukindexattrs, index_count); + relation->rd_plain_ukcount = plain_uk_index_count; MemoryContextSwitchTo(oldcxt); /* We return our original working copy for caller to play with */ @@ -5618,6 +5639,8 @@ load_relcache_init_file(bool shared) rel->rd_keyattr = NULL; rel->rd_pkattr = NULL; rel->rd_idattr = NULL; + rel->rd_plain_ukattrs = NULL; + rel->rd_plain_ukcount = 0; rel->rd_pubactions = NULL; rel->rd_statvalid = false; rel->rd_statlist = NIL; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index e44f71e991..fa798dd564 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1064,6 +1064,16 @@ static struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"enable_distinct_elimination", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables plan-time and run-time unique elimination."), + gettext_noop("Allows the query planner to remove the uncecessary distinct clause."), + GUC_EXPLAIN + }, + &enable_distinct_elimination, + true, + NULL, NULL, NULL + }, { {"geqo", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("Enables genetic query optimization."), diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index b7b18a0b68..ff30feb521 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -117,4 +117,6 @@ extern int bms_prev_member(const Bitmapset *a, int prevbit); /* support for hashtables using Bitmapsets as keys: */ extern uint32 bms_hash_value(const Bitmapset *a); +extern Bitmapset **bms_array_copy(Bitmapset **bms_array, int len); +extern void bms_array_free(Bitmapset **bms_array, int len); #endif /* BITMAPSET_H */ diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index cb012ba198..4fa5d32df6 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -64,6 +64,7 @@ extern PGDLLIMPORT bool enable_partitionwise_aggregate; extern PGDLLIMPORT bool enable_parallel_append; extern PGDLLIMPORT bool enable_parallel_hash; extern PGDLLIMPORT bool enable_partition_pruning; +extern PGDLLIMPORT bool enable_distinct_elimination; extern PGDLLIMPORT int constraint_exclusion; extern double index_pages_fetched(double tuples_fetched, BlockNumber pages, diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 44ed04dd3f..7c5a6d65b6 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -120,6 +120,9 @@ typedef struct RelationData Bitmapset *rd_indexattr; /* identifies columns used in indexes */ Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */ Bitmapset *rd_pkattr; /* cols included in primary key */ + Bitmapset **rd_plain_ukattrs; /* cols included in the plain unique indexes, + only non-expression, non-partical columns are count */ + int rd_plain_ukcount; /* the no. of uk count */ Bitmapset *rd_idattr; /* included in replica identity index */ PublicationActions *rd_pubactions; /* publication actions */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 761376b007..3f6595d53b 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4433,17 +4433,17 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s explain (costs off) select d.* from d left join (select distinct * from b) s on d.a = s.id; - QUERY PLAN --------------------------------------- - Merge Right Join - Merge Cond: (b.id = d.a) - -> Unique - -> Sort - Sort Key: b.id, b.c_id - -> Seq Scan on b + QUERY PLAN +--------------------------------- + Merge Left Join + Merge Cond: (d.a = s.id) -> Sort Sort Key: d.a -> Seq Scan on d + -> Sort + Sort Key: s.id + -> Subquery Scan on s + -> Seq Scan on b (9 rows) -- check join removal works when uniqueness of the join condition is enforced diff --git a/src/test/regress/expected/select_distinct_2.out b/src/test/regress/expected/select_distinct_2.out new file mode 100644 index 0000000000..9c0dd564c8 --- /dev/null +++ b/src/test/regress/expected/select_distinct_2.out @@ -0,0 +1,141 @@ +create table select_distinct_a(a int, b char(20), c char(20) not null, d int, e int, primary key(a, b)); +set enable_mergejoin to off; +set enable_hashjoin to off; +-- no node for distinct. +explain (costs off) select distinct * from select_distinct_a; + QUERY PLAN +------------------------------- + Seq Scan on select_distinct_a +(1 row) + +explain (costs off) select distinct b,c,d,e from select_distinct_a; + QUERY PLAN +------------------------------------- + HashAggregate + Group Key: b, c, d, e + -> Seq Scan on select_distinct_a +(3 rows) + +create unique index select_distinct_a_uk on select_distinct_a(c, d); +explain (costs off) select distinct b,c,d,e from select_distinct_a where c is not null; + QUERY PLAN +------------------------------------- + HashAggregate + Group Key: b, c, d, e + -> Seq Scan on select_distinct_a + Filter: (c IS NOT NULL) +(4 rows) + +explain (costs off) select distinct b,c,d,e from select_distinct_a where c is not null and d is not null; + QUERY PLAN +------------------------------------------------- + Seq Scan on select_distinct_a + Filter: ((c IS NOT NULL) AND (d IS NOT NULL)) +(2 rows) + +explain select distinct d, e from select_distinct_a group by d, e; + QUERY PLAN +-------------------------------------------------------------------------- + HashAggregate (cost=15.85..17.85 rows=200 width=8) + Group Key: d, e + -> Seq Scan on select_distinct_a (cost=0.00..13.90 rows=390 width=8) +(3 rows) + +create table select_distinct_b(a int, b char(20), c char(20) not null, d int, e int, primary key(a, b)); +explain (costs off) select distinct * from select_distinct_a a, select_distinct_b b; + QUERY PLAN +--------------------------------------------- + Nested Loop + -> Seq Scan on select_distinct_a a + -> Materialize + -> Seq Scan on select_distinct_b b +(4 rows) + +explain (costs off) select distinct a.b, a.c, b.a, b.b from select_distinct_a a, select_distinct_b b; + QUERY PLAN +--------------------------------------------------------- + Unique + -> Sort + Sort Key: a.b, a.c, b.a, b.b + -> Nested Loop + -> Seq Scan on select_distinct_a a + -> Materialize + -> Seq Scan on select_distinct_b b +(7 rows) + +explain (costs off) select distinct a.d, a.c, b.a, b.b from select_distinct_a a, select_distinct_b b where a.d is not null; + QUERY PLAN +--------------------------------------------- + Nested Loop + -> Seq Scan on select_distinct_b b + -> Materialize + -> Seq Scan on select_distinct_a a + Filter: (d IS NOT NULL) +(5 rows) + +explain (costs off) select distinct a.d, b.a from select_distinct_a a, select_distinct_b b group by a.d, b.a; + QUERY PLAN +--------------------------------------------------- + HashAggregate + Group Key: a.d, b.a + -> Nested Loop + -> Seq Scan on select_distinct_a a + -> Materialize + -> Seq Scan on select_distinct_b b +(6 rows) + +explain (costs off) select distinct a, b from select_distinct_a where a in (select a from select_distinct_b); + QUERY PLAN +------------------------------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on select_distinct_a + -> Index Only Scan using select_distinct_b_pkey on select_distinct_b + Index Cond: (a = select_distinct_a.a) +(4 rows) + +explain (costs off) select distinct a, b from select_distinct_a where a not in (select a from select_distinct_b); + QUERY PLAN +--------------------------------------- + Seq Scan on select_distinct_a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Seq Scan on select_distinct_b +(4 rows) + +explain (costs off) select distinct * from select_distinct_a a, (select a, max(b) as b from select_distinct_b group by a) b +where a.a in (select a from select_distinct_b) +and a.b = b.b; + QUERY PLAN +------------------------------------------------------------------------------------------------- + Unique + -> Sort + Sort Key: a.a, a.b, a.c, a.d, a.e, select_distinct_b_1.a + -> Nested Loop + Join Filter: (a.b = (max(select_distinct_b_1.b))) + -> HashAggregate + Group Key: select_distinct_b_1.a + -> Seq Scan on select_distinct_b select_distinct_b_1 + -> Materialize + -> Nested Loop Semi Join + -> Seq Scan on select_distinct_a a + -> Index Only Scan using select_distinct_b_pkey on select_distinct_b + Index Cond: (a = a.a) +(13 rows) + +explain (costs off) select distinct on(a) a, b from select_distinct_a; + QUERY PLAN +------------------------------------------- + Unique + -> Sort + Sort Key: a + -> Seq Scan on select_distinct_a +(4 rows) + +explain (costs off) select distinct on(a, b) a, b from select_distinct_a; + QUERY PLAN +------------------------------- + Seq Scan on select_distinct_a +(1 row) + +drop table select_distinct_a; +drop table select_distinct_b; diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index a1c90eb905..e053214f9d 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -73,6 +73,7 @@ select name, setting from pg_settings where name like 'enable%'; name | setting --------------------------------+--------- enable_bitmapscan | on + enable_distinct_elimination | on enable_gathermerge | on enable_hashagg | on enable_hashjoin | on @@ -89,7 +90,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(17 rows) +(18 rows) -- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- more-or-less working. We can't test their contents in any great detail diff --git a/src/test/regress/sql/select_distinct_2.sql b/src/test/regress/sql/select_distinct_2.sql new file mode 100644 index 0000000000..cad8d40dc6 --- /dev/null +++ b/src/test/regress/sql/select_distinct_2.sql @@ -0,0 +1,42 @@ +create table select_distinct_a(a int, b char(20), c char(20) not null, d int, e int, primary key(a, b)); + +set enable_mergejoin to off; +set enable_hashjoin to off; + +-- no node for distinct. +explain (costs off) select distinct * from select_distinct_a; + +explain (costs off) select distinct b,c,d,e from select_distinct_a; + +create unique index select_distinct_a_uk on select_distinct_a(c, d); + +explain (costs off) select distinct b,c,d,e from select_distinct_a where c is not null; + +explain (costs off) select distinct b,c,d,e from select_distinct_a where c is not null and d is not null; + +explain select distinct d, e from select_distinct_a group by d, e; + + +create table select_distinct_b(a int, b char(20), c char(20) not null, d int, e int, primary key(a, b)); + +explain (costs off) select distinct * from select_distinct_a a, select_distinct_b b; + +explain (costs off) select distinct a.b, a.c, b.a, b.b from select_distinct_a a, select_distinct_b b; + +explain (costs off) select distinct a.d, a.c, b.a, b.b from select_distinct_a a, select_distinct_b b where a.d is not null; + +explain (costs off) select distinct a.d, b.a from select_distinct_a a, select_distinct_b b group by a.d, b.a; + +explain (costs off) select distinct a, b from select_distinct_a where a in (select a from select_distinct_b); + +explain (costs off) select distinct a, b from select_distinct_a where a not in (select a from select_distinct_b); + +explain (costs off) select distinct * from select_distinct_a a, (select a, max(b) as b from select_distinct_b group by a) b +where a.a in (select a from select_distinct_b) +and a.b = b.b; + +explain (costs off) select distinct on(a) a, b from select_distinct_a; +explain (costs off) select distinct on(a, b) a, b from select_distinct_a; + +drop table select_distinct_a; +drop table select_distinct_b; -- 2.20.1 (Apple Git-117)