From 7799b005726b542cffa6e9a19704cabf235b214e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= Date: Wed, 7 Jul 2021 16:02:01 +0800 Subject: [PATCH v1] design uniquekey v2 --- src/backend/nodes/list.c | 16 + src/backend/optimizer/path/Makefile | 3 +- src/backend/optimizer/path/allpaths.c | 7 +- src/backend/optimizer/path/uniquekey.c | 276 ++++++++++++++++++ src/backend/optimizer/plan/planner.c | 3 + src/backend/optimizer/util/plancat.c | 17 ++ src/include/nodes/nodes.h | 3 +- src/include/nodes/pathnodes.h | 16 + src/include/nodes/pg_list.h | 2 + src/include/optimizer/paths.h | 4 + src/include/optimizer/plancat.h | 2 + src/test/regress/expected/join.out | 11 +- src/test/regress/expected/select_distinct.out | 50 ++++ src/test/regress/sql/select_distinct.sql | 20 ++ 14 files changed, 420 insertions(+), 10 deletions(-) create mode 100644 src/backend/optimizer/path/uniquekey.c diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c index 94fb236daf..3053ce38ea 100644 --- a/src/backend/nodes/list.c +++ b/src/backend/nodes/list.c @@ -702,6 +702,22 @@ list_member_oid(const List *list, Oid datum) return false; } +/* + * list_is_subset_ptr - is A a subset of B? + */ +bool +list_is_subset_ptr(const List *a, const List *b) +{ + ListCell *lc; + foreach(lc, a) + { + if (!list_member_ptr(b, lfirst(lc))) + return false; + } + return true; +} + + /* * Delete the n'th cell (counting from 0) in list. * diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile index 1e199ff66f..63cc1505d9 100644 --- a/src/backend/optimizer/path/Makefile +++ b/src/backend/optimizer/path/Makefile @@ -21,6 +21,7 @@ OBJS = \ joinpath.o \ joinrels.o \ pathkeys.o \ - tidpath.o + tidpath.o \ + uniquekey.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 0dcb9e2337..f5d8a443f9 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -396,6 +396,9 @@ static void set_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { + /* Set the notnull before the UniqueKey populate */ + set_baserel_notnull_attrs(rel); + if (rel->reloptkind == RELOPT_BASEREL && relation_excluded_by_constraints(root, rel, rte)) { @@ -491,7 +494,7 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel, } } - set_baserel_notnull_attrs(rel); + /* * We insist that all non-dummy rels have a nonzero rowcount estimate. @@ -616,6 +619,8 @@ set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) */ check_index_predicates(root, rel); + populate_baserel_uniquekeys(root, rel); + /* Mark rel with estimated output rows, width, etc */ set_baserel_size_estimates(root, rel); } diff --git a/src/backend/optimizer/path/uniquekey.c b/src/backend/optimizer/path/uniquekey.c new file mode 100644 index 0000000000..6111e9fa00 --- /dev/null +++ b/src/backend/optimizer/path/uniquekey.c @@ -0,0 +1,276 @@ +/*------------------------------------------------------------------------- + * + * pathkeys.c + * Utilities for maintaining uniquekey. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/optimizer/path/uniquekey.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/sysattr.h" +#include "nodes/nodeFuncs.h" +#include "nodes/pathnodes.h" +#include "optimizer/paths.h" + + +/* + * print_uniquekey + * Used for easier reivew, should be removed before commit. + */ +static void +print_uniquekey(PlannerInfo *root, RelOptInfo *rel) +{ + if (false) + { + ListCell *lc; + elog(INFO, "Rel = %s", bmsToString(rel->relids)); + foreach(lc, rel->uniquekeys) + { + UniqueKey *ukey = lfirst_node(UniqueKey, lc); + int i = -1; + elog(INFO, "UNIQUEKEY{indexes=%s, multinull=%d}", + bmsToString(ukey->unique_expr_indexes), + ukey->multi_nulls + ); + + while ((i = bms_next_member(ukey->unique_expr_indexes, i)) >= 0) + { + Node *node = (Node *) list_nth(root->unique_exprs, i); + if (IsA(node, SingleRow)) + elog(INFO, + "Expr(%d) SingleRow{relid = %d}", + i, castNode(SingleRow, node)->relid); + else + elog(INFO, + "EC(%d), %s", i, nodeToString(node) + ); + } + } + } +} + +static UniqueKey * +make_uniquekey(Bitmapset *unique_expr_indexes, bool multi_null) +{ + UniqueKey *ukey = makeNode(UniqueKey); + ukey->unique_expr_indexes = unique_expr_indexes; + ukey->multi_nulls = multi_null; + return ukey; +} + + +static PathKey * +find_matching_pathkey_expr(Expr *expr, List *pathkeys, Relids relids) +{ + ListCell *lc; + foreach(lc, pathkeys) + { + PathKey * pathkey = lfirst_node(PathKey, lc); + if (find_ec_member_matching_expr(pathkey->pk_eclass, expr, relids)) + return pathkey; + } + return NULL; +} + +static bool +add_uniquekey_for_uniqueindex(PlannerInfo *root, IndexOptInfo *unique_index, + List *mergeable_const_peer, List *expr_opfamilies) +{ + + List *unique_ecs = NIL; + ListCell *indexpr_item; + int c = 0; + RelOptInfo *rel = unique_index->rel; + PathKey *pathkey; + bool multinull = false; + + indexpr_item = list_head(unique_index->indexprs); + for (c = 0; c < unique_index->nkeycolumns; c++) + { + int attr = unique_index->indexkeys[c]; + Expr *expr; + bool matched_const = false; + ListCell *lc1, *lc2; + if (attr > 0) + { + Var *var; + expr = list_nth_node(TargetEntry, unique_index->indextlist, c)->expr; + var = castNode(Var, expr); + Assert(IsA(expr, Var)); + if (!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber, + rel->notnull_attrs[0])) + multinull = true; + } + else if (attr == 0) + { + /* Expression index */ + expr = lfirst(indexpr_item); + indexpr_item = lnext(unique_index->indexprs, indexpr_item); + /* We can't grantee an FuncExpr will not return NULLs */ + multinull = true; + } + else /* attr < 0 */ + { + /* Index on OID is possible, not handle it for now. */ + return false; + } + + /* + * Check index_col = Const case with regarding to opfamily checking + * If so, we can remove the index_col from the final UniqueKey->exprs. + */ + forboth(lc1, mergeable_const_peer, lc2, expr_opfamilies) + { + if (list_member_oid((List *) lfirst(lc2), unique_index->opfamily[c]) && + match_index_to_operand((Node *) lfirst(lc1), c, unique_index)) + { + matched_const = true; + break; + } + } + + if (matched_const) + continue; + + /* Check if this expr exist in distinct_pathkey. */ + pathkey = find_matching_pathkey_expr(expr, root->distinct_pathkeys, rel->relids); + if (!pathkey) + return false; + unique_ecs = lappend(unique_ecs, pathkey->pk_eclass); + } + + { + Bitmapset *unique_exprs_index = bms_make_singleton(list_length(root->unique_exprs)); + if (unique_ecs == NIL) + { + SingleRow *singlerow = makeNode(SingleRow); + singlerow->relid = rel->relid; + rel->uniquekeys = list_make1(make_uniquekey(unique_exprs_index, false /* multi-null */)); + root->unique_exprs = lappend(root->unique_exprs, singlerow); + return true; + } + else + { + UniqueKey *ukey = make_uniquekey(unique_exprs_index, multinull); + root->unique_exprs = lappend(root->unique_exprs, unique_ecs); + rel->uniquekeys = lappend(rel->uniquekeys, ukey); + return false; + } + } + return false; +} + +void +populate_baserel_uniquekeys(PlannerInfo *root, RelOptInfo *rel) +{ + ListCell *lc; + List *mergeable_const_peer = NIL, *expr_opfamilies = NIL; + foreach(lc, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + if (rinfo->mergeopfamilies == NIL) + continue; + + if (bms_is_empty(rinfo->left_relids)) + mergeable_const_peer = lappend(mergeable_const_peer, get_rightop(rinfo->clause)); + else if (bms_is_empty(rinfo->right_relids)) + mergeable_const_peer = lappend(mergeable_const_peer, get_leftop(rinfo->clause)); + else + continue; + expr_opfamilies = lappend(expr_opfamilies, rinfo->mergeopfamilies); + } + + foreach(lc, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *)lfirst(lc); + if (!index->unique || !index->immediate || + (index->indpred != NIL && !index->predOK)) + continue; + + if (add_uniquekey_for_uniqueindex(root, index, + mergeable_const_peer, + expr_opfamilies)) + return; + } + + print_uniquekey(root, rel); +} + + +static bool +uniquekey_contains_in(PlannerInfo *root, UniqueKey *ukey, List *lecs, Relids relids) +{ + int i = -1; + while ((i = bms_next_member(ukey->unique_expr_indexes, i)) >= 0) + { + Node *expr = list_nth(root->unique_exprs, i); + if (IsA(expr, SingleRow)) + { + /* Any column regarding SingleRow.relid is OK */ + if (!bms_is_member(castNode(SingleRow, expr)->relid, relids)) + return false; + } + else if (IsA(expr, List)) + { + /* unique_expr is a List of EquivalenceClass * */ + if (!list_is_subset_ptr((List*)expr, lecs)) + return false; + } + else + { + /* Impossible to go here */ + Assert(false); + return false; + } + } + return true; +} + + +bool +relation_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *distinct_pathkey) +{ + ListCell *lc; + List *lecs = NIL; + Relids relids = NULL; + foreach(lc, distinct_pathkey) + { + PathKey *pathkey = lfirst(lc); + lecs = lappend(lecs, pathkey->pk_eclass); + /* + * Note that ec_relids doesn't include child member, but + * distinct would not operate on childrel as well. + */ + relids = bms_union(relids, pathkey->pk_eclass->ec_relids); + } + + foreach(lc, rel->uniquekeys) + { + UniqueKey *ukey = lfirst(lc); + if (ukey->multi_nulls) + continue; + + if (uniquekey_contains_in(root, ukey, lecs, relids)) + return true; + } + + return false; + +} + + +static UniqueKey* +__attribute__ ((unused)) +build_composited_uniquekey(UniqueKey *ukey1, UniqueKey *ukey2, bool multi_null) +{ + Bitmapset *unique_expr = bms_union(ukey1->unique_expr_indexes, ukey2->unique_expr_indexes); + return make_uniquekey(unique_expr, multi_null); + +} diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 1868c4eff4..b9d4e6395e 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4238,6 +4238,9 @@ create_distinct_paths(PlannerInfo *root, Path *path; ListCell *lc; + if (relation_is_distinct_for(root, input_rel, root->distinct_pathkeys)) + return input_rel; + /* For now, do all work in the (DISTINCT, NULL) upperrel */ distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL); diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 7d3b40090e..4852d88c52 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -2079,6 +2079,23 @@ get_function_rows(PlannerInfo *root, Oid funcid, Node *node) return result; } +List * +get_unique_indexes(RelOptInfo *rel) +{ + List *res = NIL; + ListCell *lc; + foreach(lc, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(lc); + + if (index->unique && index->immediate && + (index->indpred == NIL || index->predOK)) + res = lappend(res, index); + } + return res; +} + + /* * has_unique_index * diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index d9e417bcd7..7a76413ba1 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -280,7 +280,8 @@ typedef enum NodeTag T_RollupData, T_GroupingSetData, T_StatisticExtInfo, - + T_UniqueKey, + T_SingleRow, /* * TAGS FOR MEMORY NODES (memnodes.h) */ diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index d6758f21e1..2ee9e0885b 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -246,6 +246,7 @@ struct PlannerInfo * subquery outputs */ List *eq_classes; /* list of active EquivalenceClasses */ + List *unique_exprs; /* List of unique expr */ bool ec_merging_done; /* set true once ECs are canonical */ @@ -691,6 +692,7 @@ typedef struct RelOptInfo * the len would always 1. and for others the array * index is relid from relids. */ + List *uniquekeys; /* A list of UniqueKey. */ /* materialization information */ List *pathlist; /* Path structures */ @@ -1067,6 +1069,20 @@ typedef struct PathKey bool pk_nulls_first; /* do NULLs come before normal values? */ } PathKey; + +typedef struct UnqiueKey +{ + NodeTag type; + Bitmapset *unique_expr_indexes; + bool multi_nulls; +} UniqueKey; + +typedef struct SingleRow +{ + NodeTag type; + Index relid; +} SingleRow; + /* * VolatileFunctionStatus -- allows nodes to cache their * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h index 30f98c4595..bbe0209d7e 100644 --- a/src/include/nodes/pg_list.h +++ b/src/include/nodes/pg_list.h @@ -558,6 +558,8 @@ extern bool list_member_ptr(const List *list, const void *datum); extern bool list_member_int(const List *list, int datum); extern bool list_member_oid(const List *list, Oid datum); +extern bool list_is_subset_ptr(const List *a, const List *b); + extern pg_nodiscard List *list_delete(List *list, void *datum); extern pg_nodiscard List *list_delete_ptr(List *list, void *datum); extern pg_nodiscard List *list_delete_int(List *list, int datum); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index f1d111063c..07a51d737a 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -255,4 +255,8 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root, extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *live_childrels); +extern void populate_baserel_uniquekeys(PlannerInfo *root, + RelOptInfo *baserel); +extern bool relation_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, + List *distinct_pathkey); #endif /* PATHS_H */ diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index 8d1d6c1b42..2dd4f525e6 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -41,6 +41,8 @@ extern bool relation_excluded_by_constraints(PlannerInfo *root, extern List *build_physical_tlist(PlannerInfo *root, RelOptInfo *rel); +extern List *get_unique_indexes(RelOptInfo *rel); +extern List *get_exprs_from_index(IndexOptInfo *indinfo, bool contains_includekey); extern bool has_unique_index(RelOptInfo *rel, AttrNumber attno); extern Selectivity restriction_selectivity(PlannerInfo *root, diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index fec0325e73..0631e8076d 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4604,18 +4604,15 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s explain (costs off) select d.* from d left join (select distinct * from b) s on d.a = s.id; - QUERY PLAN --------------------------------------- + QUERY PLAN +------------------------------------ Merge Right Join Merge Cond: (b.id = d.a) - -> Unique - -> Sort - Sort Key: b.id, b.c_id - -> Seq Scan on b + -> Index Scan using b_pkey on b -> Sort Sort Key: d.a -> Seq Scan on d -(9 rows) +(6 rows) -- check join removal works when uniqueness of the join condition is enforced -- by a UNION diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 11c6f50fbf..c5d32cec3a 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -306,3 +306,53 @@ SELECT null IS NOT DISTINCT FROM null as "yes"; t (1 row) +-- uniquekey test +CREATE TABLE uktest(a int, b int, c int not null, d int, e int, f int, PRIMARY KEY(a, b)); +CREATE UNIQUE INDEX on uktest(c, d); +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uktest; + QUERY PLAN +-------------------- + Seq Scan on uktest +(1 row) + +EXPLAIN (COSTS OFF) SELECT DISTINCT c FROM uktest; + QUERY PLAN +-------------------------- + HashAggregate + Group Key: c + -> Seq Scan on uktest +(3 rows) + +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uktest; + QUERY PLAN +-------------------------- + HashAggregate + Group Key: c, d + -> Seq Scan on uktest +(3 rows) + +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uktest WHERE d > 10; + QUERY PLAN +-------------------- + Seq Scan on uktest + Filter: (d > 10) +(2 rows) + +-- UniqueKey expressions reduce due to c = Const. +EXPLAIN (COSTS OFF) SELECT DISTINCT b FROM uktest where a = 1; + QUERY PLAN +---------------------------------------- + Bitmap Heap Scan on uktest + Recheck Cond: (a = 1) + -> Bitmap Index Scan on uktest_pkey + Index Cond: (a = 1) +(4 rows) + +-- Single row case. +EXPLAIN (COSTS OFF) SELECT DISTINCT c FROM uktest where a = 1 and b = 1; + QUERY PLAN +---------------------------------------- + Index Scan using uktest_pkey on uktest + Index Cond: ((a = 1) AND (b = 1)) +(2 rows) + diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index 33102744eb..706bcf37df 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -135,3 +135,23 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no"; SELECT 2 IS NOT DISTINCT FROM 2 as "yes"; SELECT 2 IS NOT DISTINCT FROM null as "no"; SELECT null IS NOT DISTINCT FROM null as "yes"; + +-- uniquekey test +CREATE TABLE uktest(a int, b int, c int not null, d int, e int, f int, PRIMARY KEY(a, b)); +CREATE UNIQUE INDEX on uktest(c, d); + +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uktest; + +EXPLAIN (COSTS OFF) SELECT DISTINCT c FROM uktest; + +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uktest; + +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uktest WHERE d > 10; + +-- UniqueKey expressions reduce due to c = Const. +EXPLAIN (COSTS OFF) SELECT DISTINCT b FROM uktest where a = 1; + +-- Single row case. +EXPLAIN (COSTS OFF) SELECT DISTINCT c FROM uktest where a = 1 and b = 1; + + -- 2.21.0