diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile index 1e199ff66f..7b9820c25f 100644 --- a/src/backend/optimizer/path/Makefile +++ b/src/backend/optimizer/path/Makefile @@ -21,6 +21,7 @@ OBJS = \ joinpath.o \ joinrels.o \ pathkeys.o \ - tidpath.o + tidpath.o \ + uniquekeys.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 905bbe77d8..10bcc0e4fa 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -579,6 +579,12 @@ set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) */ check_index_predicates(root, rel); + /* + * Now that we've marked which partial indexes are suitable, we can now + * build the relation's unique keys. + */ + populate_baserel_uniquekeys(root, rel); + /* Mark rel with estimated output rows, width, etc */ set_baserel_size_estimates(root, rel); } diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index a21c295b99..3dd060f926 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -752,6 +752,8 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) return joinrel; } + propagate_unique_keys_to_joinrel(root, joinrel, rel1, rel2, restrictlist); + /* Add paths to the join relation. */ populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo, restrictlist); diff --git a/src/backend/optimizer/path/uniquekeys.c b/src/backend/optimizer/path/uniquekeys.c new file mode 100644 index 0000000000..40261db1b3 --- /dev/null +++ b/src/backend/optimizer/path/uniquekeys.c @@ -0,0 +1,295 @@ +/*------------------------------------------------------------------------- + * + * uniquekeys.c + * Utilities for matching and building unique keys + * + * Portions Copyright (c) 2020, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/optimizer/path/uniquekeys.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/pathnode.h" + +/* + * populate_baserel_uniquekeys + * Populate 'baserel' uniquekeys list by looking at the rel's unique indexes + */ +void +populate_baserel_uniquekeys(PlannerInfo *root, RelOptInfo *baserel) +{ + ListCell *lc; + + Assert(baserel->rtekind == RTE_RELATION); + + foreach(lc, baserel->indexlist) + { + IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc); + UniqueKeySet *keyset; + List *keys; + int c; + int exprno; + + /* + * If the index is not unique, or not immediately enforced, or if it's + * a partial index that doesn't match the query, it's useless here. + */ + if (!ind->unique || !ind->immediate || + (ind->indpred != NIL && !ind->predOK)) + continue; + + keys = NIL; + exprno = 0; + + for (c = 0; c < ind->nkeycolumns; c++) + { + UniqueKey *key = makeNode(UniqueKey); + + key->uk_collation = ind->indexcollations[c]; + key->uk_opfamily = ind->opfamily[c]; + /* XXX is this too lazy? Should I be building my own Var here from indexkeys[c]? */ + key->uk_expr = copyObject(((TargetEntry *) list_nth(ind->indextlist, c))->expr); + + keys = lappend(keys, key); + } + + keyset = makeNode(UniqueKeySet); + /* XXX check and update the non_null_keys for NOT NULL Vars */ + keyset->non_null_keys = NULL; + keyset->keys = keys; + + baserel->uniquekeys = lappend(baserel->uniquekeys, keyset); + } +} + +static bool +relation_is_unique_for_keys(PlannerInfo *root, UniqueKeySet *keyset, List *exprs) +{ + ListCell *lc; + + foreach(lc, keyset->keys) + { + UniqueKey *key = (UniqueKey *) lfirst(lc); + ListCell *lc2; + bool found = false; + + foreach(lc2, exprs) + { + Expr *expr = (Expr *) lfirst(lc2); + + /* XXX check collation */ + if (equal(key->uk_expr, expr)) + { + found = true; + break; + } + } + + if (!found) + return false; + } + + return true; +} + +/* + * relation_has_uniquekeys_for + * Returns true if we have proofs that 'rel' cannot return multiple rows with + * the same values in each of 'exprs'. Otherwise returns false. + */ +bool +relation_has_uniquekeys_for(PlannerInfo *root, RelOptInfo *rel, List *exprs, + bool req_nonnull) +{ + ListCell *lc; + + foreach(lc, rel->uniquekeys) + { + UniqueKeySet *keyset = (UniqueKeySet *) lfirst(lc); + + /* + * When we require the keys cannot produce NULL values, skip over sets where + * not all keys are marked as non-null. + */ + if (req_nonnull && bms_num_members(keyset->non_null_keys) < list_length(keyset->keys)) + continue; + + if (relation_is_unique_for_keys(root, keyset, exprs)) + return true; + } + + return false; +} + +/* + * clause_sides_match_join + * Determine whether a join clause is of the right form to use in this join. + * + * We already know that the clause is a binary opclause referencing only the + * rels in the current join. The point here is to check whether it has the + * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr", + * rather than mixing outer and inner vars on either side. If it matches, + * we set the transient flag outer_is_left to identify which side is which. + */ +static inline bool +clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, + Relids innerrelids) +{ + if (bms_is_subset(rinfo->left_relids, outerrelids) && + bms_is_subset(rinfo->right_relids, innerrelids)) + { + /* lefthand side is outer */ + rinfo->outer_is_left = true; + return true; + } + else if (bms_is_subset(rinfo->left_relids, innerrelids) && + bms_is_subset(rinfo->right_relids, outerrelids)) + { + /* righthand side is outer */ + rinfo->outer_is_left = false; + return true; + } + return false; /* no good for these input relations */ +} + +static bool +clauselist_matches_uniquekeys(List *clause_list, UniqueKeySet *keyset, bool outer_side) +{ + ListCell *lc; + + foreach(lc, keyset->keys) + { + UniqueKey *key = (UniqueKey *)lfirst(lc); + ListCell *lc2; + bool matched_expr = false; + + foreach(lc2, clause_list) + { + RestrictInfo *rinfo = (RestrictInfo *)lfirst(lc2); + Node *rexpr; + + /* + * The condition's equality operator must be a member of the + * index opfamily, else it is not asserting the right kind of + * equality behavior for this index. We check this first + * since it's probably cheaper than match_index_to_operand(). + */ + if (!list_member_oid(rinfo->mergeopfamilies, key->uk_opfamily)) + continue; + + /* + * XXX at some point we may need to check collations here too. + * For the moment we assume all collations reduce to the same + * notion of equality. + */ + + /* OK, see if the condition operand matches the index key */ + if (rinfo->outer_is_left != outer_side) + rexpr = get_rightop(rinfo->clause); + else + rexpr = get_leftop(rinfo->clause); + + if (IsA(rexpr, RelabelType)) + rexpr = (Node *)((RelabelType *)rexpr)->arg; + + if (equal(rexpr, key->uk_expr)) + { + matched_expr = true; + break; + } + } + + if (!matched_expr) + return false; + } + + return true; +} + +/* + * propagate_unique_keys_to_joinrel + * Using 'restrictlist' determine if rel2 can duplicate rows in rel1 and + * vice-versa. If the relation at the other side of the join cannot + * cause row duplication, then tag the uniquekeys for the relation onto + * 'joinrel's uniquekey list. + */ +void +propagate_unique_keys_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *rel1, RelOptInfo *rel2, + List *restrictlist) +{ + ListCell *lc; + List *clause_list = NIL; + bool matched; + + /* + * XXX what about base quals being compared to Consts? We're not looking + * at those here at all. We'd need to split the joinrel into base rel + * components and tag on the base quals to clause_list, as, or course a + * join rel does not contain any base quals. + */ + foreach(lc, restrictlist) + { + RestrictInfo *restrictinfo = (RestrictInfo *)lfirst(lc); + + /* XXX what do we do about these? We don't know the join type yet */ + //if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids)) + //{ + // continue; + //} + + /* Ignore if it's not a mergejoinable clause */ + if (!restrictinfo->can_join || + restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * Check if clause has the form "outer op inner" or "inner op outer", + * and if so mark which side is inner. + */ + if (!clause_sides_match_join(restrictinfo, rel1->relids, rel2->relids)) + continue; /* no good for these input relations */ + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + + matched = false; + foreach(lc, rel1->uniquekeys) + { + UniqueKeySet *keys = (UniqueKeySet *) lfirst(lc); + + /* XXX need to think about how to update the not-null bits here */ + if (clauselist_matches_uniquekeys(clause_list, keys, true)) + { + matched = true; + break; + } + } + + /* If we get a match then propagate the unique keys of rel2 onto the join rel */ + if (matched) + joinrel->uniquekeys = list_concat(joinrel->uniquekeys, rel2->uniquekeys); + + matched = false; + foreach(lc, rel2->uniquekeys) + { + UniqueKeySet *keys = (UniqueKeySet *)lfirst(lc); + + /* XXX need to think about how to update the not-null bits here */ + if (clauselist_matches_uniquekeys(clause_list, keys, false)) + { + matched = true; + break; + } + } + + /* If we get a match then propagate the unique keys of rel1 onto the join rel */ + if (matched) + joinrel->uniquekeys = list_concat(joinrel->uniquekeys, rel1->uniquekeys); +} diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index b44efd6314..06846b1b83 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4757,6 +4757,21 @@ create_distinct_paths(PlannerInfo *root, distinct_rel->useridiscurrent = input_rel->useridiscurrent; distinct_rel->fdwroutine = input_rel->fdwroutine; + /* XXX just doing this in a really hacky way to see if it works... */ + if (relation_has_uniquekeys_for(root, input_rel, get_sortgrouplist_exprs(parse->distinctClause, parse->targetList), false)) + { + + add_path(distinct_rel, (Path *)cheapest_input_path); + + /* XXX yeah yeah, need to call the hooks etc. */ + + /* Now choose the best path(s) */ + set_cheapest(distinct_rel); + + return distinct_rel; + } + + /* Estimate number of distinct rows there will be */ if (parse->groupClause || parse->groupingSets || parse->hasAggs || root->hasHavingQual) diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 8a76afe8cc..4db76fdb28 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -260,6 +260,8 @@ typedef enum NodeTag T_EquivalenceClass, T_EquivalenceMember, T_PathKey, + T_UniqueKeySet, + T_UniqueKey, T_PathTarget, T_RestrictInfo, T_IndexClause, diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 0ceb809644..5fc725e2e7 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -706,6 +706,7 @@ typedef struct RelOptInfo QualCost baserestrictcost; /* cost of evaluating the above */ Index baserestrict_min_security; /* min security_level found in * baserestrictinfo */ + List *uniquekeys; /* List of UniqueKeysets */ List *joininfo; /* RestrictInfo structures for join clauses * involving this rel */ bool has_eclass_joins; /* T means joininfo is incomplete */ @@ -1016,6 +1017,31 @@ typedef struct PathKey bool pk_nulls_first; /* do NULLs come before normal values? */ } PathKey; +/* UniqueKeySet + * + * Represents a set of unique keys + */ +typedef struct UniqueKeySet +{ + NodeTag type; + + Bitmapset *non_null_keys; /* indexes of 'keys' proved non-null */ + List *keys; /* list of UniqueKeys */ +} UniqueKeySet; + +/* + * UniqueKey + * + * Represents the unique properties held by a RelOptInfo or a Path + */ +typedef struct UniqueKey +{ + NodeTag type; + + Oid uk_collation; /* collation, if datatypes are collatable */ + Oid uk_opfamily; /* btree opfamily defining the ordering */ + Expr *uk_expr; /* unique key expression */ +} UniqueKey; /* * PathTarget diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9ab73bd20c..16c1faa41e 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -240,4 +240,18 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root, extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *live_childrels); +/* + * uniquekeys.c + * Utilities for matching and building unique keys + */ +extern void populate_baserel_uniquekeys(PlannerInfo *root, + RelOptInfo *baserel); +extern bool relation_has_uniquekeys_for(PlannerInfo *root, RelOptInfo *rel, + List *exprs, bool req_nonnull); +extern void propagate_unique_keys_to_joinrel(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *rel1, + RelOptInfo *rel2, + List *restrictlist); + #endif /* PATHS_H */