diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 315a528..ca4d912 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1151,9 +1151,16 @@ ExplainNode(PlanState *planstate, List *ancestors, appendStringInfo(es->str, " %s Join", jointype); else if (!IsA(plan, NestLoop)) appendStringInfoString(es->str, " Join"); + if (((Join *)plan)->unique_inner) + appendStringInfoString(es->str, "(inner unique)"); + } else + { ExplainPropertyText("Join Type", jointype, es); + ExplainPropertyText("Inner unique", + ((Join *)plan)->unique_inner ? "true" : "false", es); + } } break; case T_SetOp: diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 1d78cdf..f2471aa 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -306,10 +306,12 @@ ExecHashJoin(HashJoinState *node) } /* - * In a semijoin, we'll consider returning the first - * match, but after that we're done with this outer tuple. + * We'll consider returning the first match if the inner + * is unique, but after that we're done with this outer + * tuple. For the case of SEMI joins, we want to skip to + * the next outer row after having matched 1 inner row. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.unique_inner) node->hj_JoinState = HJ_NEED_NEW_OUTER; if (otherqual == NIL || @@ -451,6 +453,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) hjstate = makeNode(HashJoinState); hjstate->js.ps.plan = (Plan *) node; hjstate->js.ps.state = estate; + hjstate->js.unique_inner = node->join.unique_inner; /* * Miscellaneous initialization @@ -498,8 +501,10 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) /* set up null tuples for outer joins, if needed */ switch (node->join.jointype) { - case JOIN_INNER: case JOIN_SEMI: + hjstate->js.unique_inner = true; + /* fall through */ + case JOIN_INNER: break; case JOIN_LEFT: case JOIN_ANTI: diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index 15742c5..f6f0559 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -840,10 +840,12 @@ ExecMergeJoin(MergeJoinState *node) } /* - * In a semijoin, we'll consider returning the first - * match, but after that we're done with this outer tuple. + * We'll consider returning the first match if the inner + * is unique, but after that we're done with this outer + * tuple. For the case of SEMI joins, we want to skip to + * the next outer row after having matched 1 inner row. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.unique_inner) node->mj_JoinState = EXEC_MJ_NEXTOUTER; qualResult = (otherqual == NIL || @@ -1486,6 +1488,8 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) mergestate->js.ps.plan = (Plan *) node; mergestate->js.ps.state = estate; + mergestate->js.unique_inner = node->join.unique_inner; + /* * Miscellaneous initialization * @@ -1553,8 +1557,10 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) switch (node->join.jointype) { - case JOIN_INNER: case JOIN_SEMI: + mergestate->js.unique_inner = true; + /* fall through */ + case JOIN_INNER: mergestate->mj_FillOuter = false; mergestate->mj_FillInner = false; break; diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index e66bcda..b5e6de1 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -247,10 +247,12 @@ ExecNestLoop(NestLoopState *node) } /* - * In a semijoin, we'll consider returning the first match, but - * after that we're done with this outer tuple. + * We'll consider returning the first match if the inner is + * unique, but after that we're done with this outer tuple. + * For the case of SEMI joins, we want to skip to the next outer + * row after having matched 1 inner row. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.unique_inner) node->nl_NeedNewOuter = true; if (otherqual == NIL || ExecQual(otherqual, econtext, false)) @@ -310,6 +312,8 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) nlstate->js.ps.plan = (Plan *) node; nlstate->js.ps.state = estate; + nlstate->js.unique_inner = node->join.unique_inner; + /* * Miscellaneous initialization * @@ -354,8 +358,10 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) switch (node->join.jointype) { - case JOIN_INNER: case JOIN_SEMI: + nlstate->js.unique_inner = true; + /* fall through */ + case JOIN_INNER: break; case JOIN_LEFT: case JOIN_ANTI: diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 029761e..4008328 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1944,6 +1944,7 @@ _copySpecialJoinInfo(const SpecialJoinInfo *from) COPY_SCALAR_FIELD(jointype); COPY_SCALAR_FIELD(lhs_strict); COPY_SCALAR_FIELD(delay_upper_joins); + COPY_SCALAR_FIELD(is_unique_join); COPY_SCALAR_FIELD(semi_can_btree); COPY_SCALAR_FIELD(semi_can_hash); COPY_NODE_FIELD(semi_operators); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 190e50a..25885df 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -798,6 +798,7 @@ _equalSpecialJoinInfo(const SpecialJoinInfo *a, const SpecialJoinInfo *b) COMPARE_SCALAR_FIELD(jointype); COMPARE_SCALAR_FIELD(lhs_strict); COMPARE_SCALAR_FIELD(delay_upper_joins); + COMPARE_SCALAR_FIELD(is_unique_join); COMPARE_SCALAR_FIELD(semi_can_btree); COMPARE_SCALAR_FIELD(semi_can_hash); COMPARE_NODE_FIELD(semi_operators); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 385b289..69e7353 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1948,6 +1948,7 @@ _outSpecialJoinInfo(StringInfo str, const SpecialJoinInfo *node) WRITE_ENUM_FIELD(jointype, JoinType); WRITE_BOOL_FIELD(lhs_strict); WRITE_BOOL_FIELD(delay_upper_joins); + WRITE_BOOL_FIELD(is_unique_join); WRITE_BOOL_FIELD(semi_can_btree); WRITE_BOOL_FIELD(semi_can_hash); WRITE_NODE_FIELD(semi_operators); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 1a0d358..df34d71 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1712,7 +1712,7 @@ cost_group(Path *path, PlannerInfo *root, */ void initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, - JoinType jointype, + JoinType jointype, bool unique_inner, Path *outer_path, Path *inner_path, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors) @@ -1746,7 +1746,9 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, inner_run_cost = inner_path->total_cost - inner_path->startup_cost; inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost; - if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + if (jointype == JOIN_SEMI || + jointype == JOIN_ANTI || + unique_inner) { double outer_matched_rows; Selectivity inner_scan_frac; @@ -1847,7 +1849,9 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, /* cost of source data */ - if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI) + if (path->jointype == JOIN_SEMI || + path->jointype == JOIN_ANTI || + path->unique_inner) { double outer_matched_rows = workspace->outer_matched_rows; Selectivity inner_scan_frac = workspace->inner_scan_frac; @@ -2658,7 +2662,9 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, /* CPU costs */ - if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI) + if (path->jpath.jointype == JOIN_SEMI || + path->jpath.jointype == JOIN_ANTI || + path->jpath.unique_inner) { double outer_matched_rows; Selectivity inner_scan_frac; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 1da953f..af9214d 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -18,6 +18,7 @@ #include "executor/executor.h" #include "optimizer/cost.h" +#include "optimizer/planmain.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" @@ -28,18 +29,19 @@ static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, - JoinType jointype, SpecialJoinInfo *sjinfo, - Relids param_source_rels, Relids extra_lateral_rels); + JoinType jointype, bool unique_inner, + SpecialJoinInfo *sjinfo, Relids param_source_rels, + Relids extra_lateral_rels); static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, - JoinType jointype, SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors, + JoinType jointype, bool unique_inner, + SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels, Relids extra_lateral_rels); static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, - List *restrictlist, - JoinType jointype, SpecialJoinInfo *sjinfo, + List *restrictlist, JoinType jointype, + bool unique_inner, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels, Relids extra_lateral_rels); static List *select_mergejoin_clauses(PlannerInfo *root, @@ -49,7 +51,8 @@ static List *select_mergejoin_clauses(PlannerInfo *root, List *restrictlist, JoinType jointype, bool *mergejoin_allowed); - +static inline bool clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel, + RelOptInfo *innerrel); /* * add_paths_to_joinrel @@ -89,6 +92,38 @@ add_paths_to_joinrel(PlannerInfo *root, Relids param_source_rels = NULL; Relids extra_lateral_rels = NULL; ListCell *lc; + bool unique_inner; + + /* left joins were already been analyzed for uniqueness in mark_unique_joins() */ + if (jointype == JOIN_LEFT) + unique_inner = sjinfo->is_unique_join; + else if (jointype == JOIN_INNER && + restrictlist != NIL && + rel_supports_distinctness(root, innerrel)) + { + /* + * remember the number of items that were in the restrictlist as + * the call to relation_has_unique_index_for may add more items + * which we'll need to remove later. + */ + int org_len = list_length(restrictlist); + + /* + * rel_is_distinct_for requires restrict infos to have the + * correct clause direction info + */ + foreach(lc, restrictlist) + { + clause_sides_match_join((RestrictInfo *)lfirst(lc), + outerrel, innerrel); + } + unique_inner = rel_is_distinct_for(root, innerrel, restrictlist); + + /* Remove any list items added by rel_is_distinct_for */ + list_truncate(restrictlist, org_len); + } + else + unique_inner = false; /* we can't prove uniqueness */ /* * Find potential mergejoin clauses. We can skip this if we are not @@ -214,7 +249,7 @@ add_paths_to_joinrel(PlannerInfo *root, if (mergejoin_allowed) sort_inner_and_outer(root, joinrel, outerrel, innerrel, restrictlist, mergeclause_list, jointype, - sjinfo, + unique_inner, sjinfo, param_source_rels, extra_lateral_rels); /* @@ -227,7 +262,7 @@ add_paths_to_joinrel(PlannerInfo *root, if (mergejoin_allowed) match_unsorted_outer(root, joinrel, outerrel, innerrel, restrictlist, mergeclause_list, jointype, - sjinfo, &semifactors, + unique_inner, sjinfo, &semifactors, param_source_rels, extra_lateral_rels); #ifdef NOT_USED @@ -257,11 +292,12 @@ add_paths_to_joinrel(PlannerInfo *root, */ if (enable_hashjoin || jointype == JOIN_FULL) hash_inner_and_outer(root, joinrel, outerrel, innerrel, - restrictlist, jointype, + restrictlist, jointype, unique_inner, sjinfo, &semifactors, param_source_rels, extra_lateral_rels); } + /* * try_nestloop_path * Consider a nestloop join path; if it appears useful, push it into @@ -271,6 +307,7 @@ static void try_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + bool unique_inner, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels, @@ -332,7 +369,7 @@ try_nestloop_path(PlannerInfo *root, * The latter two steps are expensive enough to make this two-phase * methodology worthwhile. */ - initial_cost_nestloop(root, &workspace, jointype, + initial_cost_nestloop(root, &workspace, jointype, unique_inner, outer_path, inner_path, sjinfo, semifactors); @@ -344,6 +381,7 @@ try_nestloop_path(PlannerInfo *root, create_nestloop_path(root, joinrel, jointype, + unique_inner, &workspace, sjinfo, semifactors, @@ -369,6 +407,7 @@ static void try_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + bool unique_inner, SpecialJoinInfo *sjinfo, Relids param_source_rels, Relids extra_lateral_rels, @@ -430,6 +469,7 @@ try_mergejoin_path(PlannerInfo *root, create_mergejoin_path(root, joinrel, jointype, + unique_inner, &workspace, sjinfo, outer_path, @@ -457,6 +497,7 @@ static void try_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + bool unique_inner, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels, @@ -505,6 +546,7 @@ try_hashjoin_path(PlannerInfo *root, create_hashjoin_path(root, joinrel, jointype, + unique_inner, &workspace, sjinfo, semifactors, @@ -565,6 +607,7 @@ clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel, * 'mergeclause_list' is a list of RestrictInfo nodes for available * mergejoin clauses in this join * 'jointype' is the type of join to do + * 'unique_inner' is the inner side of the join unique on the join condition * 'sjinfo' is extra info about the join for selectivity estimation * 'param_source_rels' are OK targets for parameterization of result paths * 'extra_lateral_rels' are additional parameterization for result paths @@ -577,6 +620,7 @@ sort_inner_and_outer(PlannerInfo *root, List *restrictlist, List *mergeclause_list, JoinType jointype, + bool unique_inner, SpecialJoinInfo *sjinfo, Relids param_source_rels, Relids extra_lateral_rels) @@ -707,6 +751,7 @@ sort_inner_and_outer(PlannerInfo *root, try_mergejoin_path(root, joinrel, jointype, + unique_inner, sjinfo, param_source_rels, extra_lateral_rels, @@ -752,6 +797,7 @@ sort_inner_and_outer(PlannerInfo *root, * 'mergeclause_list' is a list of RestrictInfo nodes for available * mergejoin clauses in this join * 'jointype' is the type of join to do + * 'unique_inner' is the inner side of the join unique on the join condition * 'sjinfo' is extra info about the join for selectivity estimation * 'semifactors' contains valid data if jointype is SEMI or ANTI * 'param_source_rels' are OK targets for parameterization of result paths @@ -765,6 +811,7 @@ match_unsorted_outer(PlannerInfo *root, List *restrictlist, List *mergeclause_list, JoinType jointype, + bool unique_inner, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels, @@ -895,6 +942,7 @@ match_unsorted_outer(PlannerInfo *root, try_nestloop_path(root, joinrel, jointype, + unique_inner, sjinfo, semifactors, param_source_rels, @@ -921,6 +969,7 @@ match_unsorted_outer(PlannerInfo *root, try_nestloop_path(root, joinrel, jointype, + unique_inner, sjinfo, semifactors, param_source_rels, @@ -936,6 +985,7 @@ match_unsorted_outer(PlannerInfo *root, try_nestloop_path(root, joinrel, jointype, + unique_inner, sjinfo, semifactors, param_source_rels, @@ -993,6 +1043,7 @@ match_unsorted_outer(PlannerInfo *root, try_mergejoin_path(root, joinrel, jointype, + unique_inner, sjinfo, param_source_rels, extra_lateral_rels, @@ -1092,6 +1143,7 @@ match_unsorted_outer(PlannerInfo *root, try_mergejoin_path(root, joinrel, jointype, + unique_inner, sjinfo, param_source_rels, extra_lateral_rels, @@ -1138,6 +1190,7 @@ match_unsorted_outer(PlannerInfo *root, try_mergejoin_path(root, joinrel, jointype, + unique_inner, sjinfo, param_source_rels, extra_lateral_rels, @@ -1172,6 +1225,7 @@ match_unsorted_outer(PlannerInfo *root, * 'restrictlist' contains all of the RestrictInfo nodes for restriction * clauses that apply to this join * 'jointype' is the type of join to do + * 'unique_inner' is the inner side of the join unique on the join condition * 'sjinfo' is extra info about the join for selectivity estimation * 'semifactors' contains valid data if jointype is SEMI or ANTI * 'param_source_rels' are OK targets for parameterization of result paths @@ -1184,6 +1238,7 @@ hash_inner_and_outer(PlannerInfo *root, RelOptInfo *innerrel, List *restrictlist, JoinType jointype, + bool unique_inner, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Relids param_source_rels, @@ -1258,6 +1313,7 @@ hash_inner_and_outer(PlannerInfo *root, try_hashjoin_path(root, joinrel, jointype, + unique_inner, sjinfo, semifactors, param_source_rels, @@ -1278,6 +1334,7 @@ hash_inner_and_outer(PlannerInfo *root, try_hashjoin_path(root, joinrel, jointype, + unique_inner, sjinfo, semifactors, param_source_rels, @@ -1291,6 +1348,7 @@ hash_inner_and_outer(PlannerInfo *root, try_hashjoin_path(root, joinrel, jointype, + unique_inner, sjinfo, semifactors, param_source_rels, @@ -1316,6 +1374,7 @@ hash_inner_and_outer(PlannerInfo *root, try_hashjoin_path(root, joinrel, jointype, + unique_inner, sjinfo, semifactors, param_source_rels, @@ -1354,6 +1413,7 @@ hash_inner_and_outer(PlannerInfo *root, try_hashjoin_path(root, joinrel, jointype, + unique_inner, sjinfo, semifactors, param_source_rels, diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index fe9fd57..a79c194 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -624,6 +624,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) /* we don't bother trying to make the remaining fields valid */ sjinfo->lhs_strict = false; sjinfo->delay_upper_joins = false; + sjinfo->is_unique_join = false; sjinfo->semi_can_btree = false; sjinfo->semi_can_hash = false; sjinfo->semi_operators = NIL; diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 11d3933..13a561a 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -33,11 +33,37 @@ /* local functions */ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static bool specialjoin_is_unique_join(PlannerInfo *root, + SpecialJoinInfo *sjinfo); static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); static Oid distinct_col_search(int colno, List *colnos, List *opids); +/* + * mark_unique_joins + Analyze joins in order to determine if their inner side is unique based + on the join condition. + */ +void +mark_unique_joins(PlannerInfo *root, List *joinlist) +{ + ListCell *lc; + + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + + /* + * Currently we're only interested in LEFT JOINs that have not already + * been marked as unique by a previous call. + */ + if (sjinfo->jointype == JOIN_LEFT && + !sjinfo->is_unique_join && + specialjoin_is_unique_join(root, sjinfo)) + sjinfo->is_unique_join = true; + } +} /* * remove_useless_joins @@ -91,6 +117,12 @@ restart: root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo); /* + * We may now be able to mark some joins as unique which we could + * not do before + */ + mark_unique_joins(root, joinlist); + + /* * Restart the scan. This is necessary to ensure we find all * removable joins independently of ordering of the join_info_list * (note that removal of attr_needed bits may make a join appear @@ -151,17 +183,17 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) { int innerrelid; RelOptInfo *innerrel; - Query *subquery = NULL; Relids joinrelids; - List *clause_list = NIL; - ListCell *l; int attroff; + ListCell *l; /* - * Must be a non-delaying left join to a single baserel, else we aren't - * going to be able to do anything with it. + * Join must not duplicate its outer side and must be a non-delaying left + * join to a single baserel, else we aren't going to be able to do anything + * with it. */ - if (sjinfo->jointype != JOIN_LEFT || + if (!sjinfo->is_unique_join || + sjinfo->jointype != JOIN_LEFT || sjinfo->delay_upper_joins) return false; @@ -170,38 +202,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) innerrel = find_base_rel(root, innerrelid); - if (innerrel->reloptkind != RELOPT_BASEREL) - return false; - - /* - * Before we go to the effort of checking whether any innerrel variables - * are needed above the join, make a quick check to eliminate cases in - * which we will surely be unable to prove uniqueness of the innerrel. - */ - if (innerrel->rtekind == RTE_RELATION) - { - /* - * For a plain-relation innerrel, we only know how to prove uniqueness - * by reference to unique indexes. If there are no indexes then - * there's certainly no unique indexes so there's no point in going - * further. - */ - if (innerrel->indexlist == NIL) - return false; - } - else if (innerrel->rtekind == RTE_SUBQUERY) - { - subquery = root->simple_rte_array[innerrelid]->subquery; - - /* - * If the subquery has no qualities that support distinctness proofs - * then there's no point in going further. - */ - if (!query_supports_distinctness(subquery)) - return false; - } - else - return false; /* unsupported rtekind */ + Assert(innerrel->reloptkind == RELOPT_BASEREL); /* Compute the relid set for the join we are considering */ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); @@ -212,7 +213,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) * * Note that this test only detects use of inner-rel attributes in higher * join conditions and the target list. There might be such attributes in - * pushed-down conditions at this join, too. We check that case below. + * pushed-down conditions at this join, too, but in this case the join + * would not have been marked as unique. * * As a micro-optimization, it seems better to start with max_attr and * count down rather than starting with min_attr and counting up, on the @@ -253,6 +255,45 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) return false; /* it does reference innerrel */ } + return true; +} + +/* + * specialjoin_is_unique_join + * True if it can be proved that this special join can only ever match at + * most 1 inner row for any single outer row. False is returned if there's + * insufficient evidence to prove the join is unique. + */ +static bool +specialjoin_is_unique_join(PlannerInfo *root, SpecialJoinInfo *sjinfo) +{ + int innerrelid; + RelOptInfo *innerrel; + Query *subquery = NULL; + Relids joinrelids; + ListCell *l; + List *clause_list = NIL; + + /* if there's more than 1 relation involved then punt */ + if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid)) + return false; + + innerrel = find_base_rel(root, innerrelid); + + if (innerrel->reloptkind != RELOPT_BASEREL) + return false; + + /* + * Before we go to the effort of pulling out the join condition's columns, + * make a quick check to eliminate cases in which we will surely be unable + * to prove uniqueness of the innerrel. + */ + if (!rel_supports_distinctness(root, innerrel)) + return false; + + /* Compute the relid set for the join we are considering */ + joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + /* * Search for mergejoinable clauses that constrain the inner rel against * either the outer rel or a pseudoconstant. If an operator is @@ -274,10 +315,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) !bms_equal(restrictinfo->required_relids, joinrelids)) { /* - * If such a clause actually references the inner rel then join - * removal has to be disallowed. We have to check this despite - * the previous attr_needed checks because of the possibility of - * pushed-down clauses referencing the rel. + * If such a clause actually references the inner rel then we can't + * mark the join as unique. */ if (bms_is_member(innerrelid, restrictinfo->clause_relids)) return false; @@ -300,71 +339,9 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) clause_list = lappend(clause_list, restrictinfo); } - /* - * relation_has_unique_index_for automatically adds any usable restriction - * clauses for the innerrel, so we needn't do that here. (XXX we are not - * considering restriction clauses for subqueries; is that worth doing?) - */ - - if (innerrel->rtekind == RTE_RELATION) - { - /* Now examine the indexes to see if we have a matching unique index */ - if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL)) - return true; - } - else /* innerrel->rtekind == RTE_SUBQUERY */ - { - List *colnos = NIL; - List *opids = NIL; - - /* - * Build the argument lists for query_is_distinct_for: a list of - * output column numbers that the query needs to be distinct over, and - * a list of equality operators that the output columns need to be - * distinct according to. - */ - foreach(l, clause_list) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Oid op; - Var *var; - - /* - * Get the equality operator we need uniqueness according to. - * (This might be a cross-type operator and thus not exactly the - * same operator the subquery would consider; that's all right - * since query_is_distinct_for can resolve such cases.) The - * mergejoinability test above should have selected only OpExprs. - */ - Assert(IsA(rinfo->clause, OpExpr)); - op = ((OpExpr *) rinfo->clause)->opno; - - /* clause_sides_match_join identified the inner side for us */ - if (rinfo->outer_is_left) - var = (Var *) get_rightop(rinfo->clause); - else - var = (Var *) get_leftop(rinfo->clause); - - /* - * If inner side isn't a Var referencing a subquery output column, - * this clause doesn't help us. - */ - if (!var || !IsA(var, Var) || - var->varno != innerrelid || var->varlevelsup != 0) - continue; - - colnos = lappend_int(colnos, var->varattno); - opids = lappend_oid(opids, op); - } - - if (query_is_distinct_for(subquery, colnos, opids)) - return true; - } + if (rel_is_distinct_for(root, innerrel, clause_list)) + return true; - /* - * Some day it would be nice to check for other methods of establishing - * distinctness. - */ return false; } @@ -564,6 +541,125 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved) return result; } +/* + * rel_is_distinct_for + * Returns True if rel can be proved to be distinct over clause_list + * + * Note: We expect clause_list to be already processed to check if the + * RestrictInfos are in the form "outerrel_expr op innerrel_expr" or + * "innerrel_expr op outerrel_expr". + * + * Note: this method may add items to clause_list, callers should either + * make a copy of the list or trim it back to it's original length after + * calling this function. + */ +bool +rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) +{ + int relid = rel->relid; + + /* + * relation_has_unique_index_for automatically adds any usable restriction + * clauses for the rel, so we needn't do that here. (XXX we are not + * considering restriction clauses for subqueries; is that worth doing?) + */ + if (rel->rtekind == RTE_RELATION) + { + /* Now examine the indexes to see if we have a matching unique index */ + if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL)) + return true; + } + else if (rel->rtekind == RTE_SUBQUERY) + { + List *colnos = NIL; + List *opids = NIL; + ListCell *l; + Query *subquery = root->simple_rte_array[relid]->subquery; + + /* + * Build the argument lists for query_is_distinct_for: a list of + * output column numbers that the query needs to be distinct over, and + * a list of equality operators that the output columns need to be + * distinct according to. + */ + foreach(l, clause_list) + { + RestrictInfo *rinfo = (RestrictInfo *)lfirst(l); + Oid op; + Var *var; + + if (!IsA(rinfo->clause, OpExpr)) + continue; + + /* + * Get the equality operator we need uniqueness according to. + * (This might be a cross-type operator and thus not exactly the + * same operator the subquery would consider; that's all right + * since query_is_distinct_for can resolve such cases.) The + * mergejoinability test above should have selected only OpExprs. + */ + op = ((OpExpr *)rinfo->clause)->opno; + + /* clause_sides_match_join identified the inner side for us */ + if (rinfo->outer_is_left) + var = (Var *)get_rightop(rinfo->clause); + else + var = (Var *)get_leftop(rinfo->clause); + + /* + * If inner side isn't a Var referencing a subquery output column, + * this clause doesn't help us. + */ + if (!var || !IsA(var, Var) || + var->varno != relid || var->varlevelsup != 0) + continue; + + colnos = lappend_int(colnos, var->varattno); + opids = lappend_oid(opids, op); + } + + if (query_is_distinct_for(subquery, colnos, opids)) + return true; + } + return false; /* can't prove rel to be distinct over clause_list */ +} +/* + * rel_supports_distinctness + * Returns true if rel has some properties which can prove the relation + * to be unique over some set of columns. + * + * This is effectively a pre-checking function for rel_is_distinct_for(). + * It must return TRUE if rel_is_distinct_for() could possibly return TRUE + */ +bool +rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) +{ + if (rel->rtekind == RTE_RELATION) + { + /* + * For a plain-relation, we only know how to prove uniqueness + * by reference to unique indexes. If there are no indexes then + * there's certainly no unique indexes so there's nothing to prove + * uniqueness on the relation. + */ + if (rel->indexlist != NIL) + return true; + } + else if (rel->rtekind == RTE_SUBQUERY) + { + Query *subquery = root->simple_rte_array[rel->relid]->subquery; + + /* Check if the subquery has any qualities that support distinctness */ + if (query_supports_distinctness(subquery)) + return true; + } + + /* + * Some day it would be nice to check for other methods of establishing + * distinctness. + */ + return false; +} /* * query_supports_distinctness - could the query possibly be proven distinct diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index cb69c03..02899de 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -131,13 +131,12 @@ static BitmapAnd *make_bitmap_and(List *bitmapplans); static BitmapOr *make_bitmap_or(List *bitmapplans); static NestLoop *make_nestloop(List *tlist, List *joinclauses, List *otherclauses, List *nestParams, - Plan *lefttree, Plan *righttree, - JoinType jointype); + Plan *lefttree, Plan *righttree, JoinPath *jpath); static HashJoin *make_hashjoin(List *tlist, List *joinclauses, List *otherclauses, List *hashclauses, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinPath *jpath); static Hash *make_hash(Plan *lefttree, Oid skewTable, AttrNumber skewColumn, @@ -152,7 +151,7 @@ static MergeJoin *make_mergejoin(List *tlist, int *mergestrategies, bool *mergenullsfirst, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinPath *jpath); static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols, AttrNumber *sortColIdx, Oid *sortOperators, Oid *collations, bool *nullsFirst, @@ -2192,7 +2191,7 @@ create_nestloop_plan(PlannerInfo *root, nestParams, outer_plan, inner_plan, - best_path->jointype); + best_path); copy_path_costsize(&join_plan->join.plan, &best_path->path); @@ -2486,7 +2485,7 @@ create_mergejoin_plan(PlannerInfo *root, mergenullsfirst, outer_plan, inner_plan, - best_path->jpath.jointype); + &best_path->jpath); /* Costs of sort and material steps are included in path cost already */ copy_path_costsize(&join_plan->join.plan, &best_path->jpath.path); @@ -2612,7 +2611,7 @@ create_hashjoin_plan(PlannerInfo *root, hashclauses, outer_plan, (Plan *) hash_plan, - best_path->jpath.jointype); + &best_path->jpath); copy_path_costsize(&join_plan->join.plan, &best_path->jpath.path); @@ -3717,7 +3716,7 @@ make_nestloop(List *tlist, List *nestParams, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinPath *jpath) { NestLoop *node = makeNode(NestLoop); Plan *plan = &node->join.plan; @@ -3727,8 +3726,9 @@ make_nestloop(List *tlist, plan->qual = otherclauses; plan->lefttree = lefttree; plan->righttree = righttree; - node->join.jointype = jointype; + node->join.jointype = jpath->jointype; node->join.joinqual = joinclauses; + node->join.unique_inner = jpath->unique_inner; node->nestParams = nestParams; return node; @@ -3741,7 +3741,7 @@ make_hashjoin(List *tlist, List *hashclauses, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinPath *jpath) { HashJoin *node = makeNode(HashJoin); Plan *plan = &node->join.plan; @@ -3752,8 +3752,9 @@ make_hashjoin(List *tlist, plan->lefttree = lefttree; plan->righttree = righttree; node->hashclauses = hashclauses; - node->join.jointype = jointype; + node->join.jointype = jpath->jointype; node->join.joinqual = joinclauses; + node->join.unique_inner = jpath->unique_inner; return node; } @@ -3801,7 +3802,7 @@ make_mergejoin(List *tlist, bool *mergenullsfirst, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinPath *jpath) { MergeJoin *node = makeNode(MergeJoin); Plan *plan = &node->join.plan; @@ -3816,8 +3817,9 @@ make_mergejoin(List *tlist, node->mergeCollations = mergecollations; node->mergeStrategies = mergestrategies; node->mergeNullsFirst = mergenullsfirst; - node->join.jointype = jointype; + node->join.jointype = jpath->jointype; node->join.joinqual = joinclauses; + node->join.unique_inner = jpath->unique_inner; return node; } diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index a7655e4..8094880 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -1087,6 +1087,7 @@ make_outerjoininfo(PlannerInfo *root, sjinfo->jointype = jointype; /* this always starts out false */ sjinfo->delay_upper_joins = false; + sjinfo->is_unique_join = false; compute_semijoin_info(sjinfo, clause); diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 848df97..55310d8 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -174,6 +174,9 @@ query_planner(PlannerInfo *root, List *tlist, */ fix_placeholder_input_needed_levels(root); + /* Analyze joins to find out which ones have a unique inner side */ + mark_unique_joins(root, joinlist); + /* * Remove any useless outer joins. Ideally this would be done during * jointree preprocessing, but the necessary information isn't available diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index faca30b..3cb8644 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1514,6 +1514,7 @@ calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path) * * 'joinrel' is the join relation. * 'jointype' is the type of join required + * 'unique_inner' is the inner side of the join unique on the join condition * 'workspace' is the result from initial_cost_nestloop * 'sjinfo' is extra info about the join for selectivity estimation * 'semifactors' contains valid data if jointype is SEMI or ANTI @@ -1529,6 +1530,7 @@ NestPath * create_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + bool unique_inner, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, @@ -1581,6 +1583,7 @@ create_nestloop_path(PlannerInfo *root, pathnode->jointype = jointype; pathnode->outerjoinpath = outer_path; pathnode->innerjoinpath = inner_path; + pathnode->unique_inner = unique_inner; pathnode->joinrestrictinfo = restrict_clauses; final_cost_nestloop(root, pathnode, workspace, sjinfo, semifactors); @@ -1595,6 +1598,7 @@ create_nestloop_path(PlannerInfo *root, * * 'joinrel' is the join relation * 'jointype' is the type of join required + * 'unique_inner' is the inner side of the join unique on the join condition * 'workspace' is the result from initial_cost_mergejoin * 'sjinfo' is extra info about the join for selectivity estimation * 'outer_path' is the outer path @@ -1611,6 +1615,7 @@ MergePath * create_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + bool unique_inner, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, Path *outer_path, @@ -1638,6 +1643,7 @@ create_mergejoin_path(PlannerInfo *root, pathnode->jpath.jointype = jointype; pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; + pathnode->jpath.unique_inner = unique_inner; pathnode->jpath.joinrestrictinfo = restrict_clauses; pathnode->path_mergeclauses = mergeclauses; pathnode->outersortkeys = outersortkeys; @@ -1655,6 +1661,7 @@ create_mergejoin_path(PlannerInfo *root, * * 'joinrel' is the join relation * 'jointype' is the type of join required + * 'unique_inner' is the inner side of the join unique on the join condition * 'workspace' is the result from initial_cost_hashjoin * 'sjinfo' is extra info about the join for selectivity estimation * 'semifactors' contains valid data if jointype is SEMI or ANTI @@ -1669,6 +1676,7 @@ HashPath * create_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + bool unique_inner, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, @@ -1706,6 +1714,7 @@ create_hashjoin_path(PlannerInfo *root, pathnode->jpath.jointype = jointype; pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; + pathnode->jpath.unique_inner = unique_inner; pathnode->jpath.joinrestrictinfo = restrict_clauses; pathnode->path_hashclauses = hashclauses; /* final_cost_hashjoin will fill in pathnode->num_batches */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index ac75f86..e79cf19 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1565,6 +1565,7 @@ typedef struct JoinState PlanState ps; JoinType jointype; List *joinqual; /* JOIN quals (in addition to ps.qual) */ + bool unique_inner; /* inner rel is unique on the join condition */ } JoinState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 21cbfa8..f093541 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -543,6 +543,7 @@ typedef struct Join Plan plan; JoinType jointype; List *joinqual; /* JOIN quals (in addition to plan.qual) */ + bool unique_inner; /* inner rel is unique on the join condition */ } Join; /* ---------------- diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 401a686..a628e4f 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1031,6 +1031,7 @@ typedef struct JoinPath Path *outerjoinpath; /* path for the outer side of the join */ Path *innerjoinpath; /* path for the inner side of the join */ + bool unique_inner; /* inner rel is unique on the join condition */ List *joinrestrictinfo; /* RestrictInfos to apply to join */ @@ -1407,6 +1408,7 @@ typedef struct SpecialJoinInfo JoinType jointype; /* always INNER, LEFT, FULL, SEMI, or ANTI */ bool lhs_strict; /* joinclause is strict for some LHS rel */ bool delay_upper_joins; /* can't commute with upper RHS */ + bool is_unique_join; /* matches a max of 1 row per outer join row */ /* Remaining fields are set only for JOIN_SEMI jointype: */ bool semi_can_btree; /* true if semi_operators are all btree */ bool semi_can_hash; /* true if semi_operators are all hash */ diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 9c2000b..307977e 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -113,7 +113,7 @@ extern void cost_group(Path *path, PlannerInfo *root, double input_tuples); extern void initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, - JoinType jointype, + JoinType jointype, bool unique_inner, Path *outer_path, Path *inner_path, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors); diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 9923f0e..adebd70 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -89,6 +89,7 @@ extern Relids calc_non_nestloop_required_outer(Path *outer_path, Path *inner_pat extern NestPath *create_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + bool unique_inner, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, @@ -101,6 +102,7 @@ extern NestPath *create_nestloop_path(PlannerInfo *root, extern MergePath *create_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + bool unique_inner, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, Path *outer_path, @@ -115,6 +117,7 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root, extern HashPath *create_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + bool unique_inner, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index fa72918..7a85227 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -122,7 +122,11 @@ extern RestrictInfo *build_implied_join_equality(Oid opno, /* * prototypes for plan/analyzejoins.c */ +extern void mark_unique_joins(PlannerInfo *root, List *joinlist); extern List *remove_useless_joins(PlannerInfo *root, List *joinlist); +extern bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, + List *clause_list); +extern bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); extern bool query_supports_distinctness(Query *query); extern bool query_is_distinct_for(Query *query, List *colnos, List *opids); diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out index dfae84e..ad1d673 100644 --- a/src/test/regress/expected/equivclass.out +++ b/src/test/regress/expected/equivclass.out @@ -186,7 +186,7 @@ explain (costs off) select * from ec1, ec2 where ff = x1 and x1 = '42'::int8alias2; QUERY PLAN ----------------------------------------- - Nested Loop + Nested Loop(inner unique) -> Seq Scan on ec2 Filter: (x1 = '42'::int8alias2) -> Index Scan using ec1_pkey on ec1 @@ -310,7 +310,7 @@ explain (costs off) -> Index Scan using ec1_expr3 on ec1 ec1_5 -> Index Scan using ec1_expr4 on ec1 ec1_6 -> Materialize - -> Merge Join + -> Merge Join(inner unique) Merge Cond: ((((ec1_1.ff + 2) + 1)) = ec1.f1) -> Merge Append Sort Key: (((ec1_1.ff + 2) + 1)) @@ -365,7 +365,7 @@ explain (costs off) where ss1.x = ec1.f1 and ec1.ff = 42::int8; QUERY PLAN ----------------------------------------------------- - Merge Join + Merge Join(inner unique) Merge Cond: ((((ec1_1.ff + 2) + 1)) = ec1.f1) -> Merge Append Sort Key: (((ec1_1.ff + 2) + 1)) diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 57fc910..6140eba 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2614,8 +2614,8 @@ from nt3 as nt3 where nt3.id = 1 and ss2.b3; QUERY PLAN ----------------------------------------------- - Nested Loop - -> Nested Loop + Nested Loop(inner unique) + -> Nested Loop(inner unique) -> Index Scan using nt3_pkey on nt3 Index Cond: (id = 1) -> Index Scan using nt2_pkey on nt2 @@ -3338,7 +3338,7 @@ explain (costs off) on (p.k = ss.k); QUERY PLAN --------------------------------- - Hash Left Join + Hash Left Join(inner unique) Hash Cond: (p.k = c.k) -> Seq Scan on parent p -> Hash @@ -4416,3 +4416,247 @@ ERROR: invalid reference to FROM-clause entry for table "xx1" LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss; ^ HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query. +-- +-- test planner's ability to mark joins as unique. +-- +create table j1 (id int primary key); +create table j2 (id int primary key); +create table j3 (id int); +insert into j1 values(1),(2),(3); +insert into j2 values(1),(2),(3); +insert into j3 values(1),(1); +analyze j1; +analyze j2; +analyze j3; +-- Ensure join is marked as unique +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Join(inner unique) + Output: j1.id, j2.id + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +-- Ensure join not marked as unique when not using = +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id > j2.id; + QUERY PLAN +----------------------------------- + Nested Loop + Output: j1.id, j2.id + Join Filter: (j1.id > j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +-- j3 has no unique index or pk on id +explain (verbose, costs off) +select * from j1 inner join j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------- + Hash Join + Output: j1.id, j3.id + Hash Cond: (j1.id = j3.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(9 rows) + +-- ensure left join is marked as unique +explain (verbose, costs off) +select * from j1 left join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Left Join(inner unique) + Output: j1.id, j2.id + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +-- ensure right join is marked as unique +explain (verbose, costs off) +select * from j1 right join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Left Join(inner unique) + Output: j1.id, j2.id + Hash Cond: (j2.id = j1.id) + -> Seq Scan on public.j2 + Output: j2.id + -> Hash + Output: j1.id + -> Seq Scan on public.j1 + Output: j1.id +(9 rows) + +-- cross joins can't be proved unique +explain (verbose, costs off) +select * from j1 cross join j2; + QUERY PLAN +----------------------------------- + Nested Loop + Output: j1.id, j2.id + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(8 rows) + +-- ensure natural join is marked as unique +explain (verbose, costs off) +select * from j1 natural join j2; + QUERY PLAN +----------------------------------- + Hash Join(inner unique) + Output: j1.id + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +-- ensure distinct clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select distinct id from j3) j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------------- + Nested Loop(inner unique) + Output: j1.id, j3.id + Join Filter: (j1.id = j3.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j3.id + -> HashAggregate + Output: j3.id + Group Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(12 rows) + +-- ensure group by clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select id from j3 group by id) j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------------- + Nested Loop(inner unique) + Output: j1.id, j3.id + Join Filter: (j1.id = j3.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j3.id + -> HashAggregate + Output: j3.id + Group Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(12 rows) + +-- a subquery with an empty FROM clause should be marked as unique. +explain (verbose, costs off) +select * from j1 +inner join (select 1 id offset 0) j3 on j1.id = j3.id; + QUERY PLAN +----------------------------- + Hash Join + Output: j1.id, (1) + Hash Cond: (j1.id = (1)) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: (1) + -> Result + Output: 1 +(9 rows) + +explain (verbose, costs off) +select * from j1 full join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Full Join + Output: j1.id, j2.id + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +drop table j1; +drop table j2; +drop table j3; +-- test a more complex permutations of unique joins +create table j1 (id1 int, id2 int, primary key(id1,id2)); +create table j2 (id1 int, id2 int, primary key(id1,id2)); +create table j3 (id1 int, id2 int, primary key(id1,id2)); +insert into j1 values(1,1),(2,2); +insert into j2 values(1,1); +insert into j3 values(1,1); +analyze j1; +analyze j2; +analyze j3; +-- ensure no unique joins when not all columns which are part of +-- the unique index are part of the join clause. +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1; + QUERY PLAN +------------------------------------------ + Nested Loop + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Join Filter: (j1.id1 = j2.id1) + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 + -> Seq Scan on public.j1 + Output: j1.id1, j1.id2 +(7 rows) + +-- ensure unique joins work with multiple columns +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2; + QUERY PLAN +---------------------------------------------------------- + Nested Loop(inner unique) + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Join Filter: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2)) + -> Seq Scan on public.j1 + Output: j1.id1, j1.id2 + -> Materialize + Output: j2.id1, j2.id2 + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 +(9 rows) + +drop table j1; +drop table j2; +drop table j3; diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out index 44e8dab..9517f9b 100644 --- a/src/test/regress/expected/rowsecurity.out +++ b/src/test/regress/expected/rowsecurity.out @@ -248,7 +248,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle); EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle); QUERY PLAN ---------------------------------------------------- - Nested Loop + Nested Loop(inner unique) -> Subquery Scan on document Filter: f_leak(document.dtitle) -> Seq Scan on document document_1 diff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out index 82d510d..01b8b45 100644 --- a/src/test/regress/expected/select_views.out +++ b/src/test/regress/expected/select_views.out @@ -1365,7 +1365,7 @@ NOTICE: f_leak => 9801-2345-6789-0123 EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_normal WHERE f_leak(cnum); QUERY PLAN --------------------------------------------------------- - Hash Join + Hash Join(inner unique) Hash Cond: (r.cid = l.cid) -> Seq Scan on credit_card r Filter: f_leak(cnum) @@ -1386,7 +1386,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_secure WHERE f_leak(cnum); --------------------------------------------------------------- Subquery Scan on my_credit_card_secure Filter: f_leak(my_credit_card_secure.cnum) - -> Hash Join + -> Hash Join(inner unique) Hash Cond: (r.cid = l.cid) -> Seq Scan on credit_card r -> Hash @@ -1420,7 +1420,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_normal -> Materialize -> Subquery Scan on l Filter: f_leak(l.cnum) - -> Hash Join + -> Hash Join(inner unique) Hash Cond: (r_1.cid = l_1.cid) -> Seq Scan on credit_card r_1 -> Hash @@ -1451,7 +1451,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_secure -> Seq Scan on credit_usage r Filter: ((ymd >= '10-01-2011'::date) AND (ymd < '11-01-2011'::date)) -> Materialize - -> Hash Join + -> Hash Join(inner unique) Hash Cond: (r_1.cid = l.cid) -> Seq Scan on credit_card r_1 -> Hash diff --git a/src/test/regress/expected/select_views_1.out b/src/test/regress/expected/select_views_1.out index ce22bfa..a37bde4 100644 --- a/src/test/regress/expected/select_views_1.out +++ b/src/test/regress/expected/select_views_1.out @@ -1365,7 +1365,7 @@ NOTICE: f_leak => 9801-2345-6789-0123 EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_normal WHERE f_leak(cnum); QUERY PLAN --------------------------------------------------------- - Hash Join + Hash Join(inner unique) Hash Cond: (r.cid = l.cid) -> Seq Scan on credit_card r Filter: f_leak(cnum) @@ -1386,7 +1386,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_secure WHERE f_leak(cnum); --------------------------------------------------------------- Subquery Scan on my_credit_card_secure Filter: f_leak(my_credit_card_secure.cnum) - -> Hash Join + -> Hash Join(inner unique) Hash Cond: (r.cid = l.cid) -> Seq Scan on credit_card r -> Hash @@ -1420,7 +1420,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_normal -> Materialize -> Subquery Scan on l Filter: f_leak(l.cnum) - -> Hash Join + -> Hash Join(inner unique) Hash Cond: (r_1.cid = l_1.cid) -> Seq Scan on credit_card r_1 -> Hash @@ -1451,7 +1451,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_secure -> Seq Scan on credit_usage r Filter: ((ymd >= '10-01-2011'::date) AND (ymd < '11-01-2011'::date)) -> Materialize - -> Hash Join + -> Hash Join(inner unique) Hash Cond: (r_1.cid = l.cid) -> Seq Scan on credit_card r_1 -> Hash diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 06a27ea..9d27d1e 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1307,3 +1307,98 @@ update xx1 set x2 = f1 from xx1, lateral (select * from int4_tbl where f1 = x1) delete from xx1 using (select * from int4_tbl where f1 = x1) ss; delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss; delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss; + +-- +-- test planner's ability to mark joins as unique. +-- + +create table j1 (id int primary key); +create table j2 (id int primary key); +create table j3 (id int); + +insert into j1 values(1),(2),(3); +insert into j2 values(1),(2),(3); +insert into j3 values(1),(1); + +analyze j1; +analyze j2; +analyze j3; + +-- Ensure join is marked as unique +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id = j2.id; + +-- Ensure join not marked as unique when not using = +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id > j2.id; + +-- j3 has no unique index or pk on id +explain (verbose, costs off) +select * from j1 inner join j3 on j1.id = j3.id; + +-- ensure left join is marked as unique +explain (verbose, costs off) +select * from j1 left join j2 on j1.id = j2.id; + +-- ensure right join is marked as unique +explain (verbose, costs off) +select * from j1 right join j2 on j1.id = j2.id; + +-- cross joins can't be proved unique +explain (verbose, costs off) +select * from j1 cross join j2; + +-- ensure natural join is marked as unique +explain (verbose, costs off) +select * from j1 natural join j2; + +-- ensure distinct clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select distinct id from j3) j3 on j1.id = j3.id; + +-- ensure group by clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select id from j3 group by id) j3 on j1.id = j3.id; + +-- a subquery with an empty FROM clause should be marked as unique. +explain (verbose, costs off) +select * from j1 +inner join (select 1 id offset 0) j3 on j1.id = j3.id; + +explain (verbose, costs off) +select * from j1 full join j2 on j1.id = j2.id; + +drop table j1; +drop table j2; +drop table j3; + +-- test a more complex permutations of unique joins + +create table j1 (id1 int, id2 int, primary key(id1,id2)); +create table j2 (id1 int, id2 int, primary key(id1,id2)); +create table j3 (id1 int, id2 int, primary key(id1,id2)); + +insert into j1 values(1,1),(2,2); +insert into j2 values(1,1); +insert into j3 values(1,1); + +analyze j1; +analyze j2; +analyze j3; + +-- ensure no unique joins when not all columns which are part of +-- the unique index are part of the join clause. +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1; + +-- ensure unique joins work with multiple columns +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2; + +drop table j1; +drop table j2; +drop table j3;