diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index ee13136..9af036d 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1245,6 +1245,24 @@ ExplainNode(PlanState *planstate, List *ancestors, if (es->verbose) show_plan_tlist(planstate, ancestors, es); + /* unique join */ + if (es->verbose || es->format != EXPLAIN_FORMAT_TEXT) + { + switch (nodeTag(plan)) + { + case T_NestLoop: + case T_MergeJoin: + case T_HashJoin: + { + const char *val = ((Join *)plan)->unique_inner ? "Yes" : "No"; + ExplainPropertyText("Unique Inner", val, es); + break; + } + default: + break; + } + } + /* quals, sort keys, etc */ switch (nodeTag(plan)) { diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 369e666..27b732b 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -306,10 +306,10 @@ ExecHashJoin(HashJoinState *node) } /* - * In a semijoin, we'll consider returning the first - * match, but after that we're done with this outer tuple. + * Skip to the next outer tuple if we only need 1 inner + * tuple to match. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.match_first_tuple_only) node->hj_JoinState = HJ_NEED_NEW_OUTER; if (otherqual == NIL || @@ -453,6 +453,14 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) hjstate->js.ps.state = estate; /* + * When the planner was able to determine that the inner side of the join + * will at most contain a single tuple for each outer tuple, then we can + * optimize the join by skipping to the next outer tuple after we find the + * first matching inner tuple. + */ + hjstate->js.match_first_tuple_only = node->join.unique_inner; + + /* * Miscellaneous initialization * * create expression context for node @@ -498,8 +506,11 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) /* set up null tuples for outer joins, if needed */ switch (node->join.jointype) { - case JOIN_INNER: case JOIN_SEMI: + /* for semi joins we match to the first tuple only */ + hjstate->js.match_first_tuple_only = true; + /* fall through */ + case JOIN_INNER: break; case JOIN_LEFT: case JOIN_ANTI: diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index 6db09b8..341cd4a 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -840,10 +840,10 @@ ExecMergeJoin(MergeJoinState *node) } /* - * In a semijoin, we'll consider returning the first - * match, but after that we're done with this outer tuple. + * Skip to the next outer tuple if we only need 1 inner + * tuple to match. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.match_first_tuple_only) node->mj_JoinState = EXEC_MJ_NEXTOUTER; qualResult = (otherqual == NIL || @@ -1487,6 +1487,14 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) mergestate->js.ps.state = estate; /* + * When the planner was able to determine that the inner side of the join + * will at most contain a single tuple for each outer tuple, then we can + * optimize the join by skipping to the next outer tuple after we find the + * first matching inner tuple. + */ + mergestate->js.match_first_tuple_only = node->join.unique_inner; + + /* * Miscellaneous initialization * * create expression context for node @@ -1553,8 +1561,11 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) switch (node->join.jointype) { - case JOIN_INNER: case JOIN_SEMI: + /* for semi joins we match to the first tuple only */ + mergestate->js.match_first_tuple_only = true; + /* fall through */ + case JOIN_INNER: mergestate->mj_FillOuter = false; mergestate->mj_FillInner = false; break; diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index 555fa09..0313171 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -247,10 +247,10 @@ ExecNestLoop(NestLoopState *node) } /* - * In a semijoin, we'll consider returning the first match, but - * after that we're done with this outer tuple. + * Skip to the next outer tuple if we only need 1 inner tuple to + * match. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.match_first_tuple_only) node->nl_NeedNewOuter = true; if (otherqual == NIL || ExecQual(otherqual, econtext, false)) @@ -311,6 +311,14 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) nlstate->js.ps.state = estate; /* + * When the planner was able to determine that the inner side of the join + * will at most contain a single tuple for each outer tuple, then we can + * optimize the join by skipping to the next outer tuple after we find the + * first matching inner tuple. + */ + nlstate->js.match_first_tuple_only = node->join.unique_inner; + + /* * Miscellaneous initialization * * create expression context for node @@ -354,8 +362,11 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) switch (node->join.jointype) { - case JOIN_INNER: case JOIN_SEMI: + /* for semi joins we match to the first tuple only */ + nlstate->js.match_first_tuple_only = true; + /* fall through */ + case JOIN_INNER: break; case JOIN_LEFT: case JOIN_ANTI: diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index df7c2fa..1f4b77e 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2063,6 +2063,7 @@ _copySpecialJoinInfo(const SpecialJoinInfo *from) COPY_SCALAR_FIELD(jointype); COPY_SCALAR_FIELD(lhs_strict); COPY_SCALAR_FIELD(delay_upper_joins); + COPY_SCALAR_FIELD(is_unique_join); COPY_SCALAR_FIELD(semi_can_btree); COPY_SCALAR_FIELD(semi_can_hash); COPY_NODE_FIELD(semi_operators); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index b9c3959..f374c36 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -838,6 +838,7 @@ _equalSpecialJoinInfo(const SpecialJoinInfo *a, const SpecialJoinInfo *b) COMPARE_SCALAR_FIELD(jointype); COMPARE_SCALAR_FIELD(lhs_strict); COMPARE_SCALAR_FIELD(delay_upper_joins); + COMPARE_SCALAR_FIELD(is_unique_join); COMPARE_SCALAR_FIELD(semi_can_btree); COMPARE_SCALAR_FIELD(semi_can_hash); COMPARE_NODE_FIELD(semi_operators); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 3119b9e..c1bda16 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2265,6 +2265,7 @@ _outSpecialJoinInfo(StringInfo str, const SpecialJoinInfo *node) WRITE_ENUM_FIELD(jointype, JoinType); WRITE_BOOL_FIELD(lhs_strict); WRITE_BOOL_FIELD(delay_upper_joins); + WRITE_BOOL_FIELD(is_unique_join); WRITE_BOOL_FIELD(semi_can_btree); WRITE_BOOL_FIELD(semi_can_hash); WRITE_NODE_FIELD(semi_operators); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 5350329..4f60c3e 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1894,7 +1894,7 @@ cost_group(Path *path, PlannerInfo *root, */ void initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, - JoinType jointype, + JoinType jointype, bool unique_inner, Path *outer_path, Path *inner_path, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors) @@ -1928,7 +1928,9 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, inner_run_cost = inner_path->total_cost - inner_path->startup_cost; inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost; - if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + if (jointype == JOIN_SEMI || + jointype == JOIN_ANTI || + unique_inner) { /* * SEMI or ANTI join: executor will stop after first match. @@ -1999,7 +2001,9 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, /* cost of inner-relation source data (we already dealt with outer rel) */ - if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI) + if (path->jointype == JOIN_SEMI || + path->jointype == JOIN_ANTI || + path->unique_inner) { /* * SEMI or ANTI join: executor will stop after first match. @@ -2878,7 +2882,9 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, /* CPU costs */ - if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI) + if (path->jpath.jointype == JOIN_SEMI || + path->jpath.jointype == JOIN_ANTI || + path->jpath.unique_inner) { double outer_matched_rows; Selectivity inner_scan_frac; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 3b898da..8a71337 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -19,6 +19,7 @@ #include "executor/executor.h" #include "foreign/fdwapi.h" #include "optimizer/cost.h" +#include "optimizer/planmain.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" @@ -50,7 +51,8 @@ static List *select_mergejoin_clauses(PlannerInfo *root, List *restrictlist, JoinType jointype, bool *mergejoin_allowed); - +static inline bool clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel, + RelOptInfo *innerrel); /* * add_paths_to_joinrel @@ -87,11 +89,44 @@ add_paths_to_joinrel(PlannerInfo *root, JoinPathExtraData extra; bool mergejoin_allowed = true; ListCell *lc; + bool unique_inner; + + /* left joins were already analyzed for uniqueness in mark_unique_joins() */ + if (jointype == JOIN_LEFT) + unique_inner = sjinfo->is_unique_join; + else if (jointype == JOIN_INNER && + restrictlist != NIL && + rel_supports_distinctness(root, innerrel)) + { + /* + * remember the number of items that were in the restrictlist as + * the call to relation_has_unique_index_for may add more items + * which we'll need to remove later. + */ + int org_len = list_length(restrictlist); + + /* + * rel_is_distinct_for requires restrict infos to have the + * correct clause direction info + */ + foreach(lc, restrictlist) + { + clause_sides_match_join((RestrictInfo *)lfirst(lc), + outerrel, innerrel); + } + unique_inner = rel_is_distinct_for(root, innerrel, restrictlist); + + /* Remove any list items added by rel_is_distinct_for */ + list_truncate(restrictlist, org_len); + } + else + unique_inner = false; /* we can't prove uniqueness */ extra.restrictlist = restrictlist; extra.mergeclause_list = NIL; extra.sjinfo = sjinfo; extra.param_source_rels = NULL; + extra.unique_inner = unique_inner; /* * Find potential mergejoin clauses. We can skip this if we are not @@ -312,7 +347,7 @@ try_nestloop_path(PlannerInfo *root, * The latter two steps are expensive enough to make this two-phase * methodology worthwhile. */ - initial_cost_nestloop(root, &workspace, jointype, + initial_cost_nestloop(root, &workspace, jointype, extra->unique_inner, outer_path, inner_path, extra->sjinfo, &extra->semifactors); @@ -324,6 +359,7 @@ try_nestloop_path(PlannerInfo *root, create_nestloop_path(root, joinrel, jointype, + extra->unique_inner, &workspace, extra->sjinfo, &extra->semifactors, @@ -375,7 +411,7 @@ try_partial_nestloop_path(PlannerInfo *root, * Before creating a path, get a quick lower bound on what it is likely * to cost. Bail out right away if it looks terrible. */ - initial_cost_nestloop(root, &workspace, jointype, + initial_cost_nestloop(root, &workspace, jointype, extra->unique_inner, outer_path, inner_path, extra->sjinfo, &extra->semifactors); if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) @@ -386,6 +422,7 @@ try_partial_nestloop_path(PlannerInfo *root, create_nestloop_path(root, joinrel, jointype, + extra->unique_inner, &workspace, extra->sjinfo, &extra->semifactors, @@ -457,6 +494,7 @@ try_mergejoin_path(PlannerInfo *root, create_mergejoin_path(root, joinrel, jointype, + extra->unique_inner, &workspace, extra->sjinfo, outer_path, @@ -522,6 +560,7 @@ try_hashjoin_path(PlannerInfo *root, create_hashjoin_path(root, joinrel, jointype, + extra->unique_inner, &workspace, extra->sjinfo, &extra->semifactors, @@ -584,6 +623,7 @@ try_partial_hashjoin_path(PlannerInfo *root, create_hashjoin_path(root, joinrel, jointype, + extra->unique_inner, &workspace, extra->sjinfo, &extra->semifactors, diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 01d4fea..72be870 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -701,6 +701,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) /* we don't bother trying to make the remaining fields valid */ sjinfo->lhs_strict = false; sjinfo->delay_upper_joins = false; + sjinfo->is_unique_join = false; sjinfo->semi_can_btree = false; sjinfo->semi_can_hash = false; sjinfo->semi_operators = NIL; diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index d682db4..74d7fb0 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -33,11 +33,37 @@ /* local functions */ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static bool specialjoin_is_unique_join(PlannerInfo *root, + SpecialJoinInfo *sjinfo); static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); static Oid distinct_col_search(int colno, List *colnos, List *opids); +/* + * mark_unique_joins + * Analyze joins in order to determine if their inner side is unique based + * on the join condition. + */ +void +mark_unique_joins(PlannerInfo *root, List *joinlist) +{ + ListCell *lc; + + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + + /* + * Currently we're only interested in LEFT JOINs that have not already + * been marked as unique by a previous call. + */ + if (sjinfo->jointype == JOIN_LEFT && + !sjinfo->is_unique_join && + specialjoin_is_unique_join(root, sjinfo)) + sjinfo->is_unique_join = true; + } +} /* * remove_useless_joins @@ -91,6 +117,12 @@ restart: root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo); /* + * We may now be able to mark some joins as unique which we could + * not do before + */ + mark_unique_joins(root, joinlist); + + /* * Restart the scan. This is necessary to ensure we find all * removable joins independently of ordering of the join_info_list * (note that removal of attr_needed bits may make a join appear @@ -151,17 +183,17 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) { int innerrelid; RelOptInfo *innerrel; - Query *subquery = NULL; Relids joinrelids; - List *clause_list = NIL; - ListCell *l; int attroff; + ListCell *l; /* - * Must be a non-delaying left join to a single baserel, else we aren't - * going to be able to do anything with it. + * Join must not duplicate its outer side and must be a non-delaying left + * join to a single baserel, else we aren't going to be able to do anything + * with it. */ - if (sjinfo->jointype != JOIN_LEFT || + if (!sjinfo->is_unique_join || + sjinfo->jointype != JOIN_LEFT || sjinfo->delay_upper_joins) return false; @@ -170,38 +202,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) innerrel = find_base_rel(root, innerrelid); - if (innerrel->reloptkind != RELOPT_BASEREL) - return false; - - /* - * Before we go to the effort of checking whether any innerrel variables - * are needed above the join, make a quick check to eliminate cases in - * which we will surely be unable to prove uniqueness of the innerrel. - */ - if (innerrel->rtekind == RTE_RELATION) - { - /* - * For a plain-relation innerrel, we only know how to prove uniqueness - * by reference to unique indexes. If there are no indexes then - * there's certainly no unique indexes so there's no point in going - * further. - */ - if (innerrel->indexlist == NIL) - return false; - } - else if (innerrel->rtekind == RTE_SUBQUERY) - { - subquery = root->simple_rte_array[innerrelid]->subquery; - - /* - * If the subquery has no qualities that support distinctness proofs - * then there's no point in going further. - */ - if (!query_supports_distinctness(subquery)) - return false; - } - else - return false; /* unsupported rtekind */ + /* Must be true as is_unique_join can only be set to true for base rels */ + Assert(innerrel->reloptkind == RELOPT_BASEREL); /* Compute the relid set for the join we are considering */ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); @@ -212,7 +214,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) * * Note that this test only detects use of inner-rel attributes in higher * join conditions and the target list. There might be such attributes in - * pushed-down conditions at this join, too. We check that case below. + * pushed-down conditions at this join, too, but in this case the join + * would not have been marked as unique. * * As a micro-optimization, it seems better to start with max_attr and * count down rather than starting with min_attr and counting up, on the @@ -253,6 +256,44 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) return false; /* it does reference innerrel */ } + return true; +} + +/* + * specialjoin_is_unique_join + * True if it can be proved that this special join can only ever match at + * most 1 inner row for any single outer row. False is returned if there's + * insufficient evidence to prove the join is unique. + */ +static bool +specialjoin_is_unique_join(PlannerInfo *root, SpecialJoinInfo *sjinfo) +{ + int innerrelid; + RelOptInfo *innerrel; + Relids joinrelids; + ListCell *l; + List *clause_list = NIL; + + /* if there's more than 1 relation involved then punt */ + if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid)) + return false; + + innerrel = find_base_rel(root, innerrelid); + + if (innerrel->reloptkind != RELOPT_BASEREL) + return false; + + /* + * Before we go to the effort of pulling out the join condition's columns, + * make a quick check to eliminate cases in which we will surely be unable + * to prove uniqueness of the innerrel. + */ + if (!rel_supports_distinctness(root, innerrel)) + return false; + + /* Compute the relid set for the join we are considering */ + joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + /* * Search for mergejoinable clauses that constrain the inner rel against * either the outer rel or a pseudoconstant. If an operator is @@ -274,10 +315,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) !bms_equal(restrictinfo->required_relids, joinrelids)) { /* - * If such a clause actually references the inner rel then join - * removal has to be disallowed. We have to check this despite - * the previous attr_needed checks because of the possibility of - * pushed-down clauses referencing the rel. + * If such a clause actually references the inner rel then we can't + * mark the join as unique. */ if (bms_is_member(innerrelid, restrictinfo->clause_relids)) return false; @@ -300,71 +339,9 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) clause_list = lappend(clause_list, restrictinfo); } - /* - * relation_has_unique_index_for automatically adds any usable restriction - * clauses for the innerrel, so we needn't do that here. (XXX we are not - * considering restriction clauses for subqueries; is that worth doing?) - */ - - if (innerrel->rtekind == RTE_RELATION) - { - /* Now examine the indexes to see if we have a matching unique index */ - if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL)) - return true; - } - else /* innerrel->rtekind == RTE_SUBQUERY */ - { - List *colnos = NIL; - List *opids = NIL; - - /* - * Build the argument lists for query_is_distinct_for: a list of - * output column numbers that the query needs to be distinct over, and - * a list of equality operators that the output columns need to be - * distinct according to. - */ - foreach(l, clause_list) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Oid op; - Var *var; - - /* - * Get the equality operator we need uniqueness according to. - * (This might be a cross-type operator and thus not exactly the - * same operator the subquery would consider; that's all right - * since query_is_distinct_for can resolve such cases.) The - * mergejoinability test above should have selected only OpExprs. - */ - Assert(IsA(rinfo->clause, OpExpr)); - op = ((OpExpr *) rinfo->clause)->opno; - - /* clause_sides_match_join identified the inner side for us */ - if (rinfo->outer_is_left) - var = (Var *) get_rightop(rinfo->clause); - else - var = (Var *) get_leftop(rinfo->clause); - - /* - * If inner side isn't a Var referencing a subquery output column, - * this clause doesn't help us. - */ - if (!var || !IsA(var, Var) || - var->varno != innerrelid || var->varlevelsup != 0) - continue; - - colnos = lappend_int(colnos, var->varattno); - opids = lappend_oid(opids, op); - } - - if (query_is_distinct_for(subquery, colnos, opids)) - return true; - } + if (rel_is_distinct_for(root, innerrel, clause_list)) + return true; - /* - * Some day it would be nice to check for other methods of establishing - * distinctness. - */ return false; } @@ -559,6 +536,126 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved) return result; } +/* + * rel_is_distinct_for + * Returns True if rel can be proved to be distinct over clause_list + * + * Note: We expect clause_list to be already processed to check if the + * RestrictInfos are in the form "outerrel_expr op innerrel_expr" or + * "innerrel_expr op outerrel_expr". + * + * Note: this method may add items to clause_list, callers should either + * make a copy of the list or trim it back to it's original length after + * calling this function. + */ +bool +rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) +{ + int relid = rel->relid; + + /* + * relation_has_unique_index_for automatically adds any usable restriction + * clauses for the rel, so we needn't do that here. (XXX we are not + * considering restriction clauses for subqueries; is that worth doing?) + */ + if (rel->rtekind == RTE_RELATION) + { + /* Now examine the indexes to see if we have a matching unique index */ + if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL)) + return true; + } + else if (rel->rtekind == RTE_SUBQUERY) + { + List *colnos = NIL; + List *opids = NIL; + ListCell *l; + Query *subquery = root->simple_rte_array[relid]->subquery; + + /* + * Build the argument lists for query_is_distinct_for: a list of + * output column numbers that the query needs to be distinct over, and + * a list of equality operators that the output columns need to be + * distinct according to. + */ + foreach(l, clause_list) + { + RestrictInfo *rinfo = (RestrictInfo *)lfirst(l); + Oid op; + Var *var; + + if (!IsA(rinfo->clause, OpExpr)) + continue; + + /* + * Get the equality operator we need uniqueness according to. + * (This might be a cross-type operator and thus not exactly the + * same operator the subquery would consider; that's all right + * since query_is_distinct_for can resolve such cases.) The + * mergejoinability test above should have selected only OpExprs. + */ + op = ((OpExpr *)rinfo->clause)->opno; + + /* clause_sides_match_join identified the inner side for us */ + if (rinfo->outer_is_left) + var = (Var *)get_rightop(rinfo->clause); + else + var = (Var *)get_leftop(rinfo->clause); + + /* + * If inner side isn't a Var referencing a subquery output column, + * this clause doesn't help us. + */ + if (!var || !IsA(var, Var) || + var->varno != relid || var->varlevelsup != 0) + continue; + + colnos = lappend_int(colnos, var->varattno); + opids = lappend_oid(opids, op); + } + + if (query_is_distinct_for(subquery, colnos, opids)) + return true; + } + return false; /* can't prove rel to be distinct over clause_list */ +} + +/* + * rel_supports_distinctness + * Returns true if rel has some properties which can prove the relation + * to be unique over some set of columns. + * + * This is effectively a pre-checking function for rel_is_distinct_for(). + * It must return TRUE if rel_is_distinct_for() could possibly return TRUE + */ +bool +rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) +{ + if (rel->rtekind == RTE_RELATION) + { + /* + * For a plain-relation, we only know how to prove uniqueness + * by reference to unique indexes. If there are no indexes then + * there's certainly no unique indexes so there's nothing to prove + * uniqueness on the relation. + */ + if (rel->indexlist != NIL) + return true; + } + else if (rel->rtekind == RTE_SUBQUERY) + { + Query *subquery = root->simple_rte_array[rel->relid]->subquery; + + /* Check if the subquery has any qualities that support distinctness */ + if (query_supports_distinctness(subquery)) + return true; + } + + /* + * Some day it would be nice to check for other methods of establishing + * distinctness. + */ + return false; +} /* * query_supports_distinctness - could the query possibly be proven distinct diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 3024ff9..91d8e50 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -204,13 +204,12 @@ static BitmapAnd *make_bitmap_and(List *bitmapplans); static BitmapOr *make_bitmap_or(List *bitmapplans); static NestLoop *make_nestloop(List *tlist, List *joinclauses, List *otherclauses, List *nestParams, - Plan *lefttree, Plan *righttree, - JoinType jointype); + Plan *lefttree, Plan *righttree, JoinPath *jpath); static HashJoin *make_hashjoin(List *tlist, List *joinclauses, List *otherclauses, List *hashclauses, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinPath *jpath); static Hash *make_hash(Plan *lefttree, Oid skewTable, AttrNumber skewColumn, @@ -225,7 +224,7 @@ static MergeJoin *make_mergejoin(List *tlist, int *mergestrategies, bool *mergenullsfirst, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinPath *jpath); static Sort *make_sort(Plan *lefttree, int numCols, AttrNumber *sortColIdx, Oid *sortOperators, Oid *collations, bool *nullsFirst); @@ -3480,7 +3479,7 @@ create_nestloop_plan(PlannerInfo *root, nestParams, outer_plan, inner_plan, - best_path->jointype); + best_path); copy_generic_path_info(&join_plan->join.plan, &best_path->path); @@ -3783,7 +3782,7 @@ create_mergejoin_plan(PlannerInfo *root, mergenullsfirst, outer_plan, inner_plan, - best_path->jpath.jointype); + &best_path->jpath); /* Costs of sort and material steps are included in path cost already */ copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); @@ -3923,7 +3922,7 @@ create_hashjoin_plan(PlannerInfo *root, hashclauses, outer_plan, (Plan *) hash_plan, - best_path->jpath.jointype); + &best_path->jpath); copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); @@ -5022,7 +5021,7 @@ make_nestloop(List *tlist, List *nestParams, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinPath *jpath) { NestLoop *node = makeNode(NestLoop); Plan *plan = &node->join.plan; @@ -5031,8 +5030,9 @@ make_nestloop(List *tlist, plan->qual = otherclauses; plan->lefttree = lefttree; plan->righttree = righttree; - node->join.jointype = jointype; + node->join.jointype = jpath->jointype; node->join.joinqual = joinclauses; + node->join.unique_inner = jpath->unique_inner; node->nestParams = nestParams; return node; @@ -5045,7 +5045,7 @@ make_hashjoin(List *tlist, List *hashclauses, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinPath *jpath) { HashJoin *node = makeNode(HashJoin); Plan *plan = &node->join.plan; @@ -5055,8 +5055,9 @@ make_hashjoin(List *tlist, plan->lefttree = lefttree; plan->righttree = righttree; node->hashclauses = hashclauses; - node->join.jointype = jointype; + node->join.jointype = jpath->jointype; node->join.joinqual = joinclauses; + node->join.unique_inner = jpath->unique_inner; return node; } @@ -5097,7 +5098,7 @@ make_mergejoin(List *tlist, bool *mergenullsfirst, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinPath *jpath) { MergeJoin *node = makeNode(MergeJoin); Plan *plan = &node->join.plan; @@ -5111,8 +5112,9 @@ make_mergejoin(List *tlist, node->mergeCollations = mergecollations; node->mergeStrategies = mergestrategies; node->mergeNullsFirst = mergenullsfirst; - node->join.jointype = jointype; + node->join.jointype = jpath->jointype; node->join.joinqual = joinclauses; + node->join.unique_inner = jpath->unique_inner; return node; } diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 37fb586..7ea7984 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -1128,6 +1128,7 @@ make_outerjoininfo(PlannerInfo *root, sjinfo->jointype = jointype; /* this always starts out false */ sjinfo->delay_upper_joins = false; + sjinfo->is_unique_join = false; compute_semijoin_info(sjinfo, clause); diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 443e64e..885746b 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -185,6 +185,9 @@ query_planner(PlannerInfo *root, List *tlist, */ fix_placeholder_input_needed_levels(root); + /* Analyze joins to find out which ones have a unique inner side */ + mark_unique_joins(root, joinlist); + /* * Remove any useless outer joins. Ideally this would be done during * jointree preprocessing, but the necessary information isn't available diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index fe5e830..60d774c 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1911,6 +1911,7 @@ calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path) * * 'joinrel' is the join relation. * 'jointype' is the type of join required + * 'unique_inner' is the inner side of the join unique on the join condition * 'workspace' is the result from initial_cost_nestloop * 'sjinfo' is extra info about the join for selectivity estimation * 'semifactors' contains valid data if jointype is SEMI or ANTI @@ -1926,6 +1927,7 @@ NestPath * create_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + bool unique_inner, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, @@ -1984,6 +1986,7 @@ create_nestloop_path(PlannerInfo *root, pathnode->jointype = jointype; pathnode->outerjoinpath = outer_path; pathnode->innerjoinpath = inner_path; + pathnode->unique_inner = unique_inner; pathnode->joinrestrictinfo = restrict_clauses; final_cost_nestloop(root, pathnode, workspace, sjinfo, semifactors); @@ -1998,6 +2001,7 @@ create_nestloop_path(PlannerInfo *root, * * 'joinrel' is the join relation * 'jointype' is the type of join required + * 'unique_inner' is the inner side of the join unique on the join condition * 'workspace' is the result from initial_cost_mergejoin * 'sjinfo' is extra info about the join for selectivity estimation * 'outer_path' is the outer path @@ -2014,6 +2018,7 @@ MergePath * create_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + bool unique_inner, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, Path *outer_path, @@ -2047,6 +2052,7 @@ create_mergejoin_path(PlannerInfo *root, pathnode->jpath.jointype = jointype; pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; + pathnode->jpath.unique_inner = unique_inner; pathnode->jpath.joinrestrictinfo = restrict_clauses; pathnode->path_mergeclauses = mergeclauses; pathnode->outersortkeys = outersortkeys; @@ -2064,6 +2070,7 @@ create_mergejoin_path(PlannerInfo *root, * * 'joinrel' is the join relation * 'jointype' is the type of join required + * 'unique_inner' is the inner side of the join unique on the join condition * 'workspace' is the result from initial_cost_hashjoin * 'sjinfo' is extra info about the join for selectivity estimation * 'semifactors' contains valid data if jointype is SEMI or ANTI @@ -2078,6 +2085,7 @@ HashPath * create_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + bool unique_inner, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, @@ -2121,6 +2129,7 @@ create_hashjoin_path(PlannerInfo *root, pathnode->jpath.jointype = jointype; pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; + pathnode->jpath.unique_inner = unique_inner; pathnode->jpath.joinrestrictinfo = restrict_clauses; pathnode->path_hashclauses = hashclauses; /* final_cost_hashjoin will fill in pathnode->num_batches */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 064a050..a31e758 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1662,6 +1662,9 @@ typedef struct JoinState PlanState ps; JoinType jointype; List *joinqual; /* JOIN quals (in addition to ps.qual) */ + bool match_first_tuple_only; /* True if we should move to the next + * outer tuple after matching first + * inner tuple */ } JoinState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 5961f2c..c5d4f0c 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -605,6 +605,7 @@ typedef struct Join Plan plan; JoinType jointype; List *joinqual; /* JOIN quals (in addition to plan.qual) */ + bool unique_inner; /* inner rel is unique on the join condition */ } Join; /* ---------------- diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 098a486..98f7ed6 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1158,6 +1158,7 @@ typedef struct JoinPath Path *outerjoinpath; /* path for the outer side of the join */ Path *innerjoinpath; /* path for the inner side of the join */ + bool unique_inner; /* inner rel is unique on the join condition */ List *joinrestrictinfo; /* RestrictInfos to apply to join */ @@ -1723,6 +1724,7 @@ typedef struct SpecialJoinInfo JoinType jointype; /* always INNER, LEFT, FULL, SEMI, or ANTI */ bool lhs_strict; /* joinclause is strict for some LHS rel */ bool delay_upper_joins; /* can't commute with upper RHS */ + bool is_unique_join; /* matches a max of 1 row per outer join row */ /* Remaining fields are set only for JOIN_SEMI jointype: */ bool semi_can_btree; /* true if semi_operators are all btree */ bool semi_can_hash; /* true if semi_operators are all hash */ @@ -1955,6 +1957,8 @@ typedef struct SemiAntiJoinFactors * sjinfo is extra info about special joins for selectivity estimation * semifactors is as shown above (only valid for SEMI or ANTI joins) * param_source_rels are OK targets for parameterization of result paths + * unique_inner is set to True if the planner has determined that the inner + * side of the join can at most produce one tuple for each outer tuple */ typedef struct JoinPathExtraData { @@ -1963,6 +1967,7 @@ typedef struct JoinPathExtraData SpecialJoinInfo *sjinfo; SemiAntiJoinFactors semifactors; Relids param_source_rels; + bool unique_inner; } JoinPathExtraData; /* diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index fea2bb7..3dc06ed 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -120,7 +120,7 @@ extern void cost_group(Path *path, PlannerInfo *root, double input_tuples); extern void initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, - JoinType jointype, + JoinType jointype, bool unique_inner, Path *outer_path, Path *inner_path, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors); diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index df4be93..0a07cb2 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -99,6 +99,7 @@ extern Relids calc_non_nestloop_required_outer(Path *outer_path, Path *inner_pat extern NestPath *create_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + bool unique_inner, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, @@ -111,6 +112,7 @@ extern NestPath *create_nestloop_path(PlannerInfo *root, extern MergePath *create_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + bool unique_inner, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, Path *outer_path, @@ -125,6 +127,7 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root, extern HashPath *create_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, + bool unique_inner, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index cd7338a..e79c037 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -90,7 +90,11 @@ extern RestrictInfo *build_implied_join_equality(Oid opno, /* * prototypes for plan/analyzejoins.c */ +extern void mark_unique_joins(PlannerInfo *root, List *joinlist); extern List *remove_useless_joins(PlannerInfo *root, List *joinlist); +extern bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, + List *clause_list); +extern bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); extern bool query_supports_distinctness(Query *query); extern bool query_is_distinct_for(Query *query, List *colnos, List *opids); diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 601bdb4..f2dbb4a 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -884,29 +884,31 @@ explain (costs off) select a,c from t1 group by a,c,d; explain (costs off) select * from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z; - QUERY PLAN -------------------------------------------------------- - Group + QUERY PLAN +------------------------------------------------------ + HashAggregate Group Key: t1.a, t1.b, t2.x, t2.y - -> Merge Join - Merge Cond: ((t1.a = t2.x) AND (t1.b = t2.y)) - -> Index Scan using t1_pkey on t1 - -> Index Scan using t2_pkey on t2 -(6 rows) + -> Hash Join + Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t1 +(7 rows) -- Test case where t1 can be optimized but not t2 explain (costs off) select t1.*,t2.x,t2.z from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z; - QUERY PLAN -------------------------------------------------------- + QUERY PLAN +------------------------------------------------------ HashAggregate Group Key: t1.a, t1.b, t2.x, t2.z - -> Merge Join - Merge Cond: ((t1.a = t2.x) AND (t1.b = t2.y)) - -> Index Scan using t1_pkey on t1 - -> Index Scan using t2_pkey on t2 -(6 rows) + -> Hash Join + Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t1 +(7 rows) -- Cannot optimize when PK is deferrable explain (costs off) select * from t3 group by a,b,c; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index cafbc5e..4c488bc 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3330,6 +3330,7 @@ using (join_key); -------------------------------------------------------------------------- Nested Loop Left Join Output: "*VALUES*".column1, i1.f1, (666) + Unique Inner: No Join Filter: ("*VALUES*".column1 = i1.f1) -> Values Scan on "*VALUES*" Output: "*VALUES*".column1 @@ -3337,12 +3338,13 @@ using (join_key); Output: i1.f1, (666) -> Nested Loop Left Join Output: i1.f1, 666 + Unique Inner: No -> Seq Scan on public.int4_tbl i1 Output: i1.f1 -> Index Only Scan using tenk1_unique2 on public.tenk1 i2 Output: i2.unique2 Index Cond: (i2.unique2 = i1.f1) -(14 rows) +(16 rows) select foo1.join_key as foo1_id, foo3.join_key AS foo3_id, bug_field from (values (0),(1)) foo1(join_key) @@ -3380,9 +3382,11 @@ select t1.* from ---------------------------------------------------------------------- Hash Left Join Output: t1.f1 + Unique Inner: No Hash Cond: (i8.q2 = i4.f1) -> Nested Loop Left Join Output: t1.f1, i8.q2 + Unique Inner: No Join Filter: (t1.f1 = '***'::text) -> Seq Scan on public.text_tbl t1 Output: t1.f1 @@ -3390,9 +3394,11 @@ select t1.* from Output: i8.q2 -> Hash Right Join Output: i8.q2 + Unique Inner: No Hash Cond: ((NULL::integer) = i8b1.q2) -> Hash Left Join Output: i8.q2, (NULL::integer) + Unique Inner: No Hash Cond: (i8.q1 = i8b2.q1) -> Seq Scan on public.int8_tbl i8 Output: i8.q1, i8.q2 @@ -3408,7 +3414,7 @@ select t1.* from Output: i4.f1 -> Seq Scan on public.int4_tbl i4 Output: i4.f1 -(30 rows) +(34 rows) select t1.* from text_tbl t1 @@ -3441,9 +3447,11 @@ select t1.* from ---------------------------------------------------------------------------- Hash Left Join Output: t1.f1 + Unique Inner: No Hash Cond: (i8.q2 = i4.f1) -> Nested Loop Left Join Output: t1.f1, i8.q2 + Unique Inner: No Join Filter: (t1.f1 = '***'::text) -> Seq Scan on public.text_tbl t1 Output: t1.f1 @@ -3451,12 +3459,15 @@ select t1.* from Output: i8.q2 -> Hash Right Join Output: i8.q2 + Unique Inner: No Hash Cond: ((NULL::integer) = i8b1.q2) -> Hash Right Join Output: i8.q2, (NULL::integer) + Unique Inner: No Hash Cond: (i8b2.q1 = i8.q1) -> Nested Loop Output: i8b2.q1, NULL::integer + Unique Inner: No -> Seq Scan on public.int8_tbl i8b2 Output: i8b2.q1, i8b2.q2 -> Materialize @@ -3473,7 +3484,7 @@ select t1.* from Output: i4.f1 -> Seq Scan on public.int4_tbl i4 Output: i4.f1 -(34 rows) +(39 rows) select t1.* from text_tbl t1 @@ -3507,9 +3518,11 @@ select t1.* from ---------------------------------------------------------------------------- Hash Left Join Output: t1.f1 + Unique Inner: No Hash Cond: (i8.q2 = i4.f1) -> Nested Loop Left Join Output: t1.f1, i8.q2 + Unique Inner: No Join Filter: (t1.f1 = '***'::text) -> Seq Scan on public.text_tbl t1 Output: t1.f1 @@ -3517,12 +3530,15 @@ select t1.* from Output: i8.q2 -> Hash Right Join Output: i8.q2 + Unique Inner: No Hash Cond: ((NULL::integer) = i8b1.q2) -> Hash Right Join Output: i8.q2, (NULL::integer) + Unique Inner: No Hash Cond: (i8b2.q1 = i8.q1) -> Hash Join Output: i8b2.q1, NULL::integer + Unique Inner: No Hash Cond: (i8b2.q1 = i4b2.f1) -> Seq Scan on public.int8_tbl i8b2 Output: i8b2.q1, i8b2.q2 @@ -3542,7 +3558,7 @@ select t1.* from Output: i4.f1 -> Seq Scan on public.int4_tbl i4 Output: i4.f1 -(37 rows) +(42 rows) select t1.* from text_tbl t1 @@ -3574,14 +3590,17 @@ select * from -------------------------------------------------------- Nested Loop Left Join Output: t1.f1, i8.q1, i8.q2, t2.f1, i4.f1 + Unique Inner: No -> Seq Scan on public.text_tbl t2 Output: t2.f1 -> Materialize Output: i8.q1, i8.q2, i4.f1, t1.f1 -> Nested Loop Output: i8.q1, i8.q2, i4.f1, t1.f1 + Unique Inner: No -> Nested Loop Left Join Output: i8.q1, i8.q2, i4.f1 + Unique Inner: No Join Filter: (i8.q1 = i4.f1) -> Seq Scan on public.int8_tbl i8 Output: i8.q1, i8.q2 @@ -3591,7 +3610,7 @@ select * from -> Seq Scan on public.text_tbl t1 Output: t1.f1 Filter: (t1.f1 = 'doh!'::text) -(19 rows) +(22 rows) select * from text_tbl t1 @@ -3621,9 +3640,11 @@ where t1.f1 = ss.f1; -------------------------------------------------- Nested Loop Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1 + Unique Inner: No Join Filter: (t1.f1 = t2.f1) -> Nested Loop Left Join Output: t1.f1, i8.q1, i8.q2 + Unique Inner: No -> Seq Scan on public.text_tbl t1 Output: t1.f1 -> Materialize @@ -3635,7 +3656,7 @@ where t1.f1 = ss.f1; Output: (i8.q1), t2.f1 -> Seq Scan on public.text_tbl t2 Output: i8.q1, t2.f1 -(16 rows) +(18 rows) select * from text_tbl t1 @@ -3660,11 +3681,14 @@ where t1.f1 = ss2.f1; ------------------------------------------------------------------- Nested Loop Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1, ((i8.q1)), (t2.f1) + Unique Inner: No Join Filter: (t1.f1 = (t2.f1)) -> Nested Loop Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1 + Unique Inner: No -> Nested Loop Left Join Output: t1.f1, i8.q1, i8.q2 + Unique Inner: No -> Seq Scan on public.text_tbl t1 Output: t1.f1 -> Materialize @@ -3680,7 +3704,7 @@ where t1.f1 = ss2.f1; Output: ((i8.q1)), (t2.f1) -> Seq Scan on public.text_tbl t3 Output: (i8.q1), t2.f1 -(22 rows) +(25 rows) select * from text_tbl t1 @@ -3706,10 +3730,13 @@ where tt1.f1 = ss1.c0; ---------------------------------------------------------- Nested Loop Output: 1 + Unique Inner: No -> Nested Loop Left Join Output: tt1.f1, tt4.f1 + Unique Inner: No -> Nested Loop Output: tt1.f1 + Unique Inner: No -> Seq Scan on public.text_tbl tt1 Output: tt1.f1 Filter: (tt1.f1 = 'foo'::text) @@ -3719,6 +3746,7 @@ where tt1.f1 = ss1.c0; Output: tt4.f1 -> Nested Loop Left Join Output: tt4.f1 + Unique Inner: No Join Filter: (tt3.f1 = tt4.f1) -> Seq Scan on public.text_tbl tt3 Output: tt3.f1 @@ -3733,7 +3761,7 @@ where tt1.f1 = ss1.c0; Output: (tt4.f1) -> Seq Scan on public.text_tbl tt5 Output: tt4.f1 -(29 rows) +(33 rows) select 1 from text_tbl as tt1 @@ -3763,13 +3791,17 @@ where ss1.c2 = 0; ------------------------------------------------------------------------ Nested Loop Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42)) + Unique Inner: No -> Hash Join Output: i41.f1, i42.f1, i8.q1, i8.q2, i43.f1, 42 + Unique Inner: No Hash Cond: (i41.f1 = i42.f1) -> Nested Loop Output: i8.q1, i8.q2, i43.f1, i41.f1 + Unique Inner: No -> Nested Loop Output: i8.q1, i8.q2, i43.f1 + Unique Inner: No -> Seq Scan on public.int8_tbl i8 Output: i8.q1, i8.q2 Filter: (i8.q1 = 0) @@ -3786,7 +3818,7 @@ where ss1.c2 = 0; Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42)) -> Seq Scan on public.text_tbl Output: i41.f1, i8.q1, i8.q2, i42.f1, i43.f1, (42) -(25 rows) +(29 rows) select ss2.* from int4_tbl i41 @@ -3843,6 +3875,7 @@ explain (verbose, costs off) --------------------------------------------------------- Merge Left Join Output: a.q2, b.q1 + Unique Inner: No Merge Cond: (a.q2 = (COALESCE(b.q1, '1'::bigint))) Filter: (COALESCE(b.q1, '1'::bigint) > 0) -> Sort @@ -3855,7 +3888,7 @@ explain (verbose, costs off) Sort Key: (COALESCE(b.q1, '1'::bigint)) -> Seq Scan on public.int8_tbl b Output: b.q1, COALESCE(b.q1, '1'::bigint) -(14 rows) +(15 rows) select a.q2, b.q1 from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1) @@ -4738,12 +4771,13 @@ select * from ------------------------------------------ Nested Loop Left Join Output: a.q1, a.q2, b.q1, b.q2, (a.q2) + Unique Inner: No -> Seq Scan on public.int8_tbl a Output: a.q1, a.q2 -> Seq Scan on public.int8_tbl b Output: b.q1, b.q2, a.q2 Filter: (a.q2 = b.q1) -(7 rows) +(8 rows) select * from int8_tbl a left join @@ -4770,12 +4804,13 @@ select * from ------------------------------------------------------------------ Nested Loop Left Join Output: a.q1, a.q2, b.q1, b.q2, (COALESCE(a.q2, '42'::bigint)) + Unique Inner: No -> Seq Scan on public.int8_tbl a Output: a.q1, a.q2 -> Seq Scan on public.int8_tbl b Output: b.q1, b.q2, COALESCE(a.q2, '42'::bigint) Filter: (a.q2 = b.q1) -(7 rows) +(8 rows) select * from int8_tbl a left join @@ -4803,6 +4838,7 @@ select * from int4_tbl i left join ------------------------------------------- Hash Left Join Output: i.f1, j.f1 + Unique Inner: No Hash Cond: (i.f1 = j.f1) -> Seq Scan on public.int4_tbl i Output: i.f1 @@ -4810,7 +4846,7 @@ select * from int4_tbl i left join Output: j.f1 -> Seq Scan on public.int2_tbl j Output: j.f1 -(9 rows) +(10 rows) select * from int4_tbl i left join lateral (select * from int2_tbl j where i.f1 = j.f1) k on true; @@ -4830,12 +4866,13 @@ select * from int4_tbl i left join ------------------------------------- Nested Loop Left Join Output: i.f1, (COALESCE(i.*)) + Unique Inner: No -> Seq Scan on public.int4_tbl i Output: i.f1, i.* -> Seq Scan on public.int2_tbl j Output: j.f1, COALESCE(i.*) Filter: (i.f1 = j.f1) -(7 rows) +(8 rows) select * from int4_tbl i left join lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true; @@ -4857,10 +4894,12 @@ select * from int4_tbl a, ------------------------------------------------- Nested Loop Output: a.f1, b.f1, c.q1, c.q2 + Unique Inner: No -> Seq Scan on public.int4_tbl a Output: a.f1 -> Hash Left Join Output: b.f1, c.q1, c.q2 + Unique Inner: No Hash Cond: (b.f1 = c.q1) -> Seq Scan on public.int4_tbl b Output: b.f1 @@ -4869,7 +4908,7 @@ select * from int4_tbl a, -> Seq Scan on public.int8_tbl c Output: c.q1, c.q2 Filter: (a.f1 = c.q2) -(14 rows) +(16 rows) select * from int4_tbl a, lateral ( @@ -4915,16 +4954,18 @@ select * from ------------------------------------------------------------- Nested Loop Left Join Output: a.q1, a.q2, b.q1, c.q1, (LEAST(a.q1, b.q1, c.q1)) + Unique Inner: No -> Seq Scan on public.int8_tbl a Output: a.q1, a.q2 -> Nested Loop Output: b.q1, c.q1, LEAST(a.q1, b.q1, c.q1) + Unique Inner: No -> Seq Scan on public.int8_tbl b Output: b.q1, b.q2 Filter: (a.q2 = b.q1) -> Seq Scan on public.int8_tbl c Output: c.q1, c.q2 -(11 rows) +(13 rows) select * from int8_tbl a left join lateral @@ -4991,13 +5032,17 @@ select * from ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Nested Loop Output: c.q1, c.q2, a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)), ((COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))) + Unique Inner: No -> Hash Right Join Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, (COALESCE(b.q2, '42'::bigint)), (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)) + Unique Inner: No Hash Cond: (d.q1 = c.q2) -> Nested Loop Output: a.q1, a.q2, b.q1, d.q1, (COALESCE(b.q2, '42'::bigint)), (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)) + Unique Inner: No -> Hash Left Join Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)) + Unique Inner: No Hash Cond: (a.q2 = b.q1) -> Seq Scan on public.int8_tbl a Output: a.q1, a.q2 @@ -5013,7 +5058,7 @@ select * from Output: c.q1, c.q2 -> Result Output: (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)) -(24 rows) +(28 rows) -- case that breaks the old ph_may_need optimization explain (verbose, costs off) @@ -5031,17 +5076,22 @@ select c.*,a.*,ss1.q1,ss2.q1,ss3.* from --------------------------------------------------------------------------------------------------------- Nested Loop Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, i.f1 + Unique Inner: No Join Filter: ((COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) > i.f1) -> Hash Right Join Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) + Unique Inner: No Hash Cond: (d.q1 = c.q2) -> Nested Loop Output: a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) + Unique Inner: No -> Hash Right Join Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, (b2.f1)::bigint)) + Unique Inner: No Hash Cond: (b.q1 = a.q2) -> Nested Loop Output: b.q1, COALESCE(b.q2, (b2.f1)::bigint) + Unique Inner: No Join Filter: (b.q1 < b2.f1) -> Seq Scan on public.int8_tbl b Output: b.q1, b.q2 @@ -5063,7 +5113,7 @@ select c.*,a.*,ss1.q1,ss2.q1,ss3.* from Output: i.f1 -> Seq Scan on public.int4_tbl i Output: i.f1 -(34 rows) +(39 rows) -- check processing of postponed quals (bug #9041) explain (verbose, costs off) @@ -5076,16 +5126,18 @@ select * from ---------------------------------------------- Nested Loop Left Join Output: (1), (2), (3) + Unique Inner: No Join Filter: (((3) = (1)) AND ((3) = (2))) -> Nested Loop Output: (1), (2) + Unique Inner: No -> Result Output: 1 -> Result Output: 2 -> Result Output: 3 -(11 rows) +(13 rows) -- check we don't try to do a unique-ified semijoin with LATERAL explain (verbose, costs off) @@ -5098,10 +5150,12 @@ select * from ---------------------------------------------------------------------- Nested Loop Output: "*VALUES*".column1, "*VALUES*".column2, int4_tbl.f1 + Unique Inner: No -> Values Scan on "*VALUES*" Output: "*VALUES*".column1, "*VALUES*".column2 -> Nested Loop Semi Join Output: int4_tbl.f1 + Unique Inner: No Join Filter: (int4_tbl.f1 = tenk1.unique1) -> Seq Scan on public.int4_tbl Output: int4_tbl.f1 @@ -5110,7 +5164,7 @@ select * from -> Index Scan using tenk1_unique2 on public.tenk1 Output: tenk1.unique1 Index Cond: (tenk1.unique2 = "*VALUES*".column2) -(14 rows) +(16 rows) select * from (values (0,9998), (1,1000)) v(id,x), @@ -5137,10 +5191,12 @@ lateral (select * from int8_tbl t1, ----------------------------------------------------------------- Nested Loop Output: "*VALUES*".column1, t1.q1, t1.q2, ss2.q1, ss2.q2 + Unique Inner: No -> Seq Scan on public.int8_tbl t1 Output: t1.q1, t1.q2 -> Nested Loop Output: "*VALUES*".column1, ss2.q1, ss2.q2 + Unique Inner: No -> Values Scan on "*VALUES*" Output: "*VALUES*".column1 -> Subquery Scan on ss2 @@ -5162,7 +5218,7 @@ lateral (select * from int8_tbl t1, -> Seq Scan on public.int8_tbl t3 Output: t3.q1, t3.q2 Filter: (t3.q2 = $2) -(27 rows) +(29 rows) select * from (values (0), (1)) v(id), lateral (select * from int8_tbl t1, @@ -5260,3 +5316,247 @@ ERROR: invalid reference to FROM-clause entry for table "xx1" LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss; ^ HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query. +-- +-- test planner's ability to mark joins as unique. +-- +create table j1 (id int primary key); +create table j2 (id int primary key); +create table j3 (id int); +insert into j1 values(1),(2),(3); +insert into j2 values(1),(2),(3); +insert into j3 values(1),(1); +analyze j1; +analyze j2; +analyze j3; +-- Ensure join is marked as unique +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Join + Output: j1.id, j2.id + Unique Inner: Yes + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(10 rows) + +-- Ensure join not marked as unique when not using = +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id > j2.id; + QUERY PLAN +----------------------------------- + Nested Loop + Output: j1.id, j2.id + Unique Inner: No + Join Filter: (j1.id > j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(10 rows) + +-- j3 has no unique index or pk on id +explain (verbose, costs off) +select * from j1 inner join j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------- + Hash Join + Output: j1.id, j3.id + Unique Inner: No + Hash Cond: (j1.id = j3.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(10 rows) + +-- ensure left join is marked as unique +explain (verbose, costs off) +select * from j1 left join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Left Join + Output: j1.id, j2.id + Unique Inner: Yes + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(10 rows) + +-- ensure right join is marked as unique +explain (verbose, costs off) +select * from j1 right join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Left Join + Output: j1.id, j2.id + Unique Inner: Yes + Hash Cond: (j2.id = j1.id) + -> Seq Scan on public.j2 + Output: j2.id + -> Hash + Output: j1.id + -> Seq Scan on public.j1 + Output: j1.id +(10 rows) + +-- clauseless cross joins can't be proved unique +explain (verbose, costs off) +select * from j1 cross join j2; + QUERY PLAN +----------------------------------- + Nested Loop + Output: j1.id, j2.id + Unique Inner: No + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +-- ensure natural join is marked as unique +explain (verbose, costs off) +select * from j1 natural join j2; + QUERY PLAN +----------------------------------- + Hash Join + Output: j1.id + Unique Inner: Yes + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(10 rows) + +-- ensure distinct clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select distinct id from j3) j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------------------- + Nested Loop + Output: j1.id, j3.id + Unique Inner: Yes + Join Filter: (j1.id = j3.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j3.id + -> Unique + Output: j3.id + -> Sort + Output: j3.id + Sort Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(15 rows) + +-- ensure group by clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select id from j3 group by id) j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------------------- + Nested Loop + Output: j1.id, j3.id + Unique Inner: Yes + Join Filter: (j1.id = j3.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j3.id + -> Group + Output: j3.id + Group Key: j3.id + -> Sort + Output: j3.id + Sort Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(16 rows) + +explain (verbose, costs off) +select * from j1 full join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Full Join + Output: j1.id, j2.id + Unique Inner: No + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(10 rows) + +drop table j1; +drop table j2; +drop table j3; +-- test a more complex permutations of unique joins +create table j1 (id1 int, id2 int, primary key(id1,id2)); +create table j2 (id1 int, id2 int, primary key(id1,id2)); +create table j3 (id1 int, id2 int, primary key(id1,id2)); +insert into j1 values(1,1),(2,2); +insert into j2 values(1,1); +insert into j3 values(1,1); +analyze j1; +analyze j2; +analyze j3; +-- ensure no unique joins when not all columns which are part of +-- the unique index are part of the join clause. +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1; + QUERY PLAN +------------------------------------------ + Nested Loop + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Unique Inner: No + Join Filter: (j1.id1 = j2.id1) + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 + -> Seq Scan on public.j1 + Output: j1.id1, j1.id2 +(8 rows) + +-- ensure unique joins work with multiple columns +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2; + QUERY PLAN +---------------------------------------------------------- + Nested Loop + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Unique Inner: Yes + Join Filter: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2)) + -> Seq Scan on public.j1 + Output: j1.id1, j1.id2 + -> Materialize + Output: j2.id1, j2.id2 + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 +(10 rows) + +drop table j1; +drop table j2; +drop table j3; diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out index 00ef421..83f0b7a 100644 --- a/src/test/regress/expected/rangefuncs.out +++ b/src/test/regress/expected/rangefuncs.out @@ -2012,12 +2012,13 @@ select x from int8_tbl, extractq2(int8_tbl) f(x); ------------------------------------------ Nested Loop Output: f.x + Unique Inner: No -> Seq Scan on public.int8_tbl Output: int8_tbl.q1, int8_tbl.q2 -> Function Scan on f Output: f.x Function Call: int8_tbl.q2 -(7 rows) +(8 rows) select x from int8_tbl, extractq2(int8_tbl) f(x); x @@ -2038,11 +2039,12 @@ select x from int8_tbl, extractq2_2(int8_tbl) f(x); ----------------------------------- Nested Loop Output: ((int8_tbl.*).q2) + Unique Inner: No -> Seq Scan on public.int8_tbl Output: int8_tbl.* -> Result Output: (int8_tbl.*).q2 -(6 rows) +(7 rows) select x from int8_tbl, extractq2_2(int8_tbl) f(x); x diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index de64ca7..b993eb2 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -783,6 +783,7 @@ select * from int4_tbl where --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Nested Loop Semi Join Output: int4_tbl.f1 + Unique Inner: No Join Filter: (CASE WHEN (hashed SubPlan 1) THEN int4_tbl.f1 ELSE NULL::integer END = b.ten) -> Seq Scan on public.int4_tbl Output: int4_tbl.f1 @@ -791,7 +792,7 @@ select * from int4_tbl where SubPlan 1 -> Index Only Scan using tenk1_unique1 on public.tenk1 a Output: a.unique1 -(10 rows) +(11 rows) select * from int4_tbl where (case when f1 in (select unique1 from tenk1 a) then f1 else null end) in @@ -811,6 +812,7 @@ select * from int4_tbl o where (f1, f1) in ---------------------------------------------------------------------- Hash Join Output: o.f1 + Unique Inner: No Hash Cond: (o.f1 = "ANY_subquery".f1) -> Seq Scan on public.int4_tbl o Output: o.f1 @@ -827,7 +829,7 @@ select * from int4_tbl o where (f1, f1) in Group Key: i.f1 -> Seq Scan on public.int4_tbl i Output: i.f1 -(18 rows) +(19 rows) select * from int4_tbl o where (f1, f1) in (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out index c5dfbb5..40c9c26 100644 --- a/src/test/regress/expected/updatable_views.out +++ b/src/test/regress/expected/updatable_views.out @@ -2136,6 +2136,7 @@ UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a = 3; Output: t1_5.ctid, t1_5.a, t1_5.b, t1_5.c, t1_5.ctid, t12.ctid, t12.tableoid -> Nested Loop Semi Join Output: t1_5.ctid, t1_5.a, t1_5.b, t1_5.c, t1_5.ctid, t12.ctid, t12.tableoid + Unique Inner: No -> Seq Scan on public.t1 t1_5 Output: t1_5.ctid, t1_5.a, t1_5.b, t1_5.c Filter: ((t1_5.a > 5) AND (t1_5.a = 3) AND leakproof(t1_5.a)) @@ -2153,6 +2154,7 @@ UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a = 3; Output: t11.ctid, t11.a, t11.b, t11.c, t11.d, t11.ctid, t12_1.ctid, t12_1.tableoid -> Nested Loop Semi Join Output: t11.ctid, t11.a, t11.b, t11.c, t11.d, t11.ctid, t12_1.ctid, t12_1.tableoid + Unique Inner: No -> Seq Scan on public.t11 Output: t11.ctid, t11.a, t11.b, t11.c, t11.d Filter: ((t11.a > 5) AND (t11.a = 3) AND leakproof(t11.a)) @@ -2170,6 +2172,7 @@ UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a = 3; Output: t12_2.ctid, t12_2.a, t12_2.b, t12_2.c, t12_2.e, t12_2.ctid, t12_3.ctid, t12_3.tableoid -> Nested Loop Semi Join Output: t12_2.ctid, t12_2.a, t12_2.b, t12_2.c, t12_2.e, t12_2.ctid, t12_3.ctid, t12_3.tableoid + Unique Inner: No -> Seq Scan on public.t12 t12_2 Output: t12_2.ctid, t12_2.a, t12_2.b, t12_2.c, t12_2.e Filter: ((t12_2.a > 5) AND (t12_2.a = 3) AND leakproof(t12_2.a)) @@ -2187,6 +2190,7 @@ UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a = 3; Output: t111_3.ctid, t111_3.a, t111_3.b, t111_3.c, t111_3.d, t111_3.e, t111_3.ctid, t12_4.ctid, t12_4.tableoid -> Nested Loop Semi Join Output: t111_3.ctid, t111_3.a, t111_3.b, t111_3.c, t111_3.d, t111_3.e, t111_3.ctid, t12_4.ctid, t12_4.tableoid + Unique Inner: No -> Seq Scan on public.t111 t111_3 Output: t111_3.ctid, t111_3.a, t111_3.b, t111_3.c, t111_3.d, t111_3.e Filter: ((t111_3.a > 5) AND (t111_3.a = 3) AND leakproof(t111_3.a)) @@ -2197,7 +2201,7 @@ UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a = 3; -> Seq Scan on public.t111 t111_4 Output: t111_4.ctid, t111_4.tableoid, t111_4.a Filter: (t111_4.a = 3) -(73 rows) +(77 rows) UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a = 3; SELECT * FROM v1 WHERE a=100; -- Nothing should have been changed to 100 @@ -2226,6 +2230,7 @@ UPDATE v1 SET a=a+1 WHERE snoop(a) AND leakproof(a) AND a = 8; Output: t1_5.a, t1_5.ctid, t1_5.b, t1_5.c, t1_5.ctid, t12.ctid, t12.tableoid -> Nested Loop Semi Join Output: t1_5.a, t1_5.ctid, t1_5.b, t1_5.c, t1_5.ctid, t12.ctid, t12.tableoid + Unique Inner: No -> Seq Scan on public.t1 t1_5 Output: t1_5.a, t1_5.ctid, t1_5.b, t1_5.c Filter: ((t1_5.a > 5) AND (t1_5.a = 8) AND leakproof(t1_5.a)) @@ -2243,6 +2248,7 @@ UPDATE v1 SET a=a+1 WHERE snoop(a) AND leakproof(a) AND a = 8; Output: t11.a, t11.ctid, t11.b, t11.c, t11.d, t11.ctid, t12_1.ctid, t12_1.tableoid -> Nested Loop Semi Join Output: t11.a, t11.ctid, t11.b, t11.c, t11.d, t11.ctid, t12_1.ctid, t12_1.tableoid + Unique Inner: No -> Seq Scan on public.t11 Output: t11.a, t11.ctid, t11.b, t11.c, t11.d Filter: ((t11.a > 5) AND (t11.a = 8) AND leakproof(t11.a)) @@ -2260,6 +2266,7 @@ UPDATE v1 SET a=a+1 WHERE snoop(a) AND leakproof(a) AND a = 8; Output: t12_2.a, t12_2.ctid, t12_2.b, t12_2.c, t12_2.e, t12_2.ctid, t12_3.ctid, t12_3.tableoid -> Nested Loop Semi Join Output: t12_2.a, t12_2.ctid, t12_2.b, t12_2.c, t12_2.e, t12_2.ctid, t12_3.ctid, t12_3.tableoid + Unique Inner: No -> Seq Scan on public.t12 t12_2 Output: t12_2.a, t12_2.ctid, t12_2.b, t12_2.c, t12_2.e Filter: ((t12_2.a > 5) AND (t12_2.a = 8) AND leakproof(t12_2.a)) @@ -2277,6 +2284,7 @@ UPDATE v1 SET a=a+1 WHERE snoop(a) AND leakproof(a) AND a = 8; Output: t111_3.a, t111_3.ctid, t111_3.b, t111_3.c, t111_3.d, t111_3.e, t111_3.ctid, t12_4.ctid, t12_4.tableoid -> Nested Loop Semi Join Output: t111_3.a, t111_3.ctid, t111_3.b, t111_3.c, t111_3.d, t111_3.e, t111_3.ctid, t12_4.ctid, t12_4.tableoid + Unique Inner: No -> Seq Scan on public.t111 t111_3 Output: t111_3.a, t111_3.ctid, t111_3.b, t111_3.c, t111_3.d, t111_3.e Filter: ((t111_3.a > 5) AND (t111_3.a = 8) AND leakproof(t111_3.a)) @@ -2287,7 +2295,7 @@ UPDATE v1 SET a=a+1 WHERE snoop(a) AND leakproof(a) AND a = 8; -> Seq Scan on public.t111 t111_4 Output: t111_4.ctid, t111_4.tableoid, t111_4.a Filter: (t111_4.a = 8) -(73 rows) +(77 rows) UPDATE v1 SET a=a+1 WHERE snoop(a) AND leakproof(a) AND a = 8; NOTICE: snooped value: 8 diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out index 137420d..c64d433 100644 --- a/src/test/regress/expected/with.out +++ b/src/test/regress/expected/with.out @@ -2183,6 +2183,7 @@ DELETE FROM a USING wcte WHERE aa = q2; Output: '42'::bigint, '47'::bigint -> Nested Loop Output: a.ctid, wcte.* + Unique Inner: No Join Filter: (a.aa = wcte.q2) -> Seq Scan on public.a Output: a.ctid, a.aa @@ -2190,6 +2191,7 @@ DELETE FROM a USING wcte WHERE aa = q2; Output: wcte.*, wcte.q2 -> Nested Loop Output: b.ctid, wcte.* + Unique Inner: No Join Filter: (b.aa = wcte.q2) -> Seq Scan on public.b Output: b.ctid, b.aa @@ -2197,6 +2199,7 @@ DELETE FROM a USING wcte WHERE aa = q2; Output: wcte.*, wcte.q2 -> Nested Loop Output: c.ctid, wcte.* + Unique Inner: No Join Filter: (c.aa = wcte.q2) -> Seq Scan on public.c Output: c.ctid, c.aa @@ -2204,12 +2207,13 @@ DELETE FROM a USING wcte WHERE aa = q2; Output: wcte.*, wcte.q2 -> Nested Loop Output: d.ctid, wcte.* + Unique Inner: No Join Filter: (d.aa = wcte.q2) -> Seq Scan on public.d Output: d.ctid, d.aa -> CTE Scan on wcte Output: wcte.*, wcte.q2 -(38 rows) +(42 rows) -- error cases -- data-modifying WITH tries to use its own output diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 3430f91..bb86c10 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1696,3 +1696,93 @@ update xx1 set x2 = f1 from xx1, lateral (select * from int4_tbl where f1 = x1) delete from xx1 using (select * from int4_tbl where f1 = x1) ss; delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss; delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss; + +-- +-- test planner's ability to mark joins as unique. +-- + +create table j1 (id int primary key); +create table j2 (id int primary key); +create table j3 (id int); + +insert into j1 values(1),(2),(3); +insert into j2 values(1),(2),(3); +insert into j3 values(1),(1); + +analyze j1; +analyze j2; +analyze j3; + +-- Ensure join is marked as unique +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id = j2.id; + +-- Ensure join not marked as unique when not using = +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id > j2.id; + +-- j3 has no unique index or pk on id +explain (verbose, costs off) +select * from j1 inner join j3 on j1.id = j3.id; + +-- ensure left join is marked as unique +explain (verbose, costs off) +select * from j1 left join j2 on j1.id = j2.id; + +-- ensure right join is marked as unique +explain (verbose, costs off) +select * from j1 right join j2 on j1.id = j2.id; + +-- clauseless cross joins can't be proved unique +explain (verbose, costs off) +select * from j1 cross join j2; + +-- ensure natural join is marked as unique +explain (verbose, costs off) +select * from j1 natural join j2; + +-- ensure distinct clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select distinct id from j3) j3 on j1.id = j3.id; + +-- ensure group by clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select id from j3 group by id) j3 on j1.id = j3.id; + +explain (verbose, costs off) +select * from j1 full join j2 on j1.id = j2.id; + +drop table j1; +drop table j2; +drop table j3; + +-- test a more complex permutations of unique joins + +create table j1 (id1 int, id2 int, primary key(id1,id2)); +create table j2 (id1 int, id2 int, primary key(id1,id2)); +create table j3 (id1 int, id2 int, primary key(id1,id2)); + +insert into j1 values(1,1),(2,2); +insert into j2 values(1,1); +insert into j3 values(1,1); + +analyze j1; +analyze j2; +analyze j3; + +-- ensure no unique joins when not all columns which are part of +-- the unique index are part of the join clause. +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1; + +-- ensure unique joins work with multiple columns +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2; + +drop table j1; +drop table j2; +drop table j3;