diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index a951c55..55a9505 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1268,10 +1268,27 @@ ExplainNode(PlanState *planstate, List *ancestors, if (es->format == EXPLAIN_FORMAT_TEXT) appendStringInfoChar(es->str, '\n'); - /* target list */ if (es->verbose) + { + /* target list */ show_plan_tlist(planstate, ancestors, es); + /* unique join */ + switch (nodeTag(plan)) + { + case T_NestLoop: + case T_MergeJoin: + case T_HashJoin: + { + const char *value = ((Join *)plan)->unique_inner ? "Yes" : "No"; + ExplainPropertyText("Unique Join", value, es); + break; + } + default: + break; + } + } + /* quals, sort keys, etc */ switch (nodeTag(plan)) { diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 1d78cdf..28fc618 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -306,10 +306,12 @@ ExecHashJoin(HashJoinState *node) } /* - * In a semijoin, we'll consider returning the first - * match, but after that we're done with this outer tuple. + * In a semijoin or if the planner found that no more than + * 1 inner row will match a single outer row, we'll + * consider returning the first match, but after that we're + * done with this outer tuple. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.jointype == JOIN_SEMI || node->js.unique_inner) node->hj_JoinState = HJ_NEED_NEW_OUTER; if (otherqual == NIL || @@ -469,6 +471,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) ExecInitExpr((Expr *) node->join.plan.qual, (PlanState *) hjstate); hjstate->js.jointype = node->join.jointype; + hjstate->js.unique_inner = node->join.unique_inner; hjstate->js.joinqual = (List *) ExecInitExpr((Expr *) node->join.joinqual, (PlanState *) hjstate); diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index 15742c5..5ee0970 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -840,10 +840,12 @@ ExecMergeJoin(MergeJoinState *node) } /* - * In a semijoin, we'll consider returning the first - * match, but after that we're done with this outer tuple. + * In a semijoin or if the planner found that no more than + * 1 inner row will match a single outer row, we'll + * consider returning the first match, but after that we're + * done with this outer tuple. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.jointype == JOIN_SEMI || node->js.unique_inner) node->mj_JoinState = EXEC_MJ_NEXTOUTER; qualResult = (otherqual == NIL || @@ -1511,6 +1513,7 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) ExecInitExpr((Expr *) node->join.plan.qual, (PlanState *) mergestate); mergestate->js.jointype = node->join.jointype; + mergestate->js.unique_inner = node->join.unique_inner; mergestate->js.joinqual = (List *) ExecInitExpr((Expr *) node->join.joinqual, (PlanState *) mergestate); diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index e66bcda..a31e1d5 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -247,10 +247,11 @@ ExecNestLoop(NestLoopState *node) } /* - * In a semijoin, we'll consider returning the first match, but - * after that we're done with this outer tuple. + * In a semijoin or if the planner found that no more than 1 inner + * row will match a single outer row, we'll consider returning the + * first match, but after that we're done with this outer tuple. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.jointype == JOIN_SEMI || node->js.unique_inner) node->nl_NeedNewOuter = true; if (otherqual == NIL || ExecQual(otherqual, econtext, false)) @@ -327,6 +328,7 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) ExecInitExpr((Expr *) node->join.plan.qual, (PlanState *) nlstate); nlstate->js.jointype = node->join.jointype; + nlstate->js.unique_inner = node->join.unique_inner; nlstate->js.joinqual = (List *) ExecInitExpr((Expr *) node->join.joinqual, (PlanState *) nlstate); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 291e6a7..9e0e9b6 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -641,6 +641,7 @@ CopyJoinFields(const Join *from, Join *newnode) CopyPlanFields((const Plan *) from, (Plan *) newnode); COPY_SCALAR_FIELD(jointype); + COPY_SCALAR_FIELD(unique_inner); COPY_NODE_FIELD(joinqual); } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 1a0d358..8740296 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1847,13 +1847,15 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, /* cost of source data */ - if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI) + if (path->jointype == JOIN_SEMI || + path->jointype == JOIN_ANTI || + path->unique_inner) { double outer_matched_rows = workspace->outer_matched_rows; Selectivity inner_scan_frac = workspace->inner_scan_frac; /* - * SEMI or ANTI join: executor will stop after first match. + * SEMI, ANTI join and unique joins: executor will stop after first match. */ /* Compute number of tuples processed (not number emitted!) */ @@ -2351,7 +2353,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, * clauses that are to be applied at the join. (This is pessimistic since * not all of the quals may get evaluated at each tuple.) * - * Note: we could adjust for SEMI/ANTI joins skipping some qual + * Note: we could adjust for SEMI/ANTI and unique joins skipping some qual * evaluations here, but it's probably not worth the trouble. */ startup_cost += qp_qual_cost.startup; @@ -2658,13 +2660,16 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, /* CPU costs */ - if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI) + if (path->jpath.jointype == JOIN_SEMI || + path->jpath.jointype == JOIN_ANTI || + path->jpath.unique_inner) { double outer_matched_rows; Selectivity inner_scan_frac; /* - * SEMI or ANTI join: executor will stop after first match. + * SEMI, ANTI joins and joins that the planner found to have a unique + * inner side will stop after first match. * * For an outer-rel row that has at least one match, we can expect the * bucket scan to stop after a fraction 1/(match_count+1) of the diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index eb65c97..f3cf3cb 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -49,7 +49,7 @@ static List *generate_join_implied_equalities_broken(PlannerInfo *root, Relids outer_relids, Relids nominal_inner_relids, RelOptInfo *inner_rel); -static Oid select_equality_operator(EquivalenceClass *ec, +Oid select_equality_operator(EquivalenceClass *ec, Oid lefttype, Oid righttype); static RestrictInfo *create_join_clause(PlannerInfo *root, EquivalenceClass *ec, Oid opno, @@ -1282,7 +1282,7 @@ generate_join_implied_equalities_broken(PlannerInfo *root, * * Returns InvalidOid if no operator can be found for this datatype combination */ -static Oid +Oid select_equality_operator(EquivalenceClass *ec, Oid lefttype, Oid righttype) { ListCell *lc; diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 49ab366..d213970 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -2814,6 +2814,8 @@ ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel, * The caller need only supply equality conditions arising from joins; * this routine automatically adds in any usable baserestrictinfo clauses. * (Note that the passed-in restrictlist will be destructively modified!) + * + * XXX: this function probably belongs in analyzejoin.c */ bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 11d3933..dfc267a 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -32,12 +32,50 @@ #include "utils/lsyscache.h" /* local functions */ +static bool specialjoin_is_unique_join(PlannerInfo *root, + SpecialJoinInfo *sjinfo, RelOptInfo **uniquerel); +static bool eclassjoin_is_unique_join(PlannerInfo *root, List *joinlist, + RangeTblRef *targetrtr); static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); +static bool query_supports_distinctness(Query *query); +static bool query_is_distinct_for(Query *query, List *colnos, List *opids); static Oid distinct_col_search(int colno, List *colnos, List *opids); +/* + * mark_unique_joins + * Analyze join quals to determine if at most 1 row can be produced from + * each relation for all relations that the relation joins to. This + * function uses both unique indexes and DISTINCT/GROUP BY clauses in + * sub-queries in order to determine the uniqueness of the relation. + */ +void +mark_unique_joins(PlannerInfo *root, List *joinlist) +{ + ListCell *lc; + + foreach(lc, joinlist) + { + RangeTblRef *rtr = (RangeTblRef *) lfirst(lc); + + if (!IsA(rtr, RangeTblRef)) + continue; + + if (eclassjoin_is_unique_join(root, joinlist, rtr)) + root->simple_rel_array[rtr->rtindex]->is_unique_join = true; + } + + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + RelOptInfo *uniquerel; + + if (specialjoin_is_unique_join(root, sjinfo, &uniquerel)) + uniquerel->is_unique_join = true; + } +} /* * remove_useless_joins @@ -91,6 +129,15 @@ restart: root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo); /* + * The removal of the join could have changed a condition that + * previously caused us not to be able to mark the join as unique. + * We'd better make another pass over each join to make sure otherwise + * we may fail to remove other joins which had a join condition on the + * join that we just removed. + */ + mark_unique_joins(root, joinlist); + + /* * Restart the scan. This is necessary to ensure we find all * removable joins independently of ordering of the join_info_list * (note that removal of attr_needed bits may make a join appear @@ -151,11 +198,9 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) { int innerrelid; RelOptInfo *innerrel; - Query *subquery = NULL; Relids joinrelids; - List *clause_list = NIL; - ListCell *l; int attroff; + ListCell *l; /* * Must be a non-delaying left join to a single baserel, else we aren't @@ -170,38 +215,11 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) innerrel = find_base_rel(root, innerrelid); - if (innerrel->reloptkind != RELOPT_BASEREL) + /* only joins that don't duplicate their outer side can be removed */ + if (!innerrel->is_unique_join) return false; - /* - * Before we go to the effort of checking whether any innerrel variables - * are needed above the join, make a quick check to eliminate cases in - * which we will surely be unable to prove uniqueness of the innerrel. - */ - if (innerrel->rtekind == RTE_RELATION) - { - /* - * For a plain-relation innerrel, we only know how to prove uniqueness - * by reference to unique indexes. If there are no indexes then - * there's certainly no unique indexes so there's no point in going - * further. - */ - if (innerrel->indexlist == NIL) - return false; - } - else if (innerrel->rtekind == RTE_SUBQUERY) - { - subquery = root->simple_rte_array[innerrelid]->subquery; - - /* - * If the subquery has no qualities that support distinctness proofs - * then there's no point in going further. - */ - if (!query_supports_distinctness(subquery)) - return false; - } - else - return false; /* unsupported rtekind */ + Assert(innerrel->reloptkind == RELOPT_BASEREL); /* Compute the relid set for the join we are considering */ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); @@ -212,7 +230,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) * * Note that this test only detects use of inner-rel attributes in higher * join conditions and the target list. There might be such attributes in - * pushed-down conditions at this join, too. We check that case below. + * pushed-down conditions at this join, too, but in this case the join + * would not have been marked as unique. * * As a micro-optimization, it seems better to start with max_attr and * count down rather than starting with min_attr and counting up, on the @@ -253,6 +272,302 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) return false; /* it does reference innerrel */ } + return true; +} + +/* + * eclassjoin_is_unique_join + * Returns True if targetrtr can be proved to never produce more than 1 + * row for each single row of the relations which is joins to. + * Proofs of uniqueness include unique indexes and GROUP BY/DISTINCT + * clauses for sub queries. + */ +static bool +eclassjoin_is_unique_join(PlannerInfo *root, List *joinlist, RangeTblRef *targetrtr) +{ + ListCell *lc; + Query *subquery; + RelOptInfo *targetrel; + + /* if there's only 1 relation then there's no joins to prove unique */ + if (list_length(joinlist) <= 1) + return false; + + targetrel = find_base_rel(root, targetrtr->rtindex); + + /* check if we've already marked this join as unique on a previous call */ + if (targetrel->is_unique_join) + return true; + + /* + * The only relation type that can help us is a base rel. If the relation + * does not have any eclass joins then we'll assume that it's not an + * equality type join, therefore won't be provably unique. + */ + if (targetrel->reloptkind != RELOPT_BASEREL || + targetrel->has_eclass_joins == false) + return false; + + /* + * Before we go to the effort of pulling out the join condition's columns, + * make a quick check to eliminate cases in which we will surely be unable + * to prove uniqueness of the rel. + */ + if (targetrel->rtekind == RTE_RELATION) + { + /* + * For a plain-relation rel, we only know how to prove uniqueness + * by reference to unique indexes. If there are no indexes then + * there's certainly no unique indexes so there's no point in going + * further. + */ + if (targetrel->indexlist == NIL) + return false; + } + else if (targetrel->rtekind == RTE_SUBQUERY) + { + subquery = root->simple_rte_array[targetrtr->rtindex]->subquery; + + /* + * If the subquery has no qualities that support distinctness proofs + * then there's no point in going further. + */ + if (!query_supports_distinctness(subquery)) + return false; + } + else + return false; /* unsupported rtekind */ + + /* + * We now search through each relation in the join list searching for every + * relation which joins to idxrel. Our goal here is to determine if idxrel + * will match at most 1 row for each of the relations it joins to. In order + * to prove this we'll analyze unique indexes for plain relations, or if + * it's a subquery we'll look at GROUP BY and DISTINCT clauses. + */ + + foreach (lc, joinlist) + { + RangeTblRef *rtr = (RangeTblRef *) lfirst(lc); + RelOptInfo *rel; + ListCell *lc2; + List *index_exprs; + List *operator_list; + bool foundjoin = false; + + /* Skip ourself, and anything that's not a RangeTblRef */ + if (rtr == targetrtr || !IsA(rtr, RangeTblRef)) + continue; + + rel = find_base_rel(root, rtr->rtindex); + + /* No point in checking if this rel has no eclass joins */ + if (rel->has_eclass_joins == false) + continue; + + index_exprs = NIL; + operator_list = NIL; + + /* + * We now analyze each eclass and extract a list of columns which the + * targetrel relation must be unique over. There are cases here that + * we don't properly handle, such as if more than 1 Var from either the + * targetrel, or rel exist in the same eclass. In this case it's not + * clear which equality operators we'd demand that the index used. We + * would also need to handle passing alternative lists of columns to + * check the index against. For now this seems more hassle than it's + * worth, and these are likely quite rare in the real world. + */ + foreach(lc2, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc2); + + /* + * Any eclasses that don't have at least 2 members or have + * volatile expressions are useless to us. Skip these. + */ + if (list_length(ec->ec_members) < 2 || + ec->ec_has_volatile) + continue; + + /* Does this eclass have members for both these relations? */ + if (bms_overlap(targetrel->relids, ec->ec_relids) && + bms_overlap(rel->relids, ec->ec_relids)) + { + ListCell *lc3; + EquivalenceMember *relecm = NULL; + EquivalenceMember *targetrelecm = NULL; + + foundjoin = true; + + foreach (lc3, ec->ec_members) + { + EquivalenceMember *ecm = (EquivalenceMember *) lfirst(lc3); + int ecmrel; + + /* + * We're only interested in ec members for a single rel. + * Expressions such as t1.col + t2.col or function calls + * where the parameters come from more than 1 rel are out. + */ + if (!bms_get_singleton_member(ecm->em_relids, &ecmrel)) + continue; + + if (ecmrel == rel->relid) + { + if (relecm != NULL) + return false; + relecm = ecm; + } + + else if (ecmrel == targetrel->relid) + { + if (targetrelecm != NULL) + return false; + targetrelecm = ecm; + } + } + + /* + * We did make a quick check above to ensure that this eclass + * contained members for both relations, but we could have + * skipped some members which contained some expression which + * referenced both relations. So despite our pre-check, the if + * test below could still be false. + */ + if (relecm != NULL && targetrelecm != NULL) + { + Oid opno = select_equality_operator(ec, + relecm->em_datatype, targetrelecm->em_datatype); + + if (!OidIsValid(opno)) + return false; + + index_exprs = lappend(index_exprs, targetrelecm->em_expr); + operator_list = lappend_oid(operator_list, opno); + } + } + } + + /* + * If we found no suitable eclass members then these 2 relations either + * have no join condition between them or we found a join condition but + * there was something about it that's not suitable for testing for + * unique properties, in the former case we're safe to skip to the next + * relation in the latter case we'd better bail. + */ + if (index_exprs == NIL) + { + if (foundjoin) + return false; /* join condition not suitable */ + + continue; + } + + if (targetrel->rtekind == RTE_RELATION) + { + if (!relation_has_unique_index_for(root, targetrel, NIL, index_exprs, operator_list)) + return false; + + /* if it's unique then we'll also need to check other relations */ + } + else /* RTE_SUBQUERY */ + { + List *colnos = NIL; + ListCell *l; + + /* + * Build the argument lists for query_is_distinct_for: a list of + * output column numbers that the query needs to be distinct over. + * Possibly we could do something with expressions in the subquery + * outputs, too, but for now keep it simple. + */ + foreach(l, index_exprs) + { + Var *var = (Var *) lfirst(l); + + if (!IsA(var, Var)) + return false; /* punt if we have a non-Var */ + + colnos = lappend_int(colnos, var->varattno); + } + + if (!query_is_distinct_for(subquery, colnos, operator_list)) + return false; + + /* if it's unique then we'll also need to check other relations */ + } + } + + return true; /* all joins proved to be unique */ +} + +/* + * Returns True if it can be proved that this special join will never produce + * more than 1 row per outer row, otherwise returns false if there is + * insufficient evidence to prove the join is unique. uniquerel is always + * set to the RelOptInfo of the rel that was proved to be unique. + */ +static bool +specialjoin_is_unique_join(PlannerInfo *root, SpecialJoinInfo *sjinfo, + RelOptInfo **uniquerel) +{ + int innerrelid; + RelOptInfo *innerrel; + Query *subquery = NULL; + Relids joinrelids; + ListCell *l; + List *clause_list = NIL; + + /* if there's more than 1 relation involved then punt */ + if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid)) + return false; + + innerrel = find_base_rel(root, innerrelid); + + /* check if we've already marked this join as unique on a previous call */ + if (innerrel->is_unique_join) + { + *uniquerel = innerrel; + return true; + } + + if (innerrel->reloptkind != RELOPT_BASEREL) + return false; + + /* + * Before we go to the effort of pulling out the join condition's columns, + * make a quick check to eliminate cases in which we will surely be unable + * to prove uniqueness of the innerrel. + */ + if (innerrel->rtekind == RTE_RELATION) + { + /* + * For a plain-relation innerrel, we only know how to prove uniqueness + * by reference to unique indexes. If there are no indexes then + * there's certainly no unique indexes so there's no point in going + * further. + */ + if (innerrel->indexlist == NIL) + return false; + } + else if (innerrel->rtekind == RTE_SUBQUERY) + { + subquery = root->simple_rte_array[innerrelid]->subquery; + + /* + * If the subquery has no qualities that support distinctness proofs + * then there's no point in going further. + */ + if (!query_supports_distinctness(subquery)) + return false; + } + else + return false; /* unsupported rtekind */ + + /* Compute the relid set for the join we are considering */ + joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + /* * Search for mergejoinable clauses that constrain the inner rel against * either the outer rel or a pseudoconstant. If an operator is @@ -274,10 +589,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) !bms_equal(restrictinfo->required_relids, joinrelids)) { /* - * If such a clause actually references the inner rel then join - * removal has to be disallowed. We have to check this despite - * the previous attr_needed checks because of the possibility of - * pushed-down clauses referencing the rel. + * If such a clause actually references the inner rel then we can't + * mark the join as unique. */ if (bms_is_member(innerrelid, restrictinfo->clause_relids)) return false; @@ -310,7 +623,10 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) { /* Now examine the indexes to see if we have a matching unique index */ if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL)) + { + *uniquerel = innerrel; return true; + } } else /* innerrel->rtekind == RTE_SUBQUERY */ { @@ -358,7 +674,10 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) } if (query_is_distinct_for(subquery, colnos, opids)) + { + *uniquerel = innerrel; return true; + } } /* @@ -368,7 +687,6 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) return false; } - /* * Remove the target relid from the planner's data structures, having * determined that there is no need to include it in the query. @@ -576,14 +894,15 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved) * query_is_distinct_for()'s argument lists if the call could not possibly * succeed. */ -bool +static bool query_supports_distinctness(Query *query) { if (query->distinctClause != NIL || query->groupClause != NIL || query->hasAggs || query->havingQual || - query->setOperations) + query->setOperations || + query->jointree->fromlist == NIL) return true; return false; @@ -607,7 +926,7 @@ query_supports_distinctness(Query *query) * should give trustworthy answers for all operators that we might need * to deal with here.) */ -bool +static bool query_is_distinct_for(Query *query, List *colnos, List *opids) { ListCell *l; @@ -676,6 +995,10 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) */ if (query->hasAggs || query->havingQual) return true; + + /* tables with an empty from clause should return at most 1 row */ + if (query->jointree->fromlist == NIL) + return true; } /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index cb69c03..94bf23f 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -132,12 +132,12 @@ static BitmapOr *make_bitmap_or(List *bitmapplans); static NestLoop *make_nestloop(List *tlist, List *joinclauses, List *otherclauses, List *nestParams, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinType jointype, bool unique_inner); static HashJoin *make_hashjoin(List *tlist, List *joinclauses, List *otherclauses, List *hashclauses, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinType jointype, bool unique_inner); static Hash *make_hash(Plan *lefttree, Oid skewTable, AttrNumber skewColumn, @@ -152,7 +152,7 @@ static MergeJoin *make_mergejoin(List *tlist, int *mergestrategies, bool *mergenullsfirst, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinType jointype, bool unique_inner); static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols, AttrNumber *sortColIdx, Oid *sortOperators, Oid *collations, bool *nullsFirst, @@ -2192,7 +2192,8 @@ create_nestloop_plan(PlannerInfo *root, nestParams, outer_plan, inner_plan, - best_path->jointype); + best_path->jointype, + best_path->unique_inner); copy_path_costsize(&join_plan->join.plan, &best_path->path); @@ -2486,7 +2487,8 @@ create_mergejoin_plan(PlannerInfo *root, mergenullsfirst, outer_plan, inner_plan, - best_path->jpath.jointype); + best_path->jpath.jointype, + best_path->jpath.unique_inner); /* Costs of sort and material steps are included in path cost already */ copy_path_costsize(&join_plan->join.plan, &best_path->jpath.path); @@ -2612,7 +2614,8 @@ create_hashjoin_plan(PlannerInfo *root, hashclauses, outer_plan, (Plan *) hash_plan, - best_path->jpath.jointype); + best_path->jpath.jointype, + best_path->jpath.unique_inner); copy_path_costsize(&join_plan->join.plan, &best_path->jpath.path); @@ -3717,7 +3720,8 @@ make_nestloop(List *tlist, List *nestParams, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinType jointype, + bool unique_inner) { NestLoop *node = makeNode(NestLoop); Plan *plan = &node->join.plan; @@ -3728,6 +3732,7 @@ make_nestloop(List *tlist, plan->lefttree = lefttree; plan->righttree = righttree; node->join.jointype = jointype; + node->join.unique_inner = unique_inner; node->join.joinqual = joinclauses; node->nestParams = nestParams; @@ -3741,7 +3746,8 @@ make_hashjoin(List *tlist, List *hashclauses, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinType jointype, + bool unique_inner) { HashJoin *node = makeNode(HashJoin); Plan *plan = &node->join.plan; @@ -3753,6 +3759,7 @@ make_hashjoin(List *tlist, plan->righttree = righttree; node->hashclauses = hashclauses; node->join.jointype = jointype; + node->join.unique_inner = unique_inner; node->join.joinqual = joinclauses; return node; @@ -3801,7 +3808,8 @@ make_mergejoin(List *tlist, bool *mergenullsfirst, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinType jointype, + bool unique_inner) { MergeJoin *node = makeNode(MergeJoin); Plan *plan = &node->join.plan; @@ -3817,6 +3825,7 @@ make_mergejoin(List *tlist, node->mergeStrategies = mergestrategies; node->mergeNullsFirst = mergenullsfirst; node->join.jointype = jointype; + node->join.unique_inner = unique_inner; node->join.joinqual = joinclauses; return node; diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 848df97..9fcd047 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -175,6 +175,12 @@ query_planner(PlannerInfo *root, List *tlist, fix_placeholder_input_needed_levels(root); /* + * Analyze all joins and mark which ones can produce at most 1 row + * for each outer row. + */ + mark_unique_joins(root, joinlist); + + /* * Remove any useless outer joins. Ideally this would be done during * jointree preprocessing, but the necessary information isn't available * until we've built baserel data structures and classified qual clauses. diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index faca30b..f87e25f 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1128,15 +1128,10 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, pathnode->uniq_exprs = sjinfo->semi_rhs_exprs; /* - * If the input is a relation and it has a unique index that proves the - * semi_rhs_exprs are unique, then we don't need to do anything. Note - * that relation_has_unique_index_for automatically considers restriction - * clauses for the rel, as well. + * If the input is a relation or subquery was discovered to have unique + * properties then the path is unique already. */ - if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree && - relation_has_unique_index_for(root, rel, NIL, - sjinfo->semi_rhs_exprs, - sjinfo->semi_operators)) + if (rel->is_unique_join) { pathnode->umethod = UNIQUE_PATH_NOOP; pathnode->path.rows = rel->rows; @@ -1151,46 +1146,6 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, return pathnode; } - /* - * If the input is a subquery whose output must be unique already, then we - * don't need to do anything. The test for uniqueness has to consider - * exactly which columns we are extracting; for example "SELECT DISTINCT - * x,y" doesn't guarantee that x alone is distinct. So we cannot check for - * this optimization unless semi_rhs_exprs consists only of simple Vars - * referencing subquery outputs. (Possibly we could do something with - * expressions in the subquery outputs, too, but for now keep it simple.) - */ - if (rel->rtekind == RTE_SUBQUERY) - { - RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); - - if (query_supports_distinctness(rte->subquery)) - { - List *sub_tlist_colnos; - - sub_tlist_colnos = translate_sub_tlist(sjinfo->semi_rhs_exprs, - rel->relid); - - if (sub_tlist_colnos && - query_is_distinct_for(rte->subquery, - sub_tlist_colnos, - sjinfo->semi_operators)) - { - pathnode->umethod = UNIQUE_PATH_NOOP; - pathnode->path.rows = rel->rows; - pathnode->path.startup_cost = subpath->startup_cost; - pathnode->path.total_cost = subpath->total_cost; - pathnode->path.pathkeys = subpath->pathkeys; - - rel->cheapest_unique_path = (Path *) pathnode; - - MemoryContextSwitchTo(oldcontext); - - return pathnode; - } - } - } - /* Estimate number of output rows */ pathnode->path.rows = estimate_num_groups(root, sjinfo->semi_rhs_exprs, @@ -1579,6 +1534,7 @@ create_nestloop_path(PlannerInfo *root, &restrict_clauses); pathnode->path.pathkeys = pathkeys; pathnode->jointype = jointype; + pathnode->unique_inner = inner_path->parent->is_unique_join; pathnode->outerjoinpath = outer_path; pathnode->innerjoinpath = inner_path; pathnode->joinrestrictinfo = restrict_clauses; @@ -1636,6 +1592,7 @@ create_mergejoin_path(PlannerInfo *root, &restrict_clauses); pathnode->jpath.path.pathkeys = pathkeys; pathnode->jpath.jointype = jointype; + pathnode->jpath.unique_inner = inner_path->parent->is_unique_join; pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.joinrestrictinfo = restrict_clauses; @@ -1704,6 +1661,7 @@ create_hashjoin_path(PlannerInfo *root, */ pathnode->jpath.path.pathkeys = NIL; pathnode->jpath.jointype = jointype; + pathnode->jpath.unique_inner = inner_path->parent->is_unique_join; pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.joinrestrictinfo = restrict_clauses; diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 8cfbea0..c20b62b 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -128,6 +128,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) rel->baserestrictcost.per_tuple = 0; rel->joininfo = NIL; rel->has_eclass_joins = false; + rel->is_unique_join = false; /* Check type of rtable entry */ switch (rte->rtekind) @@ -390,6 +391,7 @@ build_join_rel(PlannerInfo *root, joinrel->baserestrictcost.per_tuple = 0; joinrel->joininfo = NIL; joinrel->has_eclass_joins = false; + joinrel->is_unique_join = false; /* * Create a new tlist containing just the vars that need to be output from @@ -701,6 +703,7 @@ build_empty_join_rel(PlannerInfo *root) joinrel->rows = 1; /* we produce one row for such cases */ joinrel->width = 0; /* it contains no Vars */ joinrel->rtekind = RTE_JOIN; + joinrel->is_unique_join = false; root->join_rel_list = lappend(root->join_rel_list, joinrel); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 59b17f3..805653d 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1561,6 +1561,7 @@ typedef struct JoinState { PlanState ps; JoinType jointype; + bool unique_inner; /* Inner side will match 0 or 1 outer rows */ List *joinqual; /* JOIN quals (in addition to ps.qual) */ } JoinState; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index f6683f0..0b40f41 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -525,9 +525,12 @@ typedef struct CustomScan /* ---------------- * Join node * - * jointype: rule for joining tuples from left and right subtrees - * joinqual: qual conditions that came from JOIN/ON or JOIN/USING - * (plan.qual contains conditions that came from WHERE) + * jointype: rule for joining tuples from left and right subtrees + * unique_inner: true if the planner discovered that each inner tuple + * can only match at most 1 outer tuple. Proofs include + * UNIQUE indexes and GROUP BY/DISTINCT clauses etc. + * joinqual: qual conditions that came from JOIN/ON or JOIN/USING + * (plan.qual contains conditions that came from WHERE) * * When jointype is INNER, joinqual and plan.qual are semantically * interchangeable. For OUTER jointypes, the two are *not* interchangeable; @@ -542,6 +545,7 @@ typedef struct Join { Plan plan; JoinType jointype; + bool unique_inner; /* Inner side will match 0 or 1 outer rows */ List *joinqual; /* JOIN quals (in addition to plan.qual) */ } Join; diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 334cf51..1302a88 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -470,6 +470,8 @@ typedef struct RelOptInfo List *joininfo; /* RestrictInfo structures for join clauses * involving this rel */ bool has_eclass_joins; /* T means joininfo is incomplete */ + bool is_unique_join; /* True if this rel won't contain more + * than 1 row for the join condition */ } RelOptInfo; /* @@ -1028,6 +1030,8 @@ typedef struct JoinPath JoinType jointype; + bool unique_inner; /* inner side of join is unique */ + Path *outerjoinpath; /* path for the outer side of the join */ Path *innerjoinpath; /* path for the inner side of the join */ diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 6cad92e..74f3a1f 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -119,6 +119,8 @@ extern List *generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, Relids outer_relids, RelOptInfo *inner_rel); +extern Oid select_equality_operator(EquivalenceClass *ec, Oid lefttype, + Oid righttype); extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2); extern void add_child_rel_equivalences(PlannerInfo *root, AppendRelInfo *appinfo, diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index fa72918..54edb9f 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -122,9 +122,8 @@ extern RestrictInfo *build_implied_join_equality(Oid opno, /* * prototypes for plan/analyzejoins.c */ +extern void mark_unique_joins(PlannerInfo *root, List *joinlist); extern List *remove_useless_joins(PlannerInfo *root, List *joinlist); -extern bool query_supports_distinctness(Query *query); -extern bool query_is_distinct_for(Query *query, List *colnos, List *opids); /* * prototypes for plan/setrefs.c diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 57fc910..cb3db4a 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3117,6 +3117,7 @@ using (join_key); -------------------------------------------------------------------------- Nested Loop Left Join Output: "*VALUES*".column1, i1.f1, (666) + Unique Join: No Join Filter: ("*VALUES*".column1 = i1.f1) -> Values Scan on "*VALUES*" Output: "*VALUES*".column1 @@ -3124,12 +3125,13 @@ using (join_key); Output: i1.f1, (666) -> Nested Loop Left Join Output: i1.f1, 666 + Unique Join: No -> Seq Scan on public.int4_tbl i1 Output: i1.f1 -> Index Only Scan using tenk1_unique2 on public.tenk1 i2 Output: i2.unique2 Index Cond: (i2.unique2 = i1.f1) -(14 rows) +(16 rows) select foo1.join_key as foo1_id, foo3.join_key AS foo3_id, bug_field from (values (0),(1)) foo1(join_key) @@ -3216,7 +3218,7 @@ select id from a where id in ( ); QUERY PLAN ---------------------------- - Hash Semi Join + Hash Join Hash Cond: (a.id = b.id) -> Seq Scan on a -> Hash @@ -3413,10 +3415,10 @@ SELECT * FROM ON true; x | q1 | q2 | y ---+------------------+-------------------+------------------ - 1 | 123 | 456 | 123 - 1 | 123 | 4567890123456789 | 123 1 | 4567890123456789 | 123 | 42 1 | 4567890123456789 | 4567890123456789 | 4567890123456789 + 1 | 123 | 4567890123456789 | 123 + 1 | 123 | 456 | 123 1 | 4567890123456789 | -4567890123456789 | 4567890123456789 (5 rows) @@ -3984,12 +3986,13 @@ select * from ------------------------------------------ Nested Loop Left Join Output: a.q1, a.q2, b.q1, b.q2, (a.q2) + Unique Join: No -> Seq Scan on public.int8_tbl a Output: a.q1, a.q2 -> Seq Scan on public.int8_tbl b Output: b.q1, b.q2, a.q2 Filter: (a.q2 = b.q1) -(7 rows) +(8 rows) select * from int8_tbl a left join @@ -4016,12 +4019,13 @@ select * from ---------------------------------------------------------------- Nested Loop Left Join Output: a.q1, a.q2, b.q1, b.q2, (COALESCE(a.q2, 42::bigint)) + Unique Join: No -> Seq Scan on public.int8_tbl a Output: a.q1, a.q2 -> Seq Scan on public.int8_tbl b Output: b.q1, b.q2, COALESCE(a.q2, 42::bigint) Filter: (a.q2 = b.q1) -(7 rows) +(8 rows) select * from int8_tbl a left join @@ -4049,6 +4053,7 @@ select * from int4_tbl i left join ------------------------------------------- Hash Left Join Output: i.f1, j.f1 + Unique Join: No Hash Cond: (i.f1 = j.f1) -> Seq Scan on public.int4_tbl i Output: i.f1 @@ -4056,7 +4061,7 @@ select * from int4_tbl i left join Output: j.f1 -> Seq Scan on public.int2_tbl j Output: j.f1 -(9 rows) +(10 rows) select * from int4_tbl i left join lateral (select * from int2_tbl j where i.f1 = j.f1) k on true; @@ -4076,12 +4081,13 @@ select * from int4_tbl i left join ------------------------------------- Nested Loop Left Join Output: i.f1, (COALESCE(i.*)) + Unique Join: No -> Seq Scan on public.int4_tbl i Output: i.f1, i.* -> Seq Scan on public.int2_tbl j Output: j.f1, COALESCE(i.*) Filter: (i.f1 = j.f1) -(7 rows) +(8 rows) select * from int4_tbl i left join lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true; @@ -4103,10 +4109,12 @@ select * from int4_tbl a, ------------------------------------------------- Nested Loop Output: a.f1, b.f1, c.q1, c.q2 + Unique Join: No -> Seq Scan on public.int4_tbl a Output: a.f1 -> Hash Left Join Output: b.f1, c.q1, c.q2 + Unique Join: No Hash Cond: (b.f1 = c.q1) -> Seq Scan on public.int4_tbl b Output: b.f1 @@ -4115,7 +4123,7 @@ select * from int4_tbl a, -> Seq Scan on public.int8_tbl c Output: c.q1, c.q2 Filter: (a.f1 = c.q2) -(14 rows) +(16 rows) select * from int4_tbl a, lateral ( @@ -4161,10 +4169,12 @@ select * from ------------------------------------------------------------- Nested Loop Left Join Output: a.q1, a.q2, b.q1, c.q1, (LEAST(a.q1, b.q1, c.q1)) + Unique Join: No -> Seq Scan on public.int8_tbl a Output: a.q1, a.q2 -> Nested Loop Output: b.q1, c.q1, LEAST(a.q1, b.q1, c.q1) + Unique Join: No Join Filter: (a.q2 = b.q1) -> Seq Scan on public.int8_tbl b Output: b.q1, b.q2 @@ -4172,7 +4182,7 @@ select * from Output: c.q1 -> Seq Scan on public.int8_tbl c Output: c.q1 -(13 rows) +(15 rows) select * from int8_tbl a left join lateral @@ -4239,13 +4249,17 @@ select * from ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Nested Loop Output: c.q1, c.q2, a.q1, a.q2, b.q1, (COALESCE(b.q2, 42::bigint)), d.q1, (COALESCE((COALESCE(b.q2, 42::bigint)), d.q2)), ((COALESCE((COALESCE(b.q2, 42::bigint)), d.q2))) + Unique Join: No -> Hash Right Join Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, (COALESCE(b.q2, 42::bigint)), (COALESCE((COALESCE(b.q2, 42::bigint)), d.q2)) + Unique Join: No Hash Cond: (d.q1 = c.q2) -> Nested Loop Output: a.q1, a.q2, b.q1, d.q1, (COALESCE(b.q2, 42::bigint)), (COALESCE((COALESCE(b.q2, 42::bigint)), d.q2)) + Unique Join: No -> Hash Left Join Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, 42::bigint)) + Unique Join: No Hash Cond: (a.q2 = b.q1) -> Seq Scan on public.int8_tbl a Output: a.q1, a.q2 @@ -4261,7 +4275,7 @@ select * from Output: c.q1, c.q2 -> Result Output: (COALESCE((COALESCE(b.q2, 42::bigint)), d.q2)) -(24 rows) +(28 rows) -- case that breaks the old ph_may_need optimization explain (verbose, costs off) @@ -4279,17 +4293,22 @@ select c.*,a.*,ss1.q1,ss2.q1,ss3.* from --------------------------------------------------------------------------------------------------------- Nested Loop Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, i.f1 + Unique Join: No Join Filter: ((COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) > i.f1) -> Hash Right Join Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) + Unique Join: No Hash Cond: (d.q1 = c.q2) -> Nested Loop Output: a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) + Unique Join: No -> Hash Right Join Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, (b2.f1)::bigint)) + Unique Join: No Hash Cond: (b.q1 = a.q2) -> Nested Loop Output: b.q1, COALESCE(b.q2, (b2.f1)::bigint) + Unique Join: No Join Filter: (b.q1 < b2.f1) -> Seq Scan on public.int8_tbl b Output: b.q1, b.q2 @@ -4311,7 +4330,7 @@ select c.*,a.*,ss1.q1,ss2.q1,ss3.* from Output: i.f1 -> Seq Scan on public.int4_tbl i Output: i.f1 -(34 rows) +(39 rows) -- check processing of postponed quals (bug #9041) explain (verbose, costs off) @@ -4324,16 +4343,20 @@ select * from ---------------------------------------------- Nested Loop Left Join Output: (1), (2), (3) + Unique Join: Yes Join Filter: (((3) = (1)) AND ((3) = (2))) -> Nested Loop Output: (1), (2) + Unique Join: No -> Result Output: 1 -> Result Output: 2 - -> Result - Output: 3 -(11 rows) + -> Materialize + Output: (3) + -> Result + Output: 3 +(15 rows) -- test some error cases where LATERAL should have been used but wasn't select f1,g from int4_tbl a, (select f1 as g) ss; @@ -4416,3 +4439,285 @@ ERROR: invalid reference to FROM-clause entry for table "xx1" LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss; ^ HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query. +-- +-- test planner's ability to mark joins as unique. +-- +create table j1 (id int primary key); +create table j2 (id int primary key); +create table j3 (id int); +insert into j1 values(1),(2),(3); +insert into j2 values(1),(2),(3); +insert into j3 values(1),(1); +analyze j1; +analyze j2; +analyze j3; +-- Ensure join is marked as unique +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Join + Output: j1.id, j2.id + Unique Join: Yes + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(10 rows) + +-- Ensure join not marked as unique when not using = +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id > j2.id; + QUERY PLAN +----------------------------------- + Nested Loop + Output: j1.id, j2.id + Unique Join: No + Join Filter: (j1.id > j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(10 rows) + +-- j3 has no unique index or pk on id +explain (verbose, costs off) +select * from j1 inner join j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------- + Hash Join + Output: j1.id, j3.id + Unique Join: No + Hash Cond: (j1.id = j3.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(10 rows) + +-- ensure left join is marked as unique +explain (verbose, costs off) +select * from j1 left join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Left Join + Output: j1.id, j2.id + Unique Join: Yes + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(10 rows) + +-- ensure right join is marked as unique +explain (verbose, costs off) +select * from j1 right join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Left Join + Output: j1.id, j2.id + Unique Join: Yes + Hash Cond: (j2.id = j1.id) + -> Seq Scan on public.j2 + Output: j2.id + -> Hash + Output: j1.id + -> Seq Scan on public.j1 + Output: j1.id +(10 rows) + +-- cross joins can't be proved unique +explain (verbose, costs off) +select * from j1 cross join j2; + QUERY PLAN +----------------------------------- + Nested Loop + Output: j1.id, j2.id + Unique Join: No + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +-- ensure natural join is marked as unique +explain (verbose, costs off) +select * from j1 natural join j2; + QUERY PLAN +----------------------------------- + Hash Join + Output: j1.id + Unique Join: Yes + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(10 rows) + +-- ensure distinct clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select distinct id from j3) j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------------- + Nested Loop + Output: j1.id, j3.id + Unique Join: Yes + Join Filter: (j1.id = j3.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j3.id + -> HashAggregate + Output: j3.id + Group Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(13 rows) + +-- ensure group by clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select id from j3 group by id) j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------------- + Nested Loop + Output: j1.id, j3.id + Unique Join: Yes + Join Filter: (j1.id = j3.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j3.id + -> HashAggregate + Output: j3.id + Group Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(13 rows) + +-- a subquery with an empty FROM clause should be marked as unique. +explain (verbose, costs off) +select * from j1 +inner join (select 1 id offset 0) j3 on j1.id = j3.id; + QUERY PLAN +------------------------------ + Nested Loop + Output: j1.id, (1) + Unique Join: Yes + Join Filter: (j1.id = (1)) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: (1) + -> Result + Output: 1 +(10 rows) + +explain (verbose, costs off) +select * from j1 full join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Full Join + Output: j1.id, j2.id + Unique Join: Yes + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(10 rows) + +drop table j1; +drop table j2; +drop table j3; +-- test a more complex permutations of unique joins +create table j1 (id1 int, id2 int, primary key(id1,id2)); +create table j2 (id1 int, id2 int, primary key(id1,id2)); +create table j3 (id1 int, id2 int, primary key(id1,id2)); +insert into j1 values(1,1),(2,2); +insert into j2 values(1,1); +insert into j3 values(1,1); +analyze j1; +analyze j2; +analyze j3; +-- ensure no unique joins when not all columns which are part of +-- the unique index are part of the join clause. +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1; + QUERY PLAN +------------------------------------------ + Nested Loop + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Unique Join: No + Join Filter: (j1.id1 = j2.id1) + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 + -> Seq Scan on public.j1 + Output: j1.id1, j1.id2 +(8 rows) + +-- ensure unique joins work with multiple columns +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2; + QUERY PLAN +---------------------------------------------------------- + Nested Loop + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Unique Join: Yes + Join Filter: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2)) + -> Seq Scan on public.j1 + Output: j1.id1, j1.id2 + -> Materialize + Output: j2.id1, j2.id2 + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 +(10 rows) + +-- ensure no unique joins when the tables are joined in 3 ways. +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id2 and j1.id2 = j2.id2 +inner join j3 on j1.id1 = j3.id1 and j2.id2 = j3.id2; + QUERY PLAN +---------------------------------------------------------- + Nested Loop + Output: j1.id1, j1.id2, j2.id1, j2.id2, j3.id1, j3.id2 + Unique Join: No + Join Filter: (j1.id1 = j3.id1) + -> Nested Loop + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Unique Join: No + Join Filter: (j1.id1 = j2.id2) + -> Seq Scan on public.j1 + Output: j1.id1, j1.id2 + Filter: (j1.id1 = j1.id2) + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 + -> Seq Scan on public.j3 + Output: j3.id1, j3.id2 + Filter: (j3.id1 = j3.id2) +(16 rows) + +drop table j1; +drop table j2; +drop table j3; diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out index 6dabe50..4378e9d 100644 --- a/src/test/regress/expected/rangefuncs.out +++ b/src/test/regress/expected/rangefuncs.out @@ -2016,12 +2016,13 @@ select x from int8_tbl, extractq2(int8_tbl) f(x); ------------------------------------------ Nested Loop Output: f.x + Unique Join: No -> Seq Scan on public.int8_tbl Output: int8_tbl.q1, int8_tbl.q2 -> Function Scan on f Output: f.x Function Call: int8_tbl.q2 -(7 rows) +(8 rows) select x from int8_tbl, extractq2(int8_tbl) f(x); x @@ -2042,11 +2043,12 @@ select x from int8_tbl, extractq2_2(int8_tbl) f(x); ----------------------------------- Nested Loop Output: ((int8_tbl.*).q2) + Unique Join: No -> Seq Scan on public.int8_tbl Output: int8_tbl.* -> Result Output: (int8_tbl.*).q2 -(6 rows) +(7 rows) select x from int8_tbl, extractq2_2(int8_tbl) f(x); x diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index b14410f..7f43784 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -761,6 +761,7 @@ select * from int4_tbl where --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Nested Loop Semi Join Output: int4_tbl.f1 + Unique Join: No Join Filter: (CASE WHEN (hashed SubPlan 1) THEN int4_tbl.f1 ELSE NULL::integer END = b.ten) -> Seq Scan on public.int4_tbl Output: int4_tbl.f1 @@ -769,7 +770,7 @@ select * from int4_tbl where SubPlan 1 -> Index Only Scan using tenk1_unique1 on public.tenk1 a Output: a.unique1 -(10 rows) +(11 rows) select * from int4_tbl where (case when f1 in (select unique1 from tenk1 a) then f1 else null end) in @@ -789,6 +790,7 @@ select * from int4_tbl o where (f1, f1) in ---------------------------------------------------------------------- Hash Join Output: o.f1 + Unique Join: No Hash Cond: (o.f1 = "ANY_subquery".f1) -> Seq Scan on public.int4_tbl o Output: o.f1 @@ -805,7 +807,7 @@ select * from int4_tbl o where (f1, f1) in Group Key: i.f1 -> Seq Scan on public.int4_tbl i Output: i.f1 -(18 rows) +(19 rows) select * from int4_tbl o where (f1, f1) in (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out index c49e769..b481cda 100644 --- a/src/test/regress/expected/updatable_views.out +++ b/src/test/regress/expected/updatable_views.out @@ -2071,6 +2071,7 @@ UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a = 3; Output: t1_5.ctid, t1_5.a, t1_5.b, t1_5.c, t1_5.ctid, t12.ctid, t12.tableoid -> Nested Loop Semi Join Output: t1_5.ctid, t1_5.a, t1_5.b, t1_5.c, t1_5.ctid, t12.ctid, t12.tableoid + Unique Join: No -> Seq Scan on public.t1 t1_5 Output: t1_5.ctid, t1_5.a, t1_5.b, t1_5.c Filter: ((t1_5.a > 5) AND (t1_5.a = 3) AND leakproof(t1_5.a)) @@ -2088,6 +2089,7 @@ UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a = 3; Output: t11.ctid, t11.a, t11.b, t11.c, t11.d, t11.ctid, t12_1.ctid, t12_1.tableoid -> Nested Loop Semi Join Output: t11.ctid, t11.a, t11.b, t11.c, t11.d, t11.ctid, t12_1.ctid, t12_1.tableoid + Unique Join: No -> Seq Scan on public.t11 Output: t11.ctid, t11.a, t11.b, t11.c, t11.d Filter: ((t11.a > 5) AND (t11.a = 3) AND leakproof(t11.a)) @@ -2105,6 +2107,7 @@ UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a = 3; Output: t12_2.ctid, t12_2.a, t12_2.b, t12_2.c, t12_2.e, t12_2.ctid, t12_3.ctid, t12_3.tableoid -> Nested Loop Semi Join Output: t12_2.ctid, t12_2.a, t12_2.b, t12_2.c, t12_2.e, t12_2.ctid, t12_3.ctid, t12_3.tableoid + Unique Join: No -> Seq Scan on public.t12 t12_2 Output: t12_2.ctid, t12_2.a, t12_2.b, t12_2.c, t12_2.e Filter: ((t12_2.a > 5) AND (t12_2.a = 3) AND leakproof(t12_2.a)) @@ -2122,6 +2125,7 @@ UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a = 3; Output: t111_3.ctid, t111_3.a, t111_3.b, t111_3.c, t111_3.d, t111_3.e, t111_3.ctid, t12_4.ctid, t12_4.tableoid -> Nested Loop Semi Join Output: t111_3.ctid, t111_3.a, t111_3.b, t111_3.c, t111_3.d, t111_3.e, t111_3.ctid, t12_4.ctid, t12_4.tableoid + Unique Join: No -> Seq Scan on public.t111 t111_3 Output: t111_3.ctid, t111_3.a, t111_3.b, t111_3.c, t111_3.d, t111_3.e Filter: ((t111_3.a > 5) AND (t111_3.a = 3) AND leakproof(t111_3.a)) @@ -2132,7 +2136,7 @@ UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a = 3; -> Seq Scan on public.t111 t111_4 Output: t111_4.ctid, t111_4.tableoid, t111_4.a Filter: (t111_4.a = 3) -(69 rows) +(73 rows) UPDATE v1 SET a=100 WHERE snoop(a) AND leakproof(a) AND a = 3; SELECT * FROM v1 WHERE a=100; -- Nothing should have been changed to 100 @@ -2157,6 +2161,7 @@ UPDATE v1 SET a=a+1 WHERE snoop(a) AND leakproof(a) AND a = 8; Output: t1_5.a, t1_5.ctid, t1_5.b, t1_5.c, t1_5.ctid, t12.ctid, t12.tableoid -> Nested Loop Semi Join Output: t1_5.a, t1_5.ctid, t1_5.b, t1_5.c, t1_5.ctid, t12.ctid, t12.tableoid + Unique Join: No -> Seq Scan on public.t1 t1_5 Output: t1_5.a, t1_5.ctid, t1_5.b, t1_5.c Filter: ((t1_5.a > 5) AND (t1_5.a = 8) AND leakproof(t1_5.a)) @@ -2174,6 +2179,7 @@ UPDATE v1 SET a=a+1 WHERE snoop(a) AND leakproof(a) AND a = 8; Output: t11.a, t11.ctid, t11.b, t11.c, t11.d, t11.ctid, t12_1.ctid, t12_1.tableoid -> Nested Loop Semi Join Output: t11.a, t11.ctid, t11.b, t11.c, t11.d, t11.ctid, t12_1.ctid, t12_1.tableoid + Unique Join: No -> Seq Scan on public.t11 Output: t11.a, t11.ctid, t11.b, t11.c, t11.d Filter: ((t11.a > 5) AND (t11.a = 8) AND leakproof(t11.a)) @@ -2191,6 +2197,7 @@ UPDATE v1 SET a=a+1 WHERE snoop(a) AND leakproof(a) AND a = 8; Output: t12_2.a, t12_2.ctid, t12_2.b, t12_2.c, t12_2.e, t12_2.ctid, t12_3.ctid, t12_3.tableoid -> Nested Loop Semi Join Output: t12_2.a, t12_2.ctid, t12_2.b, t12_2.c, t12_2.e, t12_2.ctid, t12_3.ctid, t12_3.tableoid + Unique Join: No -> Seq Scan on public.t12 t12_2 Output: t12_2.a, t12_2.ctid, t12_2.b, t12_2.c, t12_2.e Filter: ((t12_2.a > 5) AND (t12_2.a = 8) AND leakproof(t12_2.a)) @@ -2208,6 +2215,7 @@ UPDATE v1 SET a=a+1 WHERE snoop(a) AND leakproof(a) AND a = 8; Output: t111_3.a, t111_3.ctid, t111_3.b, t111_3.c, t111_3.d, t111_3.e, t111_3.ctid, t12_4.ctid, t12_4.tableoid -> Nested Loop Semi Join Output: t111_3.a, t111_3.ctid, t111_3.b, t111_3.c, t111_3.d, t111_3.e, t111_3.ctid, t12_4.ctid, t12_4.tableoid + Unique Join: No -> Seq Scan on public.t111 t111_3 Output: t111_3.a, t111_3.ctid, t111_3.b, t111_3.c, t111_3.d, t111_3.e Filter: ((t111_3.a > 5) AND (t111_3.a = 8) AND leakproof(t111_3.a)) @@ -2218,7 +2226,7 @@ UPDATE v1 SET a=a+1 WHERE snoop(a) AND leakproof(a) AND a = 8; -> Seq Scan on public.t111 t111_4 Output: t111_4.ctid, t111_4.tableoid, t111_4.a Filter: (t111_4.a = 8) -(69 rows) +(73 rows) UPDATE v1 SET a=a+1 WHERE snoop(a) AND leakproof(a) AND a = 8; NOTICE: snooped value: 8 diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out index 524e0ef..1eb2da4 100644 --- a/src/test/regress/expected/with.out +++ b/src/test/regress/expected/with.out @@ -2093,6 +2093,7 @@ DELETE FROM a USING wcte WHERE aa = q2; Output: 42::bigint, 47::bigint -> Nested Loop Output: a.ctid, wcte.* + Unique Join: No Join Filter: (a.aa = wcte.q2) -> Seq Scan on public.a Output: a.ctid, a.aa @@ -2100,6 +2101,7 @@ DELETE FROM a USING wcte WHERE aa = q2; Output: wcte.*, wcte.q2 -> Nested Loop Output: b.ctid, wcte.* + Unique Join: No Join Filter: (b.aa = wcte.q2) -> Seq Scan on public.b Output: b.ctid, b.aa @@ -2107,6 +2109,7 @@ DELETE FROM a USING wcte WHERE aa = q2; Output: wcte.*, wcte.q2 -> Nested Loop Output: c.ctid, wcte.* + Unique Join: No Join Filter: (c.aa = wcte.q2) -> Seq Scan on public.c Output: c.ctid, c.aa @@ -2114,12 +2117,13 @@ DELETE FROM a USING wcte WHERE aa = q2; Output: wcte.*, wcte.q2 -> Nested Loop Output: d.ctid, wcte.* + Unique Join: No Join Filter: (d.aa = wcte.q2) -> Seq Scan on public.d Output: d.ctid, d.aa -> CTE Scan on wcte Output: wcte.*, wcte.q2 -(34 rows) +(38 rows) -- error cases -- data-modifying WITH tries to use its own output diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 06a27ea..34f1dc4 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1307,3 +1307,104 @@ update xx1 set x2 = f1 from xx1, lateral (select * from int4_tbl where f1 = x1) delete from xx1 using (select * from int4_tbl where f1 = x1) ss; delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss; delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss; + +-- +-- test planner's ability to mark joins as unique. +-- + +create table j1 (id int primary key); +create table j2 (id int primary key); +create table j3 (id int); + +insert into j1 values(1),(2),(3); +insert into j2 values(1),(2),(3); +insert into j3 values(1),(1); + +analyze j1; +analyze j2; +analyze j3; + +-- Ensure join is marked as unique +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id = j2.id; + +-- Ensure join not marked as unique when not using = +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id > j2.id; + +-- j3 has no unique index or pk on id +explain (verbose, costs off) +select * from j1 inner join j3 on j1.id = j3.id; + +-- ensure left join is marked as unique +explain (verbose, costs off) +select * from j1 left join j2 on j1.id = j2.id; + +-- ensure right join is marked as unique +explain (verbose, costs off) +select * from j1 right join j2 on j1.id = j2.id; + +-- cross joins can't be proved unique +explain (verbose, costs off) +select * from j1 cross join j2; + +-- ensure natural join is marked as unique +explain (verbose, costs off) +select * from j1 natural join j2; + +-- ensure distinct clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select distinct id from j3) j3 on j1.id = j3.id; + +-- ensure group by clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select id from j3 group by id) j3 on j1.id = j3.id; + +-- a subquery with an empty FROM clause should be marked as unique. +explain (verbose, costs off) +select * from j1 +inner join (select 1 id offset 0) j3 on j1.id = j3.id; + +explain (verbose, costs off) +select * from j1 full join j2 on j1.id = j2.id; + +drop table j1; +drop table j2; +drop table j3; + +-- test a more complex permutations of unique joins + +create table j1 (id1 int, id2 int, primary key(id1,id2)); +create table j2 (id1 int, id2 int, primary key(id1,id2)); +create table j3 (id1 int, id2 int, primary key(id1,id2)); + +insert into j1 values(1,1),(2,2); +insert into j2 values(1,1); +insert into j3 values(1,1); + +analyze j1; +analyze j2; +analyze j3; + +-- ensure no unique joins when not all columns which are part of +-- the unique index are part of the join clause. +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1; + +-- ensure unique joins work with multiple columns +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2; + +-- ensure no unique joins when the tables are joined in 3 ways. +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id2 and j1.id2 = j2.id2 +inner join j3 on j1.id1 = j3.id1 and j2.id2 = j3.id2; + +drop table j1; +drop table j2; +drop table j3;