diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index c9b55ea..c91f752 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1329,6 +1329,24 @@ ExplainNode(PlanState *planstate, List *ancestors, if (es->verbose) show_plan_tlist(planstate, ancestors, es); + /* unique join */ + if (es->verbose || es->format != EXPLAIN_FORMAT_TEXT) + { + switch (nodeTag(plan)) + { + case T_NestLoop: + case T_MergeJoin: + case T_HashJoin: + { + const char *value = ((Join *) plan)->inner_unique ? "Yes" : "No"; + ExplainPropertyText("Inner Unique", value, es); + break; + } + default: + break; + } + } + /* quals, sort keys, etc */ switch (nodeTag(plan)) { diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index c50d93f..dd35604 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -288,10 +288,12 @@ ExecHashJoin(HashJoinState *node) } /* - * In a semijoin, we'll consider returning the first - * match, but after that we're done with this outer tuple. + * Skip to the next outer tuple if we to only match one + * inner tuple, or if there's only one possible inner + * matching tuple. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.jointype == JOIN_SEMI || + node->js.inner_unique) node->hj_JoinState = HJ_NEED_NEW_OUTER; if (otherqual == NIL || @@ -402,6 +404,14 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) hjstate->js.ps.state = estate; /* + * When the planner was able to determine that the inner side of the join + * will at most contain a single tuple for each outer tuple, then we can + * optimize the join by skipping to the next outer tuple after we find the + * first matching inner tuple. + */ + hjstate->js.inner_unique = node->join.inner_unique; + + /* * Miscellaneous initialization * * create expression context for node diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index 105e2dc..8f2ab14 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -802,10 +802,11 @@ ExecMergeJoin(MergeJoinState *node) } /* - * In a semijoin, we'll consider returning the first - * match, but after that we're done with this outer tuple. + * Skip to the next outer tuple if we to only match one + * inner tuple, or if there's only one possible inner + * matching tuple. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.jointype == JOIN_SEMI || node->js.inner_unique) node->mj_JoinState = EXEC_MJ_NEXTOUTER; qualResult = (otherqual == NIL || @@ -826,7 +827,27 @@ ExecMergeJoin(MergeJoinState *node) InstrCountFiltered2(node, 1); } else + { + /* + * If the inner side of the join is unique, then it's not + * possible that we'll find another row matching this + * outer row, so we can now move along to the next outer + * row. + * The correctness of this relies on none of the proof + * quals used to prove the uniqueness being in the + * joinqual. Since analyzejoins.c only uses quals with + * mergeopfamilies, then this is safe. If that were to + * ever change, then this would need to be revisitied. + * + * We're unable to perform the same optimization for + * SEMI_JOINs, as there might be another row matching the + * join condition. + */ + if (node->js.inner_unique) + node->mj_JoinState = EXEC_MJ_NEXTOUTER; + InstrCountFiltered1(node, 1); + } break; /* @@ -1048,7 +1069,11 @@ ExecMergeJoin(MergeJoinState *node) /* * the merge clause matched so now we restore the inner * scan position to the first mark, and go join that tuple - * (and any following ones) to the new outer. + * (and any following ones) to the new outer. In the case + * where we've only joined to the first inner tuple, we've + * not advanced the inner side any away from where the + * restore point would normally be, so have no need to + * perform the restore. * * NOTE: we do not need to worry about the MatchedInner * state for the rescanned inner tuples. We know all of @@ -1062,17 +1087,21 @@ ExecMergeJoin(MergeJoinState *node) * forcing the merge clause to never match, so we never * get here. */ - ExecRestrPos(innerPlan); + if (node->js.jointype != JOIN_SEMI && + !node->js.inner_unique) + { + ExecRestrPos(innerPlan); - /* - * ExecRestrPos probably should give us back a new Slot, - * but since it doesn't, use the marked slot. (The - * previously returned mj_InnerTupleSlot cannot be assumed - * to hold the required tuple.) - */ - node->mj_InnerTupleSlot = innerTupleSlot; - /* we need not do MJEvalInnerValues again */ + /* + * ExecRestrPos probably should give us back a new + * Slot, but since it doesn't, use the marked slot. + * (The previously returned mj_InnerTupleSlot cannot + * be assumed to hold the required tuple.) + */ + node->mj_InnerTupleSlot = innerTupleSlot; + } + /* we need not do MJEvalInnerValues again */ node->mj_JoinState = EXEC_MJ_JOINTUPLES; } else @@ -1172,9 +1201,18 @@ ExecMergeJoin(MergeJoinState *node) if (compareResult == 0) { - ExecMarkPos(innerPlan); + /* + * When we're only going join to a single inner tuple then + * we'll not advance the inner side further on the current + * outer tuple, so have no need to perform a Mark. + */ + if (node->js.jointype != JOIN_SEMI && + !node->js.inner_unique) + { + ExecMarkPos(innerPlan); - MarkInnerTuple(node->mj_InnerTupleSlot, node); + MarkInnerTuple(node->mj_InnerTupleSlot, node); + } node->mj_JoinState = EXEC_MJ_JOINTUPLES; } @@ -1438,6 +1476,14 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) mergestate->js.ps.state = estate; /* + * When the planner was able to determine that the inner side of the join + * will at most contain a single tuple for each outer tuple, then we can + * optimize the join by skipping to the next outer tuple after we find the + * first matching inner tuple. + */ + mergestate->js.inner_unique = node->join.inner_unique; + + /* * Miscellaneous initialization * * create expression context for node diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index cac7ba1..dd1c4ed 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -219,10 +219,10 @@ ExecNestLoop(NestLoopState *node) } /* - * In a semijoin, we'll consider returning the first match, but - * after that we're done with this outer tuple. + * Skip to the next outer tuple if we to only match one inner + * tuple, or if there's only one possible inner matching tuple. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.jointype == JOIN_SEMI || node->js.inner_unique) node->nl_NeedNewOuter = true; if (otherqual == NIL || ExecQual(otherqual, econtext, false)) @@ -273,6 +273,14 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) nlstate->js.ps.state = estate; /* + * When the planner was able to determine that the inner side of the join + * will at most contain a single tuple for each outer tuple, then we can + * optimize the join by skipping to the next outer tuple after we find the + * first matching inner tuple. + */ + nlstate->js.inner_unique = node->join.inner_unique; + + /* * Miscellaneous initialization * * create expression context for node diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index e78f3a8..849e295 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2032,15 +2032,13 @@ cost_group(Path *path, PlannerInfo *root, * 'jointype' is the type of join to be performed * 'outer_path' is the outer input to the join * 'inner_path' is the inner input to the join - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if jointype is SEMI or ANTI + * 'extra' contains miscellaneous information about the join */ void initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, Path *outer_path, Path *inner_path, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors) + JoinPathExtraData *extra) { Cost startup_cost = 0; Cost run_cost = 0; @@ -2071,10 +2069,13 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, inner_run_cost = inner_path->total_cost - inner_path->startup_cost; inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost; - if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + if (extra->inner_unique || + jointype == JOIN_SEMI || + jointype == JOIN_ANTI) { /* - * SEMI or ANTI join: executor will stop after first match. + * With inner unique, SEMI or ANTI join, the executor will stop after + * the first match. * * Getting decent estimates requires inspection of the join quals, * which we choose to postpone to final_cost_nestloop. @@ -2107,17 +2108,16 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, * * 'path' is already filled in except for the rows and cost fields * 'workspace' is the result from initial_cost_nestloop - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if path->jointype is SEMI or ANTI + * 'extra' contains miscellaneous information about the join */ void final_cost_nestloop(PlannerInfo *root, NestPath *path, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors) + JoinPathExtraData *extra) { Path *outer_path = path->outerjoinpath; Path *inner_path = path->innerjoinpath; + SemiAntiJoinFactors *semifactors = &extra->semifactors; double outer_path_rows = outer_path->rows; double inner_path_rows = inner_path->rows; Cost startup_cost = workspace->startup_cost; @@ -2152,10 +2152,13 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, /* cost of inner-relation source data (we already dealt with outer rel) */ - if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI) + if (extra->inner_unique || + path->jointype == JOIN_SEMI || + path->jointype == JOIN_ANTI) { /* - * SEMI or ANTI join: executor will stop after first match. + * With inner unique, SEMI or ANTI join, the executor will stop after + * the first match. */ Cost inner_run_cost = workspace->inner_run_cost; Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost; @@ -2296,7 +2299,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, * 'inner_path' is the inner input to the join * 'outersortkeys' is the list of sort keys for the outer path * 'innersortkeys' is the list of sort keys for the inner path - * 'sjinfo' is extra info about the join for selectivity estimation + * 'extra' contains miscellaneous information about the join * * Note: outersortkeys and innersortkeys should be NIL if no explicit * sort is needed because the respective source path is already ordered. @@ -2307,7 +2310,7 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, List *mergeclauses, Path *outer_path, Path *inner_path, List *outersortkeys, List *innersortkeys, - SpecialJoinInfo *sjinfo) + JoinPathExtraData *extra) { Cost startup_cost = 0; Cost run_cost = 0; @@ -2384,7 +2387,8 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, innerstartsel = cache->leftstartsel; innerendsel = cache->leftendsel; } - if (jointype == JOIN_LEFT || + if (extra->inner_unique || + jointype == JOIN_LEFT || jointype == JOIN_ANTI) { outerstartsel = 0.0; @@ -2522,12 +2526,12 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, * 'path' is already filled in except for the rows and cost fields and * materialize_inner * 'workspace' is the result from initial_cost_mergejoin - * 'sjinfo' is extra info about the join for selectivity estimation + * 'extra' contains miscellaneous information about the join */ void final_cost_mergejoin(PlannerInfo *root, MergePath *path, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo) + JoinPathExtraData *extra) { Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; @@ -2611,9 +2615,14 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, * computations? * * The whole issue is moot if we are working from a unique-ified outer - * input. + * input. We'll also never rescan during SEMI/ANTI/inner_unique as there's + * no need to advance the inner side to find subsequent matches, therefore + * no need to mark/restore. */ - if (IsA(outer_path, UniquePath)) + if (extra->inner_unique || + path->jpath.jointype == JOIN_SEMI || + path->jpath.jointype == JOIN_ANTI || + IsA(outer_path, UniquePath)) rescannedtuples = 0; else { @@ -2662,7 +2671,10 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, /* * Even if materializing doesn't look cheaper, we *must* do it if the * inner path is to be used directly (without sorting) and it doesn't - * support mark/restore. + * support mark/restore. However, if the inner side of the join is + * guaranteed not to duplicate outer rows, as in the case with SEMI/ANTI + * JOINs and joins which have an inner unique side, then we'll never need + * to perform mark/restore. * * Since the inner side must be ordered, and only Sorts and IndexScans can * create order to begin with, and they both support mark/restore, you @@ -2675,7 +2687,9 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, * materialization is required for correctness in this case, and turning * it off does not entitle us to deliver an invalid plan. */ - else if (innersortkeys == NIL && + else if (innersortkeys == NIL && !extra->inner_unique && + path->jpath.jointype != JOIN_SEMI && + path->jpath.jointype != JOIN_ANTI && !ExecSupportsMarkRestore(inner_path)) path->materialize_inner = true; @@ -2817,16 +2831,14 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey) * 'hashclauses' is the list of joinclauses to be used as hash clauses * 'outer_path' is the outer input to the join * 'inner_path' is the inner input to the join - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if jointype is SEMI or ANTI + * 'extra' contains miscellaneous information about the join */ void initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, List *hashclauses, Path *outer_path, Path *inner_path, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors) + JoinPathExtraData *extra) { Cost startup_cost = 0; Cost run_cost = 0; @@ -2911,17 +2923,16 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, * 'path' is already filled in except for the rows and cost fields and * num_batches * 'workspace' is the result from initial_cost_hashjoin - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if path->jointype is SEMI or ANTI + * 'extra' contains miscellaneous information about the join */ void final_cost_hashjoin(PlannerInfo *root, HashPath *path, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors) + JoinPathExtraData *extra) { Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; + SemiAntiJoinFactors *semifactors = &extra->semifactors; double outer_path_rows = outer_path->rows; double inner_path_rows = inner_path->rows; List *hashclauses = path->path_hashclauses; @@ -3037,13 +3048,16 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, /* CPU costs */ - if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI) + if (extra->inner_unique || + path->jpath.jointype == JOIN_SEMI || + path->jpath.jointype == JOIN_ANTI) { double outer_matched_rows; Selectivity inner_scan_frac; /* - * SEMI or ANTI join: executor will stop after first match. + * With inner unique, SEMI or ANTI join, the executor will stop after + * the first match. * * For an outer-rel row that has at least one match, we can expect the * bucket scan to stop after a fraction 1/(match_count+1) of the diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index de7044d..ae89e89 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -21,6 +21,7 @@ #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "optimizer/planmain.h" /* Hook for plugins to get control in add_paths_to_joinrel() */ set_join_pathlist_hook_type set_join_pathlist_hook = NULL; @@ -121,6 +122,13 @@ add_paths_to_joinrel(PlannerInfo *root, extra.param_source_rels = NULL; /* + * Check for proofs which prove that this inner relation cannot cause + * duplicate of outer side tuples. + */ + extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel, + jointype, sjinfo, restrictlist); + + /* * Find potential mergejoin clauses. We can skip this if we are not * interested in doing a mergejoin. However, mergejoin may be our only * way of implementing a full outer join, so override enable_mergejoin if @@ -336,8 +344,7 @@ try_nestloop_path(PlannerInfo *root, * methodology worthwhile. */ initial_cost_nestloop(root, &workspace, jointype, - outer_path, inner_path, - extra->sjinfo, &extra->semifactors); + outer_path, inner_path, extra); if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, @@ -348,11 +355,9 @@ try_nestloop_path(PlannerInfo *root, joinrel, jointype, &workspace, - extra->sjinfo, - &extra->semifactors, + extra, outer_path, inner_path, - extra->restrictlist, pathkeys, required_outer)); } @@ -399,8 +404,7 @@ try_partial_nestloop_path(PlannerInfo *root, * cost. Bail out right away if it looks terrible. */ initial_cost_nestloop(root, &workspace, jointype, - outer_path, inner_path, - extra->sjinfo, &extra->semifactors); + outer_path, inner_path, extra); if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) return; @@ -410,11 +414,9 @@ try_partial_nestloop_path(PlannerInfo *root, joinrel, jointype, &workspace, - extra->sjinfo, - &extra->semifactors, + extra, outer_path, inner_path, - extra->restrictlist, pathkeys, NULL)); } @@ -486,7 +488,7 @@ try_mergejoin_path(PlannerInfo *root, initial_cost_mergejoin(root, &workspace, jointype, mergeclauses, outer_path, inner_path, outersortkeys, innersortkeys, - extra->sjinfo); + extra); if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, @@ -497,10 +499,9 @@ try_mergejoin_path(PlannerInfo *root, joinrel, jointype, &workspace, - extra->sjinfo, + extra, outer_path, inner_path, - extra->restrictlist, pathkeys, required_outer, mergeclauses, @@ -562,7 +563,7 @@ try_partial_mergejoin_path(PlannerInfo *root, initial_cost_mergejoin(root, &workspace, jointype, mergeclauses, outer_path, inner_path, outersortkeys, innersortkeys, - extra->sjinfo); + extra); if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) return; @@ -573,10 +574,9 @@ try_partial_mergejoin_path(PlannerInfo *root, joinrel, jointype, &workspace, - extra->sjinfo, + extra, outer_path, inner_path, - extra->restrictlist, pathkeys, NULL, mergeclauses, @@ -620,8 +620,7 @@ try_hashjoin_path(PlannerInfo *root, * never have any output pathkeys, per comments in create_hashjoin_path. */ initial_cost_hashjoin(root, &workspace, jointype, hashclauses, - outer_path, inner_path, - extra->sjinfo, &extra->semifactors); + outer_path, inner_path, extra); if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, @@ -632,11 +631,9 @@ try_hashjoin_path(PlannerInfo *root, joinrel, jointype, &workspace, - extra->sjinfo, - &extra->semifactors, + extra, outer_path, inner_path, - extra->restrictlist, required_outer, hashclauses)); } @@ -683,8 +680,7 @@ try_partial_hashjoin_path(PlannerInfo *root, * cost. Bail out right away if it looks terrible. */ initial_cost_hashjoin(root, &workspace, jointype, hashclauses, - outer_path, inner_path, - extra->sjinfo, &extra->semifactors); + outer_path, inner_path, extra); if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL)) return; @@ -694,11 +690,9 @@ try_partial_hashjoin_path(PlannerInfo *root, joinrel, jointype, &workspace, - extra->sjinfo, - &extra->semifactors, + extra, outer_path, inner_path, - extra->restrictlist, NULL, hashclauses)); } diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index ac63f75..b48167e 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -41,6 +41,10 @@ static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list); static Oid distinct_col_search(int colno, List *colnos, List *opids); +static bool is_innerrel_unique_for(PlannerInfo *root, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist); /* @@ -845,3 +849,165 @@ distinct_col_search(int colno, List *colnos, List *opids) } return InvalidOid; } + + +/* + * is_innerrel_unique_for + * Determine if this innerrel can, at most, return a single tuple for each + * outer tuple, based on the 'restrictlist'. + */ +static bool +is_innerrel_unique_for(PlannerInfo *root, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist) +{ + List *clause_list = NIL; + ListCell *lc; + + /* Fall out quickly if we certainly can't prove anything */ + if (restrictlist == NIL || + !rel_supports_distinctness(root, innerrel)) + return false; + + /* + * Search for mergejoinable clauses that constrain the inner rel against + * the outer rel. If an operator is mergejoinable then it behaves like + * equality for some btree opclass, so it's what we want. The + * mergejoinability test also eliminates clauses containing volatile + * functions, which we couldn't depend on. + */ + foreach(lc, restrictlist) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + + /* + * XXX Beware of making changes here. Merge Join's unique inner + * optimization relies on all quals which make the join unique being + * part of the initial join condition. + */ + + /* Ignore if it's not a mergejoinable clause */ + if (!restrictinfo->can_join || + restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * Check if clause has the form "outer op inner" or "inner op outer", + * and if so mark which side is inner. + */ + if (!clause_sides_match_join(restrictinfo, outerrel->relids, + innerrel->relids)) + continue; /* no good for these input relations */ + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + + /* Let rel_is_distinct_for() do the hard work */ + return rel_is_distinct_for(root, innerrel, clause_list); +} + +/* + * innerrel_is_unique + * Check for proofs which prove that 'innerrel' is unique based on the join + * condition in 'restrictlist'. + */ +bool +innerrel_is_unique(PlannerInfo *root, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + SpecialJoinInfo *sjinfo, + List *restrictlist) +{ + MemoryContext old_context; + ListCell *lc; + int innerrelid; + bool unique = false; + + /* can't prove uniqueness for joins with an empty restrictlist */ + if (restrictlist == NIL) + return false; + + /* + * Currently we're unable to prove uniqueness when there's more than one + * relation on the inner side of the join. + */ + if (!bms_get_singleton_member(innerrel->relids, &innerrelid)) + return false; + + /* + * First let's query the unique and non-unique caches to see if we've + * managed to prove that innerrel is unique for some subset of this + * outerrel. We don't need an exact match, as if we have any extra + * outerrels than were previously cached, then they can't make the + * innerrel any less unique. + */ + foreach(lc, root->unique_rels[innerrelid]) + { + Bitmapset *unique_rels = (Bitmapset *) lfirst(lc); + + if (bms_is_subset(unique_rels, outerrel->relids)) + return true; /* Success! */ + } + + /* + * We may have previously determined that this outerrel, or some superset + * thereof, cannot prove this innerrel to be unique. + */ + foreach(lc, root->non_unique_rels[innerrelid]) + { + Bitmapset *unique_rels = (Bitmapset *) lfirst(lc); + + if (bms_is_subset(outerrel->relids, unique_rels)) + return false; + } + + /* No cached information, so try to make the proof. */ + if (is_innerrel_unique_for(root, outerrel, innerrel, restrictlist)) + { + unique = true; /* Success! */ + + /* + * Cache the positive result for future probes, being sure to keep it + * in the planner_cxt even if we are working in GEQO. + * + * Note: one might consider trying to isolate the minimal subset of + * the outerrels that proved the innerrel unique. But it's not worth + * the trouble, because the planner builds up joinrels incrementally + * and so we'll see the minimally sufficient outerrels before any + * supersets of them anyway. + */ + old_context = MemoryContextSwitchTo(root->planner_cxt); + root->unique_rels[innerrelid] = + lappend(root->unique_rels[innerrelid], + bms_copy(outerrel->relids)); + MemoryContextSwitchTo(old_context); + } + else + { + /* + * None of the join conditions for outerrel proved innerrel unique, so + * we can safely reject this outerrel or any subset of it in future + * checks. + * + * However, in normal planning mode, caching this knowledge is totally + * pointless; it won't be queried again, because we build up joinrels + * from smaller to larger. It is useful in GEQO mode, where the + * knowledge can be carried across successive planning attempts; and + * it's likely to be useful when using join-search plugins, too. + * Hence cache only when join_search_private is non-NULL. (Yeah, + * that's a hack, but it seems reasonable.) + */ + if (root->join_search_private) + { + old_context = MemoryContextSwitchTo(root->planner_cxt); + root->non_unique_rels[innerrelid] = + lappend(root->non_unique_rels[innerrelid], + bms_copy(outerrel->relids)); + MemoryContextSwitchTo(old_context); + } + } + return unique; +} diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index d002e6d..fd9993e 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -211,12 +211,12 @@ static BitmapOr *make_bitmap_or(List *bitmapplans); static NestLoop *make_nestloop(List *tlist, List *joinclauses, List *otherclauses, List *nestParams, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinPath *jpath); static HashJoin *make_hashjoin(List *tlist, List *joinclauses, List *otherclauses, List *hashclauses, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinPath *jpath); static Hash *make_hash(Plan *lefttree, Oid skewTable, AttrNumber skewColumn, @@ -231,7 +231,7 @@ static MergeJoin *make_mergejoin(List *tlist, int *mergestrategies, bool *mergenullsfirst, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinPath *jpath); static Sort *make_sort(Plan *lefttree, int numCols, AttrNumber *sortColIdx, Oid *sortOperators, Oid *collations, bool *nullsFirst); @@ -3676,7 +3676,7 @@ create_nestloop_plan(PlannerInfo *root, nestParams, outer_plan, inner_plan, - best_path->jointype); + best_path); copy_generic_path_info(&join_plan->join.plan, &best_path->path); @@ -3978,7 +3978,7 @@ create_mergejoin_plan(PlannerInfo *root, mergenullsfirst, outer_plan, inner_plan, - best_path->jpath.jointype); + &best_path->jpath); /* Costs of sort and material steps are included in path cost already */ copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); @@ -4118,7 +4118,7 @@ create_hashjoin_plan(PlannerInfo *root, hashclauses, outer_plan, (Plan *) hash_plan, - best_path->jpath.jointype); + &best_path->jpath); copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); @@ -5290,7 +5290,7 @@ make_nestloop(List *tlist, List *nestParams, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinPath *jpath) { NestLoop *node = makeNode(NestLoop); Plan *plan = &node->join.plan; @@ -5299,9 +5299,10 @@ make_nestloop(List *tlist, plan->qual = otherclauses; plan->lefttree = lefttree; plan->righttree = righttree; - node->join.jointype = jointype; + node->join.jointype = jpath->jointype; node->join.joinqual = joinclauses; node->nestParams = nestParams; + node->join.inner_unique = jpath->inner_unique; return node; } @@ -5313,7 +5314,7 @@ make_hashjoin(List *tlist, List *hashclauses, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinPath *jpath) { HashJoin *node = makeNode(HashJoin); Plan *plan = &node->join.plan; @@ -5323,8 +5324,9 @@ make_hashjoin(List *tlist, plan->lefttree = lefttree; plan->righttree = righttree; node->hashclauses = hashclauses; - node->join.jointype = jointype; + node->join.jointype = jpath->jointype; node->join.joinqual = joinclauses; + node->join.inner_unique = jpath->inner_unique; return node; } @@ -5365,7 +5367,7 @@ make_mergejoin(List *tlist, bool *mergenullsfirst, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinPath *jpath) { MergeJoin *node = makeNode(MergeJoin); Plan *plan = &node->join.plan; @@ -5379,8 +5381,9 @@ make_mergejoin(List *tlist, node->mergeCollations = mergecollations; node->mergeStrategies = mergestrategies; node->mergeNullsFirst = mergenullsfirst; - node->join.jointype = jointype; + node->join.jointype = jpath->jointype; node->join.joinqual = joinclauses; + node->join.inner_unique = jpath->inner_unique; return node; } diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 3c58d05..415acfc 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -124,6 +124,9 @@ query_planner(PlannerInfo *root, List *tlist, */ setup_simple_rel_arrays(root); + /* Allocate memory for caching which joins are unique. */ + setup_unique_join_caches(root); + /* * Construct RelOptInfo nodes for all base relations in query, and * indirectly for all appendrel member relations ("other rels"). This diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 8ce772d..c82ff39 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2019,11 +2019,9 @@ calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path) * 'joinrel' is the join relation. * 'jointype' is the type of join required * 'workspace' is the result from initial_cost_nestloop - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if jointype is SEMI or ANTI + * 'extra' contains various information about the join * 'outer_path' is the outer path * 'inner_path' is the inner path - * 'restrict_clauses' are the RestrictInfo nodes to apply at the join * 'pathkeys' are the path keys of the new join path * 'required_outer' is the set of required outer rels * @@ -2034,16 +2032,15 @@ create_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors, + JoinPathExtraData *extra, Path *outer_path, Path *inner_path, - List *restrict_clauses, List *pathkeys, Relids required_outer) { NestPath *pathnode = makeNode(NestPath); Relids inner_req_outer = PATH_REQ_OUTER(inner_path); + List *restrict_clauses = extra->restrictlist; /* * If the inner path is parameterized by the outer, we must drop any @@ -2079,7 +2076,7 @@ create_nestloop_path(PlannerInfo *root, joinrel, outer_path, inner_path, - sjinfo, + extra->sjinfo, required_outer, &restrict_clauses); pathnode->path.parallel_aware = false; @@ -2092,8 +2089,9 @@ create_nestloop_path(PlannerInfo *root, pathnode->outerjoinpath = outer_path; pathnode->innerjoinpath = inner_path; pathnode->joinrestrictinfo = restrict_clauses; + pathnode->inner_unique = extra->inner_unique; - final_cost_nestloop(root, pathnode, workspace, sjinfo, semifactors); + final_cost_nestloop(root, pathnode, workspace, extra); return pathnode; } @@ -2106,10 +2104,9 @@ create_nestloop_path(PlannerInfo *root, * 'joinrel' is the join relation * 'jointype' is the type of join required * 'workspace' is the result from initial_cost_mergejoin - * 'sjinfo' is extra info about the join for selectivity estimation + * 'extra' contains various information about the join * 'outer_path' is the outer path * 'inner_path' is the inner path - * 'restrict_clauses' are the RestrictInfo nodes to apply at the join * 'pathkeys' are the path keys of the new join path * 'required_outer' is the set of required outer rels * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses @@ -2122,10 +2119,9 @@ create_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, + JoinPathExtraData *extra, Path *outer_path, Path *inner_path, - List *restrict_clauses, List *pathkeys, Relids required_outer, List *mergeclauses, @@ -2133,6 +2129,7 @@ create_mergejoin_path(PlannerInfo *root, List *innersortkeys) { MergePath *pathnode = makeNode(MergePath); + List *restrict_clauses = extra->restrictlist; pathnode->jpath.path.pathtype = T_MergeJoin; pathnode->jpath.path.parent = joinrel; @@ -2142,7 +2139,7 @@ create_mergejoin_path(PlannerInfo *root, joinrel, outer_path, inner_path, - sjinfo, + extra->sjinfo, required_outer, &restrict_clauses); pathnode->jpath.path.parallel_aware = false; @@ -2155,12 +2152,13 @@ create_mergejoin_path(PlannerInfo *root, pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.joinrestrictinfo = restrict_clauses; + pathnode->jpath.inner_unique = extra->inner_unique; pathnode->path_mergeclauses = mergeclauses; pathnode->outersortkeys = outersortkeys; pathnode->innersortkeys = innersortkeys; /* pathnode->materialize_inner will be set by final_cost_mergejoin */ - final_cost_mergejoin(root, pathnode, workspace, sjinfo); + final_cost_mergejoin(root, pathnode, workspace, extra); return pathnode; } @@ -2172,11 +2170,9 @@ create_mergejoin_path(PlannerInfo *root, * 'joinrel' is the join relation * 'jointype' is the type of join required * 'workspace' is the result from initial_cost_hashjoin - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if jointype is SEMI or ANTI + * 'extra' contains various information about the join * 'outer_path' is the cheapest outer path * 'inner_path' is the cheapest inner path - * 'restrict_clauses' are the RestrictInfo nodes to apply at the join * 'required_outer' is the set of required outer rels * 'hashclauses' are the RestrictInfo nodes to use as hash clauses * (this should be a subset of the restrict_clauses list) @@ -2186,15 +2182,14 @@ create_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors, + JoinPathExtraData *extra, Path *outer_path, Path *inner_path, - List *restrict_clauses, Relids required_outer, List *hashclauses) { HashPath *pathnode = makeNode(HashPath); + List *restrict_clauses = extra->restrictlist; pathnode->jpath.path.pathtype = T_HashJoin; pathnode->jpath.path.parent = joinrel; @@ -2204,7 +2199,7 @@ create_hashjoin_path(PlannerInfo *root, joinrel, outer_path, inner_path, - sjinfo, + extra->sjinfo, required_outer, &restrict_clauses); pathnode->jpath.path.parallel_aware = false; @@ -2229,10 +2224,11 @@ create_hashjoin_path(PlannerInfo *root, pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.joinrestrictinfo = restrict_clauses; + pathnode->jpath.inner_unique = extra->inner_unique; pathnode->path_hashclauses = hashclauses; /* final_cost_hashjoin will fill in pathnode->num_batches */ - final_cost_hashjoin(root, pathnode, workspace, sjinfo, semifactors); + final_cost_hashjoin(root, pathnode, workspace, extra); return pathnode; } diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index caf8291..65c92bf 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -81,6 +81,22 @@ setup_simple_rel_arrays(PlannerInfo *root) } /* + * setup_unique_join_caches + * Prepare the arrays we use for caching which joins are proved to be + * unique and non-unique. + */ +void +setup_unique_join_caches(PlannerInfo *root) +{ + int size = list_length(root->parse->rtable) + 1; + + /* initialize the unique relation cache to all NULLs */ + root->unique_rels = (List **) palloc0(size * sizeof(List *)); + + root->non_unique_rels = (List **) palloc0(size * sizeof(List *)); +} + +/* * build_simple_rel * Construct a new RelOptInfo for a base relation or 'other' relation. */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index f856f60..85f931c 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1760,6 +1760,8 @@ typedef struct JoinState PlanState ps; JoinType jointype; List *joinqual; /* JOIN quals (in addition to ps.qual) */ + bool inner_unique; /* True if each outer tuple can match to no + * more than one inner tuple.*/ } JoinState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index b880dc1..28638a2 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -615,6 +615,8 @@ typedef struct CustomScan * jointype: rule for joining tuples from left and right subtrees * joinqual: qual conditions that came from JOIN/ON or JOIN/USING * (plan.qual contains conditions that came from WHERE) + * inner_unique each outer tuple can match to no more than one inner + * tuple * * When jointype is INNER, joinqual and plan.qual are semantically * interchangeable. For OUTER jointypes, the two are *not* interchangeable; @@ -630,6 +632,7 @@ typedef struct Join Plan plan; JoinType jointype; List *joinqual; /* JOIN quals (in addition to plan.qual) */ + bool inner_unique; } Join; /* ---------------- diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 05d6f07..59ceee1 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -222,6 +222,18 @@ typedef struct PlannerInfo List **join_rel_level; /* lists of join-relation RelOptInfos */ int join_cur_level; /* index of list being extended */ + /* + * During the join search we attempt to determine which joins can be + * proven to be unique on their inner side based on the join condition. + * This is a rather expensive test to perform, and requires looking for + * proofs such as a relation's unique indexes. We use this cache during + * the join search to record lists of the sets of relations which both + * prove, and disprove the uniqueness properties for the relid indexed by + * these arrays. + */ + List **unique_rels; /* cache for proven unique rels */ + List **non_unique_rels; /* cache for proven non-unique rels */ + List *init_plans; /* init SubPlans for query */ List *cte_plan_ids; /* per-CTE-item list of subplan IDs */ @@ -1231,6 +1243,9 @@ typedef struct JoinPath List *joinrestrictinfo; /* RestrictInfos to apply to join */ + bool inner_unique; /* each outer tuple can match to no more than + * one inner tuple */ + /* * See the notes for RelOptInfo and ParamPathInfo to understand why * joinrestrictinfo is needed in JoinPath, and can't be merged into the @@ -2056,6 +2071,8 @@ typedef struct SemiAntiJoinFactors * sjinfo is extra info about special joins for selectivity estimation * semifactors is as shown above (only valid for SEMI or ANTI joins) * param_source_rels are OK targets for parameterization of result paths + * inner_unique true if proofs exist which prove that outer side tuples match + * no more than one inner side tuple */ typedef struct JoinPathExtraData { @@ -2064,6 +2081,7 @@ typedef struct JoinPathExtraData SpecialJoinInfo *sjinfo; SemiAntiJoinFactors semifactors; Relids param_source_rels; + bool inner_unique; } JoinPathExtraData; /* diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index d9a9b12..ca33494 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -127,33 +127,29 @@ extern void initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, Path *outer_path, Path *inner_path, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors); + JoinPathExtraData *extra); extern void final_cost_nestloop(PlannerInfo *root, NestPath *path, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors); + JoinPathExtraData *extra); extern void initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, List *mergeclauses, Path *outer_path, Path *inner_path, List *outersortkeys, List *innersortkeys, - SpecialJoinInfo *sjinfo); + JoinPathExtraData *extra); extern void final_cost_mergejoin(PlannerInfo *root, MergePath *path, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo); + JoinPathExtraData *extra); extern void initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, List *hashclauses, Path *outer_path, Path *inner_path, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors); + JoinPathExtraData *extra); extern void final_cost_hashjoin(PlannerInfo *root, HashPath *path, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors); + JoinPathExtraData *extra); extern void cost_gather(GatherPath *path, PlannerInfo *root, RelOptInfo *baserel, ParamPathInfo *param_info, double *rows); extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan); diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 373c722..c5c4ac0 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -115,11 +115,9 @@ extern NestPath *create_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors, + JoinPathExtraData *extra, Path *outer_path, Path *inner_path, - List *restrict_clauses, List *pathkeys, Relids required_outer); @@ -127,10 +125,9 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, + JoinPathExtraData *extra, Path *outer_path, Path *inner_path, - List *restrict_clauses, List *pathkeys, Relids required_outer, List *mergeclauses, @@ -141,11 +138,9 @@ extern HashPath *create_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors, + JoinPathExtraData *extra, Path *outer_path, Path *inner_path, - List *restrict_clauses, Relids required_outer, List *hashclauses); @@ -251,6 +246,7 @@ extern Path *reparameterize_path(PlannerInfo *root, Path *path, * prototypes for relnode.c */ extern void setup_simple_rel_arrays(PlannerInfo *root); +extern void setup_unique_join_caches(PlannerInfo *root); extern RelOptInfo *build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind); extern RelOptInfo *find_base_rel(PlannerInfo *root, int relid); diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 94ef84b..e37c647 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -105,7 +105,11 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root); extern List *remove_useless_joins(PlannerInfo *root, List *joinlist); extern bool query_supports_distinctness(Query *query); extern bool query_is_distinct_for(Query *query, List *colnos, List *opids); - +extern bool innerrel_is_unique(PlannerInfo *root, RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + SpecialJoinInfo *sjinfo, + List *restrictlist); /* * prototypes for plan/setrefs.c */ diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 0ff8062..9784a87 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -381,6 +381,7 @@ order by 1, 2; Sort Key: s1.s1, s2.s2 -> Nested Loop Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2))) + Inner Unique: No -> Function Scan on pg_catalog.generate_series s1 Output: s1.s1 Function Call: generate_series(1, 3) @@ -390,7 +391,7 @@ order by 1, 2; -> Function Scan on pg_catalog.generate_series s2 Output: s2.s2 Function Call: generate_series(1, 3) -(14 rows) +(15 rows) select s1, s2, sm from generate_series(1, 3) s1, @@ -982,29 +983,31 @@ explain (costs off) select a,c from t1 group by a,c,d; explain (costs off) select * from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z; - QUERY PLAN -------------------------------------------------------- - Group + QUERY PLAN +------------------------------------------------------ + HashAggregate Group Key: t1.a, t1.b, t2.x, t2.y - -> Merge Join - Merge Cond: ((t1.a = t2.x) AND (t1.b = t2.y)) - -> Index Scan using t1_pkey on t1 - -> Index Scan using t2_pkey on t2 -(6 rows) + -> Hash Join + Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t1 +(7 rows) -- Test case where t1 can be optimized but not t2 explain (costs off) select t1.*,t2.x,t2.z from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z; - QUERY PLAN -------------------------------------------------------- + QUERY PLAN +------------------------------------------------------ HashAggregate Group Key: t1.a, t1.b, t2.x, t2.z - -> Merge Join - Merge Cond: ((t1.a = t2.x) AND (t1.b = t2.y)) - -> Index Scan using t1_pkey on t1 - -> Index Scan using t2_pkey on t2 -(6 rows) + -> Hash Join + Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t1 +(7 rows) -- Cannot optimize when PK is deferrable explain (costs off) select * from t3 group by a,b,c; diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out index 564218b..a96b2a1 100644 --- a/src/test/regress/expected/equivclass.out +++ b/src/test/regress/expected/equivclass.out @@ -317,12 +317,11 @@ explain (costs off) -> Index Scan using ec1_expr2 on ec1 ec1_1 -> Index Scan using ec1_expr3 on ec1 ec1_2 -> Index Scan using ec1_expr4 on ec1 ec1_3 - -> Materialize - -> Sort - Sort Key: ec1.f1 USING < - -> Index Scan using ec1_pkey on ec1 - Index Cond: (ff = '42'::bigint) -(20 rows) + -> Sort + Sort Key: ec1.f1 USING < + -> Index Scan using ec1_pkey on ec1 + Index Cond: (ff = '42'::bigint) +(19 rows) -- check partially indexed scan set enable_nestloop = on; @@ -374,12 +373,11 @@ explain (costs off) Sort Key: (((ec1_2.ff + 3) + 1)) -> Seq Scan on ec1 ec1_2 -> Index Scan using ec1_expr4 on ec1 ec1_3 - -> Materialize - -> Sort - Sort Key: ec1.f1 USING < - -> Index Scan using ec1_pkey on ec1 - Index Cond: (ff = '42'::bigint) -(14 rows) + -> Sort + Sort Key: ec1.f1 USING < + -> Index Scan using ec1_pkey on ec1 + Index Cond: (ff = '42'::bigint) +(13 rows) -- check effects of row-level security set enable_nestloop = on; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 4992048..bc3c597 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3362,6 +3362,7 @@ using (join_key); -------------------------------------------------------------------------- Nested Loop Left Join Output: "*VALUES*".column1, i1.f1, (666) + Inner Unique: No Join Filter: ("*VALUES*".column1 = i1.f1) -> Values Scan on "*VALUES*" Output: "*VALUES*".column1 @@ -3369,12 +3370,13 @@ using (join_key); Output: i1.f1, (666) -> Nested Loop Left Join Output: i1.f1, 666 + Inner Unique: No -> Seq Scan on public.int4_tbl i1 Output: i1.f1 -> Index Only Scan using tenk1_unique2 on public.tenk1 i2 Output: i2.unique2 Index Cond: (i2.unique2 = i1.f1) -(14 rows) +(16 rows) select foo1.join_key as foo1_id, foo3.join_key AS foo3_id, bug_field from (values (0),(1)) foo1(join_key) @@ -3412,9 +3414,11 @@ select t1.* from ---------------------------------------------------------------------- Hash Left Join Output: t1.f1 + Inner Unique: No Hash Cond: (i8.q2 = i4.f1) -> Nested Loop Left Join Output: t1.f1, i8.q2 + Inner Unique: No Join Filter: (t1.f1 = '***'::text) -> Seq Scan on public.text_tbl t1 Output: t1.f1 @@ -3422,9 +3426,11 @@ select t1.* from Output: i8.q2 -> Hash Right Join Output: i8.q2 + Inner Unique: No Hash Cond: ((NULL::integer) = i8b1.q2) -> Hash Left Join Output: i8.q2, (NULL::integer) + Inner Unique: No Hash Cond: (i8.q1 = i8b2.q1) -> Seq Scan on public.int8_tbl i8 Output: i8.q1, i8.q2 @@ -3440,7 +3446,7 @@ select t1.* from Output: i4.f1 -> Seq Scan on public.int4_tbl i4 Output: i4.f1 -(30 rows) +(34 rows) select t1.* from text_tbl t1 @@ -3473,9 +3479,11 @@ select t1.* from ---------------------------------------------------------------------------- Hash Left Join Output: t1.f1 + Inner Unique: No Hash Cond: (i8.q2 = i4.f1) -> Nested Loop Left Join Output: t1.f1, i8.q2 + Inner Unique: No Join Filter: (t1.f1 = '***'::text) -> Seq Scan on public.text_tbl t1 Output: t1.f1 @@ -3483,12 +3491,15 @@ select t1.* from Output: i8.q2 -> Hash Right Join Output: i8.q2 + Inner Unique: No Hash Cond: ((NULL::integer) = i8b1.q2) -> Hash Right Join Output: i8.q2, (NULL::integer) + Inner Unique: No Hash Cond: (i8b2.q1 = i8.q1) -> Nested Loop Output: i8b2.q1, NULL::integer + Inner Unique: No -> Seq Scan on public.int8_tbl i8b2 Output: i8b2.q1, i8b2.q2 -> Materialize @@ -3505,7 +3516,7 @@ select t1.* from Output: i4.f1 -> Seq Scan on public.int4_tbl i4 Output: i4.f1 -(34 rows) +(39 rows) select t1.* from text_tbl t1 @@ -3539,9 +3550,11 @@ select t1.* from ---------------------------------------------------------------------------- Hash Left Join Output: t1.f1 + Inner Unique: No Hash Cond: (i8.q2 = i4.f1) -> Nested Loop Left Join Output: t1.f1, i8.q2 + Inner Unique: No Join Filter: (t1.f1 = '***'::text) -> Seq Scan on public.text_tbl t1 Output: t1.f1 @@ -3549,12 +3562,15 @@ select t1.* from Output: i8.q2 -> Hash Right Join Output: i8.q2 + Inner Unique: No Hash Cond: ((NULL::integer) = i8b1.q2) -> Hash Right Join Output: i8.q2, (NULL::integer) + Inner Unique: No Hash Cond: (i8b2.q1 = i8.q1) -> Hash Join Output: i8b2.q1, NULL::integer + Inner Unique: No Hash Cond: (i8b2.q1 = i4b2.f1) -> Seq Scan on public.int8_tbl i8b2 Output: i8b2.q1, i8b2.q2 @@ -3574,7 +3590,7 @@ select t1.* from Output: i4.f1 -> Seq Scan on public.int4_tbl i4 Output: i4.f1 -(37 rows) +(42 rows) select t1.* from text_tbl t1 @@ -3606,14 +3622,17 @@ select * from -------------------------------------------------------- Nested Loop Left Join Output: t1.f1, i8.q1, i8.q2, t2.f1, i4.f1 + Inner Unique: No -> Seq Scan on public.text_tbl t2 Output: t2.f1 -> Materialize Output: i8.q1, i8.q2, i4.f1, t1.f1 -> Nested Loop Output: i8.q1, i8.q2, i4.f1, t1.f1 + Inner Unique: No -> Nested Loop Left Join Output: i8.q1, i8.q2, i4.f1 + Inner Unique: No Join Filter: (i8.q1 = i4.f1) -> Seq Scan on public.int8_tbl i8 Output: i8.q1, i8.q2 @@ -3623,7 +3642,7 @@ select * from -> Seq Scan on public.text_tbl t1 Output: t1.f1 Filter: (t1.f1 = 'doh!'::text) -(19 rows) +(22 rows) select * from text_tbl t1 @@ -3653,9 +3672,11 @@ where t1.f1 = ss.f1; -------------------------------------------------- Nested Loop Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1 + Inner Unique: No Join Filter: (t1.f1 = t2.f1) -> Nested Loop Left Join Output: t1.f1, i8.q1, i8.q2 + Inner Unique: No -> Seq Scan on public.text_tbl t1 Output: t1.f1 -> Materialize @@ -3667,7 +3688,7 @@ where t1.f1 = ss.f1; Output: (i8.q1), t2.f1 -> Seq Scan on public.text_tbl t2 Output: i8.q1, t2.f1 -(16 rows) +(18 rows) select * from text_tbl t1 @@ -3692,11 +3713,14 @@ where t1.f1 = ss2.f1; ------------------------------------------------------------------- Nested Loop Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1, ((i8.q1)), (t2.f1) + Inner Unique: No Join Filter: (t1.f1 = (t2.f1)) -> Nested Loop Output: t1.f1, i8.q1, i8.q2, (i8.q1), t2.f1 + Inner Unique: No -> Nested Loop Left Join Output: t1.f1, i8.q1, i8.q2 + Inner Unique: No -> Seq Scan on public.text_tbl t1 Output: t1.f1 -> Materialize @@ -3712,7 +3736,7 @@ where t1.f1 = ss2.f1; Output: ((i8.q1)), (t2.f1) -> Seq Scan on public.text_tbl t3 Output: (i8.q1), t2.f1 -(22 rows) +(25 rows) select * from text_tbl t1 @@ -3738,10 +3762,13 @@ where tt1.f1 = ss1.c0; ---------------------------------------------------------- Nested Loop Output: 1 + Inner Unique: No -> Nested Loop Left Join Output: tt1.f1, tt4.f1 + Inner Unique: No -> Nested Loop Output: tt1.f1 + Inner Unique: No -> Seq Scan on public.text_tbl tt1 Output: tt1.f1 Filter: (tt1.f1 = 'foo'::text) @@ -3751,6 +3778,7 @@ where tt1.f1 = ss1.c0; Output: tt4.f1 -> Nested Loop Left Join Output: tt4.f1 + Inner Unique: No Join Filter: (tt3.f1 = tt4.f1) -> Seq Scan on public.text_tbl tt3 Output: tt3.f1 @@ -3765,7 +3793,7 @@ where tt1.f1 = ss1.c0; Output: (tt4.f1) -> Seq Scan on public.text_tbl tt5 Output: tt4.f1 -(29 rows) +(33 rows) select 1 from text_tbl as tt1 @@ -3795,13 +3823,17 @@ where ss1.c2 = 0; ------------------------------------------------------------------------ Nested Loop Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42)) + Inner Unique: No -> Hash Join Output: i41.f1, i42.f1, i8.q1, i8.q2, i43.f1, 42 + Inner Unique: No Hash Cond: (i41.f1 = i42.f1) -> Nested Loop Output: i8.q1, i8.q2, i43.f1, i41.f1 + Inner Unique: No -> Nested Loop Output: i8.q1, i8.q2, i43.f1 + Inner Unique: No -> Seq Scan on public.int8_tbl i8 Output: i8.q1, i8.q2 Filter: (i8.q1 = 0) @@ -3818,7 +3850,7 @@ where ss1.c2 = 0; Output: (i41.f1), (i8.q1), (i8.q2), (i42.f1), (i43.f1), ((42)) -> Seq Scan on public.text_tbl Output: i41.f1, i8.q1, i8.q2, i42.f1, i43.f1, (42) -(25 rows) +(29 rows) select ss2.* from int4_tbl i41 @@ -3906,6 +3938,7 @@ explain (verbose, costs off) --------------------------------------------------------- Merge Left Join Output: a.q2, b.q1 + Inner Unique: No Merge Cond: (a.q2 = (COALESCE(b.q1, '1'::bigint))) Filter: (COALESCE(b.q1, '1'::bigint) > 0) -> Sort @@ -3918,7 +3951,7 @@ explain (verbose, costs off) Sort Key: (COALESCE(b.q1, '1'::bigint)) -> Seq Scan on public.int8_tbl b Output: b.q1, COALESCE(b.q1, '1'::bigint) -(14 rows) +(15 rows) select a.q2, b.q1 from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1) @@ -3979,7 +4012,7 @@ select id from a where id in ( ); QUERY PLAN ---------------------------- - Hash Semi Join + Hash Join Hash Cond: (a.id = b.id) -> Seq Scan on a -> Hash @@ -4805,12 +4838,13 @@ select * from ------------------------------------------ Nested Loop Left Join Output: a.q1, a.q2, b.q1, b.q2, (a.q2) + Inner Unique: No -> Seq Scan on public.int8_tbl a Output: a.q1, a.q2 -> Seq Scan on public.int8_tbl b Output: b.q1, b.q2, a.q2 Filter: (a.q2 = b.q1) -(7 rows) +(8 rows) select * from int8_tbl a left join @@ -4837,12 +4871,13 @@ select * from ------------------------------------------------------------------ Nested Loop Left Join Output: a.q1, a.q2, b.q1, b.q2, (COALESCE(a.q2, '42'::bigint)) + Inner Unique: No -> Seq Scan on public.int8_tbl a Output: a.q1, a.q2 -> Seq Scan on public.int8_tbl b Output: b.q1, b.q2, COALESCE(a.q2, '42'::bigint) Filter: (a.q2 = b.q1) -(7 rows) +(8 rows) select * from int8_tbl a left join @@ -4870,6 +4905,7 @@ select * from int4_tbl i left join ------------------------------------------- Hash Left Join Output: i.f1, j.f1 + Inner Unique: No Hash Cond: (i.f1 = j.f1) -> Seq Scan on public.int4_tbl i Output: i.f1 @@ -4877,7 +4913,7 @@ select * from int4_tbl i left join Output: j.f1 -> Seq Scan on public.int2_tbl j Output: j.f1 -(9 rows) +(10 rows) select * from int4_tbl i left join lateral (select * from int2_tbl j where i.f1 = j.f1) k on true; @@ -4897,12 +4933,13 @@ select * from int4_tbl i left join ------------------------------------- Nested Loop Left Join Output: i.f1, (COALESCE(i.*)) + Inner Unique: No -> Seq Scan on public.int4_tbl i Output: i.f1, i.* -> Seq Scan on public.int2_tbl j Output: j.f1, COALESCE(i.*) Filter: (i.f1 = j.f1) -(7 rows) +(8 rows) select * from int4_tbl i left join lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true; @@ -4924,10 +4961,12 @@ select * from int4_tbl a, ------------------------------------------------- Nested Loop Output: a.f1, b.f1, c.q1, c.q2 + Inner Unique: No -> Seq Scan on public.int4_tbl a Output: a.f1 -> Hash Left Join Output: b.f1, c.q1, c.q2 + Inner Unique: No Hash Cond: (b.f1 = c.q1) -> Seq Scan on public.int4_tbl b Output: b.f1 @@ -4936,7 +4975,7 @@ select * from int4_tbl a, -> Seq Scan on public.int8_tbl c Output: c.q1, c.q2 Filter: (a.f1 = c.q2) -(14 rows) +(16 rows) select * from int4_tbl a, lateral ( @@ -4982,16 +5021,18 @@ select * from ------------------------------------------------------------- Nested Loop Left Join Output: a.q1, a.q2, b.q1, c.q1, (LEAST(a.q1, b.q1, c.q1)) + Inner Unique: No -> Seq Scan on public.int8_tbl a Output: a.q1, a.q2 -> Nested Loop Output: b.q1, c.q1, LEAST(a.q1, b.q1, c.q1) + Inner Unique: No -> Seq Scan on public.int8_tbl b Output: b.q1, b.q2 Filter: (a.q2 = b.q1) -> Seq Scan on public.int8_tbl c Output: c.q1, c.q2 -(11 rows) +(13 rows) select * from int8_tbl a left join lateral @@ -5058,13 +5099,17 @@ select * from ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Nested Loop Output: c.q1, c.q2, a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)), ((COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))) + Inner Unique: No -> Hash Right Join Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, (COALESCE(b.q2, '42'::bigint)), (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)) + Inner Unique: No Hash Cond: (d.q1 = c.q2) -> Nested Loop Output: a.q1, a.q2, b.q1, d.q1, (COALESCE(b.q2, '42'::bigint)), (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)) + Inner Unique: No -> Hash Left Join Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)) + Inner Unique: No Hash Cond: (a.q2 = b.q1) -> Seq Scan on public.int8_tbl a Output: a.q1, a.q2 @@ -5080,7 +5125,7 @@ select * from Output: c.q1, c.q2 -> Result Output: (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)) -(24 rows) +(28 rows) -- case that breaks the old ph_may_need optimization explain (verbose, costs off) @@ -5098,17 +5143,22 @@ select c.*,a.*,ss1.q1,ss2.q1,ss3.* from --------------------------------------------------------------------------------------------------------- Nested Loop Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, i.f1 + Inner Unique: No Join Filter: ((COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) > i.f1) -> Hash Right Join Output: c.q1, c.q2, a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) + Inner Unique: No Hash Cond: (d.q1 = c.q2) -> Nested Loop Output: a.q1, a.q2, b.q1, d.q1, (COALESCE((COALESCE(b.q2, (b2.f1)::bigint)), d.q2)) + Inner Unique: No -> Hash Right Join Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, (b2.f1)::bigint)) + Inner Unique: No Hash Cond: (b.q1 = a.q2) -> Nested Loop Output: b.q1, COALESCE(b.q2, (b2.f1)::bigint) + Inner Unique: No Join Filter: (b.q1 < b2.f1) -> Seq Scan on public.int8_tbl b Output: b.q1, b.q2 @@ -5130,7 +5180,7 @@ select c.*,a.*,ss1.q1,ss2.q1,ss3.* from Output: i.f1 -> Seq Scan on public.int4_tbl i Output: i.f1 -(34 rows) +(39 rows) -- check processing of postponed quals (bug #9041) explain (verbose, costs off) @@ -5143,16 +5193,18 @@ select * from ---------------------------------------------- Nested Loop Left Join Output: (1), (2), (3) + Inner Unique: No Join Filter: (((3) = (1)) AND ((3) = (2))) -> Nested Loop Output: (1), (2) + Inner Unique: No -> Result Output: 1 -> Result Output: 2 -> Result Output: 3 -(11 rows) +(13 rows) -- check we don't try to do a unique-ified semijoin with LATERAL explain (verbose, costs off) @@ -5165,10 +5217,12 @@ select * from ---------------------------------------------------------------------- Nested Loop Output: "*VALUES*".column1, "*VALUES*".column2, int4_tbl.f1 + Inner Unique: No -> Values Scan on "*VALUES*" Output: "*VALUES*".column1, "*VALUES*".column2 -> Nested Loop Semi Join Output: int4_tbl.f1 + Inner Unique: No Join Filter: (int4_tbl.f1 = tenk1.unique1) -> Seq Scan on public.int4_tbl Output: int4_tbl.f1 @@ -5177,7 +5231,7 @@ select * from -> Index Scan using tenk1_unique2 on public.tenk1 Output: tenk1.unique1 Index Cond: (tenk1.unique2 = "*VALUES*".column2) -(14 rows) +(16 rows) select * from (values (0,9998), (1,1000)) v(id,x), @@ -5204,10 +5258,12 @@ lateral (select * from int8_tbl t1, ----------------------------------------------------------------- Nested Loop Output: "*VALUES*".column1, t1.q1, t1.q2, ss2.q1, ss2.q2 + Inner Unique: No -> Seq Scan on public.int8_tbl t1 Output: t1.q1, t1.q2 -> Nested Loop Output: "*VALUES*".column1, ss2.q1, ss2.q2 + Inner Unique: No -> Values Scan on "*VALUES*" Output: "*VALUES*".column1 -> Subquery Scan on ss2 @@ -5229,7 +5285,7 @@ lateral (select * from int8_tbl t1, -> Seq Scan on public.int8_tbl t3 Output: t3.q1, t3.q2 Filter: (t3.q2 = $2) -(27 rows) +(29 rows) select * from (values (0), (1)) v(id), lateral (select * from int8_tbl t1, @@ -5327,3 +5383,283 @@ ERROR: invalid reference to FROM-clause entry for table "xx1" LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss; ^ HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query. +-- +-- test planner's ability to mark joins as unique +-- +create table j1 (id int primary key); +create table j2 (id int primary key); +create table j3 (id int); +insert into j1 values(1),(2),(3); +insert into j2 values(1),(2),(3); +insert into j3 values(1),(1); +analyze j1; +analyze j2; +analyze j3; +-- ensure join is properly marked as unique +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Join + Output: j1.id, j2.id + Inner Unique: Yes + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(10 rows) + +-- ensure join is not unique when not an equi-join +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id > j2.id; + QUERY PLAN +----------------------------------- + Nested Loop + Output: j1.id, j2.id + Inner Unique: No + Join Filter: (j1.id > j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(10 rows) + +-- ensure join is not unique, as j3 has no unique index or pk on id +explain (verbose, costs off) +select * from j1 inner join j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------- + Hash Join + Output: j1.id, j3.id + Inner Unique: No + Hash Cond: (j1.id = j3.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(10 rows) + +-- ensure left join is marked as unique +explain (verbose, costs off) +select * from j1 left join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Left Join + Output: j1.id, j2.id + Inner Unique: Yes + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(10 rows) + +-- ensure right join is marked as unique +explain (verbose, costs off) +select * from j1 right join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Left Join + Output: j1.id, j2.id + Inner Unique: Yes + Hash Cond: (j2.id = j1.id) + -> Seq Scan on public.j2 + Output: j2.id + -> Hash + Output: j1.id + -> Seq Scan on public.j1 + Output: j1.id +(10 rows) + +-- ensure full join is marked as unique +explain (verbose, costs off) +select * from j1 full join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Full Join + Output: j1.id, j2.id + Inner Unique: Yes + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(10 rows) + +-- a clauseless (cross) join can't be unique +explain (verbose, costs off) +select * from j1 cross join j2; + QUERY PLAN +----------------------------------- + Nested Loop + Output: j1.id, j2.id + Inner Unique: No + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +-- ensure a natural join is marked as unique +explain (verbose, costs off) +select * from j1 natural join j2; + QUERY PLAN +----------------------------------- + Hash Join + Output: j1.id + Inner Unique: Yes + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(10 rows) + +-- ensure a distinct clause allows the inner to become unique +explain (verbose, costs off) +select * from j1 +inner join (select distinct id from j3) j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------------------- + Nested Loop + Output: j1.id, j3.id + Inner Unique: Yes + Join Filter: (j1.id = j3.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j3.id + -> Unique + Output: j3.id + -> Sort + Output: j3.id + Sort Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(15 rows) + +-- ensure group by clause allows the inner to become unique +explain (verbose, costs off) +select * from j1 +inner join (select id from j3 group by id) j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------------------- + Nested Loop + Output: j1.id, j3.id + Inner Unique: Yes + Join Filter: (j1.id = j3.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j3.id + -> Group + Output: j3.id + Group Key: j3.id + -> Sort + Output: j3.id + Sort Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(16 rows) + +drop table j1; +drop table j2; +drop table j3; +-- test a more complex permutations of unique joins +create table j1 (id1 int, id2 int, primary key(id1,id2)); +create table j2 (id1 int, id2 int, primary key(id1,id2)); +create table j3 (id1 int, id2 int, primary key(id1,id2)); +insert into j1 values(1,1),(2,2); +insert into j2 values(1,1); +insert into j3 values(1,1); +analyze j1; +analyze j2; +analyze j3; +-- ensure there's no unique join when not all columns which are part of the +-- unique index are seen in the join clause +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1; + QUERY PLAN +------------------------------------------ + Nested Loop + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Inner Unique: No + Join Filter: (j1.id1 = j2.id1) + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 + -> Seq Scan on public.j1 + Output: j1.id1, j1.id2 +(8 rows) + +-- ensure proper unique detection with multiple join quals +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2; + QUERY PLAN +---------------------------------------------------------- + Nested Loop + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Inner Unique: Yes + Join Filter: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2)) + -> Seq Scan on public.j1 + Output: j1.id1, j1.id2 + -> Materialize + Output: j2.id1, j2.id2 + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 +(10 rows) + +-- ensure we don't detect the join to be unique when quals are not part of the +-- join condition +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1 where j1.id2 = 1; + QUERY PLAN +------------------------------------------ + Nested Loop + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Inner Unique: No + Join Filter: (j1.id1 = j2.id1) + -> Seq Scan on public.j1 + Output: j1.id1, j1.id2 + Filter: (j1.id2 = 1) + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 +(9 rows) + +-- as above, but for left joins. +explain (verbose, costs off) +select * from j1 +left join j2 on j1.id1 = j2.id1 where j1.id2 = 1; + QUERY PLAN +------------------------------------------ + Nested Loop Left Join + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Inner Unique: No + Join Filter: (j1.id1 = j2.id1) + -> Seq Scan on public.j1 + Output: j1.id1, j1.id2 + Filter: (j1.id2 = 1) + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 +(9 rows) + +drop table j1; +drop table j2; +drop table j3; diff --git a/src/test/regress/expected/plpgsql.out b/src/test/regress/expected/plpgsql.out index 04848c1..e40bdf3 100644 --- a/src/test/regress/expected/plpgsql.out +++ b/src/test/regress/expected/plpgsql.out @@ -5391,12 +5391,13 @@ select i, a from ----------------------------------------------------------------- Nested Loop Output: i.i, (returns_rw_array(1)) + Inner Unique: No -> Result Output: returns_rw_array(1) -> Function Scan on public.consumes_rw_array i Output: i.i Function Call: consumes_rw_array((returns_rw_array(1))) -(7 rows) +(8 rows) select i, a from (select returns_rw_array(1) as a offset 0) ss, diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out index 526a4ae..8df33be 100644 --- a/src/test/regress/expected/rangefuncs.out +++ b/src/test/regress/expected/rangefuncs.out @@ -1994,12 +1994,13 @@ select x from int8_tbl, extractq2(int8_tbl) f(x); ------------------------------------------ Nested Loop Output: f.x + Inner Unique: No -> Seq Scan on public.int8_tbl Output: int8_tbl.q1, int8_tbl.q2 -> Function Scan on f Output: f.x Function Call: int8_tbl.q2 -(7 rows) +(8 rows) select x from int8_tbl, extractq2(int8_tbl) f(x); x @@ -2020,11 +2021,12 @@ select x from int8_tbl, extractq2_2(int8_tbl) f(x); ----------------------------------- Nested Loop Output: ((int8_tbl.*).q2) + Inner Unique: No -> Seq Scan on public.int8_tbl Output: int8_tbl.* -> Result Output: (int8_tbl.*).q2 -(6 rows) +(7 rows) select x from int8_tbl, extractq2_2(int8_tbl) f(x); x diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index ed7d6d8..063cb13 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -837,6 +837,7 @@ select * from int4_tbl where --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Nested Loop Semi Join Output: int4_tbl.f1 + Inner Unique: No Join Filter: (CASE WHEN (hashed SubPlan 1) THEN int4_tbl.f1 ELSE NULL::integer END = b.ten) -> Seq Scan on public.int4_tbl Output: int4_tbl.f1 @@ -845,7 +846,7 @@ select * from int4_tbl where SubPlan 1 -> Index Only Scan using tenk1_unique1 on public.tenk1 a Output: a.unique1 -(10 rows) +(11 rows) select * from int4_tbl where (case when f1 in (select unique1 from tenk1 a) then f1 else null end) in @@ -865,6 +866,7 @@ select * from int4_tbl o where (f1, f1) in ------------------------------------------------------------------- Nested Loop Semi Join Output: o.f1 + Inner Unique: No Join Filter: (o.f1 = "ANY_subquery".f1) -> Seq Scan on public.int4_tbl o Output: o.f1 @@ -882,7 +884,7 @@ select * from int4_tbl o where (f1, f1) in Group Key: i.f1 -> Seq Scan on public.int4_tbl i Output: i.f1 -(19 rows) +(20 rows) select * from int4_tbl o where (f1, f1) in (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out index 3b7f689..3e4c3fb 100644 --- a/src/test/regress/expected/with.out +++ b/src/test/regress/expected/with.out @@ -2195,6 +2195,7 @@ DELETE FROM a USING wcte WHERE aa = q2; Output: '42'::bigint, '47'::bigint -> Nested Loop Output: a.ctid, wcte.* + Inner Unique: No Join Filter: (a.aa = wcte.q2) -> Seq Scan on public.a Output: a.ctid, a.aa @@ -2202,6 +2203,7 @@ DELETE FROM a USING wcte WHERE aa = q2; Output: wcte.*, wcte.q2 -> Nested Loop Output: b.ctid, wcte.* + Inner Unique: No Join Filter: (b.aa = wcte.q2) -> Seq Scan on public.b Output: b.ctid, b.aa @@ -2209,6 +2211,7 @@ DELETE FROM a USING wcte WHERE aa = q2; Output: wcte.*, wcte.q2 -> Nested Loop Output: c.ctid, wcte.* + Inner Unique: No Join Filter: (c.aa = wcte.q2) -> Seq Scan on public.c Output: c.ctid, c.aa @@ -2216,12 +2219,13 @@ DELETE FROM a USING wcte WHERE aa = q2; Output: wcte.*, wcte.q2 -> Nested Loop Output: d.ctid, wcte.* + Inner Unique: No Join Filter: (d.aa = wcte.q2) -> Seq Scan on public.d Output: d.ctid, d.aa -> CTE Scan on wcte Output: wcte.*, wcte.q2 -(38 rows) +(42 rows) -- error cases -- data-modifying WITH tries to use its own output diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out index d3bd8c9..7e0f3cf 100644 --- a/src/test/regress/expected/xml_1.out +++ b/src/test/regress/expected/xml_1.out @@ -926,12 +926,13 @@ EXPLAIN (COSTS OFF, VERBOSE) SELECT * FROM xmltableview1; ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Nested Loop Output: "xmltable".id, "xmltable"._id, "xmltable".country_name, "xmltable".country_id, "xmltable".region_id, "xmltable".size, "xmltable".unit, "xmltable".premier_name + Inner Unique: No -> Seq Scan on public.xmldata Output: xmldata.data -> Table Function Scan on "xmltable" Output: "xmltable".id, "xmltable"._id, "xmltable".country_name, "xmltable".country_id, "xmltable".region_id, "xmltable".size, "xmltable".unit, "xmltable".premier_name Table Function Call: XMLTABLE(('/ROWS/ROW'::text) PASSING (xmldata.data) COLUMNS id integer PATH ('@id'::text), _id FOR ORDINALITY, country_name text PATH ('COUNTRY_NAME'::text) NOT NULL, country_id text PATH ('COUNTRY_ID'::text), region_id integer PATH ('REGION_ID'::text), size double precision PATH ('SIZE'::text), unit text PATH ('SIZE/@unit'::text), premier_name text DEFAULT ('not specified'::text) PATH ('PREMIER_NAME'::text)) -(7 rows) +(8 rows) -- XMLNAMESPACES tests SELECT * FROM XMLTABLE(XMLNAMESPACES('http://x.y' AS zz), @@ -1068,12 +1069,13 @@ SELECT xmltable.* ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Nested Loop Output: "xmltable".id, "xmltable"._id, "xmltable".country_name, "xmltable".country_id, "xmltable".region_id, "xmltable".size, "xmltable".unit, "xmltable".premier_name + Inner Unique: No -> Seq Scan on public.xmldata Output: xmldata.data -> Table Function Scan on "xmltable" Output: "xmltable".id, "xmltable"._id, "xmltable".country_name, "xmltable".country_id, "xmltable".region_id, "xmltable".size, "xmltable".unit, "xmltable".premier_name Table Function Call: XMLTABLE(('/ROWS/ROW'::text) PASSING (xmldata.data) COLUMNS id integer PATH ('@id'::text), _id FOR ORDINALITY, country_name text PATH ('COUNTRY_NAME'::text) NOT NULL, country_id text PATH ('COUNTRY_ID'::text), region_id integer PATH ('REGION_ID'::text), size double precision PATH ('SIZE'::text), unit text PATH ('SIZE/@unit'::text), premier_name text DEFAULT ('not specified'::text) PATH ('PREMIER_NAME'::text)) -(7 rows) +(8 rows) -- test qual SELECT xmltable.* FROM xmldata, LATERAL xmltable('/ROWS/ROW[COUNTRY_NAME="Japan" or COUNTRY_NAME="India"]' PASSING data COLUMNS "COUNTRY_NAME" text, "REGION_ID" int) WHERE "COUNTRY_NAME" = 'Japan'; @@ -1087,13 +1089,14 @@ SELECT xmltable.* FROM xmldata, LATERAL xmltable('/ROWS/ROW[COUNTRY_NAME="Japan" ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Nested Loop Output: "xmltable"."COUNTRY_NAME", "xmltable"."REGION_ID" + Inner Unique: No -> Seq Scan on public.xmldata Output: xmldata.data -> Table Function Scan on "xmltable" Output: "xmltable"."COUNTRY_NAME", "xmltable"."REGION_ID" Table Function Call: XMLTABLE(('/ROWS/ROW[COUNTRY_NAME="Japan" or COUNTRY_NAME="India"]'::text) PASSING (xmldata.data) COLUMNS "COUNTRY_NAME" text, "REGION_ID" integer) Filter: ("xmltable"."COUNTRY_NAME" = 'Japan'::text) -(8 rows) +(9 rows) -- should to work with more data INSERT INTO xmldata VALUES(' @@ -1186,13 +1189,14 @@ SELECT xmltable.* ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Nested Loop Output: "xmltable".id, "xmltable"._id, "xmltable".country_name, "xmltable".country_id, "xmltable".region_id, "xmltable".size, "xmltable".unit, "xmltable".premier_name + Inner Unique: No -> Seq Scan on public.xmldata Output: xmldata.data -> Table Function Scan on "xmltable" Output: "xmltable".id, "xmltable"._id, "xmltable".country_name, "xmltable".country_id, "xmltable".region_id, "xmltable".size, "xmltable".unit, "xmltable".premier_name Table Function Call: XMLTABLE(('/ROWS/ROW'::text) PASSING (xmldata.data) COLUMNS id integer PATH ('@id'::text), _id FOR ORDINALITY, country_name text PATH ('COUNTRY_NAME'::text) NOT NULL, country_id text PATH ('COUNTRY_ID'::text), region_id integer PATH ('REGION_ID'::text), size double precision PATH ('SIZE'::text), unit text PATH ('SIZE/@unit'::text), premier_name text DEFAULT ('not specified'::text) PATH ('PREMIER_NAME'::text)) Filter: ("xmltable".region_id = 2) -(8 rows) +(9 rows) -- should fail, NULL value SELECT xmltable.* diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index cca1a53..1692720 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1732,3 +1732,106 @@ update xx1 set x2 = f1 from xx1, lateral (select * from int4_tbl where f1 = x1) delete from xx1 using (select * from int4_tbl where f1 = x1) ss; delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss; delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss; + +-- +-- test planner's ability to mark joins as unique +-- + +create table j1 (id int primary key); +create table j2 (id int primary key); +create table j3 (id int); + +insert into j1 values(1),(2),(3); +insert into j2 values(1),(2),(3); +insert into j3 values(1),(1); + +analyze j1; +analyze j2; +analyze j3; + +-- ensure join is properly marked as unique +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id = j2.id; + +-- ensure join is not unique when not an equi-join +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id > j2.id; + +-- ensure join is not unique, as j3 has no unique index or pk on id +explain (verbose, costs off) +select * from j1 inner join j3 on j1.id = j3.id; + +-- ensure left join is marked as unique +explain (verbose, costs off) +select * from j1 left join j2 on j1.id = j2.id; + +-- ensure right join is marked as unique +explain (verbose, costs off) +select * from j1 right join j2 on j1.id = j2.id; + +-- ensure full join is marked as unique +explain (verbose, costs off) +select * from j1 full join j2 on j1.id = j2.id; + +-- a clauseless (cross) join can't be unique +explain (verbose, costs off) +select * from j1 cross join j2; + +-- ensure a natural join is marked as unique +explain (verbose, costs off) +select * from j1 natural join j2; + +-- ensure a distinct clause allows the inner to become unique +explain (verbose, costs off) +select * from j1 +inner join (select distinct id from j3) j3 on j1.id = j3.id; + +-- ensure group by clause allows the inner to become unique +explain (verbose, costs off) +select * from j1 +inner join (select id from j3 group by id) j3 on j1.id = j3.id; + +drop table j1; +drop table j2; +drop table j3; + +-- test a more complex permutations of unique joins + +create table j1 (id1 int, id2 int, primary key(id1,id2)); +create table j2 (id1 int, id2 int, primary key(id1,id2)); +create table j3 (id1 int, id2 int, primary key(id1,id2)); + +insert into j1 values(1,1),(2,2); +insert into j2 values(1,1); +insert into j3 values(1,1); + +analyze j1; +analyze j2; +analyze j3; + +-- ensure there's no unique join when not all columns which are part of the +-- unique index are seen in the join clause +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1; + +-- ensure proper unique detection with multiple join quals +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2; + +-- ensure we don't detect the join to be unique when quals are not part of the +-- join condition +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1 where j1.id2 = 1; + +-- as above, but for left joins. +explain (verbose, costs off) +select * from j1 +left join j2 on j1.id1 = j2.id1 where j1.id2 = 1; + + +drop table j1; +drop table j2; +drop table j3;