diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 4899a27..43bc425 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -3889,6 +3889,17 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events, return all_fired; } +/* ---------- + * AfterTriggerQueueIsEmpty() + * + * True if there are no pending triggers in the queue. + * ---------- + */ +bool +AfterTriggerQueueIsEmpty(void) +{ + return (afterTriggers.query_depth == -1 && afterTriggers.events.head == NULL); +} /* ---------- * AfterTriggerBeginXact() diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index af707b0..bfbd5b3 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -14,8 +14,8 @@ include $(top_builddir)/src/Makefile.global OBJS = execAmi.o execCurrent.o execGrouping.o execJunk.o execMain.o \ execProcnode.o execQual.o execScan.o execTuples.o \ - execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o \ - nodeBitmapAnd.o nodeBitmapOr.o \ + execUtils.o functions.o instrument.o nodeAlternativePlan.o nodeAppend.o \ + nodeAgg.o nodeBitmapAnd.o nodeBitmapOr.o \ nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeCustom.o nodeHash.o \ nodeHashjoin.o nodeIndexscan.o nodeIndexonlyscan.o \ nodeLimit.o nodeLockRows.o \ diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 9892499..523e187 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -79,6 +79,7 @@ #include "executor/executor.h" #include "executor/nodeAgg.h" +#include "executor/nodeAlternativePlan.h" #include "executor/nodeAppend.h" #include "executor/nodeBitmapAnd.h" #include "executor/nodeBitmapHeapscan.h" @@ -147,6 +148,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags) /* * control nodes */ + case T_AlternativePlan: + result = (PlanState *) ExecInitAlternativePlan((AlternativePlan *)node, + estate, eflags); + break; + case T_Result: result = (PlanState *) ExecInitResult((Result *) node, estate, eflags); diff --git a/src/backend/executor/nodeAlternativePlan.c b/src/backend/executor/nodeAlternativePlan.c new file mode 100644 index 0000000..cafe33a --- /dev/null +++ b/src/backend/executor/nodeAlternativePlan.c @@ -0,0 +1,51 @@ +/*------------------------------------------------------------------------- + * + * nodeAlternativePlan.c + * Node to support storage of alternative plans. + * + * Note that this node is rather special as it only exists while the plan + * is being initialised. + * + * When the initialization method is called for this node, a decision is + * made to decide which plan should be initialized, the code here then calls + * the initialize method on the selected plan and returns the state value + * from the root node of that plan. + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/executor/nodeAlternativePlan.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "commands/trigger.h" + +#include "executor/executor.h" +#include "executor/nodeAlternativePlan.h" + +PlanState * +ExecInitAlternativePlan(AlternativePlan *node, EState *estate, int eflags) +{ + /* + * If we have items in the fk trigger queue, then we'd better use the all + * all purpose plan. Since an AlternativePlan node has no state, we simply + * just initialize the root node of the selected plan. This means that the + * AlternativePlan node is *never* seen in EXPLAIN or EXPLAIN ANALYZE. + */ + if (!AfterTriggerQueueIsEmpty()) + return (PlanState *) ExecInitNode((Plan *) list_nth(node->planList, 1), + estate, eflags); + + /* + * Otherwise we initialize the root node of the optimized plan and return + * that. + */ + else + return (PlanState *) ExecInitNode((Plan *) linitial(node->planList), + estate, eflags); +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index f1a24f5..3cd1a4e 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -93,6 +93,7 @@ _copyPlannedStmt(const PlannedStmt *from) COPY_NODE_FIELD(relationOids); COPY_NODE_FIELD(invalItems); COPY_SCALAR_FIELD(nParamExec); + COPY_SCALAR_FIELD(suitableFor); return newnode; } @@ -963,6 +964,16 @@ _copyLimit(const Limit *from) return newnode; } +static AlternativePlan * +_copyAlternativePlan(const AlternativePlan *from) +{ + AlternativePlan *newnode = makeNode(AlternativePlan); + + COPY_NODE_FIELD(planList); + + return newnode; +} + /* * _copyNestLoopParam */ @@ -4117,6 +4128,9 @@ copyObject(const void *from) case T_Limit: retval = _copyLimit(from); break; + case T_AlternativePlan: + retval = _copyAlternativePlan(from); + break; case T_NestLoopParam: retval = _copyNestLoopParam(from); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index dd1278b..9824b3d 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -255,6 +255,7 @@ _outPlannedStmt(StringInfo str, const PlannedStmt *node) WRITE_NODE_FIELD(relationOids); WRITE_NODE_FIELD(invalItems); WRITE_INT_FIELD(nParamExec); + WRITE_INT_FIELD(suitableFor); } /* @@ -1716,6 +1717,7 @@ _outPlannerGlobal(StringInfo str, const PlannerGlobal *node) WRITE_UINT_FIELD(lastPHId); WRITE_UINT_FIELD(lastRowMarkId); WRITE_BOOL_FIELD(transientPlan); + WRITE_INT_FIELD(suitableFor); } static void @@ -1801,6 +1803,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node) /* we don't try to print fdwroutine or fdw_private */ WRITE_NODE_FIELD(baserestrictinfo); WRITE_NODE_FIELD(joininfo); + WRITE_INT_FIELD(removal_flags); WRITE_BOOL_FIELD(has_eclass_joins); } diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 58d78e6..69990a2 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -97,7 +97,8 @@ static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); -static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist); +static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist, + int removal_flags); static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, pushdown_safety_info *safetyInfo); static bool recurse_pushdown_safe(Node *setOp, Query *topquery, @@ -122,7 +123,7 @@ static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel); * single rel that represents the join of all base rels in the query. */ RelOptInfo * -make_one_rel(PlannerInfo *root, List *joinlist) +make_one_rel(PlannerInfo *root, List *joinlist, int removal_flags) { RelOptInfo *rel; Index rti; @@ -142,7 +143,8 @@ make_one_rel(PlannerInfo *root, List *joinlist) Assert(brel->relid == rti); /* sanity check on array */ /* ignore RTEs that are "other rels" */ - if (brel->reloptkind != RELOPT_BASEREL) + if (brel->reloptkind != RELOPT_BASEREL || + brel->removal_flags & removal_flags) continue; root->all_baserels = bms_add_member(root->all_baserels, brel->relid); @@ -157,12 +159,13 @@ make_one_rel(PlannerInfo *root, List *joinlist) /* * Generate access paths for the entire join tree. */ - rel = make_rel_from_joinlist(root, joinlist); + rel = make_rel_from_joinlist(root, joinlist, removal_flags); + /* * The result should join all and only the query's base rels. */ - Assert(bms_equal(rel->relids, root->all_baserels)); + Assert(bms_is_subset(root->all_baserels, rel->relids)); return rel; } @@ -1496,7 +1499,7 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) * data structure. */ static RelOptInfo * -make_rel_from_joinlist(PlannerInfo *root, List *joinlist) +make_rel_from_joinlist(PlannerInfo *root, List *joinlist, int removal_flags) { int levels_needed; List *initial_rels; @@ -1528,11 +1531,23 @@ make_rel_from_joinlist(PlannerInfo *root, List *joinlist) int varno = ((RangeTblRef *) jlnode)->rtindex; thisrel = find_base_rel(root, varno); + + /* + * If this relation can be removed for these removal_flags, then + * we'll not bother including this in the list of relations to join + * to + */ + if ((thisrel->removal_flags & removal_flags)) + { + /* one less level needed too */ + levels_needed--; + continue; + } } else if (IsA(jlnode, List)) { /* Recurse to handle subproblem */ - thisrel = make_rel_from_joinlist(root, (List *) jlnode); + thisrel = make_rel_from_joinlist(root, (List *) jlnode, removal_flags); } else { diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index eb65c97..8ddc9db 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -49,8 +49,6 @@ static List *generate_join_implied_equalities_broken(PlannerInfo *root, Relids outer_relids, Relids nominal_inner_relids, RelOptInfo *inner_rel); -static Oid select_equality_operator(EquivalenceClass *ec, - Oid lefttype, Oid righttype); static RestrictInfo *create_join_clause(PlannerInfo *root, EquivalenceClass *ec, Oid opno, EquivalenceMember *leftem, @@ -1282,7 +1280,7 @@ generate_join_implied_equalities_broken(PlannerInfo *root, * * Returns InvalidOid if no operator can be found for this datatype combination */ -static Oid +Oid select_equality_operator(EquivalenceClass *ec, Oid lefttype, Oid righttype) { ListCell *lc; diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 11d3933..e6bfe37 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -32,13 +32,21 @@ #include "utils/lsyscache.h" /* local functions */ -static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static bool innerjoin_is_removable(PlannerInfo *root, List *joinlist, + RangeTblRef *removalrtr, Relids ignoredrels); +static bool leftjoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static bool relation_is_needed(PlannerInfo *root, Relids joinrelids, + RelOptInfo *rel, Relids ignoredrels); +static bool relation_has_foreign_key_for(PlannerInfo *root, RelOptInfo *rel, + RelOptInfo *referencedrel, List *referencing_vars, + List *index_vars, List *operator_list); +static bool expressions_match_foreign_key(ForeignKeyInfo *fk, List *fkvars, + List *indexvars, List *operators); static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); static Oid distinct_col_search(int colno, List *colnos, List *opids); - /* * remove_useless_joins * Check for relations that don't actually need to be joined at all, @@ -46,26 +54,104 @@ static Oid distinct_col_search(int colno, List *colnos, List *opids); * * We are passed the current joinlist and return the updated list. Other * data structures that have to be updated are accessible via "root". + * + * There are 2 methods here for removing joins. Joins such as LEFT JOINs + * which can be proved to be needless due to lack of use of any of the joining + * relation's columns and the existence of a unique index on a subset of the + * join clause, can simply be removed from the query plan at plan time. For + * certain other join types we make use of foreign keys to attempt to prove the + * join is needless, though, for these we're unable to be certain that the join + * is not required at plan time, as if the plan is executed when pending + * foreign key triggers have not yet been fired, then the foreign key is + * effectively violated until these triggers have fired. Removing a join in + * such a case could cause a query to produce incorrect results. + * + * Instead we handle this case by marking the RangeTblEntry for the relation + * with a special flag which tells the executor that it's possible that joining + * to this relation may not be required. The executor may then check this flag + * and choose to skip the join based on if there are foreign key triggers + * pending or not. */ List * remove_useless_joins(PlannerInfo *root, List *joinlist) { ListCell *lc; + Relids removedrels = NULL; /* - * We are only interested in relations that are left-joined to, so we can - * scan the join_info_list to find them easily. + * Start by analyzing INNER JOINed relations in order to determine if any + * of the relations can be ignored. */ restart: + foreach(lc, joinlist) + { + RangeTblRef *rtr = (RangeTblRef *) lfirst(lc); + RelOptInfo *rel; + + if (!IsA(rtr, RangeTblRef)) + continue; + + rel = root->simple_rel_array[rtr->rtindex]; + + /* Don't try to remove this one again if we've already removed it */ + if ((rel->removal_flags & PLAN_SUITABILITY_FK_TRIGGER_EMPTY) != 0) + continue; + + /* skip if the join can't be removed */ + if (!innerjoin_is_removable(root, joinlist, rtr, removedrels)) + continue; + + /* + * Since we're not actually removing the join here, we need to maintain + * a list of relations that we've "removed" so when we're checking if + * other relations can be removed we'll know that if the to be removed + * relation is only referenced by a relation that we've already removed + * that it can be safely assumed that the relation is not referenced by + * any useful relation. + */ + removedrels = bms_add_member(removedrels, rtr->rtindex); + + /* + * Mark that this relation is only required when the foreign key trigger + * queue us non-empty. + */ + rel->removal_flags |= PLAN_SUITABILITY_FK_TRIGGER_EMPTY; + + /* + * Globally mark this plan to say that there are some relations which + * are only required when the foreign key trigger queue is non-empty. + * The planner will later generate 2 plans, 1 which is suitable only + * when all if these bitmask conditions are met, and another which is + * an all purpose plan, which will be used if *any* of the bitmask's + * conditions are not met. + */ + root->glob->suitableFor |= PLAN_SUITABILITY_FK_TRIGGER_EMPTY; + + /* + * Restart the scan. This is necessary to ensure we find all removable + * joins independently of their ordering. (note that since we've added + * this relation to the removedrels, we may now realize that other + * relations can also be removed as they're only referenced by the one + * that we've just marked as possibly removable). + */ + goto restart; + } + + /* now process special joins. Currently only left joins are supported */ foreach(lc, root->join_info_list) { SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); int innerrelid; int nremoved; - /* Skip if not removable */ - if (!join_is_removable(root, sjinfo)) - continue; + if (sjinfo->jointype == JOIN_LEFT) + { + /* Skip if not removable */ + if (!leftjoin_is_removable(root, sjinfo)) + continue; + } + else + continue; /* we don't support this join type */ /* * Currently, join_is_removable can only succeed when the sjinfo's @@ -91,12 +177,11 @@ restart: root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo); /* - * Restart the scan. This is necessary to ensure we find all - * removable joins independently of ordering of the join_info_list - * (note that removal of attr_needed bits may make a join appear - * removable that did not before). Also, since we just deleted the - * current list cell, we'd have to have some kluge to continue the - * list scan anyway. + * Restart the scan. This is necessary to ensure we find all removable + * joins independently of their ordering. (note that removal of + * attr_needed bits may make a join, inner or outer, appear removable + * that did not before). Also, since we just deleted the current list + * cell, we'd have to have some kluge to continue the list scan anyway. */ goto restart; } @@ -136,8 +221,226 @@ clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, } /* - * join_is_removable - * Check whether we need not perform this special join at all, because + * innerjoin_is_removable + * True if the join to removalrtr can be removed. + * + * In order to prove a relation which is inner joined is not required we must + * be sure that the join would emit exactly 1 row on the join condition. This + * differs from the logic which is used for proving LEFT JOINs can be removed, + * where it's possible to just check that a unique index exists on the relation + * being removed which has a set of columns that is a subset of the columns + * seen in the join condition. If no matching row is found then left join would + * not remove the non-matched row from the result set. This is not the case + * with INNER JOINs, so here we must use foreign keys as proof that the 1 row + * exists before we can allow any joins to be removed. + */ +static bool +innerjoin_is_removable(PlannerInfo *root, List *joinlist, + RangeTblRef *removalrtr, Relids ignoredrels) +{ + ListCell *lc; + RelOptInfo *removalrel; + + removalrel = find_base_rel(root, removalrtr->rtindex); + + /* + * As foreign keys may only reference base rels which have unique indexes, + * we needn't go any further if we're not dealing with a base rel, or if + * the base rel has no unique indexes. We'd also better abort if the + * rtekind is anything but a relation, as things like sub-queries may have + * grouping or distinct clauses that would cause us not to be able to use + * the foreign key to prove the existence of a row matching the join + * condition. We also abort if the rel has no eclass joins as such a rel + * could well be joined using some operator which is not an equality + * operator, or the rel may not even be inner joined at all. + * + * Here we actually only check if the rel has any indexes, ideally we'd be + * checking for unique indexes, but we could only determine that by looping + * over the indexlist, and this is likely too expensive a check to be worth + * it here. + */ + if (removalrel->reloptkind != RELOPT_BASEREL || + removalrel->rtekind != RTE_RELATION || + removalrel->has_eclass_joins == false || + removalrel->indexlist == NIL) + return false; + + /* + * Currently we disallow the removal if we find any baserestrictinfo items + * on the relation being removed. The reason for this is that these would + * filter out rows and make it so the foreign key cannot prove that we'll + * match exactly 1 row on the join condition. However, this check is + * currently probably a bit overly strict as it should be possible to just + * check and ensure that each Var seen in the baserestrictinfo is also + * present in an eclass and if so, just translate and move the whole + * baserestrictinfo over to the relation which has the foreign key to prove + * that this join is not needed. e.g: + * SELECT a.* FROM a INNER JOIN b ON a.b_id = b.id WHERE b.id = 1; + * could become: SELECT a.* FROM a WHERE a.b_id = 1; + */ + if (removalrel->baserestrictinfo != NIL) + return false; + + /* + * Currently only eclass joins are supported, so if there are any non + * eclass join quals then we'll report the join is non-removable. + */ + if (removalrel->joininfo != NIL) + return false; + + /* + * Now we'll search through each relation in the joinlist to see if we can + * find a relation which has a foreign key which references removalrel on + * the join condition. If we find a rel with a foreign key which matches + * the join condition exactly, then we can be sure that exactly 1 row will + * be matched on the join, if we also see that no Vars from the relation + * are needed, then we can report the join as removable. + */ + foreach (lc, joinlist) + { + RangeTblRef *rtr = (RangeTblRef *) lfirst(lc); + RelOptInfo *rel; + ListCell *lc2; + List *referencing_vars; + List *index_vars; + List *operator_list; + Relids joinrelids; + + /* we can't remove ourself, or anything other than RangeTblRefs */ + if (rtr == removalrtr || !IsA(rtr, RangeTblRef)) + continue; + + rel = find_base_rel(root, rtr->rtindex); + + /* + * The only relation type that can help us is a base rel with at least + * one foreign key defined, if there's no eclass joins then this rel + * is not going to help us prove the removalrel is not needed. + */ + if (rel->reloptkind != RELOPT_BASEREL || + rel->rtekind != RTE_RELATION || + rel->has_eclass_joins == false || + rel->fklist == NIL) + continue; + + /* + * Both rels have eclass joins, but do they have eclass joins to each + * other? Skip this rel if it does not. + */ + if (!have_relevant_eclass_joinclause(root, rel, removalrel)) + continue; + + joinrelids = bms_union(rel->relids, removalrel->relids); + + /* if any of the Vars from the relation are needed then abort */ + if (relation_is_needed(root, joinrelids, removalrel, ignoredrels)) + return false; + + referencing_vars = NIL; + index_vars = NIL; + operator_list = NIL; + + /* now populate the lists with the join condition Vars */ + foreach(lc2, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc2); + + if (list_length(ec->ec_members) <= 1) + continue; + + if (bms_overlap(removalrel->relids, ec->ec_relids) && + bms_overlap(rel->relids, ec->ec_relids)) + { + ListCell *lc3; + Var *refvar = NULL; + Var *idxvar = NULL; + + /* + * Look at each member of the eclass and try to find a Var from + * each side of the join that we can append to the list of + * columns that should be checked against each foreign key. + * + * The following logic does not allow for join removals to take + * place for foreign keys that have duplicate columns on the + * referencing side of the foreign key, such as: + * (a,a) references (x,y) + * The use case for such a foreign key is likely small enough + * that we needn't bother making this code anymore complex to + * solve. If we find more than 1 Var from any of the rels then + * we'll bail out. + */ + foreach (lc3, ec->ec_members) + { + EquivalenceMember *ecm = (EquivalenceMember *) lfirst(lc3); + + Var *var = (Var *) ecm->em_expr; + + if (!IsA(var, Var)) + continue; /* Ignore Consts */ + + if (var->varno == rel->relid) + { + if (refvar != NULL) + return false; + refvar = var; + } + + else if (var->varno == removalrel->relid) + { + if (idxvar != NULL) + return false; + idxvar = var; + } + } + + if (refvar != NULL && idxvar != NULL) + { + Oid opno; + Oid reloid = root->simple_rte_array[refvar->varno]->relid; + + /* + * We cannot allow the removal to take place if any of the + * columns in the join condition are nullable. This is due + * to the fact that the join condition would end up + * filtering out NULL values for us, but if we remove the + * join, then there's nothing to stop the NULLs getting + * into the resultset. + */ + if (!get_attnotnull(reloid, refvar->varattno)) + return false; + + /* grab the correct equality operator for these two vars */ + opno = select_equality_operator(ec, refvar->vartype, idxvar->vartype); + + if (!OidIsValid(opno)) + return false; + + referencing_vars = lappend(referencing_vars, refvar); + index_vars = lappend(index_vars, idxvar); + operator_list = lappend_oid(operator_list, opno); + } + } + } + + /* + * Did we find any conditions? It's ok that we just check 1 of the 3 + * lists to see if it's empty here as these will always contain the + * same number of items + */ + if (referencing_vars != NIL) + { + if (relation_has_foreign_key_for(root, rel, removalrel, + referencing_vars, index_vars, operator_list)) + return true; /* removalrel can be removed */ + } + } + + return false; /* can't remove join */ +} + +/* + * leftjoin_is_removable + * Check whether we need not perform this left join at all, because * it will just duplicate its left input. * * This is true for a left join for which the join condition cannot match @@ -147,7 +450,7 @@ clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, * above the join. */ static bool -join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) +leftjoin_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) { int innerrelid; RelOptInfo *innerrel; @@ -155,14 +458,14 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) Relids joinrelids; List *clause_list = NIL; ListCell *l; - int attroff; + + Assert(sjinfo->jointype == JOIN_LEFT); /* - * Must be a non-delaying left join to a single baserel, else we aren't + * Must be a non-delaying join to a single baserel, else we aren't * going to be able to do anything with it. */ - if (sjinfo->jointype != JOIN_LEFT || - sjinfo->delay_upper_joins) + if (sjinfo->delay_upper_joins) return false; if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid)) @@ -206,52 +509,9 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) /* Compute the relid set for the join we are considering */ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); - /* - * We can't remove the join if any inner-rel attributes are used above the - * join. - * - * Note that this test only detects use of inner-rel attributes in higher - * join conditions and the target list. There might be such attributes in - * pushed-down conditions at this join, too. We check that case below. - * - * As a micro-optimization, it seems better to start with max_attr and - * count down rather than starting with min_attr and counting up, on the - * theory that the system attributes are somewhat less likely to be wanted - * and should be tested last. - */ - for (attroff = innerrel->max_attr - innerrel->min_attr; - attroff >= 0; - attroff--) - { - if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids)) - return false; - } - - /* - * Similarly check that the inner rel isn't needed by any PlaceHolderVars - * that will be used above the join. We only need to fail if such a PHV - * actually references some inner-rel attributes; but the correct check - * for that is relatively expensive, so we first check against ph_eval_at, - * which must mention the inner rel if the PHV uses any inner-rel attrs as - * non-lateral references. Note that if the PHV's syntactic scope is just - * the inner rel, we can't drop the rel even if the PHV is variable-free. - */ - foreach(l, root->placeholder_list) - { - PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); - - if (bms_is_subset(phinfo->ph_needed, joinrelids)) - continue; /* PHV is not used above the join */ - if (bms_overlap(phinfo->ph_lateral, innerrel->relids)) - return false; /* it references innerrel laterally */ - if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids)) - continue; /* it definitely doesn't reference innerrel */ - if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids)) - return false; /* there isn't any other place to eval PHV */ - if (bms_overlap(pull_varnos((Node *) phinfo->ph_var->phexpr), - innerrel->relids)) - return false; /* it does reference innerrel */ - } + /* if the relation is referenced in the query then it cannot be removed */ + if (relation_is_needed(root, joinrelids, innerrel, NULL)) + return false; /* * Search for mergejoinable clauses that constrain the inner rel against @@ -368,6 +628,218 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) return false; } +/* + * relation_is_needed + * True if any of the Vars from this relation are required in the query + */ +static inline bool +relation_is_needed(PlannerInfo *root, Relids joinrelids, RelOptInfo *rel, Relids ignoredrels) +{ + int attroff; + ListCell *l; + + /* + * rel is referenced if any of it's attributes are used above the join. + * + * Note that this test only detects use of rel's attributes in higher + * join conditions and the target list. There might be such attributes in + * pushed-down conditions at this join, too. We check that case below. + * + * As a micro-optimization, it seems better to start with max_attr and + * count down rather than starting with min_attr and counting up, on the + * theory that the system attributes are somewhat less likely to be wanted + * and should be tested last. + */ + for (attroff = rel->max_attr - rel->min_attr; + attroff >= 0; + attroff--) + { + if (!bms_is_subset(bms_difference(rel->attr_needed[attroff], ignoredrels), joinrelids)) + return true; + } + + /* + * Similarly check that rel isn't needed by any PlaceHolderVars that will + * be used above the join. We only need to fail if such a PHV actually + * references some of rel's attributes; but the correct check for that is + * relatively expensive, so we first check against ph_eval_at, which must + * mention rel if the PHV uses any of-rel's attrs as non-lateral + * references. Note that if the PHV's syntactic scope is just rel, we + * can't return true even if the PHV is variable-free. + */ + foreach(l, root->placeholder_list) + { + PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); + + if (bms_is_subset(phinfo->ph_needed, joinrelids)) + continue; /* PHV is not used above the join */ + if (bms_overlap(phinfo->ph_lateral, rel->relids)) + return true; /* it references rel laterally */ + if (!bms_overlap(phinfo->ph_eval_at, rel->relids)) + continue; /* it definitely doesn't reference rel */ + if (bms_is_subset(phinfo->ph_eval_at, rel->relids)) + return true; /* there isn't any other place to eval PHV */ + if (bms_overlap(pull_varnos((Node *) phinfo->ph_var->phexpr), + rel->relids)) + return true; /* it does reference rel */ + } + + return false; /* it does not reference rel */ +} + +/* + * relation_has_foreign_key_for + * Checks if rel has a foreign key which references referencedrel with the + * given list of expressions. + * + * For the match to succeed: + * referencing_vars must match the columns defined in the foreign key. + * index_vars must match the columns defined in the index for the foreign key. + */ +static bool +relation_has_foreign_key_for(PlannerInfo *root, RelOptInfo *rel, + RelOptInfo *referencedrel, List *referencing_vars, + List *index_vars, List *operator_list) +{ + ListCell *lc; + Oid refreloid; + + /* + * Look up the Oid of the referenced relation. We only want to look at + * foreign keys on the referencing relation which reference this relation. + */ + refreloid = root->simple_rte_array[referencedrel->relid]->relid; + + Assert(list_length(referencing_vars) > 0); + Assert(list_length(referencing_vars) == list_length(index_vars)); + Assert(list_length(referencing_vars) == list_length(operator_list)); + + /* + * Search through each foreign key on the referencing relation and try + * to find one which references the relation in the join condition. If we + * find one then we'll send the join conditions off to + * expressions_match_foreign_key() to see if they match the foreign key. + */ + foreach(lc, rel->fklist) + { + ForeignKeyInfo *fk = (ForeignKeyInfo *) lfirst(lc); + + if (fk->confrelid == refreloid) + { + if (expressions_match_foreign_key(fk, referencing_vars, + index_vars, operator_list)) + return true; + } + } + + return false; +} + +/* + * expressions_match_foreign_key + * True if the given fkvars, indexvars and operators will match + * exactly 1 record in the referenced relation of the foreign key. + * + * Note: This function expects fkvars and indexvars to only contain Var types. + * Expression indexes are not supported by foreign keys. + */ +static bool +expressions_match_foreign_key(ForeignKeyInfo *fk, List *fkvars, + List *indexvars, List *operators) +{ + ListCell *lc; + ListCell *lc2; + ListCell *lc3; + Bitmapset *allitems; + Bitmapset *matcheditems; + int lstidx; + int col; + + Assert(list_length(fkvars) == list_length(indexvars)); + Assert(list_length(fkvars) == list_length(operators)); + + /* + * Fast path out if there's not enough conditions to match each column in + * the foreign key. Note that we cannot check that the number of + * expressions are equal here since it would cause any expressions which + * are duplicated not to match. + */ + if (list_length(fkvars) < fk->conncols) + return false; + + /* + * We need to ensure that each foreign key column can be matched to a list + * item, and we need to ensure that each list item can be matched to a + * foreign key column. We do this by looping over each foreign key column + * and checking that we can find an item in the list which matches the + * current column, however this method does not allow us to ensure that no + * additional items exist in the list. We could solve that by performing + * another loop over each list item and check that it matches a foreign key + * column, but that's a bit wasteful. Instead we'll use 2 bitmapsets, one + * to store the 0 based index of each list item, and with the other we'll + * store each list index that we've managed to match. After we're done + * matching we'll just make sure that both bitmapsets are equal. + */ + allitems = NULL; + matcheditems = NULL; + + /* + * Build a bitmapset which contains each 1 based list index. It seems more + * efficient to do this in reverse so that we allocate enough memory for + * the bitmapset on first loop rather than reallocating each time we find + * we need a bit more space. + */ + for (lstidx = list_length(fkvars) - 1; lstidx >= 0; lstidx--) + allitems = bms_add_member(allitems, lstidx); + + for (col = 0; col < fk->conncols; col++) + { + bool matched = false; + + lstidx = 0; + + forthree(lc, fkvars, lc2, indexvars, lc3, operators) + { + Var *expr = (Var *) lfirst(lc); + Var *idxexpr = (Var *) lfirst(lc2); + Oid opr = lfirst_oid(lc3); + + Assert(IsA(expr, Var)); + Assert(IsA(idxexpr, Var)); + + /* Does this join qual match up to the current fkey column? */ + if (fk->conkey[col] == expr->varattno && + fk->confkey[col] == idxexpr->varattno && + equality_ops_are_compatible(opr, fk->conpfeqop[col])) + { + matched = true; + + /* mark this list item as matched */ + matcheditems = bms_add_member(matcheditems, lstidx); + + /* + * Don't break here as there may be duplicate expressions + * that we also need to match against. + */ + } + lstidx++; + } + + /* punt if there's no match. */ + if (!matched) + return false; + } + + /* + * Ensure that we managed to match every item in the list to a foreign key + * column. + */ + if (!bms_equal(allitems, matcheditems)) + return false; + + return true; /* matched */ +} + /* * Remove the target relid from the planner's data structures, having diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 655be81..7540a1d 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -4676,6 +4676,15 @@ make_lockrows(Plan *lefttree, List *rowMarks, int epqParam) return node; } +AlternativePlan * +make_alternativeplan(List *planlist) +{ + AlternativePlan *node = makeNode(AlternativePlan); + node->planList = planlist; + + return node; +} + /* * Note: offset_est and count_est are passed in to save having to repeat * work already done to estimate the values of the limitOffset and limitCount diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index b90c2ef..5cd2ab5 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -409,6 +409,7 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, Path *sorted_path; Cost path_cost; double path_fraction; + List *final_rel_list; /*---------- * Generate modified query of the form @@ -478,8 +479,12 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, subroot->tuple_fraction = 1.0; subroot->limit_tuples = 1.0; - final_rel = query_planner(subroot, parse->targetList, - minmax_qp_callback, NULL); + final_rel_list = query_planner(subroot, parse->targetList, + minmax_qp_callback, NULL, true); + + Assert(list_length(final_rel_list) == 1); + + final_rel = (RelOptInfo *) linitial(final_rel_list); /* * Get the best presorted path, that being the one that's cheapest for diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 848df97..6cf5915 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -34,7 +34,7 @@ * * Since query_planner does not handle the toplevel processing (grouping, * sorting, etc) it cannot select the best path by itself. Instead, it - * returns the RelOptInfo for the top level of joining, and the caller + * returns a list of RelOptInfo for the top level of joining, and the caller * (grouping_planner) can choose one of the surviving paths for the rel. * Normally it would choose either the rel's cheapest path, or the cheapest * path for the desired sort order. @@ -50,14 +50,23 @@ * plan. This value is *not* available at call time, but is computed by * qp_callback once we have completed merging the query's equivalence classes. * (We cannot construct canonical pathkeys until that's done.) + * + * Note: during the planning process, the planner may discover optimization + * opportunities that may or may not be possible to utiliize during query + * execution. In this case the planner will generate 2 plans. 1 for the fully + * optimized version, and 1 all purpose plan which will only be used if + * conditions are not found to be favourable for the optimized version of the + * plan during executor startup. */ -RelOptInfo * +List * query_planner(PlannerInfo *root, List *tlist, - query_pathkeys_callback qp_callback, void *qp_extra) + query_pathkeys_callback qp_callback, void *qp_extra, + bool all_purpose_plan_only) { Query *parse = root->parse; List *joinlist; RelOptInfo *final_rel; + List *final_rel_list = NIL; Index rti; double total_pages; @@ -84,7 +93,7 @@ query_planner(PlannerInfo *root, List *tlist, root->canon_pathkeys = NIL; (*qp_callback) (root, qp_extra); - return final_rel; + return lappend(NIL, final_rel); } /* @@ -231,14 +240,37 @@ query_planner(PlannerInfo *root, List *tlist, root->total_table_pages = total_pages; /* - * Ready to do the primary planning. + * If the planner found any optimizations that caused the plan not to be + * suitable in all situations, then we must create 2 plans. One will be + * the fully the optimized version and the other will be a general purpose + * plan that will only be used by the executor if any of the required + * conditions for the optimization were not met. Note that we'll only + * generate an optimized plan if the caller didn't specifically request an + * all purpose plan. */ - final_rel = make_one_rel(root, joinlist); + if (root->glob->suitableFor != PLAN_SUITABILITY_ALL_PURPOSE + && all_purpose_plan_only == false) + { + /* Generate fully optimized plan, with all removable joins removed */ + final_rel = make_one_rel(root, joinlist, root->glob->suitableFor); + + /* Check that we got at least one usable path */ + if (!final_rel || !final_rel->cheapest_total_path || + final_rel->cheapest_total_path->param_info != NULL) + elog(ERROR, "failed to construct the join relation"); + + final_rel_list = lappend(final_rel_list, final_rel); + } + + /* generate an all purpose plan */ + final_rel = make_one_rel(root, joinlist, PLAN_SUITABILITY_ALL_PURPOSE); /* Check that we got at least one usable path */ if (!final_rel || !final_rel->cheapest_total_path || final_rel->cheapest_total_path->param_info != NULL) elog(ERROR, "failed to construct the join relation"); - return final_rel; + final_rel_list = lappend(final_rel_list, final_rel); + + return final_rel_list; } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 9cbbcfb..7ca31e3 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -178,6 +178,7 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) glob->lastRowMarkId = 0; glob->transientPlan = false; glob->hasRowSecurity = false; + glob->suitableFor = PLAN_SUITABILITY_ALL_PURPOSE; /* Determine what fraction of the plan is likely to be scanned */ if (cursorOptions & CURSOR_OPT_FAST_PLAN) @@ -256,6 +257,7 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) result->invalItems = glob->invalItems; result->nParamExec = glob->nParamExec; result->hasRowSecurity = glob->hasRowSecurity; + result->suitableFor = glob->suitableFor; return result; } @@ -1087,10 +1089,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) int64 count_est = 0; double limit_tuples = -1.0; Plan *result_plan; + List *result_plan_list = NIL; List *current_pathkeys; double dNumGroups = 0; bool use_hashed_distinct = false; bool tested_hashed_distinct = false; + ListCell *lc; /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */ if (parse->limitCount || parse->limitOffset) @@ -1169,6 +1173,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) root->sort_pathkeys = make_pathkeys_for_sortclauses(root, parse->sortClause, tlist); + + result_plan_list = list_make1(result_plan); } else { @@ -1178,6 +1184,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) bool need_tlist_eval = true; standard_qp_extra qp_extra; RelOptInfo *final_rel; + List *final_rel_list; Path *cheapest_path; Path *sorted_path; Path *best_path; @@ -1288,710 +1295,723 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * standard_qp_callback) pathkey representations of the query's sort * clause, distinct clause, etc. */ - final_rel = query_planner(root, sub_tlist, - standard_qp_callback, &qp_extra); - - /* - * Extract rowcount and width estimates for use below. - */ - path_rows = final_rel->rows; - path_width = final_rel->width; + final_rel_list = query_planner(root, sub_tlist, + standard_qp_callback, &qp_extra, false); - /* - * If there's grouping going on, estimate the number of result groups. - * We couldn't do this any earlier because it depends on relation size - * estimates that are created within query_planner(). - * - * Then convert tuple_fraction to fractional form if it is absolute, - * and if grouping or aggregation is involved, adjust tuple_fraction - * to describe the fraction of the underlying un-aggregated tuples - * that will be fetched. - */ - dNumGroups = 1; /* in case not grouping */ - - if (parse->groupClause) + foreach(lc, final_rel_list) { - List *groupExprs; - - groupExprs = get_sortgrouplist_exprs(parse->groupClause, - parse->targetList); - dNumGroups = estimate_num_groups(root, groupExprs, path_rows); - + final_rel = (RelOptInfo *) lfirst(lc); /* - * In GROUP BY mode, an absolute LIMIT is relative to the number - * of groups not the number of tuples. If the caller gave us a - * fraction, keep it as-is. (In both cases, we are effectively - * assuming that all the groups are about the same size.) + * Extract rowcount and width estimates for use below. */ - if (tuple_fraction >= 1.0) - tuple_fraction /= dNumGroups; + path_rows = final_rel->rows; + path_width = final_rel->width; /* - * If both GROUP BY and ORDER BY are specified, we will need two - * levels of sort --- and, therefore, certainly need to read all - * the tuples --- unless ORDER BY is a subset of GROUP BY. - * Likewise if we have both DISTINCT and GROUP BY, or if we have a - * window specification not compatible with the GROUP BY. - */ - if (!pathkeys_contained_in(root->sort_pathkeys, - root->group_pathkeys) || - !pathkeys_contained_in(root->distinct_pathkeys, - root->group_pathkeys) || - !pathkeys_contained_in(root->window_pathkeys, - root->group_pathkeys)) - tuple_fraction = 0.0; - } - else if (parse->hasAggs || root->hasHavingQual) - { - /* - * Ungrouped aggregate will certainly want to read all the tuples, - * and it will deliver a single result row (so leave dNumGroups - * set to 1). - */ - tuple_fraction = 0.0; - } - else if (parse->distinctClause) - { - /* - * Since there was no grouping or aggregation, it's reasonable to - * assume the UNIQUE filter has effects comparable to GROUP BY. - * (If DISTINCT is used with grouping, we ignore its effects for - * rowcount estimation purposes; this amounts to assuming the - * grouped rows are distinct already.) - */ - List *distinctExprs; - - distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, - parse->targetList); - dNumGroups = estimate_num_groups(root, distinctExprs, path_rows); - - /* - * Adjust tuple_fraction the same way as for GROUP BY, too. - */ - if (tuple_fraction >= 1.0) - tuple_fraction /= dNumGroups; - } - else - { - /* - * Plain non-grouped, non-aggregated query: an absolute tuple - * fraction can be divided by the number of tuples. + * If there's grouping going on, estimate the number of result groups. + * We couldn't do this any earlier because it depends on relation size + * estimates that are created within query_planner(). + * + * Then convert tuple_fraction to fractional form if it is absolute, + * and if grouping or aggregation is involved, adjust tuple_fraction + * to describe the fraction of the underlying un-aggregated tuples + * that will be fetched. */ - if (tuple_fraction >= 1.0) - tuple_fraction /= path_rows; - } + dNumGroups = 1; /* in case not grouping */ - /* - * Pick out the cheapest-total path as well as the cheapest presorted - * path for the requested pathkeys (if there is one). We should take - * the tuple fraction into account when selecting the cheapest - * presorted path, but not when selecting the cheapest-total path, - * since if we have to sort then we'll have to fetch all the tuples. - * (But there's a special case: if query_pathkeys is NIL, meaning - * order doesn't matter, then the "cheapest presorted" path will be - * the cheapest overall for the tuple fraction.) - */ - cheapest_path = final_rel->cheapest_total_path; - - sorted_path = - get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, - root->query_pathkeys, - NULL, - tuple_fraction); + if (parse->groupClause) + { + List *groupExprs; - /* Don't consider same path in both guises; just wastes effort */ - if (sorted_path == cheapest_path) - sorted_path = NULL; + groupExprs = get_sortgrouplist_exprs(parse->groupClause, + parse->targetList); + dNumGroups = estimate_num_groups(root, groupExprs, path_rows); - /* - * Forget about the presorted path if it would be cheaper to sort the - * cheapest-total path. Here we need consider only the behavior at - * the tuple_fraction point. Also, limit_tuples is only relevant if - * not grouping/aggregating, so use root->limit_tuples in the - * cost_sort call. - */ - if (sorted_path) - { - Path sort_path; /* dummy for result of cost_sort */ + /* + * In GROUP BY mode, an absolute LIMIT is relative to the number + * of groups not the number of tuples. If the caller gave us a + * fraction, keep it as-is. (In both cases, we are effectively + * assuming that all the groups are about the same size.) + */ + if (tuple_fraction >= 1.0) + tuple_fraction /= dNumGroups; - if (root->query_pathkeys == NIL || - pathkeys_contained_in(root->query_pathkeys, - cheapest_path->pathkeys)) - { - /* No sort needed for cheapest path */ - sort_path.startup_cost = cheapest_path->startup_cost; - sort_path.total_cost = cheapest_path->total_cost; + /* + * If both GROUP BY and ORDER BY are specified, we will need two + * levels of sort --- and, therefore, certainly need to read all + * the tuples --- unless ORDER BY is a subset of GROUP BY. + * Likewise if we have both DISTINCT and GROUP BY, or if we have a + * window specification not compatible with the GROUP BY. + */ + if (!pathkeys_contained_in(root->sort_pathkeys, + root->group_pathkeys) || + !pathkeys_contained_in(root->distinct_pathkeys, + root->group_pathkeys) || + !pathkeys_contained_in(root->window_pathkeys, + root->group_pathkeys)) + tuple_fraction = 0.0; } - else + else if (parse->hasAggs || root->hasHavingQual) { - /* Figure cost for sorting */ - cost_sort(&sort_path, root, root->query_pathkeys, - cheapest_path->total_cost, - path_rows, path_width, - 0.0, work_mem, root->limit_tuples); + /* + * Ungrouped aggregate will certainly want to read all the tuples, + * and it will deliver a single result row (so leave dNumGroups + * set to 1). + */ + tuple_fraction = 0.0; } - - if (compare_fractional_path_costs(sorted_path, &sort_path, - tuple_fraction) > 0) + else if (parse->distinctClause) { - /* Presorted path is a loser */ - sorted_path = NULL; - } - } + /* + * Since there was no grouping or aggregation, it's reasonable to + * assume the UNIQUE filter has effects comparable to GROUP BY. + * (If DISTINCT is used with grouping, we ignore its effects for + * rowcount estimation purposes; this amounts to assuming the + * grouped rows are distinct already.) + */ + List *distinctExprs; - /* - * Consider whether we want to use hashing instead of sorting. - */ - if (parse->groupClause) - { - /* - * If grouping, decide whether to use sorted or hashed grouping. - */ - use_hashed_grouping = - choose_hashed_grouping(root, - tuple_fraction, limit_tuples, - path_rows, path_width, - cheapest_path, sorted_path, - dNumGroups, &agg_costs); - /* Also convert # groups to long int --- but 'ware overflow! */ - numGroups = (long) Min(dNumGroups, (double) LONG_MAX); - } - else if (parse->distinctClause && sorted_path && - !root->hasHavingQual && !parse->hasAggs && !activeWindows) - { - /* - * We'll reach the DISTINCT stage without any intermediate - * processing, so figure out whether we will want to hash or not - * so we can choose whether to use cheapest or sorted path. - */ - use_hashed_distinct = - choose_hashed_distinct(root, - tuple_fraction, limit_tuples, - path_rows, path_width, - cheapest_path->startup_cost, - cheapest_path->total_cost, - sorted_path->startup_cost, - sorted_path->total_cost, - sorted_path->pathkeys, - dNumGroups); - tested_hashed_distinct = true; - } + distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, + parse->targetList); + dNumGroups = estimate_num_groups(root, distinctExprs, path_rows); - /* - * Select the best path. If we are doing hashed grouping, we will - * always read all the input tuples, so use the cheapest-total path. - * Otherwise, the comparison above is correct. - */ - if (use_hashed_grouping || use_hashed_distinct || !sorted_path) - best_path = cheapest_path; - else - best_path = sorted_path; + /* + * Adjust tuple_fraction the same way as for GROUP BY, too. + */ + if (tuple_fraction >= 1.0) + tuple_fraction /= dNumGroups; + } + else + { + /* + * Plain non-grouped, non-aggregated query: an absolute tuple + * fraction can be divided by the number of tuples. + */ + if (tuple_fraction >= 1.0) + tuple_fraction /= path_rows; + } - /* - * Check to see if it's possible to optimize MIN/MAX aggregates. If - * so, we will forget all the work we did so far to choose a "regular" - * path ... but we had to do it anyway to be able to tell which way is - * cheaper. - */ - result_plan = optimize_minmax_aggregates(root, - tlist, - &agg_costs, - best_path); - if (result_plan != NULL) - { - /* - * optimize_minmax_aggregates generated the full plan, with the - * right tlist, and it has no sort order. - */ - current_pathkeys = NIL; - } - else - { /* - * Normal case --- create a plan according to query_planner's - * results. + * Pick out the cheapest-total path as well as the cheapest presorted + * path for the requested pathkeys (if there is one). We should take + * the tuple fraction into account when selecting the cheapest + * presorted path, but not when selecting the cheapest-total path, + * since if we have to sort then we'll have to fetch all the tuples. + * (But there's a special case: if query_pathkeys is NIL, meaning + * order doesn't matter, then the "cheapest presorted" path will be + * the cheapest overall for the tuple fraction.) */ - bool need_sort_for_grouping = false; + cheapest_path = final_rel->cheapest_total_path; - result_plan = create_plan(root, best_path); - current_pathkeys = best_path->pathkeys; + sorted_path = + get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, + root->query_pathkeys, + NULL, + tuple_fraction); - /* Detect if we'll need an explicit sort for grouping */ - if (parse->groupClause && !use_hashed_grouping && - !pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) - { - need_sort_for_grouping = true; - - /* - * Always override create_plan's tlist, so that we don't sort - * useless data from a "physical" tlist. - */ - need_tlist_eval = true; - } + /* Don't consider same path in both guises; just wastes effort */ + if (sorted_path == cheapest_path) + sorted_path = NULL; /* - * create_plan returns a plan with just a "flat" tlist of required - * Vars. Usually we need to insert the sub_tlist as the tlist of - * the top plan node. However, we can skip that if we determined - * that whatever create_plan chose to return will be good enough. + * Forget about the presorted path if it would be cheaper to sort the + * cheapest-total path. Here we need consider only the behavior at + * the tuple_fraction point. Also, limit_tuples is only relevant if + * not grouping/aggregating, so use root->limit_tuples in the + * cost_sort call. */ - if (need_tlist_eval) + if (sorted_path) { - /* - * If the top-level plan node is one that cannot do expression - * evaluation and its existing target list isn't already what - * we need, we must insert a Result node to project the - * desired tlist. - */ - if (!is_projection_capable_plan(result_plan) && - !tlist_same_exprs(sub_tlist, result_plan->targetlist)) + Path sort_path; /* dummy for result of cost_sort */ + + if (root->query_pathkeys == NIL || + pathkeys_contained_in(root->query_pathkeys, + cheapest_path->pathkeys)) { - result_plan = (Plan *) make_result(root, - sub_tlist, - NULL, - result_plan); + /* No sort needed for cheapest path */ + sort_path.startup_cost = cheapest_path->startup_cost; + sort_path.total_cost = cheapest_path->total_cost; } else { - /* - * Otherwise, just replace the subplan's flat tlist with - * the desired tlist. - */ - result_plan->targetlist = sub_tlist; + /* Figure cost for sorting */ + cost_sort(&sort_path, root, root->query_pathkeys, + cheapest_path->total_cost, + path_rows, path_width, + 0.0, work_mem, root->limit_tuples); } + if (compare_fractional_path_costs(sorted_path, &sort_path, + tuple_fraction) > 0) + { + /* Presorted path is a loser */ + sorted_path = NULL; + } + } + + /* + * Consider whether we want to use hashing instead of sorting. + */ + if (parse->groupClause) + { /* - * Also, account for the cost of evaluation of the sub_tlist. - * See comments for add_tlist_costs_to_plan() for more info. + * If grouping, decide whether to use sorted or hashed grouping. */ - add_tlist_costs_to_plan(root, result_plan, sub_tlist); + use_hashed_grouping = + choose_hashed_grouping(root, + tuple_fraction, limit_tuples, + path_rows, path_width, + cheapest_path, sorted_path, + dNumGroups, &agg_costs); + /* Also convert # groups to long int --- but 'ware overflow! */ + numGroups = (long) Min(dNumGroups, (double) LONG_MAX); } - else + else if (parse->distinctClause && sorted_path && + !root->hasHavingQual && !parse->hasAggs && !activeWindows) { /* - * Since we're using create_plan's tlist and not the one - * make_subplanTargetList calculated, we have to refigure any - * grouping-column indexes make_subplanTargetList computed. + * We'll reach the DISTINCT stage without any intermediate + * processing, so figure out whether we will want to hash or not + * so we can choose whether to use cheapest or sorted path. */ - locate_grouping_columns(root, tlist, result_plan->targetlist, - groupColIdx); + use_hashed_distinct = + choose_hashed_distinct(root, + tuple_fraction, limit_tuples, + path_rows, path_width, + cheapest_path->startup_cost, + cheapest_path->total_cost, + sorted_path->startup_cost, + sorted_path->total_cost, + sorted_path->pathkeys, + dNumGroups); + tested_hashed_distinct = true; } /* - * Insert AGG or GROUP node if needed, plus an explicit sort step - * if necessary. - * - * HAVING clause, if any, becomes qual of the Agg or Group node. + * Select the best path. If we are doing hashed grouping, we will + * always read all the input tuples, so use the cheapest-total path. + * Otherwise, the comparison above is correct. */ - if (use_hashed_grouping) + if (use_hashed_grouping || use_hashed_distinct || !sorted_path) + best_path = cheapest_path; + else + best_path = sorted_path; + + /* + * Check to see if it's possible to optimize MIN/MAX aggregates. If + * so, we will forget all the work we did so far to choose a "regular" + * path ... but we had to do it anyway to be able to tell which way is + * cheaper. + */ + result_plan = optimize_minmax_aggregates(root, + tlist, + &agg_costs, + best_path); + if (result_plan != NULL) { - /* Hashed aggregate plan --- no sort needed */ - result_plan = (Plan *) make_agg(root, - tlist, - (List *) parse->havingQual, - AGG_HASHED, - &agg_costs, - numGroupCols, - groupColIdx, - extract_grouping_ops(parse->groupClause), - numGroups, - result_plan); - /* Hashed aggregation produces randomly-ordered results */ + /* + * optimize_minmax_aggregates generated the full plan, with the + * right tlist, and it has no sort order. + */ current_pathkeys = NIL; } - else if (parse->hasAggs) + else { - /* Plain aggregate plan --- sort if needed */ - AggStrategy aggstrategy; + /* + * Normal case --- create a plan according to query_planner's + * results. + */ + bool need_sort_for_grouping = false; + + result_plan = create_plan(root, best_path); + current_pathkeys = best_path->pathkeys; - if (parse->groupClause) + /* Detect if we'll need an explicit sort for grouping */ + if (parse->groupClause && !use_hashed_grouping && + !pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) { - if (need_sort_for_grouping) + need_sort_for_grouping = true; + + /* + * Always override create_plan's tlist, so that we don't sort + * useless data from a "physical" tlist. + */ + need_tlist_eval = true; + } + + /* + * create_plan returns a plan with just a "flat" tlist of required + * Vars. Usually we need to insert the sub_tlist as the tlist of + * the top plan node. However, we can skip that if we determined + * that whatever create_plan chose to return will be good enough. + */ + if (need_tlist_eval) + { + /* + * If the top-level plan node is one that cannot do expression + * evaluation and its existing target list isn't already what + * we need, we must insert a Result node to project the + * desired tlist. + */ + if (!is_projection_capable_plan(result_plan) && + !tlist_same_exprs(sub_tlist, result_plan->targetlist)) { - result_plan = (Plan *) - make_sort_from_groupcols(root, - parse->groupClause, - groupColIdx, - result_plan); - current_pathkeys = root->group_pathkeys; + result_plan = (Plan *) make_result(root, + sub_tlist, + NULL, + result_plan); + } + else + { + /* + * Otherwise, just replace the subplan's flat tlist with + * the desired tlist. + */ + result_plan->targetlist = sub_tlist; } - aggstrategy = AGG_SORTED; /* - * The AGG node will not change the sort ordering of its - * groups, so current_pathkeys describes the result too. + * Also, account for the cost of evaluation of the sub_tlist. + * See comments for add_tlist_costs_to_plan() for more info. */ + add_tlist_costs_to_plan(root, result_plan, sub_tlist); } else { - aggstrategy = AGG_PLAIN; - /* Result will be only one row anyway; no sort order */ - current_pathkeys = NIL; + /* + * Since we're using create_plan's tlist and not the one + * make_subplanTargetList calculated, we have to refigure any + * grouping-column indexes make_subplanTargetList computed. + */ + locate_grouping_columns(root, tlist, result_plan->targetlist, + groupColIdx); } - result_plan = (Plan *) make_agg(root, - tlist, - (List *) parse->havingQual, - aggstrategy, - &agg_costs, - numGroupCols, - groupColIdx, - extract_grouping_ops(parse->groupClause), - numGroups, - result_plan); - } - else if (parse->groupClause) - { /* - * GROUP BY without aggregation, so insert a group node (plus - * the appropriate sort node, if necessary). + * Insert AGG or GROUP node if needed, plus an explicit sort step + * if necessary. * - * Add an explicit sort if we couldn't make the path come out - * the way the GROUP node needs it. + * HAVING clause, if any, becomes qual of the Agg or Group node. */ - if (need_sort_for_grouping) + if (use_hashed_grouping) { - result_plan = (Plan *) - make_sort_from_groupcols(root, - parse->groupClause, - groupColIdx, - result_plan); - current_pathkeys = root->group_pathkeys; + /* Hashed aggregate plan --- no sort needed */ + result_plan = (Plan *) make_agg(root, + tlist, + (List *) parse->havingQual, + AGG_HASHED, + &agg_costs, + numGroupCols, + groupColIdx, + extract_grouping_ops(parse->groupClause), + numGroups, + result_plan); + /* Hashed aggregation produces randomly-ordered results */ + current_pathkeys = NIL; } + else if (parse->hasAggs) + { + /* Plain aggregate plan --- sort if needed */ + AggStrategy aggstrategy; - result_plan = (Plan *) make_group(root, - tlist, - (List *) parse->havingQual, - numGroupCols, - groupColIdx, - extract_grouping_ops(parse->groupClause), - dNumGroups, - result_plan); - /* The Group node won't change sort ordering */ - } - else if (root->hasHavingQual) - { - /* - * No aggregates, and no GROUP BY, but we have a HAVING qual. - * This is a degenerate case in which we are supposed to emit - * either 0 or 1 row depending on whether HAVING succeeds. - * Furthermore, there cannot be any variables in either HAVING - * or the targetlist, so we actually do not need the FROM - * table at all! We can just throw away the plan-so-far and - * generate a Result node. This is a sufficiently unusual - * corner case that it's not worth contorting the structure of - * this routine to avoid having to generate the plan in the - * first place. - */ - result_plan = (Plan *) make_result(root, - tlist, - parse->havingQual, - NULL); - } - } /* end of non-minmax-aggregate case */ - - /* - * Since each window function could require a different sort order, we - * stack up a WindowAgg node for each window, with sort steps between - * them as needed. - */ - if (activeWindows) - { - List *window_tlist; - ListCell *l; + if (parse->groupClause) + { + if (need_sort_for_grouping) + { + result_plan = (Plan *) + make_sort_from_groupcols(root, + parse->groupClause, + groupColIdx, + result_plan); + current_pathkeys = root->group_pathkeys; + } + aggstrategy = AGG_SORTED; + + /* + * The AGG node will not change the sort ordering of its + * groups, so current_pathkeys describes the result too. + */ + } + else + { + aggstrategy = AGG_PLAIN; + /* Result will be only one row anyway; no sort order */ + current_pathkeys = NIL; + } - /* - * If the top-level plan node is one that cannot do expression - * evaluation, we must insert a Result node to project the desired - * tlist. (In some cases this might not really be required, but - * it's not worth trying to avoid it. In particular, think not to - * skip adding the Result if the initial window_tlist matches the - * top-level plan node's output, because we might change the tlist - * inside the following loop.) Note that on second and subsequent - * passes through the following loop, the top-level node will be a - * WindowAgg which we know can project; so we only need to check - * once. - */ - if (!is_projection_capable_plan(result_plan)) - { - result_plan = (Plan *) make_result(root, - NIL, - NULL, - result_plan); - } + result_plan = (Plan *) make_agg(root, + tlist, + (List *) parse->havingQual, + aggstrategy, + &agg_costs, + numGroupCols, + groupColIdx, + extract_grouping_ops(parse->groupClause), + numGroups, + result_plan); + } + else if (parse->groupClause) + { + /* + * GROUP BY without aggregation, so insert a group node (plus + * the appropriate sort node, if necessary). + * + * Add an explicit sort if we couldn't make the path come out + * the way the GROUP node needs it. + */ + if (need_sort_for_grouping) + { + result_plan = (Plan *) + make_sort_from_groupcols(root, + parse->groupClause, + groupColIdx, + result_plan); + current_pathkeys = root->group_pathkeys; + } - /* - * The "base" targetlist for all steps of the windowing process is - * a flat tlist of all Vars and Aggs needed in the result. (In - * some cases we wouldn't need to propagate all of these all the - * way to the top, since they might only be needed as inputs to - * WindowFuncs. It's probably not worth trying to optimize that - * though.) We also add window partitioning and sorting - * expressions to the base tlist, to ensure they're computed only - * once at the bottom of the stack (that's critical for volatile - * functions). As we climb up the stack, we'll add outputs for - * the WindowFuncs computed at each level. - */ - window_tlist = make_windowInputTargetList(root, + result_plan = (Plan *) make_group(root, tlist, - activeWindows); + (List *) parse->havingQual, + numGroupCols, + groupColIdx, + extract_grouping_ops(parse->groupClause), + dNumGroups, + result_plan); + /* The Group node won't change sort ordering */ + } + else if (root->hasHavingQual) + { + /* + * No aggregates, and no GROUP BY, but we have a HAVING qual. + * This is a degenerate case in which we are supposed to emit + * either 0 or 1 row depending on whether HAVING succeeds. + * Furthermore, there cannot be any variables in either HAVING + * or the targetlist, so we actually do not need the FROM + * table at all! We can just throw away the plan-so-far and + * generate a Result node. This is a sufficiently unusual + * corner case that it's not worth contorting the structure of + * this routine to avoid having to generate the plan in the + * first place. + */ + result_plan = (Plan *) make_result(root, + tlist, + parse->havingQual, + NULL); + } + } /* end of non-minmax-aggregate case */ /* - * The copyObject steps here are needed to ensure that each plan - * node has a separately modifiable tlist. (XXX wouldn't a - * shallow list copy do for that?) + * Since each window function could require a different sort order, we + * stack up a WindowAgg node for each window, with sort steps between + * them as needed. */ - result_plan->targetlist = (List *) copyObject(window_tlist); - - foreach(l, activeWindows) + if (activeWindows) { - WindowClause *wc = (WindowClause *) lfirst(l); - List *window_pathkeys; - int partNumCols; - AttrNumber *partColIdx; - Oid *partOperators; - int ordNumCols; - AttrNumber *ordColIdx; - Oid *ordOperators; - - window_pathkeys = make_pathkeys_for_window(root, - wc, - tlist); + List *window_tlist; + ListCell *l; /* - * This is a bit tricky: we build a sort node even if we don't - * really have to sort. Even when no explicit sort is needed, - * we need to have suitable resjunk items added to the input - * plan's tlist for any partitioning or ordering columns that - * aren't plain Vars. (In theory, make_windowInputTargetList - * should have provided all such columns, but let's not assume - * that here.) Furthermore, this way we can use existing - * infrastructure to identify which input columns are the - * interesting ones. + * If the top-level plan node is one that cannot do expression + * evaluation, we must insert a Result node to project the desired + * tlist. (In some cases this might not really be required, but + * it's not worth trying to avoid it. In particular, think not to + * skip adding the Result if the initial window_tlist matches the + * top-level plan node's output, because we might change the tlist + * inside the following loop.) Note that on second and subsequent + * passes through the following loop, the top-level node will be a + * WindowAgg which we know can project; so we only need to check + * once. */ - if (window_pathkeys) - { - Sort *sort_plan; - - sort_plan = make_sort_from_pathkeys(root, - result_plan, - window_pathkeys, - -1.0); - if (!pathkeys_contained_in(window_pathkeys, - current_pathkeys)) - { - /* we do indeed need to sort */ - result_plan = (Plan *) sort_plan; - current_pathkeys = window_pathkeys; - } - /* In either case, extract the per-column information */ - get_column_info_for_window(root, wc, tlist, - sort_plan->numCols, - sort_plan->sortColIdx, - &partNumCols, - &partColIdx, - &partOperators, - &ordNumCols, - &ordColIdx, - &ordOperators); - } - else + if (!is_projection_capable_plan(result_plan)) { - /* empty window specification, nothing to sort */ - partNumCols = 0; - partColIdx = NULL; - partOperators = NULL; - ordNumCols = 0; - ordColIdx = NULL; - ordOperators = NULL; + result_plan = (Plan *) make_result(root, + NIL, + NULL, + result_plan); } - if (lnext(l)) - { - /* Add the current WindowFuncs to the running tlist */ - window_tlist = add_to_flat_tlist(window_tlist, - wflists->windowFuncs[wc->winref]); - } - else + /* + * The "base" targetlist for all steps of the windowing process is + * a flat tlist of all Vars and Aggs needed in the result. (In + * some cases we wouldn't need to propagate all of these all the + * way to the top, since they might only be needed as inputs to + * WindowFuncs. It's probably not worth trying to optimize that + * though.) We also add window partitioning and sorting + * expressions to the base tlist, to ensure they're computed only + * once at the bottom of the stack (that's critical for volatile + * functions). As we climb up the stack, we'll add outputs for + * the WindowFuncs computed at each level. + */ + window_tlist = make_windowInputTargetList(root, + tlist, + activeWindows); + + /* + * The copyObject steps here are needed to ensure that each plan + * node has a separately modifiable tlist. (XXX wouldn't a + * shallow list copy do for that?) + */ + result_plan->targetlist = (List *) copyObject(window_tlist); + + foreach(l, activeWindows) { - /* Install the original tlist in the topmost WindowAgg */ - window_tlist = tlist; - } + WindowClause *wc = (WindowClause *) lfirst(l); + List *window_pathkeys; + int partNumCols; + AttrNumber *partColIdx; + Oid *partOperators; + int ordNumCols; + AttrNumber *ordColIdx; + Oid *ordOperators; + + window_pathkeys = make_pathkeys_for_window(root, + wc, + tlist); + + /* + * This is a bit tricky: we build a sort node even if we don't + * really have to sort. Even when no explicit sort is needed, + * we need to have suitable resjunk items added to the input + * plan's tlist for any partitioning or ordering columns that + * aren't plain Vars. (In theory, make_windowInputTargetList + * should have provided all such columns, but let's not assume + * that here.) Furthermore, this way we can use existing + * infrastructure to identify which input columns are the + * interesting ones. + */ + if (window_pathkeys) + { + Sort *sort_plan; + + sort_plan = make_sort_from_pathkeys(root, + result_plan, + window_pathkeys, + -1.0); + if (!pathkeys_contained_in(window_pathkeys, + current_pathkeys)) + { + /* we do indeed need to sort */ + result_plan = (Plan *) sort_plan; + current_pathkeys = window_pathkeys; + } + /* In either case, extract the per-column information */ + get_column_info_for_window(root, wc, tlist, + sort_plan->numCols, + sort_plan->sortColIdx, + &partNumCols, + &partColIdx, + &partOperators, + &ordNumCols, + &ordColIdx, + &ordOperators); + } + else + { + /* empty window specification, nothing to sort */ + partNumCols = 0; + partColIdx = NULL; + partOperators = NULL; + ordNumCols = 0; + ordColIdx = NULL; + ordOperators = NULL; + } - /* ... and make the WindowAgg plan node */ - result_plan = (Plan *) - make_windowagg(root, - (List *) copyObject(window_tlist), - wflists->windowFuncs[wc->winref], - wc->winref, - partNumCols, - partColIdx, - partOperators, - ordNumCols, - ordColIdx, - ordOperators, - wc->frameOptions, - wc->startOffset, - wc->endOffset, - result_plan); + if (lnext(l)) + { + /* Add the current WindowFuncs to the running tlist */ + window_tlist = add_to_flat_tlist(window_tlist, + wflists->windowFuncs[wc->winref]); + } + else + { + /* Install the original tlist in the topmost WindowAgg */ + window_tlist = tlist; + } + + /* ... and make the WindowAgg plan node */ + result_plan = (Plan *) + make_windowagg(root, + (List *) copyObject(window_tlist), + wflists->windowFuncs[wc->winref], + wc->winref, + partNumCols, + partColIdx, + partOperators, + ordNumCols, + ordColIdx, + ordOperators, + wc->frameOptions, + wc->startOffset, + wc->endOffset, + result_plan); + } } - } + + result_plan_list = lappend(result_plan_list, result_plan); + } /* foreach final_rel_list */ } /* end of if (setOperations) */ - /* - * If there is a DISTINCT clause, add the necessary node(s). - */ - if (parse->distinctClause) + foreach(lc, result_plan_list) { - double dNumDistinctRows; - long numDistinctRows; + result_plan = (Plan *) lfirst(lc); /* - * If there was grouping or aggregation, use the current number of - * rows as the estimated number of DISTINCT rows (ie, assume the - * result was already mostly unique). If not, use the number of - * distinct-groups calculated previously. + * If there is a DISTINCT clause, add the necessary node(s). */ - if (parse->groupClause || root->hasHavingQual || parse->hasAggs) - dNumDistinctRows = result_plan->plan_rows; - else - dNumDistinctRows = dNumGroups; - - /* Also convert to long int --- but 'ware overflow! */ - numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX); - - /* Choose implementation method if we didn't already */ - if (!tested_hashed_distinct) + if (parse->distinctClause) { - /* - * At this point, either hashed or sorted grouping will have to - * work from result_plan, so we pass that as both "cheapest" and - * "sorted". - */ - use_hashed_distinct = - choose_hashed_distinct(root, - tuple_fraction, limit_tuples, - result_plan->plan_rows, - result_plan->plan_width, - result_plan->startup_cost, - result_plan->total_cost, - result_plan->startup_cost, - result_plan->total_cost, - current_pathkeys, - dNumDistinctRows); - } + double dNumDistinctRows; + long numDistinctRows; - if (use_hashed_distinct) - { - /* Hashed aggregate plan --- no sort needed */ - result_plan = (Plan *) make_agg(root, - result_plan->targetlist, - NIL, - AGG_HASHED, - NULL, - list_length(parse->distinctClause), - extract_grouping_cols(parse->distinctClause, - result_plan->targetlist), - extract_grouping_ops(parse->distinctClause), - numDistinctRows, - result_plan); - /* Hashed aggregation produces randomly-ordered results */ - current_pathkeys = NIL; - } - else - { /* - * Use a Unique node to implement DISTINCT. Add an explicit sort - * if we couldn't make the path come out the way the Unique node - * needs it. If we do have to sort, always sort by the more - * rigorous of DISTINCT and ORDER BY, to avoid a second sort - * below. However, for regular DISTINCT, don't sort now if we - * don't have to --- sorting afterwards will likely be cheaper, - * and also has the possibility of optimizing via LIMIT. But for - * DISTINCT ON, we *must* force the final sort now, else it won't - * have the desired behavior. + * If there was grouping or aggregation, use the current number of + * rows as the estimated number of DISTINCT rows (ie, assume the + * result was already mostly unique). If not, use the number of + * distinct-groups calculated previously. */ - List *needed_pathkeys; - - if (parse->hasDistinctOn && - list_length(root->distinct_pathkeys) < - list_length(root->sort_pathkeys)) - needed_pathkeys = root->sort_pathkeys; + if (parse->groupClause || root->hasHavingQual || parse->hasAggs) + dNumDistinctRows = result_plan->plan_rows; else - needed_pathkeys = root->distinct_pathkeys; + dNumDistinctRows = dNumGroups; + + /* Also convert to long int --- but 'ware overflow! */ + numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX); + + /* Choose implementation method if we didn't already */ + if (!tested_hashed_distinct) + { + /* + * At this point, either hashed or sorted grouping will have to + * work from result_plan, so we pass that as both "cheapest" and + * "sorted". + */ + use_hashed_distinct = + choose_hashed_distinct(root, + tuple_fraction, limit_tuples, + result_plan->plan_rows, + result_plan->plan_width, + result_plan->startup_cost, + result_plan->total_cost, + result_plan->startup_cost, + result_plan->total_cost, + current_pathkeys, + dNumDistinctRows); + } - if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys)) + if (use_hashed_distinct) + { + /* Hashed aggregate plan --- no sort needed */ + result_plan = (Plan *) make_agg(root, + result_plan->targetlist, + NIL, + AGG_HASHED, + NULL, + list_length(parse->distinctClause), + extract_grouping_cols(parse->distinctClause, + result_plan->targetlist), + extract_grouping_ops(parse->distinctClause), + numDistinctRows, + result_plan); + /* Hashed aggregation produces randomly-ordered results */ + current_pathkeys = NIL; + } + else { - if (list_length(root->distinct_pathkeys) >= + /* + * Use a Unique node to implement DISTINCT. Add an explicit sort + * if we couldn't make the path come out the way the Unique node + * needs it. If we do have to sort, always sort by the more + * rigorous of DISTINCT and ORDER BY, to avoid a second sort + * below. However, for regular DISTINCT, don't sort now if we + * don't have to --- sorting afterwards will likely be cheaper, + * and also has the possibility of optimizing via LIMIT. But for + * DISTINCT ON, we *must* force the final sort now, else it won't + * have the desired behavior. + */ + List *needed_pathkeys; + + if (parse->hasDistinctOn && + list_length(root->distinct_pathkeys) < list_length(root->sort_pathkeys)) - current_pathkeys = root->distinct_pathkeys; + needed_pathkeys = root->sort_pathkeys; else + needed_pathkeys = root->distinct_pathkeys; + + if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys)) { - current_pathkeys = root->sort_pathkeys; - /* Assert checks that parser didn't mess up... */ - Assert(pathkeys_contained_in(root->distinct_pathkeys, - current_pathkeys)); + if (list_length(root->distinct_pathkeys) >= + list_length(root->sort_pathkeys)) + current_pathkeys = root->distinct_pathkeys; + else + { + current_pathkeys = root->sort_pathkeys; + /* Assert checks that parser didn't mess up... */ + Assert(pathkeys_contained_in(root->distinct_pathkeys, + current_pathkeys)); + } + + result_plan = (Plan *) make_sort_from_pathkeys(root, + result_plan, + current_pathkeys, + -1.0); } + result_plan = (Plan *) make_unique(result_plan, + parse->distinctClause); + result_plan->plan_rows = dNumDistinctRows; + /* The Unique node won't change sort ordering */ + } + } + + /* + * If ORDER BY was given and we were not able to make the plan come out in + * the right order, add an explicit sort step. + */ + if (parse->sortClause) + { + if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) + { result_plan = (Plan *) make_sort_from_pathkeys(root, result_plan, - current_pathkeys, - -1.0); + root->sort_pathkeys, + limit_tuples); + current_pathkeys = root->sort_pathkeys; } - - result_plan = (Plan *) make_unique(result_plan, - parse->distinctClause); - result_plan->plan_rows = dNumDistinctRows; - /* The Unique node won't change sort ordering */ } - } - /* - * If ORDER BY was given and we were not able to make the plan come out in - * the right order, add an explicit sort step. - */ - if (parse->sortClause) - { - if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) + /* + * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node. + * (Note: we intentionally test parse->rowMarks not root->rowMarks here. + * If there are only non-locking rowmarks, they should be handled by the + * ModifyTable node instead.) + */ + if (parse->rowMarks) { - result_plan = (Plan *) make_sort_from_pathkeys(root, - result_plan, - root->sort_pathkeys, - limit_tuples); - current_pathkeys = root->sort_pathkeys; - } - } + result_plan = (Plan *) make_lockrows(result_plan, + root->rowMarks, + SS_assign_special_param(root)); - /* - * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node. - * (Note: we intentionally test parse->rowMarks not root->rowMarks here. - * If there are only non-locking rowmarks, they should be handled by the - * ModifyTable node instead.) - */ - if (parse->rowMarks) - { - result_plan = (Plan *) make_lockrows(result_plan, - root->rowMarks, - SS_assign_special_param(root)); + /* + * The result can no longer be assumed sorted, since locking might + * cause the sort key columns to be replaced with new values. + */ + current_pathkeys = NIL; + } /* - * The result can no longer be assumed sorted, since locking might - * cause the sort key columns to be replaced with new values. + * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node. */ - current_pathkeys = NIL; - } + if (limit_needed(parse)) + { + result_plan = (Plan *) make_limit(result_plan, + parse->limitOffset, + parse->limitCount, + offset_est, + count_est); + } - /* - * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node. - */ - if (limit_needed(parse)) - { - result_plan = (Plan *) make_limit(result_plan, - parse->limitOffset, - parse->limitCount, - offset_est, - count_est); - } + lfirst(lc) = result_plan; + } /* foreach all_plans */ /* * Return the actual output ordering in query_pathkeys for possible use by @@ -1999,7 +2019,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) */ root->query_pathkeys = current_pathkeys; - return result_plan; + /* if there is only one plan, then just return that plan */ + if (list_length(result_plan_list) == 1) + return (Plan *) linitial(result_plan_list); + + /* + * Otherwise we'd better add an AlternativePlan node to allow the executor + * to decide which plan to use. + */ + else + return (Plan *) make_alternativeplan(result_plan_list); } /* diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 7703946..c0b7a34 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -435,6 +435,17 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) */ switch (nodeTag(plan)) { + case T_AlternativePlan: + { + AlternativePlan *aplan = (AlternativePlan *) plan; + ListCell *lc; + foreach(lc, aplan->planList) + { + Plan *plan = (Plan *) lfirst(lc); + set_plan_refs(root, plan, rtoffset); + } + } + break; case T_SeqScan: { SeqScan *splan = (SeqScan *) plan; diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 265c865..855bc96 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -25,7 +25,9 @@ #include "access/transam.h" #include "access/xlog.h" #include "catalog/catalog.h" +#include "catalog/pg_constraint.h" #include "catalog/heap.h" +#include "catalog/pg_type.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -38,6 +40,7 @@ #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" #include "storage/bufmgr.h" +#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -89,6 +92,12 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, Relation relation; bool hasindex; List *indexinfos = NIL; + List *fkinfos = NIL; + Relation fkeyRel; + Relation fkeyRelIdx; + ScanKeyData fkeyScankey; + SysScanDesc fkeyScan; + HeapTuple tuple; /* * We need not lock the relation since it was already locked, either by @@ -384,6 +393,111 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, heap_close(relation, NoLock); + /* load foreign key constraints */ + ScanKeyInit(&fkeyScankey, + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relationObjectId)); + + fkeyRel = heap_open(ConstraintRelationId, AccessShareLock); + fkeyRelIdx = index_open(ConstraintRelidIndexId, AccessShareLock); + fkeyScan = systable_beginscan_ordered(fkeyRel, fkeyRelIdx, NULL, 1, &fkeyScankey); + + while ((tuple = systable_getnext_ordered(fkeyScan, ForwardScanDirection)) != NULL) + { + Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple); + ForeignKeyInfo *fkinfo; + Datum adatum; + bool isNull; + ArrayType *arr; + int nelements; + + /* skip if not a foreign key */ + if (con->contype != CONSTRAINT_FOREIGN) + continue; + + /* we're not interested unless the fkey has been validated */ + if (!con->convalidated) + continue; + + fkinfo = (ForeignKeyInfo *) palloc(sizeof(ForeignKeyInfo)); + fkinfo->conindid = con->conindid; + fkinfo->confrelid = con->confrelid; + fkinfo->convalidated = con->convalidated; + fkinfo->conrelid = con->conrelid; + fkinfo->confupdtype = con->confupdtype; + fkinfo->confdeltype = con->confdeltype; + fkinfo->confmatchtype = con->confmatchtype; + + adatum = heap_getattr(tuple, Anum_pg_constraint_conkey, + RelationGetDescr(fkeyRel), &isNull); + + if (isNull) + elog(ERROR, "null conkey for constraint %u", + HeapTupleGetOid(tuple)); + + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + nelements = ARR_DIMS(arr)[0]; + if (ARR_NDIM(arr) != 1 || + nelements < 0 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != INT2OID) + elog(ERROR, "conkey is not a 1-D smallint array"); + + fkinfo->conkey = (int16 *) ARR_DATA_PTR(arr); + fkinfo->conncols = nelements; + + adatum = heap_getattr(tuple, Anum_pg_constraint_confkey, + RelationGetDescr(fkeyRel), &isNull); + + if (isNull) + elog(ERROR, "null confkey for constraint %u", + HeapTupleGetOid(tuple)); + + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + nelements = ARR_DIMS(arr)[0]; + + if (ARR_NDIM(arr) != 1 || + nelements < 0 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != INT2OID) + elog(ERROR, "confkey is not a 1-D smallint array"); + + /* sanity check */ + if (nelements != fkinfo->conncols) + elog(ERROR, "number of confkey elements does not equal conkey elements"); + + fkinfo->confkey = (int16 *) ARR_DATA_PTR(arr); + adatum = heap_getattr(tuple, Anum_pg_constraint_conpfeqop, + RelationGetDescr(fkeyRel), &isNull); + + if (isNull) + elog(ERROR, "null conpfeqop for constraint %u", + HeapTupleGetOid(tuple)); + + arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */ + nelements = ARR_DIMS(arr)[0]; + + if (ARR_NDIM(arr) != 1 || + nelements < 0 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != OIDOID) + elog(ERROR, "conpfeqop is not a 1-D smallint array"); + + /* sanity check */ + if (nelements != fkinfo->conncols) + elog(ERROR, "number of conpfeqop elements does not equal conkey elements"); + + fkinfo->conpfeqop = (Oid *) ARR_DATA_PTR(arr); + + fkinfos = lappend(fkinfos, fkinfo); + } + + rel->fklist = fkinfos; + systable_endscan_ordered(fkeyScan); + index_close(fkeyRelIdx, AccessShareLock); + heap_close(fkeyRel, AccessShareLock); + /* * Allow a plugin to editorialize on the info we obtained from the * catalogs. Actions might include altering the assumed relation size, diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 8cfbea0..0be29e6 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -115,6 +115,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) rel->lateral_relids = NULL; rel->lateral_referencers = NULL; rel->indexlist = NIL; + rel->fklist = NIL; rel->pages = 0; rel->tuples = 0; rel->allvisfrac = 0; @@ -127,6 +128,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) rel->baserestrictcost.startup = 0; rel->baserestrictcost.per_tuple = 0; rel->joininfo = NIL; + rel->removal_flags = PLAN_SUITABILITY_ALL_PURPOSE; rel->has_eclass_joins = false; /* Check type of rtable entry */ @@ -377,6 +379,7 @@ build_join_rel(PlannerInfo *root, joinrel->lateral_relids = NULL; joinrel->lateral_referencers = NULL; joinrel->indexlist = NIL; + joinrel->fklist = NIL; joinrel->pages = 0; joinrel->tuples = 0; joinrel->allvisfrac = 0; @@ -389,6 +392,7 @@ build_join_rel(PlannerInfo *root, joinrel->baserestrictcost.startup = 0; joinrel->baserestrictcost.per_tuple = 0; joinrel->joininfo = NIL; + joinrel->removal_flags = PLAN_SUITABILITY_ALL_PURPOSE; joinrel->has_eclass_joins = false; /* diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 818c2f6..115e398 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -916,6 +916,33 @@ get_atttypetypmodcoll(Oid relid, AttrNumber attnum, ReleaseSysCache(tp); } +/* + * get_attnotnull + * + * Given the relation id and the attribute number, + * return the "attnotnull" field from the attribute relation. + */ +bool +get_attnotnull(Oid relid, AttrNumber attnum) +{ + HeapTuple tp; + + tp = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + bool result; + + result = att_tup->attnotnull; + ReleaseSysCache(tp); + return result; + } + else + return false; +} + /* ---------- COLLATION CACHE ---------- */ /* diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index d0c0dcc..2ae3ea0 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -181,6 +181,7 @@ extern void ExecBSTruncateTriggers(EState *estate, extern void ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo); +extern bool AfterTriggerQueueIsEmpty(void); extern void AfterTriggerBeginXact(void); extern void AfterTriggerBeginQuery(void); extern void AfterTriggerEndQuery(EState *estate); diff --git a/src/include/executor/nodeAlternativePlan.h b/src/include/executor/nodeAlternativePlan.h new file mode 100644 index 0000000..092f4ef --- /dev/null +++ b/src/include/executor/nodeAlternativePlan.h @@ -0,0 +1,25 @@ +/*------------------------------------------------------------------------- + * + * nodeAppend.h + * + * + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/executor/nodeAlternativePlan.h + * + *------------------------------------------------------------------------- + */ +#ifndef NODEALTERNATIVEPLAN_H +#define NODEALTERNATIVEPLAN_H + +#include "nodes/execnodes.h" + +extern PlanState *ExecInitAlternativePlan(AlternativePlan *node, + EState *estate, int eflags); +/* + * Note that this node is only ever seen during initialization of a plan and + * it has no state type. + */ +#endif /* NODEALTERNATIVEPLAN_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 97ef0fc..668d426 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -77,6 +77,7 @@ typedef enum NodeTag T_SetOp, T_LockRows, T_Limit, + T_AlternativePlan, /* these aren't subclasses of Plan: */ T_NestLoopParam, T_PlanRowMark, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index b1dfa85..3018256 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -734,6 +734,10 @@ typedef enum RTEKind RTE_CTE /* common table expr (WITH list element) */ } RTEKind; +/* Bit flags to mark suitability of plans */ +#define PLAN_SUITABILITY_ALL_PURPOSE 0 +#define PLAN_SUITABILITY_FK_TRIGGER_EMPTY 1 + typedef struct RangeTblEntry { NodeTag type; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 316c9ce..a3d3127 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -70,8 +70,11 @@ typedef struct PlannedStmt int nParamExec; /* number of PARAM_EXEC Params used */ + int suitableFor; /* under which conditions can this plan be used */ + bool hasRowSecurity; /* row security applied? */ + } PlannedStmt; /* macro for fetching the Plan associated with a SubPlan node */ @@ -767,6 +770,20 @@ typedef struct LockRows int epqParam; /* ID of Param for EvalPlanQual re-eval */ } LockRows; + +/* ---------------- + * alternative plan node + * + * Stores a list of alternative plans and one + * all purpose plan. + * ---------------- + */ +typedef struct AlternativePlan +{ + Plan plan; + List *planList; +} AlternativePlan; + /* ---------------- * limit node * diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 6845a40..d94339f 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -95,6 +95,8 @@ typedef struct PlannerGlobal int nParamExec; /* number of PARAM_EXEC Params used */ + int suitableFor; /* under which conditions can this plan be used */ + Index lastPHId; /* highest PlaceHolderVar ID assigned */ Index lastRowMarkId; /* highest PlanRowMark ID assigned */ @@ -103,6 +105,7 @@ typedef struct PlannerGlobal bool hasRowSecurity; /* row security applied? */ + } PlannerGlobal; /* macro for fetching the Plan associated with a SubPlan node */ @@ -359,6 +362,8 @@ typedef struct PlannerInfo * lateral_referencers - relids of rels that reference this one laterally * indexlist - list of IndexOptInfo nodes for relation's indexes * (always NIL if it's not a table) + * fklist - list of ForeignKeyInfo's for relation's foreign key + * constraints. (always NIL if it's not a table) * pages - number of disk pages in relation (zero if not a table) * tuples - number of tuples in relation (not considering restrictions) * allvisfrac - fraction of disk pages that are marked all-visible @@ -452,6 +457,7 @@ typedef struct RelOptInfo Relids lateral_relids; /* minimum parameterization of rel */ Relids lateral_referencers; /* rels that reference me laterally */ List *indexlist; /* list of IndexOptInfo */ + List *fklist; /* list of ForeignKeyInfo */ BlockNumber pages; /* size estimates derived from pg_class */ double tuples; double allvisfrac; @@ -469,6 +475,8 @@ typedef struct RelOptInfo QualCost baserestrictcost; /* cost of evaluating the above */ List *joininfo; /* RestrictInfo structures for join clauses * involving this rel */ + int removal_flags; /* it may be possible to not bother joining + * this relation at all */ bool has_eclass_joins; /* T means joininfo is incomplete */ } RelOptInfo; @@ -542,6 +550,51 @@ typedef struct IndexOptInfo bool amhasgetbitmap; /* does AM have amgetbitmap interface? */ } IndexOptInfo; +/* + * ForeignKeyInfo + * Used to store pg_constraint records for foreign key constraints for use + * by the planner. + * + * conindid - The index which supports the foreign key + * + * confrelid - The relation that is referenced by this foreign key + * + * convalidated - True if the foreign key has been validated. + * + * conrelid - The Oid of the relation that the foreign key belongs to + * + * confupdtype - ON UPDATE action for when the referenced table is updated + * + * confdeltype - ON DELETE action, controls what to do when a record is + * deleted from the referenced table. + * + * confmatchtype - foreign key match type, e.g MATCH FULL, MATCH PARTIAL + * + * conncols - Number of columns defined in the foreign key + * + * conkey - An array of conncols elements to store the varattno of the + * columns on the referencing side of the foreign key + * + * confkey - An array of conncols elements to store the varattno of the + * columns on the referenced side of the foreign key + * + * conpfeqop - An array of conncols elements to store the operators for + * PK = FK comparisons + */ +typedef struct ForeignKeyInfo +{ + Oid conindid; /* index supporting this constraint */ + Oid confrelid; /* relation referenced by foreign key */ + bool convalidated; /* constraint has been validated? */ + Oid conrelid; /* relation this constraint constrains */ + char confupdtype; /* foreign key's ON UPDATE action */ + char confdeltype; /* foreign key's ON DELETE action */ + char confmatchtype; /* foreign key's match type */ + int conncols; /* number of columns references */ + int16 *conkey; /* Columns of conrelid that the constraint applies to */ + int16 *confkey; /* columns of confrelid that foreign key references */ + Oid *conpfeqop; /* Operator list for comparing PK to FK */ +} ForeignKeyInfo; /* * EquivalenceClasses diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 6cad92e..7b040fa 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -37,7 +37,8 @@ typedef RelOptInfo *(*join_search_hook_type) (PlannerInfo *root, extern PGDLLIMPORT join_search_hook_type join_search_hook; -extern RelOptInfo *make_one_rel(PlannerInfo *root, List *joinlist); +extern RelOptInfo *make_one_rel(PlannerInfo *root, List *joinlist, + int removal_flags); extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels); @@ -119,6 +120,8 @@ extern List *generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, Relids outer_relids, RelOptInfo *inner_rel); +extern Oid select_equality_operator(EquivalenceClass *ec, Oid lefttype, + Oid righttype); extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2); extern void add_child_rel_equivalences(PlannerInfo *root, AppendRelInfo *appinfo, diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 082f7d7..7bcd93a 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -27,8 +27,9 @@ typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); /* * prototypes for plan/planmain.c */ -extern RelOptInfo *query_planner(PlannerInfo *root, List *tlist, - query_pathkeys_callback qp_callback, void *qp_extra); +extern List *query_planner(PlannerInfo *root, List *tlist, + query_pathkeys_callback qp_callback, void *qp_extra, + bool all_purpose_plan_only); /* * prototypes for plan/planagg.c @@ -73,6 +74,7 @@ extern Group *make_group(PlannerInfo *root, List *tlist, List *qual, extern Plan *materialize_finished_plan(Plan *subplan); extern Unique *make_unique(Plan *lefttree, List *distinctList); extern LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam); +extern AlternativePlan *make_alternativeplan(List *planlist); extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount, int64 offset_est, int64 count_est); extern SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 2f5ede1..14e64fc 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -68,6 +68,7 @@ extern Oid get_atttype(Oid relid, AttrNumber attnum); extern int32 get_atttypmod(Oid relid, AttrNumber attnum); extern void get_atttypetypmodcoll(Oid relid, AttrNumber attnum, Oid *typid, int32 *typmod, Oid *collid); +extern bool get_attnotnull(Oid relid, AttrNumber attnum); extern char *get_collation_name(Oid colloid); extern char *get_constraint_name(Oid conoid); extern Oid get_opclass_family(Oid opclass); diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 2501184..e485554 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3276,6 +3276,171 @@ select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4 (1 row) rollback; +begin work; +create temp table c ( + id int primary key +); +create temp table b ( + id int primary key, + c_id int not null, + val int not null, + constraint b_c_id_fkey foreign key (c_id) references c deferrable +); +create temp table a ( + id int primary key, + b_id int not null, + constraint a_b_id_fkey foreign key (b_id) references b deferrable +); +insert into c (id) values(1); +insert into b (id,c_id,val) values(2,1,10); +insert into a (id,b_id) values(3,2); +-- this should remove inner join to b +explain (costs off) +select a.* from a inner join b on a.b_id = b.id; + QUERY PLAN +--------------- + Seq Scan on a +(1 row) + +-- this should remove inner join to b and c +explain (costs off) +select a.* from a inner join b on a.b_id = b.id inner join c on b.c_id = c.id; + QUERY PLAN +--------------- + Seq Scan on a +(1 row) + +-- Ensure all of the target entries have their proper aliases. +select a.* from a inner join b on a.b_id = b.id inner join c on b.c_id = c.id; + id | b_id +----+------ + 3 | 2 +(1 row) + +-- change order of tables in query, this should generate the same plan as above. +explain (costs off) +select a.* from c inner join b on c.id = b.c_id inner join a on a.b_id = b.id; + QUERY PLAN +--------------- + Seq Scan on a +(1 row) + +-- inner join can't be removed due to b columns in the target list +explain (costs off) +select * from a inner join b on a.b_id = b.id; + QUERY PLAN +------------------------------ + Hash Join + Hash Cond: (a.b_id = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(5 rows) + +-- this should not remove inner join to b due to quals restricting results from b +explain (costs off) +select a.* from a inner join b on a.b_id = b.id where b.val = 10; + QUERY PLAN +---------------------------------- + Hash Join + Hash Cond: (a.b_id = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b + Filter: (val = 10) +(6 rows) + +-- this should not remove join to b +explain (costs off) +select a.* from a inner join b on a.b_id = b.id where b.val = b.id; + QUERY PLAN +---------------------------------- + Hash Join + Hash Cond: (a.b_id = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b + Filter: (id = val) +(6 rows) + +-- this should not remove the join, no foreign key exists between a.id and b.id +explain (costs off) +select a.* from a inner join b on a.id = b.id; + QUERY PLAN +---------------------------- + Hash Join + Hash Cond: (a.id = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(5 rows) + +-- ensure a left joined rel can't remove an inner joined rel +explain (costs off) +select a.* from b left join a on b.id = a.b_id; + QUERY PLAN +------------------------------ + Hash Right Join + Hash Cond: (a.b_id = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(5 rows) + +-- Ensure we remove b, but don't try and remove c. c has no join condition. +explain (costs off) +select a.* from a inner join b on a.b_id = b.id cross join c; + QUERY PLAN +--------------------------- + Nested Loop + -> Seq Scan on c + -> Materialize + -> Seq Scan on a +(4 rows) + +set constraints b_c_id_fkey deferred; +-- join should be removed. +explain (costs off) +select b.* from b inner join c on b.c_id = c.id; + QUERY PLAN +--------------- + Seq Scan on b +(1 row) + +prepare ab as select b.* from b inner join c on b.c_id = c.id; +explain (costs off) +execute ab; + QUERY PLAN +--------------- + Seq Scan on b +(1 row) + +-- perform an update which will cause some pending fk triggers to be added +update c set id = 2 where id=1; +-- ensure inner join is no longer removed. +explain (costs off) +select b.* from b inner join c on b.c_id = c.id; + QUERY PLAN +------------------------------ + Hash Join + Hash Cond: (b.c_id = c.id) + -> Seq Scan on b + -> Hash + -> Seq Scan on c +(5 rows) + +explain (costs off) +execute ab; + QUERY PLAN +------------------------------ + Hash Join + Hash Cond: (b.c_id = c.id) + -> Seq Scan on b + -> Hash + -> Seq Scan on c +(5 rows) + +rollback; create temp table parent (k int primary key, pd int); create temp table child (k int unique, cd int); insert into parent values (1, 10), (2, 20), (3, 30); diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 718e1d9..c3ee72e 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -977,6 +977,89 @@ select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4 rollback; +begin work; + +create temp table c ( + id int primary key +); +create temp table b ( + id int primary key, + c_id int not null, + val int not null, + constraint b_c_id_fkey foreign key (c_id) references c deferrable +); +create temp table a ( + id int primary key, + b_id int not null, + constraint a_b_id_fkey foreign key (b_id) references b deferrable +); + +insert into c (id) values(1); +insert into b (id,c_id,val) values(2,1,10); +insert into a (id,b_id) values(3,2); + +-- this should remove inner join to b +explain (costs off) +select a.* from a inner join b on a.b_id = b.id; + +-- this should remove inner join to b and c +explain (costs off) +select a.* from a inner join b on a.b_id = b.id inner join c on b.c_id = c.id; + +-- Ensure all of the target entries have their proper aliases. +select a.* from a inner join b on a.b_id = b.id inner join c on b.c_id = c.id; + +-- change order of tables in query, this should generate the same plan as above. +explain (costs off) +select a.* from c inner join b on c.id = b.c_id inner join a on a.b_id = b.id; + +-- inner join can't be removed due to b columns in the target list +explain (costs off) +select * from a inner join b on a.b_id = b.id; + +-- this should not remove inner join to b due to quals restricting results from b +explain (costs off) +select a.* from a inner join b on a.b_id = b.id where b.val = 10; + +-- this should not remove join to b +explain (costs off) +select a.* from a inner join b on a.b_id = b.id where b.val = b.id; + +-- this should not remove the join, no foreign key exists between a.id and b.id +explain (costs off) +select a.* from a inner join b on a.id = b.id; + +-- ensure a left joined rel can't remove an inner joined rel +explain (costs off) +select a.* from b left join a on b.id = a.b_id; + +-- Ensure we remove b, but don't try and remove c. c has no join condition. +explain (costs off) +select a.* from a inner join b on a.b_id = b.id cross join c; + +set constraints b_c_id_fkey deferred; + +-- join should be removed. +explain (costs off) +select b.* from b inner join c on b.c_id = c.id; + +prepare ab as select b.* from b inner join c on b.c_id = c.id; + +explain (costs off) +execute ab; + +-- perform an update which will cause some pending fk triggers to be added +update c set id = 2 where id=1; + +-- ensure inner join is no longer removed. +explain (costs off) +select b.* from b inner join c on b.c_id = c.id; + +explain (costs off) +execute ab; + +rollback; + create temp table parent (k int primary key, pd int); create temp table child (k int unique, cd int); insert into parent values (1, 10), (2, 20), (3, 30);