Re: [HACKERS] Re: Improve OR conditions on joined columns (common star schema problem) - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: [HACKERS] Re: Improve OR conditions on joined columns (common star schema problem) |
Date | |
Msg-id | 740.1535047846@sss.pgh.pa.us Whole thread Raw |
In response to | Re: [HACKERS] Re: Improve OR conditions on joined columns (common star schema problem) (Tom Lane <tgl@sss.pgh.pa.us>) |
Responses |
Re: [HACKERS] Re: Improve OR conditions on joined columns (commonstar schema problem)
Re: [HACKERS] Re: Improve OR conditions on joined columns (commonstar schema problem) |
List | pgsql-hackers |
I wrote: > [ join-or-to-union-4.patch ] Rebased up to HEAD, per cfbot nagging. Still no substantive change from v2. regards, tom lane diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 6269f47..8935503 100644 *** a/src/backend/nodes/outfuncs.c --- b/src/backend/nodes/outfuncs.c *************** _outPlannerInfo(StringInfo str, const Pl *** 2310,2315 **** --- 2310,2316 ---- WRITE_FLOAT_FIELD(limit_tuples, "%.0f"); WRITE_UINT_FIELD(qual_security_level); WRITE_ENUM_FIELD(inhTargetKind, InheritanceKind); + WRITE_BOOL_FIELD(needBaseTids); WRITE_BOOL_FIELD(hasJoinRTEs); WRITE_BOOL_FIELD(hasLateralRTEs); WRITE_BOOL_FIELD(hasDeletedRTEs); diff --git a/src/backend/optimizer/plan/Makefile b/src/backend/optimizer/plan/Makefile index 88a9f7f..1db6bd5 100644 *** a/src/backend/optimizer/plan/Makefile --- b/src/backend/optimizer/plan/Makefile *************** top_builddir = ../../../.. *** 13,18 **** include $(top_builddir)/src/Makefile.global OBJS = analyzejoins.o createplan.o initsplan.o planagg.o planmain.o planner.o \ ! setrefs.o subselect.o include $(top_srcdir)/src/backend/common.mk --- 13,18 ---- include $(top_builddir)/src/Makefile.global OBJS = analyzejoins.o createplan.o initsplan.o planagg.o planmain.o planner.o \ ! planunionor.o setrefs.o subselect.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 0e73f9c..fdffb6b 100644 *** a/src/backend/optimizer/plan/analyzejoins.c --- b/src/backend/optimizer/plan/analyzejoins.c *************** clause_sides_match_join(RestrictInfo *ri *** 155,160 **** --- 155,163 ---- * cases, but we don't have the infrastructure to prove them.) We also * have to check that the inner side doesn't generate any variables needed * above the join. + * + * Note: making this significantly smarter might break planunionor.c. + * Study that file before doing so. */ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index b05adc7..4486885 100644 *** a/src/backend/optimizer/plan/planmain.c --- b/src/backend/optimizer/plan/planmain.c *************** query_planner(PlannerInfo *root, List *t *** 212,217 **** --- 212,225 ---- add_placeholders_to_base_rels(root); /* + * Also, if we have a request to emit baserel CTIDs, add those to the + * baserel targetlists now. This likewise has to be done after join + * removal, because we only want CTIDs for rels that survive join removal. + */ + if (root->needBaseTids) + add_base_rel_ctids(root); + + /* * Construct the lateral reference sets now that we have finalized * PlaceHolderVar eval levels. */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 96bf060..36907a4 100644 *** a/src/backend/optimizer/plan/planner.c --- b/src/backend/optimizer/plan/planner.c *************** subquery_planner(PlannerGlobal *glob, Qu *** 623,628 **** --- 623,629 ---- root->minmax_aggs = NIL; root->qual_security_level = 0; root->inhTargetKind = INHKIND_NONE; + root->needBaseTids = false; root->hasRecursion = hasRecursion; if (hasRecursion) root->wt_param_id = SS_assign_special_param(root); *************** grouping_planner(PlannerInfo *root, bool *** 1797,1802 **** --- 1798,1804 ---- WindowFuncLists *wflists = NULL; List *activeWindows = NIL; grouping_sets_data *gset_data = NULL; + List *union_or_subpaths; standard_qp_extra qp_extra; /* A recursive query should always have setOperations */ *************** grouping_planner(PlannerInfo *root, bool *** 1874,1879 **** --- 1876,1889 ---- preprocess_minmax_aggregates(root, tlist); /* + * Preprocess join OR clauses that might be better handled as UNIONs. + * This likewise needs to be close to the query_planner() call. But + * it doesn't matter which of preprocess_minmax_aggregates() and this + * function we call first, because they treat disjoint sets of cases. + */ + union_or_subpaths = split_join_or_clauses(root); + + /* * Figure out whether there's a hard limit on the number of rows that * query_planner's result subplan needs to return. Even if we know a * hard limit overall, it doesn't apply if the query has any *************** grouping_planner(PlannerInfo *root, bool *** 1908,1913 **** --- 1918,1931 ---- standard_qp_callback, &qp_extra); /* + * If we found any way to convert a join OR clause to a union, finish + * up generating the path(s) for that, and add them into the topmost + * scan/join relation. + */ + if (union_or_subpaths) + finish_union_or_paths(root, current_rel, union_or_subpaths); + + /* * Convert the query's result tlist into PathTarget format. * * Note: it's desirable to not do this till after query_planner(), diff --git a/src/backend/optimizer/plan/planunionor.c b/src/backend/optimizer/plan/planunionor.c index ...08b545f . *** a/src/backend/optimizer/plan/planunionor.c --- b/src/backend/optimizer/plan/planunionor.c *************** *** 0 **** --- 1,667 ---- + /*------------------------------------------------------------------------- + * + * planunionor.c + * Consider whether join OR clauses can be converted to UNION queries. + * + * This module looks for OR clauses whose arms each reference a single + * query relation (but not all the same rel), and tries to generate a path + * representing conversion of such an OR clause into a UNION operation. + * For example, + * SELECT ... FROM a, b WHERE (cond-on-A OR cond-on-B) AND other-conds + * can be implemented as + * SELECT ... FROM a, b WHERE cond-on-A AND other-conds + * UNION + * SELECT ... FROM a, b WHERE cond-on-B AND other-conds + * given a suitable definition of "UNION" (one that won't merge rows that + * would have been separate in the original query output). Since this change + * converts join clauses into restriction clauses, the modified query can be + * much faster to run than the original, despite the duplication of effort + * involved and the extra UNION processing. It's particularly useful for + * star-schema queries where the OR arms reference different dimension tables; + * each separated query may be able to remove joins to all but one dimension + * table, and arrange that join to use an inner indexscan on the fact table. + * + * We must insist that the WHERE and JOIN/ON clauses contain no volatile + * functions, because of the likelihood that qual clauses will be evaluated + * more times than a naive programmer would expect. We need not restrict + * the SELECT's tlist, because that will be evaluated after the UNION. + * + * The current implementation of the UNION step is to de-duplicate using + * row CTIDs. A big limitation is that this only works on plain relations, + * and not for instance on foreign tables. Another problem is that we can + * only de-duplicate by sort/unique, not hashing; but that could be fixed + * if we write a hash opclass for TID. + * + * To allow join removal to happen, we can't reference the CTID column + * of an otherwise-removable relation. Therefore, this code proceeds by + * de-duplicating output rows using only the CTIDs of relations that are not + * removable in any UNION arm. It is not immediately obvious that that works + * at all, but it does, given one restriction. If a rel is removed in some + * arm, then it is not referenced above the joins in that arm (in particular, + * it's not used in that arm's version of the OR clause), and we were able + * to prove that removing it doesn't change the output rowcount in that arm. + * Therefore there's no need to consider it for de-duplication so far as that + * arm's output is concerned. The identical proof can be expected to apply + * in other arms, except in an arm that references that rel in its version + * of the OR clause. But in such an arm, we have effectively added a + * restriction clause to what is known in other arms, which means that the + * set of rows output by that rel can't increase compared to other arms. + * Therefore the situation in such an arm must be that including the rel + * could result in either zero or one output row, rather than exactly one + * output row as in other arms. So we still don't need to consider it for + * de-duplication. But there's a hole in this argument, which is that we + * must consider the effects of reduce_outer_joins() as well as + * remove_useless_joins(). Addition of a restriction clause could result in + * simplifying a FULL join into a LEFT join, which might allow join removal + * to happen against the right side of that join; but the same proof would + * fail in arms that didn't restrict the left side. We deal with this issue + * by not attempting union OR transformation if the query has any FULL joins. + * + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/plan/planunionor.c + * + *------------------------------------------------------------------------- + */ + #include "postgres.h" + + #include "access/sysattr.h" + #include "catalog/pg_class.h" + #include "catalog/pg_operator.h" + #include "catalog/pg_type.h" + #include "miscadmin.h" + #include "nodes/makefuncs.h" + #include "optimizer/clauses.h" + #include "optimizer/cost.h" + #include "optimizer/pathnode.h" + #include "optimizer/planmain.h" + #include "optimizer/prep.h" + #include "optimizer/subselect.h" + #include "optimizer/tlist.h" + #include "optimizer/var.h" + + + static bool is_suitable_join_or_clause(BoolExpr *orclause, List **relids); + static bool is_query_safe_for_union_or_transform(PlannerInfo *root); + static List *create_union_or_subpaths(PlannerInfo *root, BoolExpr *orclause, + int n, List *armrelids); + static void union_or_qp_callback(PlannerInfo *root, void *extra); + + + /* + * split_join_or_clauses - create paths based on splitting join OR clauses + * + * This should be called by grouping_planner() just before it's ready to call + * query_planner(), because we generate simplified join paths by cloning the + * planner's state and invoking query_planner() on a modified version of + * the query parsetree. Thus, all preprocessing needed before query_planner() + * must already be done. Note however that we repeat reduce_outer_joins() + * because of the possibility that the simplified WHERE clause allows reduction + * of an outer join to inner-join form. That's okay for now, but maybe we + * should move the reduce_outer_joins() call into query_planner()? + * + * The result is a list (one entry per potential union OR path) of sublists of + * best paths for the inputs to the UNION step. Adding the UNION processing + * is retty mechanical, but we can't do it until we have a RelOptInfo for the + * top-level join rel. + */ + List * + split_join_or_clauses(PlannerInfo *root) + { + List *results = NIL; + Query *parse = root->parse; + bool checked_query = false; + ListCell *lc; + int n; + + /* + * At least for now, we restrict this optimization to plain SELECTs. + */ + if (parse->commandType != CMD_SELECT || + parse->rowMarks || + parse->setOperations) + return NIL; + + /* + * Reject if query contains any CTEs; copying them would break + * single-evaluation semantics. (In principle we could arrange for all + * UNION arms to read from a single instance of a CTE, but that's an + * improvement for another day, especially since we have no way to de-dup + * CTE outputs anyway.) + */ + if (parse->cteList) + return NIL; + + /* + * The query must reference multiple tables, else we certainly aren't + * going to find any suitable OR clauses. Do a quick check that there's + * more than one RTE. + */ + if (list_length(parse->rtable) < 2) + return NIL; + + /* + * Scan the top-level WHERE clause looking for suitable OR clauses, and + * for each one, generate paths for the UNION input sub-queries. There + * might be more than one suitable OR clause, in which case we can try the + * transformation for each one of them separately and add that list of + * paths to the results. + * + * XXX we should also search the JOIN/ON clauses of any top-level inner + * JOIN nodes, since those are semantically equivalent to WHERE. But it's + * hard to see how to do that without either copying the whole JOIN tree + * in advance or repeating the search after copying, and neither of those + * options seems attractive. + */ + n = 0; + foreach(lc, (List *) parse->jointree->quals) + { + Node *qual = (Node *) lfirst(lc); + List *armrelids; + + if (or_clause(qual) && + is_suitable_join_or_clause((BoolExpr *) qual, &armrelids)) + { + List *subpaths; + + /* + * Check that the query as a whole is safe for this optimization. + * We only need to do this once, but it's somewhat expensive, so + * don't do it till we find a candidate OR clause. + */ + if (!checked_query) + { + if (!is_query_safe_for_union_or_transform(root)) + return NIL; + checked_query = true; + } + /* OK, transform the query and create a list of sub-paths */ + subpaths = create_union_or_subpaths(root, (BoolExpr *) qual, + n, armrelids); + results = lappend(results, subpaths); + } + n++; + } + + return results; + } + + /* + * Make sure that a UNION subpath will emit the CTID columns for all its + * (surviving) baserels. This is called after we've done join removal in + * the UNION arm. + */ + void + add_base_rel_ctids(PlannerInfo *root) + { + Relids allbaserels; + List *vars; + int brelid; + + /* Find out the set of baserels that survived join removal */ + allbaserels = get_relids_in_jointree((Node *) root->parse->jointree, false); + /* For each such rel, make a Var for its CTID column */ + vars = NIL; + brelid = -1; + while ((brelid = bms_next_member(allbaserels, brelid)) >= 0) + { + Var *var; + + var = makeVar(brelid, + SelfItemPointerAttributeNumber, + TIDOID, + -1, + InvalidOid, + 0); + vars = lappend(vars, var); + } + /* Add to rel tlists if not present, and mark as needed at top level */ + add_vars_to_targetlist(root, vars, bms_make_singleton(0), false); + } + + /* + * Finish constructing Paths representing the UNION implementation of join + * OR clause(s), and attach them to "joinrel", which is the final scan/join + * relation returned by query_planner() for the conventional implementation of + * the query. "union_or_subpaths" is the output of split_join_or_clauses(). + */ + void + finish_union_or_paths(PlannerInfo *root, RelOptInfo *joinrel, + List *union_or_subpaths) + { + ListCell *lc; + + /* This loop iterates once per splittable OR clause */ + foreach(lc, union_or_subpaths) + { + List *subpaths = (List *) lfirst(lc); + List *common_exprs; + PathTarget *common_target; + Path *appendpath; + List *uniq_operators; + List *uniq_exprs; + UniquePath *pathnode; + Path sort_path; /* dummy for result of cost_sort */ + int numCols; + ListCell *lc2; + + /* + * Join removal in the sub-queries might have resulted in different + * sub-paths returning different sets of Vars, in particular we might + * not see the full set of artificially-added CTID Vars coming out of + * each sub-path. Fortunately, we only need the ones that are + * available from every sub-path. Since Append can't project, we need + * to build a pathtarget containing only the commonly available Vars, + * and force each sub-path to return that target. + * + * This coding assumes that the commonly available Vars will appear in + * the same order in each subpath target, which should be true but + * it's surely an implementation artifact. If it stops being true, we + * could fall back on list_intersection(), but that'd be O(N^3). + */ + common_exprs = (List *) + copyObject(((Path *) linitial(subpaths))->pathtarget->exprs); + for_each_cell(lc2, lnext(list_head(subpaths))) + { + Path *subpath = (Path *) lfirst(lc2); + ListCell *lcs; + ListCell *lcc; + ListCell *prevc; + + lcs = list_head(subpath->pathtarget->exprs); + prevc = NULL; + lcc = list_head(common_exprs); + while (lcc) + { + ListCell *nextc = lnext(lcc); + + if (lcs && equal(lfirst(lcs), lfirst(lcc))) + { + lcs = lnext(lcs); + prevc = lcc; + } + else + common_exprs = list_delete_cell(common_exprs, lcc, prevc); + lcc = nextc; + } + } + common_target = create_empty_pathtarget(); + common_target->exprs = common_exprs; + set_pathtarget_cost_width(root, common_target); + /* Now forcibly apply this target to each subpath */ + foreach(lc2, subpaths) + { + Path *subpath = (Path *) lfirst(lc2); + + lfirst(lc2) = create_projection_path(root, + joinrel, + subpath, + common_target); + } + + /* + * Generate Append path combining the sub-paths for this UNION. The + * Append path's pathtarget has to match what is actually coming out + * of the subpaths, since Append can't project. + */ + appendpath = (Path *) create_append_path(root, joinrel, subpaths, NIL, + NULL, 0, false, NIL, -1); + appendpath->pathtarget = common_target; + + /* + * Make the operator and expression lists needed for the Unique path. + * We need to unique-ify on every CTID that is commonly available from + * all the sub-paths. (See discussion at head of file.) + */ + uniq_operators = uniq_exprs = NIL; + foreach(lc2, common_exprs) + { + Var *var = (Var *) lfirst(lc2); + + if (IsA(var, Var) && + var->varattno == SelfItemPointerAttributeNumber && + var->varlevelsup == 0) + { + Assert(var->vartype == TIDOID); + uniq_operators = lappend_oid(uniq_operators, TIDEqualOperator); + uniq_exprs = lappend(uniq_exprs, var); + } + } + Assert(uniq_exprs != NIL); + + /* + * Generate a Unique path representing the de-duplication step. For + * now, we can only consider sort+unique implementation, since we lack + * hashing support for type "tid". + * + * XXX maybe refactor to share some code with create_unique_path()? + */ + pathnode = makeNode(UniquePath); + + pathnode->path.pathtype = T_Unique; + pathnode->path.parent = joinrel; + pathnode->path.pathtarget = appendpath->pathtarget; + pathnode->path.param_info = appendpath->param_info; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = joinrel->consider_parallel && + appendpath->parallel_safe; + pathnode->path.parallel_workers = appendpath->parallel_workers; + + /* + * Treat the output as unsorted, since it almost certainly doesn't + * match any useful pathkeys. + */ + pathnode->path.pathkeys = NIL; + + pathnode->subpath = appendpath; + pathnode->in_operators = uniq_operators; + pathnode->uniq_exprs = uniq_exprs; + + /* Estimate number of output rows */ + pathnode->path.rows = appendpath->rows; + numCols = list_length(uniq_exprs); + + /* + * Estimate cost for sort+unique implementation + */ + cost_sort(&sort_path, root, NIL, + appendpath->total_cost, + appendpath->rows, + appendpath->pathtarget->width, + 0.0, + work_mem, + -1.0); + + /* + * Charge one cpu_operator_cost per comparison per input tuple. We + * assume all columns get compared at most of the tuples. (XXX + * probably this is an overestimate.) This should agree with + * create_unique_path. + */ + sort_path.total_cost += cpu_operator_cost * appendpath->rows * numCols; + + pathnode->umethod = UNIQUE_PATH_SORT; + + pathnode->path.startup_cost = sort_path.startup_cost; + pathnode->path.total_cost = sort_path.total_cost; + + /* Attach it to the joinrel */ + add_path(joinrel, (Path *) pathnode); + } + + /* We need to refigure which is the cheapest path for the joinrel */ + set_cheapest(joinrel); + } + + /* + * Is this OR clause a suitable clause for splitting? + * + * Each of its arms must reference just one rel, and they must not all be + * the same rel. + * On success, pass back a list of the relids referenced by each OR arm, + * so we don't have to repeat the pull_varnos() work later. + */ + static bool + is_suitable_join_or_clause(BoolExpr *orclause, List **relids) + { + bool ok = false; + List *relidlist = NIL; + int firstrelid = 0; + ListCell *lc; + + *relids = NIL; /* prevent uninitialized-variable warnings */ + foreach(lc, orclause->args) + { + Node *qual = (Node *) lfirst(lc); + Relids varnos = pull_varnos(qual); + int relid; + + if (!bms_get_singleton_member(varnos, &relid)) + return false; /* this arm fails the sine qua non */ + if (relidlist == NIL) + firstrelid = relid; + else if (firstrelid != relid) + ok = true; /* arms reference more than one relid */ + relidlist = lappend_int(relidlist, relid); + } + *relids = relidlist; + return ok; + } + + /* + * Is query as a whole safe to apply union OR transformation to? + * This checks relatively-expensive conditions that we don't want to + * worry about until we've found a candidate OR clause. + */ + static bool + is_query_safe_for_union_or_transform(PlannerInfo *root) + { + Query *parse = root->parse; + Relids allbaserels; + ListCell *lc; + int relid; + + /* + * Must not have any volatile functions in FROM or WHERE (see notes at + * head of file). + */ + if (contain_volatile_functions((Node *) parse->jointree)) + return false; + + /* + * We insist that all baserels used in the query be plain relations, so + * that we can use their ctids as unique row identifiers in the UNION + * step. One could imagine ways to relax this later, for instance by + * forcibly adding WITH ORDINALITY to function RTEs. We'd have to examine + * each RTE anyway, though, to check for volatile functions. + */ + allbaserels = get_relids_in_jointree((Node *) parse->jointree, false); + relid = 0; + foreach(lc, parse->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + + relid++; + + /* fail if query contains any FULL joins (see head of this file) */ + if (rte->rtekind == RTE_JOIN && rte->jointype == JOIN_FULL) + return false; + + /* otherwise, ignore RTEs that aren't referenced baserels */ + if (!bms_is_member(relid, allbaserels)) + continue; + + /* fail if not plain rel */ + if (rte->rtekind != RTE_RELATION) + return false; + /* fail if it doesn't have CTIDs */ + if (rte->relkind != RELKIND_RELATION && + rte->relkind != RELKIND_MATVIEW) + return false; + + /* disallow TABLESAMPLE (might be okay if repeatable?) */ + if (rte->tablesample) + return false; + + /* check for volatiles in security barrier quals */ + if (contain_volatile_functions((Node *) rte->securityQuals)) + return false; + } + + /* OK to proceed */ + return true; + } + + /* + * Split the query and the given OR clause into one UNION arm per relation + * mentioned in the OR clause, and make a list of best paths for the UNION + * arms. (Since the UNION step will lose any ordering or fast-start + * properties of the paths, there's no need to consider any but the + * cheapest-total path for each arm; hence it's okay to return just one path + * per arm.) + * + * "n" is the OR clause's index in the query's WHERE list. + * "armrelids" is the OR-arm-to-referenced-rel mapping. + */ + static List * + create_union_or_subpaths(PlannerInfo *root, BoolExpr *orclause, + int n, List *armrelids) + { + List *subpaths = NIL; + Relids orrels; + int relid; + ListCell *lc; + ListCell *lc2; + + /* + * There might be multiple OR arms referring to the same rel, which we + * should combine into a restriction OR clause. So first identify the set + * of rels used in the OR. + */ + orrels = NULL; + foreach(lc, armrelids) + orrels = bms_add_member(orrels, lfirst_int(lc)); + + /* Now, for each such rel, generate a path for a UNION arm */ + while ((relid = bms_first_member(orrels)) >= 0) + { + List *orarms; + PlannerInfo *subroot; + Query *parse; + List *subquery_quals; + bool hasOuterJoins; + RelOptInfo *final_rel; + Path *subpath; + int k; + ListCell *prev; + + /* Extract the OR arms for this rel, making copies for safety */ + orarms = NIL; + forboth(lc, orclause->args, lc2, armrelids) + { + Node *qual = (Node *) lfirst(lc); + int qualrelid = lfirst_int(lc2); + + if (qualrelid == relid) + orarms = lappend(orarms, copyObject(qual)); + } + Assert(orarms != NIL); + if (list_length(orarms) == 1) + { + /* + * When there's just a single arm for this rel (the typical case), + * it goes directly into the subquery's WHERE list. But it might + * be a sub-AND, in which case we must flatten it into a qual list + * to preserve AND/OR flatness. + */ + orarms = make_ands_implicit((Expr *) linitial(orarms)); + } + else + { + /* + * When there's more than one arm, convert back to an OR clause. + * No flatness worries here; the arms were already valid OR-list + * elements. + */ + orarms = list_make1(make_orclause(orarms)); + } + + /* Clone the planner's state */ + subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo)); + memcpy(subroot, root, sizeof(PlannerInfo)); + subroot->parse = parse = (Query *) copyObject(root->parse); + /* Making copies of these might be overkill, but be safe */ + subroot->processed_tlist = (List *) copyObject(root->processed_tlist); + subroot->append_rel_list = (List *) copyObject(root->append_rel_list); + /* Tell query_planner to expect full retrieval of UNION input */ + subroot->tuple_fraction = 1.0; + subroot->limit_tuples = -1.0; + + /* + * Remove the subquery's copy of the original OR clause, which we + * identify by its index in the WHERE clause list. + */ + subquery_quals = (List *) parse->jointree->quals; + k = 0; + prev = NULL; + foreach(lc, subquery_quals) + { + if (k == n) + { + subquery_quals = list_delete_cell(subquery_quals, lc, prev); + break; + } + k++; + prev = lc; + } + + /* And instead add the qual or quals we extracted from the OR clause */ + subquery_quals = list_concat(subquery_quals, orarms); + parse->jointree->quals = (Node *) subquery_quals; + + /* + * Ask for baserel CTIDs to be added to the output of the subquery. We + * only want CTIDs of rels that will survive join removal, so we can't + * add them now, as that would in itself prevent join removal. + */ + subroot->needBaseTids = true; + + /* Re-apply reduce_outer_joins() in case we can now reduce some */ + /* (XXX would be better if this just got moved into query_planner) */ + hasOuterJoins = false; + foreach(lc, parse->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + + if (rte->rtekind == RTE_JOIN) + { + if (IS_OUTER_JOIN(rte->jointype)) + { + hasOuterJoins = true; + break; + } + } + } + if (hasOuterJoins) + reduce_outer_joins(subroot); + + /* Plan the modified query */ + final_rel = query_planner(subroot, subroot->processed_tlist, + union_or_qp_callback, NULL); + + /* + * Get the cheapest-total path for the subquery; there's little value + * in considering any others. + */ + subpath = final_rel->cheapest_total_path; + Assert(subpath); + + /* Add cheapest-total path to subpaths list */ + subpaths = lappend(subpaths, subpath); + } + + return subpaths; + } + + /* + * Compute query_pathkeys and other pathkeys during plan generation + */ + static void + union_or_qp_callback(PlannerInfo *root, void *extra) + { + /* + * Since the output of the subquery is going to go through a UNION step + * that destroys ordering, there's little need to worry about what its + * output order is. Hence, don't bother telling it about pathkeys that + * might apply to these later execution steps. + */ + root->group_pathkeys = NIL; + root->window_pathkeys = NIL; + root->distinct_pathkeys = NIL; + root->sort_pathkeys = NIL; + root->query_pathkeys = NIL; + } diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index c3f46a2..311f4f1 100644 *** a/src/backend/optimizer/prep/prepjointree.c --- b/src/backend/optimizer/prep/prepjointree.c *************** pull_up_simple_subquery(PlannerInfo *roo *** 915,920 **** --- 915,921 ---- subroot->minmax_aggs = NIL; subroot->qual_security_level = 0; subroot->inhTargetKind = INHKIND_NONE; + subroot->needBaseTids = false; subroot->hasRecursion = false; subroot->wt_param_id = -1; subroot->non_recursive_path = NULL; diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 41caf87..f1105a5 100644 *** a/src/include/nodes/relation.h --- b/src/include/nodes/relation.h *************** typedef struct PlannerInfo *** 322,327 **** --- 322,328 ---- InheritanceKind inhTargetKind; /* indicates if the target relation is an * inheritance child or partition or a * partitioned table */ + bool needBaseTids; /* true to force outputting baserel CTIDs */ bool hasJoinRTEs; /* true if any RTEs are RTE_JOIN kind */ bool hasLateralRTEs; /* true if any RTEs are marked LATERAL */ bool hasDeletedRTEs; /* true if any RTE was deleted from jointree */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index c8ab028..5c0be2b 100644 *** a/src/include/optimizer/planmain.h --- b/src/include/optimizer/planmain.h *************** extern RelOptInfo *query_planner(Planner *** 46,51 **** --- 46,59 ---- extern void preprocess_minmax_aggregates(PlannerInfo *root, List *tlist); /* + * prototypes for plan/planunionor.c + */ + extern List *split_join_or_clauses(PlannerInfo *root); + extern void add_base_rel_ctids(PlannerInfo *root); + extern void finish_union_or_paths(PlannerInfo *root, RelOptInfo *joinrel, + List *union_or_subpaths); + + /* * prototypes for plan/createplan.c */ extern Plan *create_plan(PlannerInfo *root, Path *best_path);
pgsql-hackers by date: