From 1f1556b04d443220f92b846d247cf133a4564290 Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat Date: Mon, 7 Aug 2023 12:17:13 +0530 Subject: [PATCH 3/3] Construct child join's joininfo and restrictlist list from joining relations In build_child_join_rel() construct the joininfo list for the child join from joining relations instead of translating the joininfo list of parent. This will avoid multiple translations of the same RestrictInfo clause and bring build_child_join_rel() close to build_join_rel(). Instead of translating the parent restrictlist, build the child join restrictlist using build_joinrel_restrictlist(), from joininfo of the joining child relations and the equivalence classes. This avoids repeated RestrictInfo translations and also brings build_child_join_rel() closer to build_join_rel(). Add a function is_child_clauselist_complete() to make sure that every clause that should be in child joinrel's joininfo is there; nothing more or nothing less. We do this by making sure that the rinfo_serials of parent clauselist match that of child clause list. A more time and memory consuming check comparing the child clause list with an actual translation of parent clause list is added under a separate compile time flag for stronger check. Ashutosh Bapat Adjust joining child relations' joininfo list for child join In try_partitionwise_join(), we know the pair of child relations that will join each other. So adjust their joininfo list accordingly. This commit leaves one TODO in the code which needs to be addressed. Ashutosh Bapat --- src/backend/optimizer/path/equivclass.c | 6 + src/backend/optimizer/path/joinrels.c | 29 +-- src/backend/optimizer/util/relnode.c | 241 ++++++++++++++++++++++-- src/include/optimizer/pathnode.h | 11 +- 4 files changed, 253 insertions(+), 34 deletions(-) diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 7fa502d6e2..4700b2ffad 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -1391,6 +1391,12 @@ generate_join_implied_equalities(PlannerInfo *root, Bitmapset *matching_ecs; int i; + /* + * TODO: What if both the relations are child relations? as in child + * joins. For some reason this function seems to work well. It produces + * the right clause nonetheless. + */ + /* If inner rel is a child, extra setup work is needed */ if (IS_OTHER_REL(inner_rel)) { diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 015a0b3cbe..2525014cdd 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -1645,27 +1645,16 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, child_rel2->relids), &nappinfos); - /* - * Construct restrictions applicable to the child join from those - * applicable to the parent join. - */ - child_restrictlist = - (List *) adjust_appendrel_attrs(root, - (Node *) parent_restrictlist, - nappinfos, appinfos); - /* Find or construct the child join's RelOptInfo */ - child_joinrel = joinrel->part_rels[cnt_parts]; - if (!child_joinrel) - { - child_joinrel = build_child_join_rel(root, child_rel1, child_rel2, - joinrel, child_restrictlist, - child_sjinfo); - joinrel->part_rels[cnt_parts] = child_joinrel; - joinrel->live_parts = bms_add_member(joinrel->live_parts, cnt_parts); - joinrel->all_partrels = bms_add_members(joinrel->all_partrels, - child_joinrel->relids); - } + child_joinrel = build_child_join_rel(root, child_rel1, child_rel2, + joinrel, cnt_parts, &child_restrictlist, + child_sjinfo); + Assert(is_child_clauselist_complete(child_restrictlist, + parent_restrictlist +#ifdef CHILD_JOIN_SANITY + , root, appinfos, nappinfos +#endif /* CHILD_JOIN_SANITY */ + )); /* Assert we got the right one */ Assert(bms_equal(child_joinrel->relids, diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 71327128e1..431d2df8d7 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -843,6 +843,168 @@ build_join_rel(PlannerInfo *root, return joinrel; } +/* + * is_child_clauselist_complete + * Does the given child clause list same as translation of parent clause list? +*/ +bool +is_child_clauselist_complete(List *child_clauselist, + List *parent_clauselist +#ifdef CHILD_JOIN_SANITY + , PlannerInfo *root, + AppendRelInfo **appinfos, + int nappinfos +#endif /* CHILD_JOIN_SANITY */ +) +{ + Bitmapset *parent_clause_ids = NULL; + Bitmapset *child_clause_ids = NULL; + ListCell *lc; + + if (list_length(child_clauselist) != list_length(parent_clauselist)) + { + elog(LOG, "child and parent clause list differ in length %d and %d respectively", + list_length(child_clauselist), list_length(parent_clauselist)); + return false; + } + + /* + * Compare parent and child clause ids. This should be enough to make sure + * that an appropriate translation of every parent clause is present in + * the child clause list. + */ + foreach(lc, parent_clauselist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + Assert(!bms_is_member(rinfo->rinfo_serial, parent_clause_ids)); + bms_add_member(parent_clause_ids, rinfo->rinfo_serial); + } + foreach(lc, child_clauselist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + Assert(!bms_is_member(rinfo->rinfo_serial, child_clause_ids)); + bms_add_member(child_clause_ids, rinfo->rinfo_serial); + } + + if (!bms_equal(child_clause_ids, parent_clause_ids)) + { + elog(LOG, "child and parent clauselists have different clauses %s and %s respectively", + bmsToString(child_clause_ids), bmsToString(parent_clause_ids)); + return false; + } + +#ifdef CHILD_JOIN_SANITY + + /* + * Carry out a complete check by translating parent clause list and + * comparing the translation with the child clause list. This consumes a + * lot of both time and memory. Hence under a separate compile time flag + * only when such a complete check is required. + */ + if (!equal(child_clauselist, + adjust_appendrel_attrs(root, (Node *) parent_clauselist, + nappinfos, appinfos))) + { + elog(LOG, "child clauses are not exact translations of parent clauses"); + return false; + } +#endif /* CHILD_JOIN_SANITY */ + + return true; +} + +/* + * adjust_inner_outer_child_joinlist + * + * When joininfo lists of child rels are created, it is not known which other + * child rels they will be join with. Their joininfo lists contain Vars of + * parent rels. When the child join rels are created in try_partitionwise_join, + * child join partners become known, thus the joininfo lists of the joining + * rels can be adjusted accordingly. This function does that. + * + * This function is called when building the child join rel. By that time the + * path creation of joining child rels is complete. Their joininfo lists are + * used only for building joininfo list or restrict list of upper joins. Those + * require clauses which contain the child vars and not parent vars. So replace + * the original clauses with their translations in-place. + * + * TODO: test this with GEQO. + */ +static void +adjust_inner_outer_child_joinlist(PlannerInfo *root, + RelOptInfo *parent_joinrel, + RelOptInfo *outer_rel, RelOptInfo *inner_rel, + AppendRelInfo **appinfos, int nappinfos) +{ + ListCell *lc; + /* Array for looking up RestrictInfo by its serial number. Allocate maximum possible. */ + RestrictInfo **duplicate_rinfos = + (RestrictInfo **) palloc0(sizeof(RestrictInfo *) * + root->last_rinfo_serial); + + foreach(lc, outer_rel->joininfo) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + /* + * If clause contains Vars of the parent, it will require translation. + * The clause will contain Vars of immediate parent whose relids are + * not available with the joining relids but they are available with + * the parent_joinrel of the caller. We can check overlap with that. + */ + if (bms_overlap(rinfo->required_relids, parent_joinrel->relids)) + { + RestrictInfo *child_rinfo = + castNode(RestrictInfo, adjust_appendrel_attrs(root, + (Node *) rinfo, + nappinfos, + appinfos)); + + lfirst(lc) = child_rinfo; + + Assert(bms_overlap(child_rinfo->required_relids, inner_rel->relids)); + Assert(!duplicate_rinfos[child_rinfo->rinfo_serial]); + duplicate_rinfos[child_rinfo->rinfo_serial] = child_rinfo; + } + } + + /* + * Translate the inner_rel clause in the same way but avoid duplicate + * translations. + */ + foreach(lc, inner_rel->joininfo) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + /* + * If clause contains Vars of the parent, it will require translation. + * The clause will contain Vars of immediate parent whose relids are + * not available with the joining relids but they are available with + * the parent_joinrel of the caller. We can check overlap with that. + */ + if (bms_overlap(rinfo->required_relids, parent_joinrel->relids)) + { + /* + * The required translation should be already available from the + * translations of the other relation. + */ + RestrictInfo *child_rinfo = duplicate_rinfos[rinfo->rinfo_serial]; + Assert(child_rinfo && bms_overlap(child_rinfo->required_relids, + outer_rel->relids)); +#ifdef CHILD_JOIN_SANITY + AssertLog(equal(child_rinfo, + adjust_appendrel_attrs(root, (Node *) rinfo, + nappinfos, appinfos))); +#endif /* CHILD_JOIN_SANITY */ + lfirst(lc) = child_rinfo; + } + } + + pfree(duplicate_rinfos); +} + /* * build_child_join_rel * Builds RelOptInfo representing join between given two child relations. @@ -859,9 +1021,10 @@ build_join_rel(PlannerInfo *root, RelOptInfo * build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel, RelOptInfo *parent_joinrel, - List *restrictlist, SpecialJoinInfo *sjinfo) + int child_index, List **restrictlist_ptr, + SpecialJoinInfo *sjinfo) { - RelOptInfo *joinrel = makeNode(RelOptInfo); + RelOptInfo *joinrel = parent_joinrel->part_rels[child_index]; AppendRelInfo **appinfos; int nappinfos; @@ -871,6 +1034,22 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, /* The parent joinrel should have consider_partitionwise_join set. */ Assert(parent_joinrel->consider_partitionwise_join); + /* + * If we already have the join rel, return it. We need to build the + * restrictlist for this pair of joining relations. + */ + if (joinrel) + { + *restrictlist_ptr = build_joinrel_restrictlist(root, joinrel, + outer_rel, inner_rel, + sjinfo); + + return joinrel; + } + + /* Not present already, make one. */ + joinrel = makeNode(RelOptInfo); + /* * Find the AppendRelInfo structures for the child baserels. We'll need * these for computing the child join's relid set, and later for mapping @@ -948,11 +1127,19 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, build_child_join_reltarget(root, parent_joinrel, joinrel, nappinfos, appinfos); - /* Construct joininfo list. */ - joinrel->joininfo = (List *) adjust_appendrel_attrs(root, - (Node *) parent_joinrel->joininfo, - nappinfos, - appinfos); + /* + * Construct restrict and join clause lists for the new child joinrel. + */ + adjust_inner_outer_child_joinlist(root, parent_joinrel, outer_rel, inner_rel, appinfos, nappinfos); + *restrictlist_ptr = build_joinrel_restrictlist(root, joinrel, outer_rel, + inner_rel, sjinfo); + build_joinrel_joinlist(joinrel, outer_rel, inner_rel); + Assert(is_child_clauselist_complete(joinrel->joininfo, + parent_joinrel->joininfo +#ifdef CHILD_JOIN_SANITY + , root, appinfos, nappinfos +#endif /* CHILD_JOIN_SANITY */ + )); /* * Lateral relids referred in child join will be same as that referred in @@ -969,14 +1156,14 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, /* Is the join between partitions itself partitioned? */ build_joinrel_partition_info(root, joinrel, outer_rel, inner_rel, sjinfo, - restrictlist); + *restrictlist_ptr); /* Child joinrel is parallel safe if parent is parallel safe. */ joinrel->consider_parallel = parent_joinrel->consider_parallel; /* Set estimates of the child-joinrel's size. */ set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel, - sjinfo, restrictlist); + sjinfo, *restrictlist_ptr); /* We build the join only once. */ Assert(!find_join_rel(root, joinrel->relids)); @@ -984,6 +1171,16 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, /* Add the relation to the PlannerInfo. */ add_join_rel(root, joinrel); + /* + * Add the child join rel into relevant arrays and lists in the parent + * join rel + */ + parent_joinrel->part_rels[child_index] = joinrel; + parent_joinrel->live_parts = bms_add_member(parent_joinrel->live_parts, + child_index); + parent_joinrel->all_partrels = bms_add_members(parent_joinrel->all_partrels, + joinrel->relids); + /* * We might need EquivalenceClass members corresponding to the child join, * so that we can represent sort pathkeys for it. As with children of @@ -1268,7 +1465,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, * RestrictInfo nodes are no longer context-dependent. Instead, just include * the original nodes in the lists made for the join relation. */ -static List * +List * build_joinrel_restrictlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outer_rel, @@ -1312,6 +1509,27 @@ build_joinrel_joinlist(RelOptInfo *joinrel, { List *result; + /* + * In order to use this function for child joins, we need to + * + * 1. translate the joininfo of the input rels by relids of the now-known + * joining partner. + * + * 2. Some clauses which will ultimately end in restrictlist will have + * separate copies of each of the clause one in both the joining + * relations. This duplicate should be avoided. We might be able to avoid + * these copies here by looking at the rinfo_serial during translations + * since both the clauses are available at one place. But when building + * another joinrel for which the same clause becomes a restrict clause, we + * need to avoid translation too. E.g. in A join B join C where A.a = B.a + * and B.a = C.a, A.a = B.a will be part of the restrictlist of (AB) and + * A(BC), thus we will have to make sure that the translated clause A1.a = + * B1.a makes its way to BC's joinlist. + * + * 3. What should we do about the existing joininfo list in either + * relations? + */ + /* * Collect all the clauses that syntactically belong above this level, * eliminating any duplicates (important since we will see many of the @@ -1396,9 +1614,6 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel, { ListCell *l; - /* Expected to be called only for join between parent relations. */ - Assert(joinrel->reloptkind == RELOPT_JOINREL); - foreach(l, joininfo_list) { RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 001e75b5b7..8191c8ce38 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -337,7 +337,16 @@ extern ParamPathInfo *find_param_path_info(RelOptInfo *rel, extern Bitmapset *get_param_path_clause_serials(Path *path); extern RelOptInfo *build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel, - RelOptInfo *parent_joinrel, List *restrictlist, + RelOptInfo *parent_joinrel, int child_index, + List **restrictlist_ptr, SpecialJoinInfo *sjinfo); +extern bool is_child_clauselist_complete(List *child_clauselist, + List *parent_clauselist +#ifdef CHILD_JOIN_SANITY + , PlannerInfo *root, + AppendRelInfo **appinfos, + int nappinfos +#endif /* CHILD_JOIN_SANITY */ +); #endif /* PATHNODE_H */ -- 2.25.1