diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index b7723481b0..0a95e7221d 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -1067,7 +1067,7 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, * paths that produce those sort orderings). */ if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) - add_child_rel_equivalences(root, appinfo, rel, childrel); + add_child_rel_equivalences(root, &appinfo, 1, rel, childrel); childrel->has_eclass_joins = rel->has_eclass_joins; /* diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 688d9b0707..06d3771d20 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -2103,23 +2103,48 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root, /* * add_child_rel_equivalences - * Search for EC members that reference the parent_rel, and + * Search for EC members that reference the root parent of child_rel, and * add transformed members referencing the child_rel. * * Note that this function won't be called at all unless we have at least some * reason to believe that the EC members it generates will be useful. * - * parent_rel and child_rel could be derived from appinfo, but since the + * parent_rel and child_rel could be derived from appinfos, but since the * caller has already computed them, we might as well just pass them in. + * Note that parent_rel and child_rel are either BASEREL and OTHER_MEMBER_REL + * or JOINREL and OTHER_JOINREL, respectively. + * + * The AppendRelInfos that are passed in are not used at all if child_rel is + * not a direct child of parent_rel, because they contain mapping from the + * direct parent, whereas we'd like to translate from the root parent's ECs. + * Still, having caller pass them in common cases that don't involve multi- + * level inheritance is great for performance, because + * adjust_appendrel_attrs_multilevel(), etc. have to look up AppendRelInfos + * from child relids whose overhead can quickly add up. */ void add_child_rel_equivalences(PlannerInfo *root, - AppendRelInfo *appinfo, + AppendRelInfo **appinfos, + int nappinfos, RelOptInfo *parent_rel, RelOptInfo *child_rel) { ListCell *lc1; + Relids top_parent_relids = child_rel->top_parent_relids; + Assert(IS_SIMPLE_REL(parent_rel) || IS_JOIN_REL(parent_rel)); + /* + * A given EC may be referenced from both a "baserel" whose expressions + * appear in it and also from the "joinrels" containing that "baserel". + * Whereas this function is always called for baserels, it's called for + * joinrels only if partitionwise join is used. + * + * Furthermore, a given expression in the EC may reference one or more + * baserels. If the latter, it must be transformed only by a parent + * joinrel which contains all of the referenced baserels. If, OTOH, + * it references only one baserel, it must be transformed only once, + * that is, with that baserel as the parent. + */ foreach(lc1, root->eq_classes) { EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); @@ -2137,7 +2162,9 @@ add_child_rel_equivalences(PlannerInfo *root, * No point in searching if child's topmost parent rel is not * mentioned in eclass. */ - if (!bms_is_subset(child_rel->top_parent_relids, cur_ec->ec_relids)) + if (!(bms_is_subset(child_rel->top_parent_relids, cur_ec->ec_relids) || + (IS_JOIN_REL(parent_rel) && + bms_overlap(top_parent_relids, cur_ec->ec_relids)))) continue; foreach(lc2, cur_ec->ec_members) @@ -2148,39 +2175,51 @@ add_child_rel_equivalences(PlannerInfo *root, continue; /* ignore consts here */ /* - * We consider only original EC members here, not - * already-transformed child members. Otherwise, if some original - * member expression references more than one appendrel, we'd get - * an O(N^2) explosion of useless derived expressions for - * combinations of children. + * There's no point in scanning child members, as their children + * are in turn are obtained by applying multi-level translation + * to the top level expressions, so we can stop after the last + * non-child EC member. */ if (cur_em->em_is_child) - continue; /* ignore children here */ + break; - /* Does this member reference child's topmost parent rel? */ - if (bms_overlap(cur_em->em_relids, child_rel->top_parent_relids)) + /* + * Does this member reference child's topmost parent rel? Any + * multi-relation expressions should be transformed only when + * needed and possible. + */ + if (bms_membership(cur_em->em_relids) == + bms_membership(top_parent_relids) && + bms_is_subset(cur_em->em_relids, child_rel->top_parent_relids)) { /* Yes, generate transformed child version */ Expr *child_expr; Relids new_relids; Relids new_nullable_relids; - if (parent_rel->reloptkind == RELOPT_BASEREL) + /* + * If the parent_rel is itself the topmost parent rel, transform + * directly. + */ + if (parent_rel->reloptkind == RELOPT_BASEREL || + parent_rel->reloptkind == RELOPT_JOINREL) { /* Simple single-level transformation */ child_expr = (Expr *) adjust_appendrel_attrs(root, (Node *) cur_em->em_expr, - 1, &appinfo); + nappinfos, appinfos); } else { + Assert(parent_rel->reloptkind == RELOPT_OTHER_MEMBER_REL || + parent_rel->reloptkind == RELOPT_OTHER_JOINREL); /* Must do multi-level transformation */ child_expr = (Expr *) adjust_appendrel_attrs_multilevel(root, (Node *) cur_em->em_expr, child_rel->relids, - child_rel->top_parent_relids); + top_parent_relids); } /* @@ -2190,22 +2229,20 @@ add_child_rel_equivalences(PlannerInfo *root, * don't want the child member to be marked as constant. */ new_relids = bms_difference(cur_em->em_relids, - child_rel->top_parent_relids); + top_parent_relids); new_relids = bms_add_members(new_relids, child_rel->relids); /* - * And likewise for nullable_relids. Note this code assumes - * parent and child relids are singletons. + * For nullable_relids, we must selectively replace parent + * nullable relids to child ones. */ new_nullable_relids = cur_em->em_nullable_relids; - if (bms_overlap(new_nullable_relids, - child_rel->top_parent_relids)) - { - new_nullable_relids = bms_difference(new_nullable_relids, - child_rel->top_parent_relids); - new_nullable_relids = bms_add_members(new_nullable_relids, - child_rel->relids); - } + if (bms_overlap(new_nullable_relids, top_parent_relids)) + new_nullable_relids = + adjust_child_relids_multilevel(root, + new_nullable_relids, + child_rel->relids, + top_parent_relids); (void) add_eq_member(cur_ec, child_expr, new_relids, new_nullable_relids, diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 6054bd2b53..bf74abd188 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -848,6 +848,16 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, (Node *) parent_joinrel->joininfo, nappinfos, appinfos); + + /* + * If the parent joinrel has pending equivalence classes, so does the + * child. + */ + if (parent_joinrel->has_eclass_joins || + has_useful_pathkeys(root, parent_joinrel)) + add_child_rel_equivalences(root, appinfos, nappinfos, + parent_joinrel, joinrel); + pfree(appinfos); /* diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 7345137d1d..832cc84bb4 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -150,7 +150,8 @@ extern EquivalenceClass *match_eclasses_to_foreign_key_col(PlannerInfo *root, ForeignKeyOptInfo *fkinfo, int colno); extern void add_child_rel_equivalences(PlannerInfo *root, - AppendRelInfo *appinfo, + AppendRelInfo **appinfos, + int nappinfos, RelOptInfo *parent_rel, RelOptInfo *child_rel); extern List *generate_implied_equalities_for_column(PlannerInfo *root, diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out index c448d85dec..003c8d1c24 100644 --- a/src/test/regress/expected/equivclass.out +++ b/src/test/regress/expected/equivclass.out @@ -439,3 +439,67 @@ explain (costs off) Filter: ((unique1 = unique1) OR (unique2 = unique2)) (2 rows) +-- Check that child merge join for a FULL OUTER join works correctly +SET enable_partitionwise_join TO on; +SET enable_partitionwise_aggregate TO on; +CREATE TABLE child_joins_ecs_testtab1 (a int); +INSERT INTO child_joins_ecs_testtab1 SELECT generate_series(1, 100); +CREATE TABLE child_joins_ecs_testtab2 (a int, b int) PARTITION BY RANGE (a); +CREATE TABLE child_joins_ecs_testtab2_p1 PARTITION OF child_joins_ecs_testtab2 FOR VALUES FROM (1) TO (10001); +CREATE TABLE child_joins_ecs_testtab2_p2 PARTITION OF child_joins_ecs_testtab2 FOR VALUES FROM (10001) TO (20001); +CREATE TABLE child_joins_ecs_testtab2_p3 PARTITION OF child_joins_ecs_testtab2 FOR VALUES FROM (20001) TO (30001); +INSERT INTO child_joins_ecs_testtab2 SELECT a, a % 100 + 1 FROM generate_series(1, 30000) a; +ANALYZE child_joins_ecs_testtab1, child_joins_ecs_testtab2; +-- this forces plan to be a specific shape +SET work_mem TO '0.1MB'; +SET max_parallel_workers_per_gather TO 0; +EXPLAIN (COSTS OFF) +SELECT child_joins_ecs_testtab1.* + FROM (SELECT a, b + FROM child_joins_ecs_testtab2 t1 FULL JOIN child_joins_ecs_testtab2 t2 USING(a, b) + WHERE a >= 1 AND a < 200000 + GROUP BY 1, 2) AS data + JOIN child_joins_ecs_testtab1 ON (child_joins_ecs_testtab1.a = data.b); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------- + Nested Loop + Join Filter: ((COALESCE(t1.b, t2.b)) = child_joins_ecs_testtab1.a) + -> Group + Group Key: (COALESCE(t1.a, t2.a)), (COALESCE(t1.b, t2.b)) + -> Merge Append + Sort Key: (COALESCE(t1.a, t2.a)), (COALESCE(t1.b, t2.b)) + -> Group + Group Key: (COALESCE(t1.a, t2.a)), (COALESCE(t1.b, t2.b)) + -> Sort + Sort Key: (COALESCE(t1.a, t2.a)), (COALESCE(t1.b, t2.b)) + -> Hash Full Join + Hash Cond: ((t1.a = t2.a) AND (t1.b = t2.b)) + Filter: ((COALESCE(t1.a, t2.a) >= 1) AND (COALESCE(t1.a, t2.a) < 200000)) + -> Seq Scan on child_joins_ecs_testtab2_p1 t1 + -> Hash + -> Seq Scan on child_joins_ecs_testtab2_p1 t2 + -> Group + Group Key: (COALESCE(t1_1.a, t2_1.a)), (COALESCE(t1_1.b, t2_1.b)) + -> Sort + Sort Key: (COALESCE(t1_1.a, t2_1.a)), (COALESCE(t1_1.b, t2_1.b)) + -> Hash Full Join + Hash Cond: ((t1_1.a = t2_1.a) AND (t1_1.b = t2_1.b)) + Filter: ((COALESCE(t1_1.a, t2_1.a) >= 1) AND (COALESCE(t1_1.a, t2_1.a) < 200000)) + -> Seq Scan on child_joins_ecs_testtab2_p2 t1_1 + -> Hash + -> Seq Scan on child_joins_ecs_testtab2_p2 t2_1 + -> Group + Group Key: (COALESCE(t1_2.a, t2_2.a)), (COALESCE(t1_2.b, t2_2.b)) + -> Sort + Sort Key: (COALESCE(t1_2.a, t2_2.a)), (COALESCE(t1_2.b, t2_2.b)) + -> Hash Full Join + Hash Cond: ((t1_2.a = t2_2.a) AND (t1_2.b = t2_2.b)) + Filter: ((COALESCE(t1_2.a, t2_2.a) >= 1) AND (COALESCE(t1_2.a, t2_2.a) < 200000)) + -> Seq Scan on child_joins_ecs_testtab2_p3 t1_2 + -> Hash + -> Seq Scan on child_joins_ecs_testtab2_p3 t2_2 + -> Materialize + -> Seq Scan on child_joins_ecs_testtab1 +(38 rows) + +DROP TABLE child_joins_ecs_testtab1, child_joins_ecs_testtab2; diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql index 85aa65de39..bd792dfc3c 100644 --- a/src/test/regress/sql/equivclass.sql +++ b/src/test/regress/sql/equivclass.sql @@ -262,3 +262,26 @@ explain (costs off) -- this could be converted, but isn't at present explain (costs off) select * from tenk1 where unique1 = unique1 or unique2 = unique2; + +-- Check that child merge join for a FULL OUTER join works correctly +SET enable_partitionwise_join TO on; +SET enable_partitionwise_aggregate TO on; +CREATE TABLE child_joins_ecs_testtab1 (a int); +INSERT INTO child_joins_ecs_testtab1 SELECT generate_series(1, 100); +CREATE TABLE child_joins_ecs_testtab2 (a int, b int) PARTITION BY RANGE (a); +CREATE TABLE child_joins_ecs_testtab2_p1 PARTITION OF child_joins_ecs_testtab2 FOR VALUES FROM (1) TO (10001); +CREATE TABLE child_joins_ecs_testtab2_p2 PARTITION OF child_joins_ecs_testtab2 FOR VALUES FROM (10001) TO (20001); +CREATE TABLE child_joins_ecs_testtab2_p3 PARTITION OF child_joins_ecs_testtab2 FOR VALUES FROM (20001) TO (30001); +INSERT INTO child_joins_ecs_testtab2 SELECT a, a % 100 + 1 FROM generate_series(1, 30000) a; +ANALYZE child_joins_ecs_testtab1, child_joins_ecs_testtab2; +-- this forces plan to be a specific shape +SET work_mem TO '0.1MB'; +SET max_parallel_workers_per_gather TO 0; +EXPLAIN (COSTS OFF) +SELECT child_joins_ecs_testtab1.* + FROM (SELECT a, b + FROM child_joins_ecs_testtab2 t1 FULL JOIN child_joins_ecs_testtab2 t2 USING(a, b) + WHERE a >= 1 AND a < 200000 + GROUP BY 1, 2) AS data + JOIN child_joins_ecs_testtab1 ON (child_joins_ecs_testtab1.a = data.b); +DROP TABLE child_joins_ecs_testtab1, child_joins_ecs_testtab2;