From 885b2e87f22ab939e652f6610fba97e4e49c46c6 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Sun, 19 Jan 2020 07:44:18 +0000 Subject: [PATCH] Fix up partitionwise join. --- src/backend/optimizer/path/equivclass.c | 102 +++++++++++++++++++++++++++ src/backend/optimizer/util/relnode.c | 19 +++-- src/include/optimizer/paths.h | 4 ++ src/test/regress/expected/partition_join.out | 38 ++++++++++ src/test/regress/sql/partition_join.sql | 4 ++ 5 files changed, 161 insertions(+), 6 deletions(-) diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 4ef1254..76fd9c9 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -1269,6 +1269,108 @@ generate_join_implied_equalities_for_ecs(PlannerInfo *root, } /* + * generate_join_implied_equalities_for_all + * Create any EC-derived joinclauses of form 'outer_em = inner_em'. + * + * This is used when building partition info for joinrel. + */ +List * +generate_join_implied_equalities_for_all(PlannerInfo *root, + Relids join_relids, + Relids outer_relids, + Relids inner_relids) +{ + List *result = NIL; + Bitmapset * matching_ecs; + int i; + + /* + * Get all eclasses in common between inner_relids and outer_relids + */ + matching_ecs = get_common_eclass_indexes(root, inner_relids, + outer_relids); + + i = -1; + while ((i = bms_next_member(matching_ecs, i)) >= 0) + { + EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i); + List *outer_members = NIL; + List *inner_members = NIL; + ListCell *lc1; + + /* Do not consider this EC if it's ec_broken */ + if (ec->ec_broken) + continue; + + /* Single-member ECs won't generate any deductions */ + if (list_length(ec->ec_members) <= 1) + continue; + + /* + * First, scan the EC to identify member values that are computable at the + * outer rel or at the inner rel. + */ + foreach(lc1, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc1); + + /* + * We don't need to check explicitly for child EC members. This test + * against join_relids will cause them to be ignored except when + * considering a child inner rel, which is what we want. + */ + if (!bms_is_subset(cur_em->em_relids, join_relids)) + continue; /* not computable yet, or wrong child */ + + if (bms_is_subset(cur_em->em_relids, outer_relids)) + outer_members = lappend(outer_members, cur_em); + else if (bms_is_subset(cur_em->em_relids, inner_relids)) + inner_members = lappend(inner_members, cur_em); + } + + /* + * First, select the joinclause if needed. We can equate any one outer + * member to any one inner member, since we know this EC is not + * ec_broken. + */ + if (outer_members && inner_members) + { + RestrictInfo *rinfo; + + foreach(lc1, outer_members) + { + EquivalenceMember *outer_em = (EquivalenceMember *) lfirst(lc1); + ListCell *lc2; + + foreach(lc2, inner_members) + { + EquivalenceMember *inner_em = (EquivalenceMember *) lfirst(lc2); + Oid eq_op; + + eq_op = select_equality_operator(ec, + outer_em->em_datatype, + inner_em->em_datatype); + if (!OidIsValid(eq_op)) + continue; + + /* + * Create clause, setting parent_ec to mark it as redundant with other + * joinclauses + */ + rinfo = create_join_clause(root, ec, eq_op, + outer_em, inner_em, + ec); + + result = lappend(result, rinfo); + } + } + } + } + + return result; +} + +/* * generate_join_implied_equalities for a still-valid EC */ static List * diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 374f938..adf75f5 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -55,7 +55,7 @@ static List *subbuild_joinrel_joinlist(RelOptInfo *joinrel, static void set_foreign_rel_properties(RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel); static void add_join_rel(PlannerInfo *root, RelOptInfo *joinrel); -static void build_joinrel_partition_info(RelOptInfo *joinrel, +static void build_joinrel_partition_info(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, List *restrictlist, JoinType jointype); static void build_child_join_reltarget(PlannerInfo *root, @@ -706,7 +706,7 @@ build_join_rel(PlannerInfo *root, joinrel->has_eclass_joins = has_relevant_eclass_joinclause(root, joinrel); /* Store the partition information. */ - build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist, + build_joinrel_partition_info(root, joinrel, outer_rel, inner_rel, restrictlist, sjinfo->jointype); /* @@ -870,7 +870,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->has_eclass_joins = parent_joinrel->has_eclass_joins; /* Is the join between partitions itself partitioned? */ - build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist, + build_joinrel_partition_info(root, joinrel, outer_rel, inner_rel, restrictlist, jointype); /* Child joinrel is parallel safe if parent is parallel safe. */ @@ -1613,9 +1613,9 @@ find_param_path_info(RelOptInfo *rel, Relids required_outer) * the join relation. */ static void -build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, - RelOptInfo *inner_rel, List *restrictlist, - JoinType jointype) +build_joinrel_partition_info(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outer_rel, RelOptInfo *inner_rel, + List *restrictlist, JoinType jointype) { int partnatts; int cnt; @@ -1628,6 +1628,13 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, return; } + restrictlist = + list_concat_unique_ptr(generate_join_implied_equalities_for_all(root, + joinrel->relids, + outer_rel->relids, + inner_rel->relids), + restrictlist); + /* * We can only consider this join as an input to further partitionwise * joins if (a) the input relations are partitioned and have diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9ab73bd..ab19ea3 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -140,6 +140,10 @@ extern List *generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, Relids outer_relids, RelOptInfo *inner_rel); +extern List *generate_join_implied_equalities_for_all(PlannerInfo *root, + Relids join_relids, + Relids outer_relids, + Relids inner_relids); extern List *generate_join_implied_equalities_for_ecs(PlannerInfo *root, List *eclasses, Relids join_relids, diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index b3fbe47..20346f3 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -62,6 +62,44 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 450 | 0450 | 450 | 0450 (4 rows) +EXPLAIN (COSTS OFF) +SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.a AND t1.a = t2.b ORDER BY t1.a, t2.b; + QUERY PLAN +--------------------------------------------------------------- + Sort + Sort Key: t1.a + -> Append + -> Merge Join + Merge Cond: (t1_1.a = t2_1.a) + -> Index Scan using iprt1_p1_a on prt1_p1 t1_1 + -> Sort + Sort Key: t2_1.b + -> Seq Scan on prt2_p1 t2_1 + Filter: (a = b) + -> Hash Join + Hash Cond: (t1_2.a = t2_2.a) + -> Seq Scan on prt1_p2 t1_2 + -> Hash + -> Seq Scan on prt2_p2 t2_2 + Filter: (a = b) + -> Hash Join + Hash Cond: (t1_3.a = t2_3.a) + -> Seq Scan on prt1_p3 t1_3 + -> Hash + -> Seq Scan on prt2_p3 t2_3 + Filter: (a = b) +(22 rows) + +SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.a AND t1.a = t2.b ORDER BY t1.a, t2.b; + a | c | b | c +----+------+----+------ + 0 | 0000 | 0 | 0000 + 6 | 0006 | 6 | 0006 + 12 | 0012 | 12 | 0012 + 18 | 0018 | 18 | 0018 + 24 | 0024 | 24 | 0024 +(5 rows) + -- left outer join, with whole-row reference; partitionwise join does not apply EXPLAIN (COSTS OFF) SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b; diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql index 575ba7b..30c981e 100644 --- a/src/test/regress/sql/partition_join.sql +++ b/src/test/regress/sql/partition_join.sql @@ -34,6 +34,10 @@ EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b; SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b; +EXPLAIN (COSTS OFF) +SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.a AND t1.a = t2.b ORDER BY t1.a, t2.b; +SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.a AND t1.a = t2.b ORDER BY t1.a, t2.b; + -- left outer join, with whole-row reference; partitionwise join does not apply EXPLAIN (COSTS OFF) SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b; -- 2.7.4