From e1fb93b999f5d1336840f218563caaa1ae23a4dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= Date: Thu, 7 May 2020 08:36:29 +0800 Subject: [PATCH v8 6/7] Join removal at run-time with UniqueKey. We delay the join removal at build_join_rel time to get the benefits of UniqueKey to bypass some limitations of current remove_useless_join. However this new strategy can't be an replacement of the current one since at the runtime it just knows 2 relation. so it can't handle something like SELECT a.* FROM a LEFT JOIN (b left join c on b.c_id = c.id) ON (a.b_id = b.id); which can be handled by the current strategy. --- src/backend/optimizer/path/joinrels.c | 8 +- src/backend/optimizer/util/relnode.c | 263 +++++++++++++++++++++++++- src/include/optimizer/pathnode.h | 4 +- src/test/regress/expected/join.out | 39 ++++ src/test/regress/sql/join.sql | 26 +++ 5 files changed, 333 insertions(+), 7 deletions(-) diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index c0d2332caf..1e6816d1b1 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -691,6 +691,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) SpecialJoinInfo sjinfo_data; RelOptInfo *joinrel; List *restrictlist; + bool innerrel_removed = false; /* We should never try to join two overlapping sets of rels. */ Assert(!bms_overlap(rel1->relids, rel2->relids)); @@ -744,7 +745,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) * goes with this particular joining. */ joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo, - &restrictlist); + &restrictlist, &innerrel_removed); /* * If we've already proven this join is empty, we needn't consider any @@ -756,9 +757,10 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) return joinrel; } + if (!innerrel_removed) /* Add paths to the join relation. */ - populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo, - restrictlist); + populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo, + restrictlist); bms_free(joinrelids); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 591a0a3957..ac086930d6 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -22,6 +22,7 @@ #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/inherit.h" +#include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/placeholder.h" @@ -74,6 +75,11 @@ static void build_child_join_reltarget(PlannerInfo *root, int nappinfos, AppendRelInfo **appinfos); +static bool join_canbe_removed(PlannerInfo *root, + SpecialJoinInfo *sjinfo, + RelOptInfo *joinrel, + RelOptInfo *innerrel, + List *restrictlist); /* * setup_simple_rel_arrays @@ -580,7 +586,8 @@ build_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, - List **restrictlist_ptr) + List **restrictlist_ptr, + bool *innerrel_removed) { RelOptInfo *joinrel; List *restrictlist; @@ -719,6 +726,64 @@ build_join_rel(PlannerInfo *root, */ joinrel->has_eclass_joins = has_relevant_eclass_joinclause(root, joinrel); + if (join_canbe_removed(root, sjinfo, + joinrel, inner_rel, + restrictlist)) + { + ListCell *lc; + + joinrel->rows = outer_rel->rows; + joinrel->consider_startup = outer_rel->consider_param_startup; + joinrel->consider_param_startup = outer_rel->consider_param_startup; + joinrel->consider_parallel = outer_rel->consider_parallel; + + /* Rely on the projection path to reduce the tlist. */ + joinrel->reltarget = outer_rel->reltarget; + + joinrel->direct_lateral_relids = outer_rel->direct_lateral_relids; + joinrel->lateral_relids = outer_rel->lateral_relids; + + joinrel->unique_for_rels = outer_rel->unique_for_rels; + joinrel->non_unique_for_rels = outer_rel->non_unique_for_rels; + joinrel->baserestrictinfo = outer_rel->baserestrictinfo; + joinrel->baserestrictcost = outer_rel->baserestrictcost; + joinrel->baserestrict_min_security = outer_rel->baserestrict_min_security; + joinrel->uniquekeys = outer_rel->uniquekeys; + joinrel->consider_partitionwise_join = outer_rel->consider_partitionwise_join; + joinrel->top_parent_relids = outer_rel->top_parent_relids; + + /* Some scan path need to know which base relation to scan, it uses the relid + * field, so we have to use the outerrel->relid. + */ + joinrel->relid = outer_rel->relid; + + /* Almost the same paths as above, it assert the rte_kind is RTE_RELATION, so + * we need to set as same as outerrel as well + */ + joinrel->rtekind = RTE_RELATION; + + /* Make sure the path->parent point to current joinrel, can't update it in-place. */ + foreach(lc, outer_rel->pathlist) + { + Size sz = size_of_path(lfirst(lc)); + Path *path = palloc(sz); + memcpy(path, lfirst(lc), sz); + path->parent = joinrel; + add_path(joinrel, path); + } + + foreach(lc, joinrel->partial_pathlist) + { + Size sz = size_of_path(lfirst(lc)); + Path *path = palloc(sz); + memcpy(path, lfirst(lc), sz); + path->parent = joinrel; + add_partial_path(joinrel, path); + } + *innerrel_removed = true; + } + else + { /* Store the partition information. */ build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist, sjinfo->jointype); @@ -747,7 +812,7 @@ build_join_rel(PlannerInfo *root, is_parallel_safe(root, (Node *) restrictlist) && is_parallel_safe(root, (Node *) joinrel->reltarget->exprs)) joinrel->consider_parallel = true; - + } /* Add the joinrel to the PlannerInfo. */ add_join_rel(root, joinrel); @@ -760,11 +825,18 @@ build_join_rel(PlannerInfo *root, if (root->join_rel_level) { Assert(root->join_cur_level > 0); - Assert(root->join_cur_level <= bms_num_members(joinrel->relids)); + // Assert(root->join_cur_level <= bms_num_members(joinrel->relids)); root->join_rel_level[root->join_cur_level] = lappend(root->join_rel_level[root->join_cur_level], joinrel); } + /* elog(INFO, "lev-%d Build JoinRel (%s) with %s and %s, inner is removed: %d", */ + /* root->join_cur_level, */ + /* bmsToString(joinrelids), */ + /* bmsToString(outer_rel->relids), */ + /* bmsToString(inner_rel->relids), */ + /* joinrel->removed); */ + return joinrel; } @@ -2028,3 +2100,188 @@ build_child_join_reltarget(PlannerInfo *root, childrel->reltarget->cost.per_tuple = parentrel->reltarget->cost.per_tuple; childrel->reltarget->width = parentrel->reltarget->width; } + +static bool +join_canbe_removed(PlannerInfo *root, + SpecialJoinInfo *sjinfo, + RelOptInfo *joinrel, + RelOptInfo *innerrel, + List *restrictlist) +{ + Bitmapset *vars; + List *exprs = NIL; + ListCell *lc; + Bitmapset *tmp; + bool res; + + if (sjinfo->jointype != JOIN_LEFT) + return false; + + if (innerrel->uniquekeys == NIL) + return false; + + /* + * Check if there is any innerrel's cols can't be removed. + */ + + vars = pull_varnos((Node*)joinrel->reltarget->exprs); + tmp = bms_intersect(vars, innerrel->relids); + if (!bms_is_empty(tmp)) + return false; + + foreach(lc, restrictlist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + if (rinfo->can_join) + { + if (rinfo->mergeopfamilies != NIL) + { + if (bms_is_subset(rinfo->left_relids, innerrel->relids)) + exprs = lappend(exprs, get_leftop(rinfo->clause)); + else if (bms_is_subset(rinfo->right_relids, innerrel->relids)) + exprs = lappend(exprs, get_rightop(rinfo->clause)); + else + Assert(false); + } + else + /* Not mergeable join clause, we have to keep it */ + return false; + } + else + { + /* + * If the rinfo is not joinable clause, and it is not pushed down to + * baserelation's basicrestrictinfo. so it must be in ON clauses. + * Example: SELECT .. FROM t1 left join t2 on t1.a = 10; + * In this case we can't remove the inner join as well. + */ + return false; + } + } + res = relation_has_uniquekeys_for(root, innerrel, exprs, true); + return res; +} + + +size_t +size_of_path(Path *path) +{ + switch(path->type) + { + case T_Path: + return sizeof(Path); + case T_IndexPath: + return sizeof(IndexPath); + case T_BitmapHeapPath: + return sizeof(BitmapHeapPath); + case T_TidPath: + return sizeof(TidPath); + case T_SubqueryScanPath: + return sizeof(SubqueryScanPath); + case T_ForeignPath: + return sizeof(ForeignPath); + case T_CustomPath: + return sizeof(CustomPath); + + + case T_NestPath: + return sizeof(NestPath); + + + case T_MergePath: + return sizeof(MergePath); + + + case T_HashPath: + return sizeof(HashPath); + + + case T_AppendPath: + return sizeof(AppendPath); + + + case T_MergeAppendPath: + return sizeof(MergeAppendPath); + + + case T_GroupResultPath: + return sizeof(GroupResultPath); + + + case T_MaterialPath: + return sizeof(MaterialPath); + + + case T_UniquePath: + return sizeof(UniquePath); + + + case T_GatherPath: + return sizeof(GatherPath); + + + case T_GatherMergePath: + return sizeof(GatherMergePath); + + + case T_ProjectionPath: + return sizeof(ProjectionPath); + + + case T_ProjectSetPath: + return sizeof(ProjectSetPath); + + + case T_SortPath: + return sizeof(SortPath); + + + case T_IncrementalSortPath: + return sizeof(IncrementalSortPath); + + + case T_GroupPath: + return sizeof(GroupPath); + + + case T_UpperUniquePath: + return sizeof(UpperUniquePath); + + + case T_AggPath: + return sizeof(AggPath); + + + case T_GroupingSetsPath: + return sizeof(GroupingSetsPath); + + + case T_MinMaxAggPath: + return sizeof(MinMaxAggPath); + + + case T_WindowAggPath: + return sizeof(WindowAggPath); + + + case T_SetOpPath: + return sizeof(SetOpPath); + + + case T_RecursiveUnionPath: + return sizeof(RecursiveUnionPath); + + + case T_LockRowsPath: + return sizeof(LockRowsPath); + case T_ModifyTablePath: + return sizeof(ModifyTablePath); + case T_LimitPath: + return sizeof(LimitPath); + default: + elog(ERROR, "unrecognized path type: %s", + nodeToString(&path->type)); + break; + } + return 0; +} diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 715a24ad29..254961b2b4 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -294,7 +294,8 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, - List **restrictlist_ptr); + List **restrictlist_ptr, + bool *innerrel_removed); extern Relids min_join_parameterization(PlannerInfo *root, Relids joinrelids, RelOptInfo *outer_rel, @@ -321,4 +322,5 @@ extern RelOptInfo *build_child_join_rel(PlannerInfo *root, RelOptInfo *parent_joinrel, List *restrictlist, SpecialJoinInfo *sjinfo, JoinType jointype); +extern size_t size_of_path(Path *path); #endif /* PATHNODE_H */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 8378936eda..b59c9a73ae 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -6283,3 +6283,42 @@ where exists (select 1 from j3 (13 rows) drop table j3; +create table m1 (a int primary key, b int, c int); +create table m2 (a int primary key, b int, c int); +create table m3 (a int primary key, b int, c int); +explain (verbose, costs off) +select t1.a +from m3 t1 +left join (select m1.a from m1, m2 where m1.b = m2.a) t2 +on (t1.a = t2.a); + QUERY PLAN +-------------------------- + Seq Scan on public.m3 t1 + Output: t1.a +(2 rows) + +explain (verbose, costs off) +select m1.* +from m1 left join m2 +on (m1.a = m2.a) +and m1.b in (select b from m3); + QUERY PLAN +---------------------------- + Seq Scan on public.m1 + Output: m1.a, m1.b, m1.c +(2 rows) + +explain (verbose, costs off) +select m1.* +from m1 left join m2 +on m1.b = m2.a +and m2.b in (select b from m3); + QUERY PLAN +---------------------------- + Seq Scan on public.m1 + Output: m1.a, m1.b, m1.c +(2 rows) + +drop table m1; +drop table m2; +drop table m3; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 3312542411..317354547d 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -2169,3 +2169,29 @@ where exists (select 1 from j3 and t1.unique1 < 1; drop table j3; + +create table m1 (a int primary key, b int, c int); +create table m2 (a int primary key, b int, c int); +create table m3 (a int primary key, b int, c int); + +explain (verbose, costs off) +select t1.a +from m3 t1 +left join (select m1.a from m1, m2 where m1.b = m2.a) t2 +on (t1.a = t2.a); + +explain (verbose, costs off) +select m1.* +from m1 left join m2 +on (m1.a = m2.a) +and m1.b in (select b from m3); + +explain (verbose, costs off) +select m1.* +from m1 left join m2 +on m1.b = m2.a +and m2.b in (select b from m3); + +drop table m1; +drop table m2; +drop table m3; -- 2.21.0