From ffcb6ade688d1956c365df4d8368da5c8a721a65 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Wed, 2 Aug 2023 16:09:55 +0800 Subject: [PATCH v3] Postpone reparameterization of paths until when creating plans --- src/backend/optimizer/path/joinpath.c | 55 +++------- src/backend/optimizer/plan/createplan.c | 16 +++ src/backend/optimizer/util/pathnode.c | 106 ++++++++++++++++--- src/include/nodes/pathnodes.h | 13 +++ src/include/optimizer/pathnode.h | 1 + src/test/regress/expected/partition_join.out | 60 +++++++++++ src/test/regress/sql/partition_join.sql | 12 +++ 7 files changed, 205 insertions(+), 58 deletions(-) diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 059e605e04..b10c2cd815 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -30,19 +30,6 @@ /* Hook for plugins to get control in add_paths_to_joinrel() */ set_join_pathlist_hook_type set_join_pathlist_hook = NULL; -/* - * Paths parameterized by the parent can be considered to be parameterized by - * any of its child. - */ -#define PATH_PARAM_BY_PARENT(path, rel) \ - ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \ - (rel)->top_parent_relids)) -#define PATH_PARAM_BY_REL_SELF(path, rel) \ - ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) - -#define PATH_PARAM_BY_REL(path, rel) \ - (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel)) - static void try_partial_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, @@ -794,24 +781,17 @@ try_nestloop_path(PlannerInfo *root, pathkeys, required_outer)) { /* - * If the inner path is parameterized, it is parameterized by the - * topmost parent of the outer rel, not the outer rel itself. Fix - * that. + * If the inner path is parameterized by the topmost parent of the + * outer rel rather than the outer rel itself, we need to fix that. We + * will perform the translation in createplan.c. For now we need to + * check whether we can translate the inner path, and if not avoid + * creating nestloop path. */ - if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent)) + if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) && + !path_is_reparameterizable_by_child(inner_path)) { - inner_path = reparameterize_path_by_child(root, inner_path, - outer_path->parent); - - /* - * If we could not translate the path, we can't create nest loop - * path. - */ - if (!inner_path) - { - bms_free(required_outer); - return; - } + bms_free(required_outer); + return; } add_path(joinrel, (Path *) @@ -886,20 +866,11 @@ try_partial_nestloop_path(PlannerInfo *root, return; /* - * If the inner path is parameterized, it is parameterized by the topmost - * parent of the outer rel, not the outer rel itself. Fix that. + * See the comments in try_nestloop_path. */ - if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent)) - { - inner_path = reparameterize_path_by_child(root, inner_path, - outer_path->parent); - - /* - * If we could not translate the path, we can't create nest loop path. - */ - if (!inner_path) - return; - } + if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) && + !path_is_reparameterizable_by_child(inner_path)) + return; /* Might be good enough to be worth trying, so let's try it. */ add_partial_path(joinrel, (Path *) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index af48109058..a107bde294 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -29,6 +29,7 @@ #include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/paramassign.h" +#include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/placeholder.h" #include "optimizer/plancat.h" @@ -4327,6 +4328,21 @@ create_nestloop_plan(PlannerInfo *root, List *nestParams; Relids saveOuterRels = root->curOuterRels; + /* + * If the inner path is parameterized by the topmost parent of the + * outer rel rather than the outer rel itself, fix that. + */ + if (PATH_PARAM_BY_PARENT(best_path->jpath.innerjoinpath, + best_path->jpath.outerjoinpath->parent)) + { + best_path->jpath.innerjoinpath = + reparameterize_path_by_child(root, + best_path->jpath.innerjoinpath, + best_path->jpath.outerjoinpath->parent); + + Assert(best_path->jpath.innerjoinpath != NULL); + } + /* NestLoop can project, so no need to be picky about child tlists */ outer_plan = create_plan_recurse(root, best_path->jpath.outerjoinpath, 0); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 754f0b9f34..a93ac22660 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2430,6 +2430,16 @@ create_nestloop_path(PlannerInfo *root, { NestPath *pathnode = makeNode(NestPath); Relids inner_req_outer = PATH_REQ_OUTER(inner_path); + Relids outerrelids; + + /* + * Paths are parameterized by top-level parents, so run parameterization + * tests on the parent relids. + */ + if (outer_path->parent->top_parent_relids) + outerrelids = outer_path->parent->top_parent_relids; + else + outerrelids = outer_path->parent->relids; /* * If the inner path is parameterized by the outer, we must drop any @@ -2439,7 +2449,7 @@ create_nestloop_path(PlannerInfo *root, * estimates for this path. We detect such clauses by checking for serial * number match to clauses already enforced in the inner path. */ - if (bms_overlap(inner_req_outer, outer_path->parent->relids)) + if (bms_overlap(inner_req_outer, outerrelids)) { Bitmapset *enforced_serials = get_param_path_clause_serials(inner_path); List *jclauses = NIL; @@ -4018,6 +4028,44 @@ reparameterize_path(PlannerInfo *root, Path *path, return NULL; } +/* + * path_is_reparameterizable_by_child + * Given a path parameterized by the parent of a child relation, check to + * see if it can be translated to be parameterized by child relation. + * + * Currently, only a few path types are supported here, though more could be + * added at need. Any addition or reduction in supported path types needs to + * be reflected in reparameterize_path_by_child(). + */ +bool +path_is_reparameterizable_by_child(Path *path) +{ + switch (nodeTag(path)) + { + case T_Path: + case T_IndexPath: + case T_BitmapHeapPath: + case T_BitmapAndPath: + case T_BitmapOrPath: + case T_ForeignPath: + case T_CustomPath: + case T_NestPath: + case T_MergePath: + case T_HashPath: + case T_AppendPath: + case T_MaterialPath: + case T_MemoizePath: + case T_GatherPath: + return true; + default: + + /* We don't know how to reparameterize this path. */ + return false; + } + + return false; +} + /* * reparameterize_path_by_child * Given a path parameterized by the parent of the given child relation, @@ -4035,7 +4083,12 @@ reparameterize_path(PlannerInfo *root, Path *path, * members are copied as they are. * * Currently, only a few path types are supported here, though more could be - * added at need. We return NULL if we can't reparameterize the given path. + * added at need. Any addition or reduction in supported path types needs to + * be reflected in path_is_reparameterizable_by_child(). We return NULL if we + * can't reparameterize the given path. + * + * Note that this function can only be called at createplan time, because it + * may modify RTEs on the fly. */ Path * reparameterize_path_by_child(PlannerInfo *root, Path *path, @@ -4046,11 +4099,11 @@ reparameterize_path_by_child(PlannerInfo *root, Path *path, ( (newnode) = makeNode(nodetype), \ memcpy((newnode), (node), sizeof(nodetype)) ) -#define ADJUST_CHILD_ATTRS(node) \ +#define ADJUST_CHILD_ATTRS(node, fieldtype) \ ((node) = \ - (List *) adjust_appendrel_attrs_multilevel(root, (Node *) (node), \ - child_rel, \ - child_rel->top_parent)) + (fieldtype) adjust_appendrel_attrs_multilevel(root, (Node *) (node), \ + child_rel, \ + child_rel->top_parent)) #define REPARAMETERIZE_CHILD_PATH(path) \ do { \ @@ -4083,6 +4136,11 @@ do { \ !bms_overlap(PATH_REQ_OUTER(path), child_rel->top_parent_relids)) return path; + /* + * The path should be reparameterizable, otherwise it would not come here. + */ + Assert(path_is_reparameterizable_by_child(path)); + /* * If possible, reparameterize the given path, making a copy. * @@ -4098,7 +4156,23 @@ do { \ switch (nodeTag(path)) { case T_Path: - FLAT_COPY_PATH(new_path, path, Path); + { + FLAT_COPY_PATH(new_path, path, Path); + + if (path->pathtype == T_SampleScan) + { + Index scan_relid = path->parent->relid; + RangeTblEntry *rte; + + /* it should be a base rel with a tablesample clause... */ + Assert(scan_relid > 0); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_RELATION); + Assert(rte->tablesample != NULL); + + ADJUST_CHILD_ATTRS(rte->tablesample, TableSampleClause *); + } + } break; case T_IndexPath: @@ -4106,7 +4180,7 @@ do { \ IndexPath *ipath; FLAT_COPY_PATH(ipath, path, IndexPath); - ADJUST_CHILD_ATTRS(ipath->indexclauses); + ADJUST_CHILD_ATTRS(ipath->indexclauses, List *); new_path = (Path *) ipath; } break; @@ -4186,7 +4260,7 @@ do { \ jpath = (JoinPath *) npath; REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath); REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath); - ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo); + ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo, List *); new_path = (Path *) npath; } break; @@ -4201,8 +4275,8 @@ do { \ jpath = (JoinPath *) mpath; REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath); REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath); - ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo); - ADJUST_CHILD_ATTRS(mpath->path_mergeclauses); + ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo, List *); + ADJUST_CHILD_ATTRS(mpath->path_mergeclauses, List *); new_path = (Path *) mpath; } break; @@ -4217,8 +4291,8 @@ do { \ jpath = (JoinPath *) hpath; REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath); REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath); - ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo); - ADJUST_CHILD_ATTRS(hpath->path_hashclauses); + ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo, List *); + ADJUST_CHILD_ATTRS(hpath->path_hashclauses, List *); new_path = (Path *) hpath; } break; @@ -4249,7 +4323,7 @@ do { \ FLAT_COPY_PATH(mpath, path, MemoizePath); REPARAMETERIZE_CHILD_PATH(mpath->subpath); - ADJUST_CHILD_ATTRS(mpath->param_exprs); + ADJUST_CHILD_ATTRS(mpath->param_exprs, List *); new_path = (Path *) mpath; } break; @@ -4300,7 +4374,7 @@ do { \ new_ppi->ppi_req_outer = bms_copy(required_outer); new_ppi->ppi_rows = old_ppi->ppi_rows; new_ppi->ppi_clauses = old_ppi->ppi_clauses; - ADJUST_CHILD_ATTRS(new_ppi->ppi_clauses); + ADJUST_CHILD_ATTRS(new_ppi->ppi_clauses, List *); new_ppi->ppi_serials = bms_copy(old_ppi->ppi_serials); rel->ppilist = lappend(rel->ppilist, new_ppi); @@ -4319,7 +4393,7 @@ do { \ child_rel->top_parent_relids)) { new_path->pathtarget = copy_pathtarget(new_path->pathtarget); - ADJUST_CHILD_ATTRS(new_path->pathtarget->exprs); + ADJUST_CHILD_ATTRS(new_path->pathtarget->exprs, List *); } return new_path; diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index a1dc1d07e1..1656628623 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1637,6 +1637,19 @@ typedef struct Path #define PATH_REQ_OUTER(path) \ ((path)->param_info ? (path)->param_info->ppi_req_outer : (Relids) NULL) +/* + * Paths parameterized by the parent can be considered to be parameterized by + * any of its child. + */ +#define PATH_PARAM_BY_PARENT(path, rel) \ + ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \ + (rel)->top_parent_relids)) +#define PATH_PARAM_BY_REL_SELF(path, rel) \ + ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) + +#define PATH_PARAM_BY_REL(path, rel) \ + (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel)) + /*---------- * IndexPath represents an index scan over a single index. * diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 001e75b5b7..85f5562033 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -295,6 +295,7 @@ extern Path *reparameterize_path(PlannerInfo *root, Path *path, double loop_count); extern Path *reparameterize_path_by_child(PlannerInfo *root, Path *path, RelOptInfo *child_rel); +extern bool path_is_reparameterizable_by_child(Path *path); /* * prototypes for relnode.c diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 6560fe2416..a11f738411 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -505,6 +505,31 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL 550 | | (12 rows) +-- lateral reference in sample scan +EXPLAIN (COSTS OFF) +SELECT * FROM prt1 t1 JOIN LATERAL + (SELECT * FROM prt1 t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s + ON t1.a = s.a; + QUERY PLAN +------------------------------------------------------------- + Append + -> Nested Loop + -> Seq Scan on prt1_p1 t1_1 + -> Sample Scan on prt1_p1 t2_1 + Sampling: system (t1_1.a) REPEATABLE (t1_1.b) + Filter: (t1_1.a = a) + -> Nested Loop + -> Seq Scan on prt1_p2 t1_2 + -> Sample Scan on prt1_p2 t2_2 + Sampling: system (t1_2.a) REPEATABLE (t1_2.b) + Filter: (t1_2.a = a) + -> Nested Loop + -> Seq Scan on prt1_p3 t1_3 + -> Sample Scan on prt1_p3 t2_3 + Sampling: system (t1_3.a) REPEATABLE (t1_3.b) + Filter: (t1_3.a = a) +(16 rows) + -- bug with inadequate sort key representation SET enable_partitionwise_aggregate TO true; SET enable_hashjoin TO false; @@ -1944,6 +1969,41 @@ SELECT * FROM prt1_l t1 LEFT JOIN LATERAL 550 | 0 | 0002 | | | | | (12 rows) +-- partitionwise join with lateral reference in sample scan +EXPLAIN (COSTS OFF) +SELECT * FROM prt1_l t1 JOIN LATERAL + (SELECT * FROM prt1_l t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s ON + t1.a = s.a AND t1.b = s.b AND t1.c = s.c; + QUERY PLAN +---------------------------------------------------------------------------------------- + Append + -> Nested Loop + -> Seq Scan on prt1_l_p1 t1_1 + -> Sample Scan on prt1_l_p1 t2_1 + Sampling: system (t1_1.a) REPEATABLE (t1_1.b) + Filter: ((t1_1.a = a) AND (t1_1.b = b) AND ((t1_1.c)::text = (c)::text)) + -> Nested Loop + -> Seq Scan on prt1_l_p2_p1 t1_2 + -> Sample Scan on prt1_l_p2_p1 t2_2 + Sampling: system (t1_2.a) REPEATABLE (t1_2.b) + Filter: ((t1_2.a = a) AND (t1_2.b = b) AND ((t1_2.c)::text = (c)::text)) + -> Nested Loop + -> Seq Scan on prt1_l_p2_p2 t1_3 + -> Sample Scan on prt1_l_p2_p2 t2_3 + Sampling: system (t1_3.a) REPEATABLE (t1_3.b) + Filter: ((t1_3.a = a) AND (t1_3.b = b) AND ((t1_3.c)::text = (c)::text)) + -> Nested Loop + -> Seq Scan on prt1_l_p3_p1 t1_4 + -> Sample Scan on prt1_l_p3_p1 t2_4 + Sampling: system (t1_4.a) REPEATABLE (t1_4.b) + Filter: ((t1_4.a = a) AND (t1_4.b = b) AND ((t1_4.c)::text = (c)::text)) + -> Nested Loop + -> Seq Scan on prt1_l_p3_p2 t1_5 + -> Sample Scan on prt1_l_p3_p2 t2_5 + Sampling: system (t1_5.a) REPEATABLE (t1_5.b) + Filter: ((t1_5.a = a) AND (t1_5.b = b) AND ((t1_5.c)::text = (c)::text)) +(26 rows) + -- join with one side empty EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c; diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql index 48daf3aee3..e2daab03fb 100644 --- a/src/test/regress/sql/partition_join.sql +++ b/src/test/regress/sql/partition_join.sql @@ -100,6 +100,12 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.a) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a; +-- lateral reference in sample scan +EXPLAIN (COSTS OFF) +SELECT * FROM prt1 t1 JOIN LATERAL + (SELECT * FROM prt1 t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s + ON t1.a = s.a; + -- bug with inadequate sort key representation SET enable_partitionwise_aggregate TO true; SET enable_hashjoin TO false; @@ -387,6 +393,12 @@ SELECT * FROM prt1_l t1 LEFT JOIN LATERAL (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a; +-- partitionwise join with lateral reference in sample scan +EXPLAIN (COSTS OFF) +SELECT * FROM prt1_l t1 JOIN LATERAL + (SELECT * FROM prt1_l t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s ON + t1.a = s.a AND t1.b = s.b AND t1.c = s.c; + -- join with one side empty EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c; -- 2.31.0