From 871b5f32fc489c6d30718755ce5e09dbf8416073 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Wed, 2 Aug 2023 16:09:55 +0800 Subject: [PATCH v4] Postpone reparameterization of paths until when creating plans --- src/backend/optimizer/path/joinpath.c | 55 ++---- src/backend/optimizer/plan/createplan.c | 16 ++ src/backend/optimizer/util/pathnode.c | 189 ++++++++++++++++++- src/include/nodes/pathnodes.h | 13 ++ src/include/optimizer/pathnode.h | 1 + src/test/regress/expected/partition_join.out | 88 +++++++++ src/test/regress/sql/partition_join.sql | 24 +++ 7 files changed, 340 insertions(+), 46 deletions(-) diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 821d282497..af8a2527d5 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -29,19 +29,6 @@ /* Hook for plugins to get control in add_paths_to_joinrel() */ set_join_pathlist_hook_type set_join_pathlist_hook = NULL; -/* - * Paths parameterized by the parent can be considered to be parameterized by - * any of its child. - */ -#define PATH_PARAM_BY_PARENT(path, rel) \ - ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \ - (rel)->top_parent_relids)) -#define PATH_PARAM_BY_REL_SELF(path, rel) \ - ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) - -#define PATH_PARAM_BY_REL(path, rel) \ - (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel)) - static void try_partial_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, @@ -779,24 +766,17 @@ try_nestloop_path(PlannerInfo *root, pathkeys, required_outer)) { /* - * If the inner path is parameterized, it is parameterized by the - * topmost parent of the outer rel, not the outer rel itself. Fix - * that. + * If the inner path is parameterized by the topmost parent of the + * outer rel rather than the outer rel itself, we need to fix that. We + * will perform the translation in createplan.c. For now we need to + * check whether we can translate the inner path, and if not avoid + * creating nestloop path. */ - if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent)) + if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) && + !path_is_reparameterizable_by_child(inner_path)) { - inner_path = reparameterize_path_by_child(root, inner_path, - outer_path->parent); - - /* - * If we could not translate the path, we can't create nest loop - * path. - */ - if (!inner_path) - { - bms_free(required_outer); - return; - } + bms_free(required_outer); + return; } add_path(joinrel, (Path *) @@ -871,20 +851,11 @@ try_partial_nestloop_path(PlannerInfo *root, return; /* - * If the inner path is parameterized, it is parameterized by the topmost - * parent of the outer rel, not the outer rel itself. Fix that. + * See the comments in try_nestloop_path. */ - if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent)) - { - inner_path = reparameterize_path_by_child(root, inner_path, - outer_path->parent); - - /* - * If we could not translate the path, we can't create nest loop path. - */ - if (!inner_path) - return; - } + if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) && + !path_is_reparameterizable_by_child(inner_path)) + return; /* Might be good enough to be worth trying, so let's try it. */ add_partial_path(joinrel, (Path *) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 34ca6d4ac2..303c7a8451 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -29,6 +29,7 @@ #include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/paramassign.h" +#include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/placeholder.h" #include "optimizer/plancat.h" @@ -4346,6 +4347,21 @@ create_nestloop_plan(PlannerInfo *root, List *nestParams; Relids saveOuterRels = root->curOuterRels; + /* + * If the inner path is parameterized by the topmost parent of the + * outer rel rather than the outer rel itself, fix that. + */ + if (PATH_PARAM_BY_PARENT(best_path->jpath.innerjoinpath, + best_path->jpath.outerjoinpath->parent)) + { + best_path->jpath.innerjoinpath = + reparameterize_path_by_child(root, + best_path->jpath.innerjoinpath, + best_path->jpath.outerjoinpath->parent); + + Assert(best_path->jpath.innerjoinpath != NULL); + } + /* NestLoop can project, so no need to be picky about child tlists */ outer_plan = create_plan_recurse(root, best_path->jpath.outerjoinpath, 0); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 211ba65389..2fa9fd315a 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -56,6 +56,7 @@ static int append_startup_cost_compare(const ListCell *a, const ListCell *b); static List *reparameterize_pathlist_by_child(PlannerInfo *root, List *pathlist, RelOptInfo *child_rel); +static bool pathlist_is_reparameterizable_by_child(List *pathlist); /***************************************************************************** @@ -2436,6 +2437,16 @@ create_nestloop_path(PlannerInfo *root, { NestPath *pathnode = makeNode(NestPath); Relids inner_req_outer = PATH_REQ_OUTER(inner_path); + Relids outerrelids; + + /* + * Paths are parameterized by top-level parents, so run parameterization + * tests on the parent relids. + */ + if (outer_path->parent->top_parent_relids) + outerrelids = outer_path->parent->top_parent_relids; + else + outerrelids = outer_path->parent->relids; /* * If the inner path is parameterized by the outer, we must drop any @@ -2445,7 +2456,7 @@ create_nestloop_path(PlannerInfo *root, * estimates for this path. We detect such clauses by checking for serial * number match to clauses already enforced in the inner path. */ - if (bms_overlap(inner_req_outer, outer_path->parent->relids)) + if (bms_overlap(inner_req_outer, outerrelids)) { Bitmapset *enforced_serials = get_param_path_clause_serials(inner_path); List *jclauses = NIL; @@ -4024,6 +4035,130 @@ reparameterize_path(PlannerInfo *root, Path *path, return NULL; } +/* + * path_is_reparameterizable_by_child + * Given a path parameterized by the parent of a child relation, check to + * see if it can be translated to be parameterized by child relation. + * + * Currently, only a few path types are supported here, though more could be + * added at need. Any addition or reduction in supported path types needs to + * be reflected in reparameterize_path_by_child(). + */ +bool +path_is_reparameterizable_by_child(Path *path) +{ + +#define CHILD_PATH_IS_REPARAMETERIZABLE(path) \ +do { \ + if (!path_is_reparameterizable_by_child((path))) \ + return false; \ +} while(0) + +#define CHILD_PATH_LIST_IS_REPARAMETERIZABLE(pathlist) \ +do { \ + if (!pathlist_is_reparameterizable_by_child((pathlist))) \ + return false; \ +} while(0) + + switch (nodeTag(path)) + { + case T_Path: + case T_IndexPath: + break; + + case T_BitmapHeapPath: + { + BitmapHeapPath *bhpath = (BitmapHeapPath *) path; + + CHILD_PATH_IS_REPARAMETERIZABLE(bhpath->bitmapqual); + } + break; + + case T_BitmapAndPath: + { + BitmapAndPath *bapath = (BitmapAndPath *) path; + + CHILD_PATH_LIST_IS_REPARAMETERIZABLE(bapath->bitmapquals); + } + break; + + case T_BitmapOrPath: + { + BitmapOrPath *bopath = (BitmapOrPath *) path; + + CHILD_PATH_LIST_IS_REPARAMETERIZABLE(bopath->bitmapquals); + } + break; + + case T_ForeignPath: + { + ForeignPath *fpath = (ForeignPath *) path; + + if (fpath->fdw_outerpath) + CHILD_PATH_IS_REPARAMETERIZABLE(fpath->fdw_outerpath); + } + break; + + case T_CustomPath: + { + CustomPath *cpath = (CustomPath *) path; + + CHILD_PATH_LIST_IS_REPARAMETERIZABLE(cpath->custom_paths); + } + break; + + case T_NestPath: + case T_MergePath: + case T_HashPath: + { + JoinPath *jpath = (JoinPath *) path; + + CHILD_PATH_IS_REPARAMETERIZABLE(jpath->outerjoinpath); + CHILD_PATH_IS_REPARAMETERIZABLE(jpath->innerjoinpath); + } + break; + + case T_AppendPath: + { + AppendPath *apath = (AppendPath *) path; + + CHILD_PATH_LIST_IS_REPARAMETERIZABLE(apath->subpaths); + } + break; + + case T_MaterialPath: + { + MaterialPath *mpath = (MaterialPath *) path; + + CHILD_PATH_IS_REPARAMETERIZABLE(mpath->subpath); + } + break; + + case T_MemoizePath: + { + MemoizePath *mpath = (MemoizePath *) path; + + CHILD_PATH_IS_REPARAMETERIZABLE(mpath->subpath); + } + break; + + case T_GatherPath: + { + GatherPath *gpath = (GatherPath *) path; + + CHILD_PATH_IS_REPARAMETERIZABLE(gpath->subpath); + } + break; + + default: + + /* We don't know how to reparameterize this path. */ + return false; + } + + return true; +} + /* * reparameterize_path_by_child * Given a path parameterized by the parent of the given child relation, @@ -4041,7 +4176,12 @@ reparameterize_path(PlannerInfo *root, Path *path, * members are copied as they are. * * Currently, only a few path types are supported here, though more could be - * added at need. We return NULL if we can't reparameterize the given path. + * added at need. Any addition or reduction in supported path types needs to + * be reflected in path_is_reparameterizable_by_child(). We return NULL if we + * can't reparameterize the given path. + * + * Note that this function can only be called at createplan time, because it + * may modify RTEs on the fly. */ Path * reparameterize_path_by_child(PlannerInfo *root, Path *path, @@ -4054,7 +4194,7 @@ reparameterize_path_by_child(PlannerInfo *root, Path *path, #define ADJUST_CHILD_ATTRS(node) \ ((node) = \ - (List *) adjust_appendrel_attrs_multilevel(root, (Node *) (node), \ + (void *) adjust_appendrel_attrs_multilevel(root, (Node *) (node), \ child_rel, \ child_rel->top_parent)) @@ -4089,6 +4229,11 @@ do { \ !bms_overlap(PATH_REQ_OUTER(path), child_rel->top_parent_relids)) return path; + /* + * The path should be reparameterizable, otherwise it would not come here. + */ + Assert(path_is_reparameterizable_by_child(path)); + /* * If possible, reparameterize the given path, making a copy. * @@ -4104,7 +4249,23 @@ do { \ switch (nodeTag(path)) { case T_Path: - FLAT_COPY_PATH(new_path, path, Path); + { + FLAT_COPY_PATH(new_path, path, Path); + + if (path->pathtype == T_SampleScan) + { + Index scan_relid = path->parent->relid; + RangeTblEntry *rte; + + /* it should be a base rel with a tablesample clause... */ + Assert(scan_relid > 0); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_RELATION); + Assert(rte->tablesample != NULL); + + ADJUST_CHILD_ATTRS(rte->tablesample); + } + } break; case T_IndexPath: @@ -4363,3 +4524,23 @@ reparameterize_pathlist_by_child(PlannerInfo *root, return result; } + +/* + * pathlist_is_reparameterizable_by_child + * Helper function to check if a list of paths could be reparameterizable. + */ +static bool +pathlist_is_reparameterizable_by_child(List *pathlist) +{ + ListCell *lc; + + foreach(lc, pathlist) + { + Path *path = (Path *) lfirst(lc); + + if (!path_is_reparameterizable_by_child(path)) + return false; + } + + return true; +} diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 5702fbba60..15b12f968c 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1637,6 +1637,19 @@ typedef struct Path #define PATH_REQ_OUTER(path) \ ((path)->param_info ? (path)->param_info->ppi_req_outer : (Relids) NULL) +/* + * Paths parameterized by the parent can be considered to be parameterized by + * any of its child. + */ +#define PATH_PARAM_BY_PARENT(path, rel) \ + ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \ + (rel)->top_parent_relids)) +#define PATH_PARAM_BY_REL_SELF(path, rel) \ + ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) + +#define PATH_PARAM_BY_REL(path, rel) \ + (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel)) + /*---------- * IndexPath represents an index scan over a single index. * diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 6e557bebc4..4d47e98310 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -298,6 +298,7 @@ extern Path *reparameterize_path(PlannerInfo *root, Path *path, double loop_count); extern Path *reparameterize_path_by_child(PlannerInfo *root, Path *path, RelOptInfo *child_rel); +extern bool path_is_reparameterizable_by_child(Path *path); /* * prototypes for relnode.c diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 6560fe2416..629d655c18 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -505,6 +505,59 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL 550 | | (12 rows) +-- lateral reference in sample scan +EXPLAIN (COSTS OFF) +SELECT * FROM prt1 t1 JOIN LATERAL + (SELECT * FROM prt1 t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s + ON t1.a = s.a; + QUERY PLAN +------------------------------------------------------------- + Append + -> Nested Loop + -> Seq Scan on prt1_p1 t1_1 + -> Sample Scan on prt1_p1 t2_1 + Sampling: system (t1_1.a) REPEATABLE (t1_1.b) + Filter: (t1_1.a = a) + -> Nested Loop + -> Seq Scan on prt1_p2 t1_2 + -> Sample Scan on prt1_p2 t2_2 + Sampling: system (t1_2.a) REPEATABLE (t1_2.b) + Filter: (t1_2.a = a) + -> Nested Loop + -> Seq Scan on prt1_p3 t1_3 + -> Sample Scan on prt1_p3 t2_3 + Sampling: system (t1_3.a) REPEATABLE (t1_3.b) + Filter: (t1_3.a = a) +(16 rows) + +-- recursively check the child path(s) to tell if a path is reparameterizable +-- in this test case we'd have a MemoizePath whose subpath is TidPath, which is not reparameterizable +SET enable_indexscan TO false; +SET enable_seqscan TO false; +EXPLAIN (COSTS OFF) +SELECT * FROM prt1 t1 LEFT JOIN LATERAL + (SELECT t1.a AS t1a, * FROM prt1 t2 WHERE t1.ctid = t2.ctid) s ON t1.a = s.a; + QUERY PLAN +--------------------------------------------------------- + Nested Loop Left Join + -> Append + -> Index Scan using iprt1_p1_a on prt1_p1 t1_1 + -> Index Scan using iprt1_p2_a on prt1_p2 t1_2 + -> Index Scan using iprt1_p3_a on prt1_p3 t1_3 + -> Append + -> Tid Scan on prt1_p1 t2_1 + TID Cond: (t1.ctid = ctid) + Filter: (t1.a = a) + -> Tid Scan on prt1_p2 t2_2 + TID Cond: (t1.ctid = ctid) + Filter: (t1.a = a) + -> Tid Scan on prt1_p3 t2_3 + TID Cond: (t1.ctid = ctid) + Filter: (t1.a = a) +(15 rows) + +RESET enable_indexscan; +RESET enable_seqscan; -- bug with inadequate sort key representation SET enable_partitionwise_aggregate TO true; SET enable_hashjoin TO false; @@ -1944,6 +1997,41 @@ SELECT * FROM prt1_l t1 LEFT JOIN LATERAL 550 | 0 | 0002 | | | | | (12 rows) +-- partitionwise join with lateral reference in sample scan +EXPLAIN (COSTS OFF) +SELECT * FROM prt1_l t1 JOIN LATERAL + (SELECT * FROM prt1_l t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s ON + t1.a = s.a AND t1.b = s.b AND t1.c = s.c; + QUERY PLAN +---------------------------------------------------------------------------------------- + Append + -> Nested Loop + -> Seq Scan on prt1_l_p1 t1_1 + -> Sample Scan on prt1_l_p1 t2_1 + Sampling: system (t1_1.a) REPEATABLE (t1_1.b) + Filter: ((t1_1.a = a) AND (t1_1.b = b) AND ((t1_1.c)::text = (c)::text)) + -> Nested Loop + -> Seq Scan on prt1_l_p2_p1 t1_2 + -> Sample Scan on prt1_l_p2_p1 t2_2 + Sampling: system (t1_2.a) REPEATABLE (t1_2.b) + Filter: ((t1_2.a = a) AND (t1_2.b = b) AND ((t1_2.c)::text = (c)::text)) + -> Nested Loop + -> Seq Scan on prt1_l_p2_p2 t1_3 + -> Sample Scan on prt1_l_p2_p2 t2_3 + Sampling: system (t1_3.a) REPEATABLE (t1_3.b) + Filter: ((t1_3.a = a) AND (t1_3.b = b) AND ((t1_3.c)::text = (c)::text)) + -> Nested Loop + -> Seq Scan on prt1_l_p3_p1 t1_4 + -> Sample Scan on prt1_l_p3_p1 t2_4 + Sampling: system (t1_4.a) REPEATABLE (t1_4.b) + Filter: ((t1_4.a = a) AND (t1_4.b = b) AND ((t1_4.c)::text = (c)::text)) + -> Nested Loop + -> Seq Scan on prt1_l_p3_p2 t1_5 + -> Sample Scan on prt1_l_p3_p2 t2_5 + Sampling: system (t1_5.a) REPEATABLE (t1_5.b) + Filter: ((t1_5.a = a) AND (t1_5.b = b) AND ((t1_5.c)::text = (c)::text)) +(26 rows) + -- join with one side empty EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c; diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql index 48daf3aee3..9e5660b0c0 100644 --- a/src/test/regress/sql/partition_join.sql +++ b/src/test/regress/sql/partition_join.sql @@ -100,6 +100,24 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.a) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a; +-- lateral reference in sample scan +EXPLAIN (COSTS OFF) +SELECT * FROM prt1 t1 JOIN LATERAL + (SELECT * FROM prt1 t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s + ON t1.a = s.a; + +-- recursively check the child path(s) to tell if a path is reparameterizable +-- in this test case we'd have a MemoizePath whose subpath is TidPath, which is not reparameterizable +SET enable_indexscan TO false; +SET enable_seqscan TO false; + +EXPLAIN (COSTS OFF) +SELECT * FROM prt1 t1 LEFT JOIN LATERAL + (SELECT t1.a AS t1a, * FROM prt1 t2 WHERE t1.ctid = t2.ctid) s ON t1.a = s.a; + +RESET enable_indexscan; +RESET enable_seqscan; + -- bug with inadequate sort key representation SET enable_partitionwise_aggregate TO true; SET enable_hashjoin TO false; @@ -387,6 +405,12 @@ SELECT * FROM prt1_l t1 LEFT JOIN LATERAL (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a; +-- partitionwise join with lateral reference in sample scan +EXPLAIN (COSTS OFF) +SELECT * FROM prt1_l t1 JOIN LATERAL + (SELECT * FROM prt1_l t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s ON + t1.a = s.a AND t1.b = s.b AND t1.c = s.c; + -- join with one side empty EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c; -- 2.31.0