From 6e33c6aa030a88ae4d0eacd73bd8a238ac416aef Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Mon, 20 Nov 2023 10:08:28 +0800 Subject: [PATCH v3] Propagate pathkeys from CTEs up to the outer query --- src/backend/optimizer/path/allpaths.c | 17 ++++++++++++++++- src/backend/optimizer/plan/planner.c | 1 + src/backend/optimizer/plan/subselect.c | 16 ++++++++++------ src/backend/optimizer/prep/prepjointree.c | 1 + src/backend/optimizer/util/pathnode.c | 5 +++-- src/include/nodes/pathnodes.h | 5 +++++ src/include/optimizer/pathnode.h | 2 +- src/test/regress/expected/with.out | 17 +++++++++++++++++ src/test/regress/sql/with.sql | 7 +++++++ 9 files changed, 61 insertions(+), 10 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 84c4de488a..d4791d128e 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -2882,6 +2882,8 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) ListCell *lc; int plan_id; Relids required_outer; + Path *ctepath; + List *pathkeys; /* * Find the referenced CTE, and locate the plan previously made for it. @@ -2921,6 +2923,19 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) /* Mark rel with estimated output rows, width, etc */ set_cte_size_estimates(root, rel, cteplan->plan_rows); + /* + * Locate the best path previously made for the referenced CTE. We need to + * know its pathkeys. + */ + Assert(list_length(cteroot->cte_plan_ids) == list_length(cteroot->cte_paths)); + ctepath = (Path *) list_nth(cteroot->cte_paths, ndx); + + /* Convert the pathkeys to outer representation */ + pathkeys = convert_subquery_pathkeys(root, + rel, + ctepath->pathkeys, + cteplan->targetlist); + /* * We don't support pushing join clauses into the quals of a CTE scan, but * it could still have required parameterization due to LATERAL refs in @@ -2929,7 +2944,7 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) required_outer = rel->lateral_relids; /* Generate appropriate path */ - add_path(rel, create_ctescan_path(root, rel, required_outer)); + add_path(rel, create_ctescan_path(root, rel, pathkeys, required_outer)); } /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 2e2458b128..b9a2dbb88c 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -643,6 +643,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->planner_cxt = CurrentMemoryContext; root->init_plans = NIL; root->cte_plan_ids = NIL; + root->cte_paths = NIL; root->multiexpr_params = NIL; root->join_domains = NIL; root->eq_classes = NIL; diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 3115d79ad9..ca73694da6 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -885,16 +885,17 @@ hash_ok_operator(OpExpr *expr) * unreferenced SELECT), "inline" it to create a regular sub-SELECT-in-FROM, * or convert it to an initplan. * - * A side effect is to fill in root->cte_plan_ids with a list that - * parallels root->parse->cteList and provides the subplan ID for - * each CTE's initplan, or a dummy ID (-1) if we didn't make an initplan. + * A side effect is to fill in root->cte_plan_ids and root->cte_paths with + * lists that parallel root->parse->cteList and provide the subplan ID and + * best path for each CTE, or a dummy ID (-1) and a dummy Path (NULL) if we + * didn't make a subplan. */ void SS_process_ctes(PlannerInfo *root) { ListCell *lc; - Assert(root->cte_plan_ids == NIL); + Assert(root->cte_plan_ids == NIL && root->cte_paths == NIL); foreach(lc, root->parse->cteList) { @@ -913,8 +914,9 @@ SS_process_ctes(PlannerInfo *root) */ if (cte->cterefcount == 0 && cmdType == CMD_SELECT) { - /* Make a dummy entry in cte_plan_ids */ + /* Make a dummy entry in cte_plan_ids and cte_paths */ root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1); + root->cte_paths = lappend(root->cte_paths, NULL); continue; } @@ -960,8 +962,9 @@ SS_process_ctes(PlannerInfo *root) !contain_volatile_functions(cte->ctequery)) { inline_cte(root, cte); - /* Make a dummy entry in cte_plan_ids */ + /* Make a dummy entry in cte_plan_ids and cte_paths */ root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1); + root->cte_paths = lappend(root->cte_paths, NULL); continue; } @@ -1051,6 +1054,7 @@ SS_process_ctes(PlannerInfo *root) root->init_plans = lappend(root->init_plans, splan); root->cte_plan_ids = lappend_int(root->cte_plan_ids, splan->plan_id); + root->cte_paths = lappend(root->cte_paths, best_path); /* Label the subplan for EXPLAIN purposes */ splan->plan_name = psprintf("CTE %s", cte->ctename); diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index aa83dd3636..a572c22556 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -990,6 +990,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, subroot->planner_cxt = CurrentMemoryContext; subroot->init_plans = NIL; subroot->cte_plan_ids = NIL; + subroot->cte_paths = NIL; subroot->multiexpr_params = NIL; subroot->join_domains = NIL; subroot->eq_classes = NIL; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 8dbf790e89..b23422f9aa 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2113,7 +2113,8 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel, * returning the pathnode. */ Path * -create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer) +create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, + List *pathkeys, Relids required_outer) { Path *pathnode = makeNode(Path); @@ -2125,7 +2126,7 @@ create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer) pathnode->parallel_aware = false; pathnode->parallel_safe = rel->consider_parallel; pathnode->parallel_workers = 0; - pathnode->pathkeys = NIL; /* XXX for now, result is always unordered */ + pathnode->pathkeys = pathkeys; cost_ctescan(pathnode, root, rel, pathnode->param_info); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 534692bee1..5fdb136557 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -301,6 +301,11 @@ struct PlannerInfo */ List *cte_plan_ids; + /* + * per-CTE-item list of Paths (or NULL if no Path was made for that CTE) + */ + List *cte_paths; + /* List of Lists of Params for MULTIEXPR subquery outputs */ List *multiexpr_params; diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index c43d97b48a..837bf6ffdc 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -115,7 +115,7 @@ extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel, extern Path *create_tablefuncscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer); extern Path *create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, - Relids required_outer); + List *pathkeys, Relids required_outer); extern Path *create_namedtuplestorescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer); extern Path *create_resultscan_path(PlannerInfo *root, RelOptInfo *rel, diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out index 7d796ea69c..bbb77e6bbb 100644 --- a/src/test/regress/expected/with.out +++ b/src/test/regress/expected/with.out @@ -672,6 +672,23 @@ select count(*) from tenk1 a Index Cond: (unique1 = x.unique1) (10 rows) +-- test that pathkeys from a materialized CTE are propagated up to the +-- outer query +explain (costs off) +with x as materialized (select unique1 from tenk1 b order by unique1) +select count(*) from tenk1 a + where unique1 in (select * from x); + QUERY PLAN +------------------------------------------------------------ + Aggregate + CTE x + -> Index Only Scan using tenk1_unique1 on tenk1 b + -> Merge Semi Join + Merge Cond: (a.unique1 = x.unique1) + -> Index Only Scan using tenk1_unique1 on tenk1 a + -> CTE Scan on x +(7 rows) + -- SEARCH clause create temp table graph0( f int, t int, label text ); insert into graph0 values diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql index f8a213e357..3b904d89b7 100644 --- a/src/test/regress/sql/with.sql +++ b/src/test/regress/sql/with.sql @@ -359,6 +359,13 @@ with x as materialized (insert into tenk1 default values returning unique1) select count(*) from tenk1 a where unique1 in (select * from x); +-- test that pathkeys from a materialized CTE are propagated up to the +-- outer query +explain (costs off) +with x as materialized (select unique1 from tenk1 b order by unique1) +select count(*) from tenk1 a + where unique1 in (select * from x); + -- SEARCH clause create temp table graph0( f int, t int, label text ); -- 2.31.0