From 6dba9a08ec483463b5323079f386e482e9f009c3 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 21 Dec 2023 19:01:15 +0200 Subject: [PATCH v1 4/5] Omit columns from final tlist that were only needed to represent ORDER BY --- src/backend/optimizer/path/pathkeys.c | 13 ++-- src/backend/optimizer/plan/createplan.c | 4 +- src/backend/optimizer/plan/planagg.c | 2 + src/backend/optimizer/plan/planner.c | 55 ++++++++++++++ src/backend/optimizer/util/plancat.c | 6 ++ src/backend/optimizer/util/tlist.c | 20 +++++ src/include/nodes/pathnodes.h | 3 + src/include/optimizer/optimizer.h | 2 + src/test/regress/expected/create_index.out | 2 +- src/test/regress/expected/gist.out | 6 +- src/test/regress/expected/groupingsets.out | 8 +- src/test/regress/expected/limit.out | 16 ++-- src/test/regress/expected/sqljson.out | 12 +-- src/test/regress/expected/subselect.out | 2 +- src/test/regress/expected/window.out | 88 +++++++++++----------- 15 files changed, 163 insertions(+), 76 deletions(-) diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index fdb60aaa8d2..d95eed80a29 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -877,13 +877,15 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, TargetEntry *tle; Var *outer_var; + /* Is the TLE actually available to the outer query? */ if (sub_eclass->ec_sortref == 0) /* can't happen */ elog(ERROR, "volatile EquivalenceClass has no sortref"); - tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist); - Assert(tle); - /* Is TLE actually available to the outer query? */ - outer_var = find_var_for_subquery_tle(rel, tle); - if (outer_var) + tle = get_sortgroupref_tle_noerr(sub_eclass->ec_sortref, subquery_tlist); + if (tle) + { + outer_var = find_var_for_subquery_tle(rel, tle); + if (outer_var) + /* XXX: funny indentation just to avoid unnecessary churn in review; needs to be pgindented */ { /* We can represent this sub_pathkey */ EquivalenceMember *sub_member; @@ -922,6 +924,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, sub_pathkey->pk_strategy, sub_pathkey->pk_nulls_first); } + } } else { diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 5842d31bdcd..b4a672a371e 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -354,7 +354,7 @@ create_plan(PlannerInfo *root, Path *best_path) * nodes don't have a tlist matching the querytree targetlist. */ if (!IsA(plan, ModifyTable)) - apply_tlist_labeling(plan->targetlist, root->processed_tlist); + apply_tlist_labeling(plan->targetlist, root->final_tlist); /* * Attach any initPlans created in this query level to the topmost plan @@ -2819,7 +2819,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) subplan = create_plan_recurse(root, subpath, CP_EXACT_TLIST); /* Transfer resname/resjunk labeling, too, to keep executor happy */ - apply_tlist_labeling(subplan->targetlist, root->processed_tlist); + apply_tlist_labeling(subplan->targetlist, root->final_tlist); plan = make_modifytable(root, subplan, diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index 624afae6bce..05a6202c422 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -420,6 +420,8 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, final_rel = query_planner(subroot, minmax_qp_callback, NULL); + subroot->final_tlist = subroot->processed_tlist; + /* * Since we didn't go through subquery_planner() to handle the subquery, * we have to do some of the same cleanup it would do, in particular cope diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 6f45efde21d..e1c56a42607 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1301,6 +1301,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) List *final_targets; List *final_targets_contain_srfs; bool final_target_parallel_safe; + PathTarget *very_final_target; RelOptInfo *current_rel; RelOptInfo *final_rel; FinalPathExtraData extra; @@ -1358,6 +1359,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) postprocess_setop_tlist(copyObject(root->processed_tlist), parse->targetList); + root->final_tlist = NIL; + foreach (lc, root->processed_tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->resjunk == JUNK_SORT_GROUP_COL || tle->resjunk == JUNK_PLANNER_ONLY) + continue; + + root->final_tlist = lappend(root->final_tlist, tle); + } + /* Also extract the PathTarget form of the setop result tlist */ final_target = current_rel->cheapest_total_path->pathtarget; @@ -1746,6 +1758,44 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) final_rel->useridiscurrent = current_rel->useridiscurrent; final_rel->fdwroutine = current_rel->fdwroutine; + /* + * If the target list contains any junk columns that are not needed in the + * executor, project them away. 'very_final_target' is the target list + * with such columns removed. + * + * We used to let the executor filter these away with a "junk filter", if + * the junk columns are expensive to compute, it's better to not compute + * them in the first place. Usually we need such columns anyway, but one + * case where we can avoid some real work is if we use an index to satisfy + * the ORDER BY. + * + * XXX: The executor still has a junk filter and would filter these away + * if we didn't. We could work a little harder here, and also add a + * projection on top of the possible LockRows node, to remove any junk + * columns created for row marks. With that, we could get rid of the junk + * filter in the executor altogether. + */ + { + List *very_final_tlist = NIL; + + foreach (lc, root->processed_tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->resjunk == JUNK_SORT_GROUP_COL || tle->resjunk == JUNK_PLANNER_ONLY) + continue; + + very_final_tlist = lappend(very_final_tlist, tle); + } + if (list_length(very_final_tlist) != list_length(root->processed_tlist)) + very_final_target = create_pathtarget(root, very_final_tlist); + else + very_final_target = final_target; + + /* Stash the final tlist that we will produce for create_plan() */ + root->final_tlist = very_final_tlist; + } + /* * Generate paths for the final_rel. Insert all surviving paths, with * LockRows, Limit, and/or ModifyTable steps added if needed. @@ -1754,6 +1804,11 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) { Path *path = (Path *) lfirst(lc); + /* see comment above */ + if (very_final_target != final_target) + path = apply_projection_to_path(root, final_rel, + path, very_final_target); + /* * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node. * (Note: we intentionally test parse->rowMarks not root->rowMarks diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 219040b52f0..0d736e2a5b3 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -1774,7 +1774,13 @@ build_physical_tlist(PlannerInfo *root, RelOptInfo *rel) /* * A resjunk column of the subquery can be reflected as * resjunk in the physical tlist; we need not punt. + * + * Subquery planner will filter out these junk columns + * from the final plan, so reflect that here. */ + if (tle->resjunk == JUNK_SORT_GROUP_COL || tle->resjunk == JUNK_PLANNER_ONLY) + continue; + var = makeVarFromTargetEntry(varno, tle); tlist = lappend(tlist, diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index a2a343e960e..c28bcb9cdea 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -358,6 +358,26 @@ get_sortgroupref_tle(Index sortref, List *targetList) return NULL; /* keep compiler quiet */ } +/* + * get_sortgroupref_tle_noerr + * As above, but return NULL rather than throwing an error if not found. + */ +TargetEntry * +get_sortgroupref_tle_noerr(Index sortref, List *targetList) +{ + ListCell *l; + + foreach(l, targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->ressortgroupref == sortref) + return tle; + } + + return NULL; +} + /* * get_sortgroupclause_tle * Find the targetlist entry matching the given SortGroupClause diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index ed85dc7414b..c8668e0ee47 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -452,6 +452,9 @@ struct PlannerInfo */ List *processed_tlist; + /* same as processed_tlist, but with the planner JUNK_* cols removed */ + List *final_tlist; + /* * For UPDATE, this list contains the target table's attribute numbers to * which the first N entries of processed_tlist are to be assigned. (Any diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index da50044bf14..7514432b8c0 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -169,6 +169,8 @@ extern bool predicate_refuted_by(List *predicate_list, List *clause_list, extern int count_nonjunk_tlist_entries(List *tlist); extern TargetEntry *get_sortgroupref_tle(Index sortref, List *targetList); +extern TargetEntry *get_sortgroupref_tle_noerr(Index sortref, + List *targetList); extern TargetEntry *get_sortgroupclause_tle(SortGroupClause *sgClause, List *targetList); extern Node *get_sortgroupclause_expr(SortGroupClause *sgClause, diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 1e2222929f1..a12d38a8c8e 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -464,7 +464,7 @@ SELECT * FROM point_tbl ORDER BY f1 <-> '0,1'; QUERY PLAN ------------------------------------------------------ Index Only Scan using gpointind on pg_temp.point_tbl - Output: f1, (f1 <-> '(0,1)'::point) + Output: f1 Order By: (point_tbl.f1 <-> '(0,1)'::point) (3 rows) diff --git a/src/test/regress/expected/gist.out b/src/test/regress/expected/gist.out index 018b76f21d1..ed6722f48f7 100644 --- a/src/test/regress/expected/gist.out +++ b/src/test/regress/expected/gist.out @@ -81,7 +81,7 @@ order by p <-> point(0.201, 0.201); QUERY PLAN --------------------------------------------------------------- Index Only Scan using gist_tbl_point_index on public.gist_tbl - Output: p, (p <-> '(0.201,0.201)'::point) + Output: p Index Cond: (gist_tbl.p <@ '(0.5,0.5),(0,0)'::box) Order By: (gist_tbl.p <-> '(0.201,0.201)'::point) (4 rows) @@ -380,9 +380,9 @@ select p from gist_tbl order by circle(p,1) <-> point(0,0) limit 1; QUERY PLAN ------------------------------------------------------------------------------------ Limit - Output: p, ((circle(p, '1'::double precision) <-> '(0,0)'::point)) + Output: p -> Index Only Scan using gist_tbl_multi_index on public.gist_tbl - Output: p, (circle(p, '1'::double precision) <-> '(0,0)'::point) + Output: p Order By: ((circle(gist_tbl.p, '1'::double precision)) <-> '(0,0)'::point) (5 rows) diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index a3b9aaca84c..113f7a72b1f 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -472,10 +472,10 @@ select grouping(ss.x) from int8_tbl i1 cross join lateral (select (select i1.q1) as x) ss group by ss.x; - QUERY PLAN ------------------------------------------------- + QUERY PLAN +-------------------------------------------- GroupAggregate - Output: GROUPING((SubPlan 1)), ((SubPlan 2)) + Output: GROUPING((SubPlan 1)) Group Key: ((SubPlan 2)) -> Sort Output: ((SubPlan 2)), i1.q1 @@ -505,7 +505,7 @@ group by ss.x; QUERY PLAN -------------------------------------------- GroupAggregate - Output: (SubPlan 2), ((SubPlan 3)) + Output: (SubPlan 2) Group Key: ((SubPlan 3)) -> Sort Output: ((SubPlan 3)), i1.q1 diff --git a/src/test/regress/expected/limit.out b/src/test/regress/expected/limit.out index a2cd0f9f5b8..60e572c53f2 100644 --- a/src/test/regress/expected/limit.out +++ b/src/test/regress/expected/limit.out @@ -401,12 +401,12 @@ select currval('testseq'); explain (verbose, costs off) select unique1, unique2, nextval('testseq') from tenk1 order by tenthous limit 10; - QUERY PLAN --------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------- Limit - Output: unique1, unique2, (nextval('testseq'::regclass)), tenthous + Output: unique1, unique2, (nextval('testseq'::regclass)) -> Result - Output: unique1, unique2, nextval('testseq'::regclass), tenthous + Output: unique1, unique2, nextval('testseq'::regclass) -> Sort Output: unique1, unique2, tenthous Sort Key: tenk1.tenthous @@ -535,12 +535,12 @@ order by s2 desc; explain (verbose, costs off) select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2 from tenk1 group by thousand order by thousand limit 3; - QUERY PLAN -------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------------------------- Limit - Output: (sum(tenthous)), (((sum(tenthous))::double precision + (random() * '0'::double precision))), thousand + Output: (sum(tenthous)), (((sum(tenthous))::double precision + (random() * '0'::double precision))) -> GroupAggregate - Output: sum(tenthous), ((sum(tenthous))::double precision + (random() * '0'::double precision)), thousand + Output: sum(tenthous), ((sum(tenthous))::double precision + (random() * '0'::double precision)) Group Key: tenk1.thousand -> Index Only Scan using tenk1_thous_tenthous on public.tenk1 Output: thousand, tenthous diff --git a/src/test/regress/expected/sqljson.out b/src/test/regress/expected/sqljson.out index 5e7da96be5e..505d7f46500 100644 --- a/src/test/regress/expected/sqljson.out +++ b/src/test/regress/expected/sqljson.out @@ -1002,10 +1002,10 @@ FROM generate_series(1,5) i; EXPLAIN (VERBOSE, COSTS OFF) SELECT JSON_OBJECTAGG(i: ('111' || i)::bytea FORMAT JSON WITH UNIQUE RETURNING text) OVER (PARTITION BY i % 2) FROM generate_series(1,5) i; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ WindowAgg - Output: JSON_OBJECTAGG(i : (('111'::text || (i)::text))::bytea FORMAT JSON WITH UNIQUE KEYS RETURNING text) OVER (?), ((i % 2)) + Output: JSON_OBJECTAGG(i : (('111'::text || (i)::text))::bytea FORMAT JSON WITH UNIQUE KEYS RETURNING text) OVER (?) -> Sort Output: ((i % 2)), i Sort Key: ((i.i % 2)) @@ -1038,10 +1038,10 @@ FROM generate_series(1,5) i; EXPLAIN (VERBOSE, COSTS OFF) SELECT JSON_ARRAYAGG(('111' || i)::bytea FORMAT JSON NULL ON NULL RETURNING text) OVER (PARTITION BY i % 2) FROM generate_series(1,5) i; - QUERY PLAN --------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------------------------------- WindowAgg - Output: JSON_ARRAYAGG((('111'::text || (i)::text))::bytea FORMAT JSON NULL ON NULL RETURNING text) OVER (?), ((i % 2)) + Output: JSON_ARRAYAGG((('111'::text || (i)::text))::bytea FORMAT JSON NULL ON NULL RETURNING text) OVER (?) -> Sort Output: ((i % 2)), i Sort Key: ((i.i % 2)) diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index a3a4d03d104..6cdc1ffcf87 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -1636,7 +1636,7 @@ select * from explain_sq_limit(); explain_sq_limit ---------------------------------------------------------------- Limit (actual rows=3 loops=1) - -> Subquery Scan on x (actual rows=3 loops=1) + -> Result (actual rows=3 loops=1) -> Sort (actual rows=3 loops=1) Sort Key: sq_limit.c1, sq_limit.pk Sort Method: top-N heapsort Memory: xxx diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index 2201740c185..2f0ff7b08db 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -3731,10 +3731,10 @@ SELECT count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN CURRENT ROW AND CURRENT ROW) cnt FROM empsalary; - QUERY PLAN ------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------- WindowAgg - Output: empno, depname, (row_number() OVER (?)), (rank() OVER (?)), count(*) OVER (?), enroll_date + Output: empno, depname, (row_number() OVER (?)), (rank() OVER (?)), count(*) OVER (?) -> WindowAgg Output: depname, enroll_date, empno, row_number() OVER (?), rank() OVER (?) -> Sort @@ -3777,16 +3777,15 @@ SELECT * FROM min(salary) OVER (PARTITION BY depname || 'A', depname) depminsalary FROM empsalary) emp WHERE depname = 'sales'; - QUERY PLAN --------------------------------------------------------------------------- - Subquery Scan on emp + QUERY PLAN +-------------------------------------------------------------------- + WindowAgg -> WindowAgg - -> WindowAgg - -> Sort - Sort Key: (((empsalary.depname)::text || 'A'::text)) - -> Seq Scan on empsalary - Filter: ((depname)::text = 'sales'::text) -(7 rows) + -> Sort + Sort Key: (((empsalary.depname)::text || 'A'::text)) + -> Seq Scan on empsalary + Filter: ((depname)::text = 'sales'::text) +(6 rows) -- pushdown is unsafe because there's a PARTITION BY clause without depname: EXPLAIN (COSTS OFF) @@ -4147,23 +4146,22 @@ SELECT * FROM ntile(2) OVER (PARTITION BY depname) nt -- w2 FROM empsalary ) e WHERE rn <= 1 AND c1 <= 3 AND nt < 2; - QUERY PLAN ------------------------------------------------------------------------------------------------ - Subquery Scan on e - -> WindowAgg - Filter: (((row_number() OVER (?)) <= 1) AND ((ntile(2) OVER (?)) < 2)) - Run Condition: (count(empsalary.salary) OVER (?) <= 3) - -> Sort - Sort Key: (((empsalary.depname)::text || ''::text)) - -> WindowAgg - Run Condition: ((row_number() OVER (?) <= 1) AND (ntile(2) OVER (?) < 2)) - -> Sort - Sort Key: empsalary.depname - -> WindowAgg - -> Sort - Sort Key: ((''::text || (empsalary.depname)::text)) - -> Seq Scan on empsalary -(14 rows) + QUERY PLAN +----------------------------------------------------------------------------------------- + WindowAgg + Filter: (((row_number() OVER (?)) <= 1) AND ((ntile(2) OVER (?)) < 2)) + Run Condition: (count(empsalary.salary) OVER (?) <= 3) + -> Sort + Sort Key: (((empsalary.depname)::text || ''::text)) + -> WindowAgg + Run Condition: ((row_number() OVER (?) <= 1) AND (ntile(2) OVER (?) < 2)) + -> Sort + Sort Key: empsalary.depname + -> WindowAgg + -> Sort + Sort Key: ((''::text || (empsalary.depname)::text)) + -> Seq Scan on empsalary +(13 rows) -- Ensure we correctly filter out all of the run conditions from each window SELECT * FROM @@ -4268,16 +4266,15 @@ SELECT * FROM min(salary) OVER (PARTITION BY depname, empno order by enroll_date) depminsalary FROM empsalary) emp WHERE depname = 'sales'; - QUERY PLAN ----------------------------------------------------------------------- - Subquery Scan on emp + QUERY PLAN +---------------------------------------------------------------- + WindowAgg -> WindowAgg - -> WindowAgg - -> Sort - Sort Key: empsalary.empno, empsalary.enroll_date - -> Seq Scan on empsalary - Filter: ((depname)::text = 'sales'::text) -(7 rows) + -> Sort + Sort Key: empsalary.empno, empsalary.enroll_date + -> Seq Scan on empsalary + Filter: ((depname)::text = 'sales'::text) +(6 rows) -- Ensure that the evaluation order of the WindowAggs results in the WindowAgg -- with the same sort order that's required by the ORDER BY is evaluated last. @@ -5330,14 +5327,13 @@ AS $$ WINDOW w AS (ORDER BY s ROWS BETWEEN CURRENT ROW AND GROUP_SIZE FOLLOWING) $$ LANGUAGE SQL STABLE; EXPLAIN (costs off) SELECT * FROM pg_temp.f(2); - QUERY PLAN ------------------------------------------------------- - Subquery Scan on f - -> WindowAgg - -> Sort - Sort Key: s.s - -> Function Scan on generate_series s -(5 rows) + QUERY PLAN +------------------------------------------------ + WindowAgg + -> Sort + Sort Key: s.s + -> Function Scan on generate_series s +(4 rows) SELECT * FROM pg_temp.f(2); f -- 2.39.2