Re: MergeAppend could consider sorting cheapest child path - Mailing list pgsql-hackers
From | Bruce Momjian |
---|---|
Subject | Re: MergeAppend could consider sorting cheapest child path |
Date | |
Msg-id | ZxBUPQA3fPSmBPV7@momjian.us Whole thread Raw |
In response to | MergeAppend could consider sorting cheapest child path (Alexander Pyhalov <a.pyhalov@postgrespro.ru>) |
Responses |
Re: MergeAppend could consider sorting cheapest child path
|
List | pgsql-hackers |
Is this still being considered? --------------------------------------------------------------------------- On Tue, Jun 18, 2024 at 07:45:09PM +0300, Alexander Pyhalov wrote: > Hi. > > Now when planner finds suitable pathkeys in generate_orderedappend_paths(), > it uses them, even if explicit sort of the cheapest child path could be more > efficient. > > We encountered this issue on partitioned table with two indexes, where one > is suitable for sorting, and another is good for selecting data. MergeAppend > was generated > with subpaths doing index scan on suitably ordered index and filtering a lot > of data. > The suggested fix allows MergeAppend to consider sorting on cheapest > childrel total path as an alternative. > > -- > Best regards, > Alexander Pyhalov, > Postgres Professional > From d5eb3d326d83e2ca47c17552fcc6fd27b6b98d4e Mon Sep 17 00:00:00 2001 > From: Alexander Pyhalov <a.pyhalov@postgrespro.ru> > Date: Tue, 18 Jun 2024 15:56:18 +0300 > Subject: [PATCH] MergeAppend could consider using sorted best path. > > This helps when index with suitable pathkeys is not > good for filtering data. > --- > .../postgres_fdw/expected/postgres_fdw.out | 6 +- > src/backend/optimizer/path/allpaths.c | 21 +++++ > src/test/regress/expected/inherit.out | 45 +++++++++- > src/test/regress/expected/partition_join.out | 87 +++++++++++-------- > src/test/regress/expected/union.out | 6 +- > src/test/regress/sql/inherit.sql | 17 ++++ > 6 files changed, 141 insertions(+), 41 deletions(-) > > diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out > index ea566d50341..687591e4a97 100644 > --- a/contrib/postgres_fdw/expected/postgres_fdw.out > +++ b/contrib/postgres_fdw/expected/postgres_fdw.out > @@ -10074,13 +10074,15 @@ SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a > -> Nested Loop > Join Filter: (t1.a = t2.b) > -> Append > - -> Foreign Scan on ftprt1_p1 t1_1 > + -> Sort > + Sort Key: t1_1.a > + -> Foreign Scan on ftprt1_p1 t1_1 > -> Foreign Scan on ftprt1_p2 t1_2 > -> Materialize > -> Append > -> Foreign Scan on ftprt2_p1 t2_1 > -> Foreign Scan on ftprt2_p2 t2_2 > -(10 rows) > +(12 rows) > > SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1; > a | b > diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c > index 4895cee9944..827bc469269 100644 > --- a/src/backend/optimizer/path/allpaths.c > +++ b/src/backend/optimizer/path/allpaths.c > @@ -1845,6 +1845,27 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, > /* Assert we do have an unparameterized path for this child */ > Assert(cheapest_total->param_info == NULL); > } > + else > + { > + /* > + * Even if we found necessary pathkeys, using unsorted path > + * can be more efficient. > + */ > + Path sort_path; > + > + cost_sort(&sort_path, > + root, > + pathkeys, > + childrel->cheapest_total_path->total_cost, > + childrel->cheapest_total_path->rows, > + childrel->cheapest_total_path->pathtarget->width, > + 0.0, > + work_mem, > + -1.0 /* need all tuples to sort them */ ); > + > + if (compare_path_costs(&sort_path, cheapest_total, TOTAL_COST) < 0) > + cheapest_total = childrel->cheapest_total_path; > + } > > /* > * When building a fractional path, determine a cheapest > diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out > index ad732134148..16e78c8d2ff 100644 > --- a/src/test/regress/expected/inherit.out > +++ b/src/test/regress/expected/inherit.out > @@ -1555,6 +1555,7 @@ insert into matest2 (name) values ('Test 4'); > insert into matest3 (name) values ('Test 5'); > insert into matest3 (name) values ('Test 6'); > set enable_indexscan = off; -- force use of seqscan/sort, so no merge > +set enable_sort = off; -- avoid sorting below MergeAppend > explain (verbose, costs off) select * from matest0 order by 1-id; > QUERY PLAN > ------------------------------------------------------------ > @@ -1608,6 +1609,7 @@ select min(1-id) from matest0; > (1 row) > > reset enable_indexscan; > +reset enable_sort; > set enable_seqscan = off; -- plan with fewest seqscans should be merge > set enable_parallel_append = off; -- Don't let parallel-append interfere > explain (verbose, costs off) select * from matest0 order by 1-id; > @@ -1702,7 +1704,9 @@ order by t1.b limit 10; > Merge Cond: (t1.b = t2.b) > -> Merge Append > Sort Key: t1.b > - -> Index Scan using matest0i on matest0 t1_1 > + -> Sort > + Sort Key: t1_1.b > + -> Seq Scan on matest0 t1_1 > -> Index Scan using matest1i on matest1 t1_2 > -> Materialize > -> Merge Append > @@ -1711,7 +1715,7 @@ order by t1.b limit 10; > Filter: (c = d) > -> Index Scan using matest1i on matest1 t2_2 > Filter: (c = d) > -(14 rows) > +(16 rows) > > reset enable_nestloop; > drop table matest0 cascade; > @@ -2663,6 +2667,43 @@ explain (costs off) select * from mcrparted where a = 10 order by a, abs(b), c; > > reset enable_bitmapscan; > drop table mcrparted; > +-- Check that sort path can be used by MergeAppend even when there are suitable pathkeys > +create table hash_parted (i int, j int, k int) partition by hash(i); > +create table hash_parted_1 partition of hash_parted for values with (modulus 4, remainder 0); > +create table hash_parted_2 partition of hash_parted for values with (modulus 4, remainder 1); > +create table hash_parted_3 partition of hash_parted for values with (modulus 4, remainder 2); > +create table hash_parted_4 partition of hash_parted for values with (modulus 4, remainder 3); > +--create table hash_parted_5 partition of hash_parted for values with (modulus 6, remainder 4); > +--create table hash_parted_6 partition of hash_parted for values with (modulus 6, remainder 5); > +create index on hash_parted(j); > +create index on hash_parted(k); > +insert into hash_parted select i, i, i from generate_series(1,10000) i; > +analyze hash_parted; > +explain (costs off) select * from hash_parted where k<100 order by j limit 100; > + QUERY PLAN > +------------------------------------------------------------------------- > + Limit > + -> Merge Append > + Sort Key: hash_parted.j > + -> Sort > + Sort Key: hash_parted_1.j > + -> Index Scan using hash_parted_1_k_idx on hash_parted_1 > + Index Cond: (k < 100) > + -> Sort > + Sort Key: hash_parted_2.j > + -> Index Scan using hash_parted_2_k_idx on hash_parted_2 > + Index Cond: (k < 100) > + -> Sort > + Sort Key: hash_parted_3.j > + -> Index Scan using hash_parted_3_k_idx on hash_parted_3 > + Index Cond: (k < 100) > + -> Sort > + Sort Key: hash_parted_4.j > + -> Index Scan using hash_parted_4_k_idx on hash_parted_4 > + Index Cond: (k < 100) > +(19 rows) > + > +drop table hash_parted; > -- Ensure LIST partitions allow an Append to be used instead of a MergeAppend > create table bool_lp (b bool) partition by list(b); > create table bool_lp_true partition of bool_lp for values in(true); > diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out > index 6d07f86b9bc..80d480d33d5 100644 > --- a/src/test/regress/expected/partition_join.out > +++ b/src/test/regress/expected/partition_join.out > @@ -1309,28 +1309,32 @@ SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 > -- This should generate a partitionwise join, but currently fails to > EXPLAIN (COSTS OFF) > SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a =t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b; > - QUERY PLAN > ------------------------------------------------------------ > - Incremental Sort > + QUERY PLAN > +----------------------------------------------------------------- > + Sort > Sort Key: prt1.a, prt2.b > - Presorted Key: prt1.a > - -> Merge Left Join > - Merge Cond: (prt1.a = prt2.b) > - -> Sort > - Sort Key: prt1.a > - -> Append > - -> Seq Scan on prt1_p1 prt1_1 > - Filter: ((a < 450) AND (b = 0)) > - -> Seq Scan on prt1_p2 prt1_2 > - Filter: ((a < 450) AND (b = 0)) > - -> Sort > - Sort Key: prt2.b > - -> Append > + -> Merge Right Join > + Merge Cond: (prt2.b = prt1.a) > + -> Append > + -> Sort > + Sort Key: prt2_1.b > -> Seq Scan on prt2_p2 prt2_1 > Filter: (b > 250) > + -> Sort > + Sort Key: prt2_2.b > -> Seq Scan on prt2_p3 prt2_2 > Filter: (b > 250) > -(19 rows) > + -> Materialize > + -> Append > + -> Sort > + Sort Key: prt1_1.a > + -> Seq Scan on prt1_p1 prt1_1 > + Filter: ((a < 450) AND (b = 0)) > + -> Sort > + Sort Key: prt1_2.a > + -> Seq Scan on prt1_p2 prt1_2 > + Filter: ((a < 450) AND (b = 0)) > +(23 rows) > > SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a =t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b; > a | b > @@ -1350,25 +1354,33 @@ SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * > -- partitionwise join does not apply > EXPLAIN (COSTS OFF) > SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a; > - QUERY PLAN > ------------------------------------------------------------------------------------------ > + QUERY PLAN > +------------------------------------------------------------------ > Merge Join > - Merge Cond: ((t1.a = t2.b) AND (((((t1.*)::prt1))::text) = ((((t2.*)::prt2))::text))) > - -> Sort > - Sort Key: t1.a, ((((t1.*)::prt1))::text) > - -> Result > - -> Append > - -> Seq Scan on prt1_p1 t1_1 > - -> Seq Scan on prt1_p2 t1_2 > - -> Seq Scan on prt1_p3 t1_3 > - -> Sort > - Sort Key: t2.b, ((((t2.*)::prt2))::text) > - -> Result > - -> Append > + Merge Cond: (t1.a = t2.b) > + Join Filter: ((((t2.*)::prt2))::text = (((t1.*)::prt1))::text) > + -> Append > + -> Sort > + Sort Key: t1_1.a > + -> Seq Scan on prt1_p1 t1_1 > + -> Sort > + Sort Key: t1_2.a > + -> Seq Scan on prt1_p2 t1_2 > + -> Sort > + Sort Key: t1_3.a > + -> Seq Scan on prt1_p3 t1_3 > + -> Materialize > + -> Append > + -> Sort > + Sort Key: t2_1.b > -> Seq Scan on prt2_p1 t2_1 > + -> Sort > + Sort Key: t2_2.b > -> Seq Scan on prt2_p2 t2_2 > + -> Sort > + Sort Key: t2_3.b > -> Seq Scan on prt2_p3 t2_3 > -(16 rows) > +(24 rows) > > SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a; > a | b > @@ -4906,21 +4918,26 @@ EXPLAIN (COSTS OFF) > SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b) WHERE t1.b >= 125 AND t1.b < 225 ORDERBY t1.a, t1.b; > QUERY PLAN > -------------------------------------------------------------------- > - Sort > + Merge Append > Sort Key: t1.a, t1.b > - -> Append > + -> Sort > + Sort Key: t1_1.a, t1_1.b > -> Hash Join > Hash Cond: ((t1_1.a = t2_1.a) AND (t1_1.b = t2_1.b)) > -> Seq Scan on alpha_neg_p1 t1_1 > Filter: ((b >= 125) AND (b < 225)) > -> Hash > -> Seq Scan on beta_neg_p1 t2_1 > + -> Sort > + Sort Key: t1_2.a, t1_2.b > -> Hash Join > Hash Cond: ((t2_2.a = t1_2.a) AND (t2_2.b = t1_2.b)) > -> Seq Scan on beta_neg_p2 t2_2 > -> Hash > -> Seq Scan on alpha_neg_p2 t1_2 > Filter: ((b >= 125) AND (b < 225)) > + -> Sort > + Sort Key: t1_4.a, t1_4.b > -> Hash Join > Hash Cond: ((t2_4.a = t1_4.a) AND (t2_4.b = t1_4.b)) > -> Append > @@ -4935,7 +4952,7 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2 > Filter: ((b >= 125) AND (b < 225)) > -> Seq Scan on alpha_pos_p3 t1_6 > Filter: ((b >= 125) AND (b < 225)) > -(29 rows) > +(34 rows) > > SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b) WHERE t1.b >= 125 AND t1.b < 225 ORDERBY t1.a, t1.b; > a | b | c | a | b | c > diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out > index 0fd0e1c38b3..4c1c173d8e6 100644 > --- a/src/test/regress/expected/union.out > +++ b/src/test/regress/expected/union.out > @@ -1195,12 +1195,14 @@ select event_id > ---------------------------------------------------------- > Merge Append > Sort Key: events.event_id > - -> Index Scan using events_pkey on events > + -> Sort > + Sort Key: events.event_id > + -> Seq Scan on events > -> Sort > Sort Key: events_1.event_id > -> Seq Scan on events_child events_1 > -> Index Scan using other_events_pkey on other_events > -(7 rows) > +(9 rows) > > drop table events_child, events, other_events; > reset enable_indexonlyscan; > diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql > index e3bcfdb181e..5331e49283f 100644 > --- a/src/test/regress/sql/inherit.sql > +++ b/src/test/regress/sql/inherit.sql > @@ -586,11 +586,13 @@ insert into matest3 (name) values ('Test 5'); > insert into matest3 (name) values ('Test 6'); > > set enable_indexscan = off; -- force use of seqscan/sort, so no merge > +set enable_sort = off; -- avoid sorting below MergeAppend > explain (verbose, costs off) select * from matest0 order by 1-id; > select * from matest0 order by 1-id; > explain (verbose, costs off) select min(1-id) from matest0; > select min(1-id) from matest0; > reset enable_indexscan; > +reset enable_sort; > > set enable_seqscan = off; -- plan with fewest seqscans should be merge > set enable_parallel_append = off; -- Don't let parallel-append interfere > @@ -976,6 +978,21 @@ reset enable_bitmapscan; > > drop table mcrparted; > > +-- Check that sort path can be used by MergeAppend even when there are suitable pathkeys > +create table hash_parted (i int, j int, k int) partition by hash(i); > +create table hash_parted_1 partition of hash_parted for values with (modulus 4, remainder 0); > +create table hash_parted_2 partition of hash_parted for values with (modulus 4, remainder 1); > +create table hash_parted_3 partition of hash_parted for values with (modulus 4, remainder 2); > +create table hash_parted_4 partition of hash_parted for values with (modulus 4, remainder 3); > +--create table hash_parted_5 partition of hash_parted for values with (modulus 6, remainder 4); > +--create table hash_parted_6 partition of hash_parted for values with (modulus 6, remainder 5); > +create index on hash_parted(j); > +create index on hash_parted(k); > +insert into hash_parted select i, i, i from generate_series(1,10000) i; > +analyze hash_parted; > +explain (costs off) select * from hash_parted where k<100 order by j limit 100; > +drop table hash_parted; > + > -- Ensure LIST partitions allow an Append to be used instead of a MergeAppend > create table bool_lp (b bool) partition by list(b); > create table bool_lp_true partition of bool_lp for values in(true); > -- > 2.34.1 > -- Bruce Momjian <bruce@momjian.us> https://momjian.us EDB https://enterprisedb.com When a patient asks the doctor, "Am I going to die?", he means "Am I going to die soon?"
pgsql-hackers by date: