Re: MergeAppend could consider sorting cheapest child path - Mailing list pgsql-hackers

From Bruce Momjian
Subject Re: MergeAppend could consider sorting cheapest child path
Date
Msg-id ZxBUPQA3fPSmBPV7@momjian.us
Whole thread Raw
In response to MergeAppend could consider sorting cheapest child path  (Alexander Pyhalov <a.pyhalov@postgrespro.ru>)
Responses Re: MergeAppend could consider sorting cheapest child path
List pgsql-hackers
Is this still being considered?

---------------------------------------------------------------------------

On Tue, Jun 18, 2024 at 07:45:09PM +0300, Alexander Pyhalov wrote:
> Hi.
> 
> Now when planner finds suitable pathkeys in generate_orderedappend_paths(),
> it uses them, even if explicit sort of the cheapest child path could be more
> efficient.
> 
> We encountered this issue on partitioned table with two indexes, where one
> is suitable for sorting, and another is good for selecting data. MergeAppend
> was generated
> with subpaths doing index scan on suitably ordered index and filtering a lot
> of data.
> The suggested fix allows MergeAppend to consider sorting on cheapest
> childrel total path as an alternative.
> 
> -- 
> Best regards,
> Alexander Pyhalov,
> Postgres Professional

> From d5eb3d326d83e2ca47c17552fcc6fd27b6b98d4e Mon Sep 17 00:00:00 2001
> From: Alexander Pyhalov <a.pyhalov@postgrespro.ru>
> Date: Tue, 18 Jun 2024 15:56:18 +0300
> Subject: [PATCH] MergeAppend could consider using sorted best path.
> 
> This helps when index with suitable pathkeys is not
> good for filtering data.
> ---
>  .../postgres_fdw/expected/postgres_fdw.out    |  6 +-
>  src/backend/optimizer/path/allpaths.c         | 21 +++++
>  src/test/regress/expected/inherit.out         | 45 +++++++++-
>  src/test/regress/expected/partition_join.out  | 87 +++++++++++--------
>  src/test/regress/expected/union.out           |  6 +-
>  src/test/regress/sql/inherit.sql              | 17 ++++
>  6 files changed, 141 insertions(+), 41 deletions(-)
> 
> diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
> index ea566d50341..687591e4a97 100644
> --- a/contrib/postgres_fdw/expected/postgres_fdw.out
> +++ b/contrib/postgres_fdw/expected/postgres_fdw.out
> @@ -10074,13 +10074,15 @@ SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a
>     ->  Nested Loop
>           Join Filter: (t1.a = t2.b)
>           ->  Append
> -               ->  Foreign Scan on ftprt1_p1 t1_1
> +               ->  Sort
> +                     Sort Key: t1_1.a
> +                     ->  Foreign Scan on ftprt1_p1 t1_1
>                 ->  Foreign Scan on ftprt1_p2 t1_2
>           ->  Materialize
>                 ->  Append
>                       ->  Foreign Scan on ftprt2_p1 t2_1
>                       ->  Foreign Scan on ftprt2_p2 t2_2
> -(10 rows)
> +(12 rows)
>  
>  SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF
t1;
>    a  |  b  
> diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
> index 4895cee9944..827bc469269 100644
> --- a/src/backend/optimizer/path/allpaths.c
> +++ b/src/backend/optimizer/path/allpaths.c
> @@ -1845,6 +1845,27 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
>                  /* Assert we do have an unparameterized path for this child */
>                  Assert(cheapest_total->param_info == NULL);
>              }
> +            else
> +            {
> +                /*
> +                 * Even if we found necessary pathkeys, using unsorted path
> +                 * can be more efficient.
> +                 */
> +                Path        sort_path;
> +
> +                cost_sort(&sort_path,
> +                          root,
> +                          pathkeys,
> +                          childrel->cheapest_total_path->total_cost,
> +                          childrel->cheapest_total_path->rows,
> +                          childrel->cheapest_total_path->pathtarget->width,
> +                          0.0,
> +                          work_mem,
> +                          -1.0 /* need all tuples to sort them */ );
> +
> +                if (compare_path_costs(&sort_path, cheapest_total, TOTAL_COST) < 0)
> +                    cheapest_total = childrel->cheapest_total_path;
> +            }
>  
>              /*
>               * When building a fractional path, determine a cheapest
> diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
> index ad732134148..16e78c8d2ff 100644
> --- a/src/test/regress/expected/inherit.out
> +++ b/src/test/regress/expected/inherit.out
> @@ -1555,6 +1555,7 @@ insert into matest2 (name) values ('Test 4');
>  insert into matest3 (name) values ('Test 5');
>  insert into matest3 (name) values ('Test 6');
>  set enable_indexscan = off;  -- force use of seqscan/sort, so no merge
> +set enable_sort = off; -- avoid sorting below MergeAppend
>  explain (verbose, costs off) select * from matest0 order by 1-id;
>                           QUERY PLAN                         
>  ------------------------------------------------------------
> @@ -1608,6 +1609,7 @@ select min(1-id) from matest0;
>  (1 row)
>  
>  reset enable_indexscan;
> +reset enable_sort;
>  set enable_seqscan = off;  -- plan with fewest seqscans should be merge
>  set enable_parallel_append = off; -- Don't let parallel-append interfere
>  explain (verbose, costs off) select * from matest0 order by 1-id;
> @@ -1702,7 +1704,9 @@ order by t1.b limit 10;
>           Merge Cond: (t1.b = t2.b)
>           ->  Merge Append
>                 Sort Key: t1.b
> -               ->  Index Scan using matest0i on matest0 t1_1
> +               ->  Sort
> +                     Sort Key: t1_1.b
> +                     ->  Seq Scan on matest0 t1_1
>                 ->  Index Scan using matest1i on matest1 t1_2
>           ->  Materialize
>                 ->  Merge Append
> @@ -1711,7 +1715,7 @@ order by t1.b limit 10;
>                             Filter: (c = d)
>                       ->  Index Scan using matest1i on matest1 t2_2
>                             Filter: (c = d)
> -(14 rows)
> +(16 rows)
>  
>  reset enable_nestloop;
>  drop table matest0 cascade;
> @@ -2663,6 +2667,43 @@ explain (costs off) select * from mcrparted where a = 10 order by a, abs(b), c;
>  
>  reset enable_bitmapscan;
>  drop table mcrparted;
> +-- Check that sort path can be used by MergeAppend even when there are suitable pathkeys
> +create table hash_parted (i int, j int, k int) partition by hash(i);
> +create table hash_parted_1 partition of hash_parted for values with (modulus 4, remainder 0);
> +create table hash_parted_2 partition of hash_parted for values with (modulus 4, remainder 1);
> +create table hash_parted_3 partition of hash_parted for values with (modulus 4, remainder 2);
> +create table hash_parted_4 partition of hash_parted for values with (modulus 4, remainder 3);
> +--create table hash_parted_5 partition of hash_parted for values with (modulus 6, remainder 4);
> +--create table hash_parted_6 partition of hash_parted for values with (modulus 6, remainder 5);
> +create index on hash_parted(j);
> +create index on hash_parted(k);
> +insert into hash_parted select i, i, i from generate_series(1,10000) i;
> +analyze hash_parted;
> +explain (costs off) select * from hash_parted where k<100 order by j limit 100;
> +                               QUERY PLAN                                
> +-------------------------------------------------------------------------
> + Limit
> +   ->  Merge Append
> +         Sort Key: hash_parted.j
> +         ->  Sort
> +               Sort Key: hash_parted_1.j
> +               ->  Index Scan using hash_parted_1_k_idx on hash_parted_1
> +                     Index Cond: (k < 100)
> +         ->  Sort
> +               Sort Key: hash_parted_2.j
> +               ->  Index Scan using hash_parted_2_k_idx on hash_parted_2
> +                     Index Cond: (k < 100)
> +         ->  Sort
> +               Sort Key: hash_parted_3.j
> +               ->  Index Scan using hash_parted_3_k_idx on hash_parted_3
> +                     Index Cond: (k < 100)
> +         ->  Sort
> +               Sort Key: hash_parted_4.j
> +               ->  Index Scan using hash_parted_4_k_idx on hash_parted_4
> +                     Index Cond: (k < 100)
> +(19 rows)
> +
> +drop table hash_parted;
>  -- Ensure LIST partitions allow an Append to be used instead of a MergeAppend
>  create table bool_lp (b bool) partition by list(b);
>  create table bool_lp_true partition of bool_lp for values in(true);
> diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
> index 6d07f86b9bc..80d480d33d5 100644
> --- a/src/test/regress/expected/partition_join.out
> +++ b/src/test/regress/expected/partition_join.out
> @@ -1309,28 +1309,32 @@ SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2
>  -- This should generate a partitionwise join, but currently fails to
>  EXPLAIN (COSTS OFF)
>  SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a
=t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
 
> -                        QUERY PLAN                         
> ------------------------------------------------------------
> - Incremental Sort
> +                           QUERY PLAN                            
> +-----------------------------------------------------------------
> + Sort
>     Sort Key: prt1.a, prt2.b
> -   Presorted Key: prt1.a
> -   ->  Merge Left Join
> -         Merge Cond: (prt1.a = prt2.b)
> -         ->  Sort
> -               Sort Key: prt1.a
> -               ->  Append
> -                     ->  Seq Scan on prt1_p1 prt1_1
> -                           Filter: ((a < 450) AND (b = 0))
> -                     ->  Seq Scan on prt1_p2 prt1_2
> -                           Filter: ((a < 450) AND (b = 0))
> -         ->  Sort
> -               Sort Key: prt2.b
> -               ->  Append
> +   ->  Merge Right Join
> +         Merge Cond: (prt2.b = prt1.a)
> +         ->  Append
> +               ->  Sort
> +                     Sort Key: prt2_1.b
>                       ->  Seq Scan on prt2_p2 prt2_1
>                             Filter: (b > 250)
> +               ->  Sort
> +                     Sort Key: prt2_2.b
>                       ->  Seq Scan on prt2_p3 prt2_2
>                             Filter: (b > 250)
> -(19 rows)
> +         ->  Materialize
> +               ->  Append
> +                     ->  Sort
> +                           Sort Key: prt1_1.a
> +                           ->  Seq Scan on prt1_p1 prt1_1
> +                                 Filter: ((a < 450) AND (b = 0))
> +                     ->  Sort
> +                           Sort Key: prt1_2.a
> +                           ->  Seq Scan on prt1_p2 prt1_2
> +                                 Filter: ((a < 450) AND (b = 0))
> +(23 rows)
>  
>  SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a
=t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
 
>    a  |  b  
> @@ -1350,25 +1354,33 @@ SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT *
>  -- partitionwise join does not apply
>  EXPLAIN (COSTS OFF)
>  SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
> -                                       QUERY PLAN                                        
> ------------------------------------------------------------------------------------------
> +                            QUERY PLAN                            
> +------------------------------------------------------------------
>   Merge Join
> -   Merge Cond: ((t1.a = t2.b) AND (((((t1.*)::prt1))::text) = ((((t2.*)::prt2))::text)))
> -   ->  Sort
> -         Sort Key: t1.a, ((((t1.*)::prt1))::text)
> -         ->  Result
> -               ->  Append
> -                     ->  Seq Scan on prt1_p1 t1_1
> -                     ->  Seq Scan on prt1_p2 t1_2
> -                     ->  Seq Scan on prt1_p3 t1_3
> -   ->  Sort
> -         Sort Key: t2.b, ((((t2.*)::prt2))::text)
> -         ->  Result
> -               ->  Append
> +   Merge Cond: (t1.a = t2.b)
> +   Join Filter: ((((t2.*)::prt2))::text = (((t1.*)::prt1))::text)
> +   ->  Append
> +         ->  Sort
> +               Sort Key: t1_1.a
> +               ->  Seq Scan on prt1_p1 t1_1
> +         ->  Sort
> +               Sort Key: t1_2.a
> +               ->  Seq Scan on prt1_p2 t1_2
> +         ->  Sort
> +               Sort Key: t1_3.a
> +               ->  Seq Scan on prt1_p3 t1_3
> +   ->  Materialize
> +         ->  Append
> +               ->  Sort
> +                     Sort Key: t2_1.b
>                       ->  Seq Scan on prt2_p1 t2_1
> +               ->  Sort
> +                     Sort Key: t2_2.b
>                       ->  Seq Scan on prt2_p2 t2_2
> +               ->  Sort
> +                     Sort Key: t2_3.b
>                       ->  Seq Scan on prt2_p3 t2_3
> -(16 rows)
> +(24 rows)
>  
>  SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
>   a  | b  
> @@ -4906,21 +4918,26 @@ EXPLAIN (COSTS OFF)
>  SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b) WHERE t1.b >= 125 AND t1.b < 225
ORDERBY t1.a, t1.b;
 
>                               QUERY PLAN                             
>  --------------------------------------------------------------------
> - Sort
> + Merge Append
>     Sort Key: t1.a, t1.b
> -   ->  Append
> +   ->  Sort
> +         Sort Key: t1_1.a, t1_1.b
>           ->  Hash Join
>                 Hash Cond: ((t1_1.a = t2_1.a) AND (t1_1.b = t2_1.b))
>                 ->  Seq Scan on alpha_neg_p1 t1_1
>                       Filter: ((b >= 125) AND (b < 225))
>                 ->  Hash
>                       ->  Seq Scan on beta_neg_p1 t2_1
> +   ->  Sort
> +         Sort Key: t1_2.a, t1_2.b
>           ->  Hash Join
>                 Hash Cond: ((t2_2.a = t1_2.a) AND (t2_2.b = t1_2.b))
>                 ->  Seq Scan on beta_neg_p2 t2_2
>                 ->  Hash
>                       ->  Seq Scan on alpha_neg_p2 t1_2
>                             Filter: ((b >= 125) AND (b < 225))
> +   ->  Sort
> +         Sort Key: t1_4.a, t1_4.b
>           ->  Hash Join
>                 Hash Cond: ((t2_4.a = t1_4.a) AND (t2_4.b = t1_4.b))
>                 ->  Append
> @@ -4935,7 +4952,7 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2
>                                   Filter: ((b >= 125) AND (b < 225))
>                             ->  Seq Scan on alpha_pos_p3 t1_6
>                                   Filter: ((b >= 125) AND (b < 225))
> -(29 rows)
> +(34 rows)
>  
>  SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b) WHERE t1.b >= 125 AND t1.b < 225
ORDERBY t1.a, t1.b;
 
>   a  |  b  |  c   | a  |  b  |  c   
> diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
> index 0fd0e1c38b3..4c1c173d8e6 100644
> --- a/src/test/regress/expected/union.out
> +++ b/src/test/regress/expected/union.out
> @@ -1195,12 +1195,14 @@ select event_id
>  ----------------------------------------------------------
>   Merge Append
>     Sort Key: events.event_id
> -   ->  Index Scan using events_pkey on events
> +   ->  Sort
> +         Sort Key: events.event_id
> +         ->  Seq Scan on events
>     ->  Sort
>           Sort Key: events_1.event_id
>           ->  Seq Scan on events_child events_1
>     ->  Index Scan using other_events_pkey on other_events
> -(7 rows)
> +(9 rows)
>  
>  drop table events_child, events, other_events;
>  reset enable_indexonlyscan;
> diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
> index e3bcfdb181e..5331e49283f 100644
> --- a/src/test/regress/sql/inherit.sql
> +++ b/src/test/regress/sql/inherit.sql
> @@ -586,11 +586,13 @@ insert into matest3 (name) values ('Test 5');
>  insert into matest3 (name) values ('Test 6');
>  
>  set enable_indexscan = off;  -- force use of seqscan/sort, so no merge
> +set enable_sort = off; -- avoid sorting below MergeAppend
>  explain (verbose, costs off) select * from matest0 order by 1-id;
>  select * from matest0 order by 1-id;
>  explain (verbose, costs off) select min(1-id) from matest0;
>  select min(1-id) from matest0;
>  reset enable_indexscan;
> +reset enable_sort;
>  
>  set enable_seqscan = off;  -- plan with fewest seqscans should be merge
>  set enable_parallel_append = off; -- Don't let parallel-append interfere
> @@ -976,6 +978,21 @@ reset enable_bitmapscan;
>  
>  drop table mcrparted;
>  
> +-- Check that sort path can be used by MergeAppend even when there are suitable pathkeys
> +create table hash_parted (i int, j int, k int) partition by hash(i);
> +create table hash_parted_1 partition of hash_parted for values with (modulus 4, remainder 0);
> +create table hash_parted_2 partition of hash_parted for values with (modulus 4, remainder 1);
> +create table hash_parted_3 partition of hash_parted for values with (modulus 4, remainder 2);
> +create table hash_parted_4 partition of hash_parted for values with (modulus 4, remainder 3);
> +--create table hash_parted_5 partition of hash_parted for values with (modulus 6, remainder 4);
> +--create table hash_parted_6 partition of hash_parted for values with (modulus 6, remainder 5);
> +create index on hash_parted(j);
> +create index on hash_parted(k);
> +insert into hash_parted select i, i, i from generate_series(1,10000) i;
> +analyze hash_parted;
> +explain (costs off) select * from hash_parted where k<100 order by j limit 100;
> +drop table hash_parted;
> +
>  -- Ensure LIST partitions allow an Append to be used instead of a MergeAppend
>  create table bool_lp (b bool) partition by list(b);
>  create table bool_lp_true partition of bool_lp for values in(true);
> -- 
> 2.34.1
> 


-- 
  Bruce Momjian  <bruce@momjian.us>        https://momjian.us
  EDB                                      https://enterprisedb.com

  When a patient asks the doctor, "Am I going to die?", he means 
  "Am I going to die soon?"



pgsql-hackers by date:

Previous
From: Bruce Momjian
Date:
Subject: Re: minor doc issue in 9.16.2.1.1. Boolean Predicate Check Expressions
Next
From: Andy Fan
Date:
Subject: Re: Considering fractional paths in Append node