Postgresql 14/15/16/17 partition pruning on dependent table during join - Mailing list pgsql-performance

From Stepan Yankevych
Subject Postgresql 14/15/16/17 partition pruning on dependent table during join
Date
Msg-id VE1PR03MB566482158A33D918D1F9019A92562@VE1PR03MB5664.eurprd03.prod.outlook.com
Whole thread Raw
Responses Re: Postgresql 14/15/16/17 partition pruning on dependent table during join
List pgsql-performance
Partition pruning is not pushing predicate into dependent table during join in some cases. 
See example. Predicate highlighted in red 

explain select *
from public.orders co  
left join public.execution e on e.order_id = co.order_id and e.exec_date_id >= co.create_date_id
where co.order_text in ('Order 5259 - F968FDC8')
and co.create_date_id = 20241021


Nested Loop Left Join  (cost=0.70..18262.53 rows=3093 width=94)
  ->  Index Scan using orders_20241021_order_text_idx on orders_20241021 co  (cost=0.41..8.43 rows=1 width=52)
        Index Cond: ((order_text)::text = 'Order 5259 - F968FDC8'::text)
        Filter: (create_date_id = 20241021)
  ->  Append  (cost=0.29..18253.87 rows=23 width=42)
        ->  Index Scan using execution_20241001_exec_date_id_order_id_idx on execution_20241001 e_1  (cost=0.29..295.91 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241002_exec_date_id_order_id_idx on execution_20241002 e_2  (cost=0.29..335.56 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241003_exec_date_id_order_id_idx on execution_20241003 e_3  (cost=0.29..380.27 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241004_exec_date_id_order_id_idx on execution_20241004 e_4  (cost=0.42..1018.57 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241007_exec_date_id_order_id_idx on execution_20241007 e_5  (cost=0.29..456.19 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241008_exec_date_id_order_id_idx on execution_20241008 e_6  (cost=0.29..501.43 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241009_exec_date_id_order_id_idx on execution_20241009 e_7  (cost=0.29..540.54 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241010_exec_date_id_order_id_idx on execution_20241010 e_8  (cost=0.29..576.55 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241011_exec_date_id_order_id_idx on execution_20241011 e_9  (cost=0.42..1594.20 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241014_exec_date_id_order_id_idx on execution_20241014 e_10  (cost=0.29..520.20 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241015_exec_date_id_order_id_idx on execution_20241015 e_11  (cost=0.29..536.32 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241016_exec_date_id_order_id_idx on execution_20241016 e_12  (cost=0.29..575.94 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241017_exec_date_id_order_id_idx on execution_20241017 e_13  (cost=0.29..601.98 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241018_exec_date_id_order_id_idx on execution_20241018 e_14  (cost=0.42..1584.26 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241021_exec_date_id_order_id_idx on execution_20241021 e_15  (cost=0.29..521.06 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241022_exec_date_id_order_id_idx on execution_20241022 e_16  (cost=0.29..536.90 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241023_exec_date_id_order_id_idx on execution_20241023 e_17  (cost=0.29..577.02 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241024_exec_date_id_order_id_idx on execution_20241024 e_18  (cost=0.29..597.01 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241025_exec_date_id_order_id_idx on execution_20241025 e_19  (cost=0.42..1600.91 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241028_exec_date_id_order_id_idx on execution_20241028 e_20  (cost=0.29..522.06 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241029_exec_date_id_order_id_idx on execution_20241029 e_21  (cost=0.29..535.84 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241030_exec_date_id_order_id_idx on execution_20241030 e_22  (cost=0.29..581.54 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
        ->  Index Scan using execution_20241031_exec_date_id_order_id_idx on execution_20241031 e_23  (cost=0.42..3263.48 rows=1 width=42)
              Index Cond: ((exec_date_id >= co.create_date_id) AND (order_id = co.order_id))
  1. We have two range partitioned by date_id (YYYYMMDD) tables orders and execution  
  2. One order can have a few executions 
  3. each execution can happen after order not earlier 
  4. During join both tables , having condition on date_id of the order we scan one partition only - AS EXPECTED 
  5. We expect to scan execution partitions starting from execution_20241021 till the last partition- BUT as we can see on the execution plan we scan all the partitions of execution. In that case we definitely do not need to scan partition 20241001 .. 20241020
  6. Execution plan looks good as soon as we change e.exec_date_id >= co.create_date_id to e.exec_date_id >= 20241021 /* the same constant we apply to order table */
  7. Can such change (query rewrite) be done by optimizer ? I guess yes 
  8. P.S. Having condition on equal in join  e.exec_date_id = co.create_date_id  - works as expected: one partition only on both tables 
The issue happens only in case we have > ,  >= ,  < , <= as join predicate on partitioned column 



See test case to reproduce . 

drop table if exists public.orders;
drop table if exists public.execution;

CREATE TABLE public.orders (
order_id int8 NOT NULL,
instrument_id int8 NULL,
account_id int4 NULL,
order_text varchar(256) NULL,
pg_db_create_time timestamp DEFAULT clock_timestamp() NULL,
create_date_id int4 not NULL
)
PARTITION BY RANGE (create_date_id);
CREATE INDEX orders_order_text_idx ON ONLY public.orders USING btree (order_text);
CREATE UNIQUE INDEX orders_order_id_idx ON ONLY public.orders USING btree (order_id, create_date_id);

CREATE TABLE public.orders_20241001 PARTITION OF public.orders  FOR VALUES FROM (20241001) TO (20241002);
CREATE TABLE public.orders_20241002 PARTITION OF public.orders  FOR VALUES FROM (20241002) TO (20241003);
CREATE TABLE public.orders_20241003 PARTITION OF public.orders  FOR VALUES FROM (20241003) TO (20241004);
CREATE TABLE public.orders_20241004 PARTITION OF public.orders  FOR VALUES FROM (20241004) TO (20241007);
CREATE TABLE public.orders_20241007 PARTITION OF public.orders  FOR VALUES FROM (20241007) TO (20241008);
CREATE TABLE public.orders_20241008 PARTITION OF public.orders  FOR VALUES FROM (20241008) TO (20241009);
CREATE TABLE public.orders_20241009 PARTITION OF public.orders  FOR VALUES FROM (20241009) TO (20241010);
CREATE TABLE public.orders_20241010 PARTITION OF public.orders  FOR VALUES FROM (20241010) TO (20241011);
CREATE TABLE public.orders_20241011 PARTITION OF public.orders  FOR VALUES FROM (20241011) TO (20241014);
CREATE TABLE public.orders_20241014 PARTITION OF public.orders  FOR VALUES FROM (20241014) TO (20241015);
CREATE TABLE public.orders_20241015 PARTITION OF public.orders  FOR VALUES FROM (20241015) TO (20241016);
CREATE TABLE public.orders_20241016 PARTITION OF public.orders  FOR VALUES FROM (20241016) TO (20241017);
CREATE TABLE public.orders_20241017 PARTITION OF public.orders  FOR VALUES FROM (20241017) TO (20241018);
CREATE TABLE public.orders_20241018 PARTITION OF public.orders  FOR VALUES FROM (20241018) TO (20241021);
CREATE TABLE public.orders_20241021 PARTITION OF public.orders  FOR VALUES FROM (20241021) TO (20241022);
CREATE TABLE public.orders_20241022 PARTITION OF public.orders  FOR VALUES FROM (20241022) TO (20241023);
CREATE TABLE public.orders_20241023 PARTITION OF public.orders  FOR VALUES FROM (20241023) TO (20241024);
CREATE TABLE public.orders_20241024 PARTITION OF public.orders  FOR VALUES FROM (20241024) TO (20241025);
CREATE TABLE public.orders_20241025 PARTITION OF public.orders  FOR VALUES FROM (20241025) TO (20241028);
CREATE TABLE public.orders_20241028 PARTITION OF public.orders  FOR VALUES FROM (20241028) TO (20241029);
CREATE TABLE public.orders_20241029 PARTITION OF public.orders  FOR VALUES FROM (20241029) TO (20241030);
CREATE TABLE public.orders_20241030 PARTITION OF public.orders  FOR VALUES FROM (20241030) TO (20241031);
CREATE TABLE public.orders_20241031 PARTITION OF public.orders  FOR VALUES FROM (20241031) TO (20241101);


CREATE TABLE public.execution (
exec_id int8 NOT NULL,
order_id int8 NOT NULL,
exec_date_id int4 NOT NULL,
exec_time timestamp(6) NOT NULL,
   qty int8 NULL,
price numeric NULL
)
PARTITION BY RANGE (exec_date_id);
CREATE INDEX execution_exec_date_id_order_id_idx ON ONLY public.execution USING btree (exec_date_id, order_id);
CREATE unique INDEX execution_pk ON ONLY public.execution USING btree (exec_id, exec_date_id);

CREATE TABLE public.execution_20241001 PARTITION OF public.execution  FOR VALUES FROM (20241001) TO (20241002);
CREATE TABLE public.execution_20241002 PARTITION OF public.execution  FOR VALUES FROM (20241002) TO (20241003);
CREATE TABLE public.execution_20241003 PARTITION OF public.execution  FOR VALUES FROM (20241003) TO (20241004);
CREATE TABLE public.execution_20241004 PARTITION OF public.execution  FOR VALUES FROM (20241004) TO (20241007);
CREATE TABLE public.execution_20241007 PARTITION OF public.execution  FOR VALUES FROM (20241007) TO (20241008);
CREATE TABLE public.execution_20241008 PARTITION OF public.execution  FOR VALUES FROM (20241008) TO (20241009);
CREATE TABLE public.execution_20241009 PARTITION OF public.execution  FOR VALUES FROM (20241009) TO (20241010);
CREATE TABLE public.execution_20241010 PARTITION OF public.execution  FOR VALUES FROM (20241010) TO (20241011);
CREATE TABLE public.execution_20241011 PARTITION OF public.execution  FOR VALUES FROM (20241011) TO (20241014);
CREATE TABLE public.execution_20241014 PARTITION OF public.execution  FOR VALUES FROM (20241014) TO (20241015);
CREATE TABLE public.execution_20241015 PARTITION OF public.execution  FOR VALUES FROM (20241015) TO (20241016);
CREATE TABLE public.execution_20241016 PARTITION OF public.execution  FOR VALUES FROM (20241016) TO (20241017);
CREATE TABLE public.execution_20241017 PARTITION OF public.execution  FOR VALUES FROM (20241017) TO (20241018);
CREATE TABLE public.execution_20241018 PARTITION OF public.execution  FOR VALUES FROM (20241018) TO (20241021);
CREATE TABLE public.execution_20241021 PARTITION OF public.execution  FOR VALUES FROM (20241021) TO (20241022);
CREATE TABLE public.execution_20241022 PARTITION OF public.execution  FOR VALUES FROM (20241022) TO (20241023);
CREATE TABLE public.execution_20241023 PARTITION OF public.execution  FOR VALUES FROM (20241023) TO (20241024);
CREATE TABLE public.execution_20241024 PARTITION OF public.execution  FOR VALUES FROM (20241024) TO (20241025);
CREATE TABLE public.execution_20241025 PARTITION OF public.execution  FOR VALUES FROM (20241025) TO (20241028);
CREATE TABLE public.execution_20241028 PARTITION OF public.execution  FOR VALUES FROM (20241028) TO (20241029);
CREATE TABLE public.execution_20241029 PARTITION OF public.execution  FOR VALUES FROM (20241029) TO (20241030);
CREATE TABLE public.execution_20241030 PARTITION OF public.execution  FOR VALUES FROM (20241030) TO (20241031);
CREATE TABLE public.execution_20241031 PARTITION OF public.execution  FOR VALUES FROM (20241031) TO (20241101);

-- generate data
INSERT INTO public.orders (order_id, instrument_id, account_id, order_text, create_date_id)
SELECT
   s,  -- order_id as a sequential number starting from 1
   (RANDOM() * 1000000)::int,  -- Random instrument_id
   (RANDOM() * 10000)::int,  -- Random account_id
   'Order ' || s || ' - ' || upper(left( md5(s::text),  floor(random() * 10)::int)) AS order_text,
   20241001 + (s % 31)  -- create_date_id cycling between 20241001 and 20241031
FROM generate_series(1, 1000000) s
where (20241001 + (s % 31))::int not in (20241005, 20241006, 20241012, 20241013, 20241019, 20241020, 20241026, 20241027 );


INSERT INTO public.execution (exec_id, order_id, exec_date_id, exec_time, qty, price)
WITH ordered_data AS (  SELECT order_id,
       create_date_id
   from  public.orders),
execution_data AS (  SELECT
       order_id,
       create_date_id,
       generate_series(1, (RANDOM() * 3 + 1)::int) AS exec_num  -- Generates between 1 and 3 executions per order
   FROM
       ordered_data),
exec_ids ASSELECT
       ROW_NUMBER() OVER (ORDER BY order_id, exec_num) AS exec_id,  -- Generates a unique exec_id
       order_id,
       create_date_id,
       exec_num
   FROM
       execution_data)
SELECT
   exec_id,
   order_id,
   CASE
       WHEN exec_num = 1 THEN create_date_id
       ELSE create_date_id + (RANDOM() * 10)::int  -- Randomly adds up to 10 days to the original date for additional records
   END AS exec_date_id,
   clock_timestamp() - (RANDOM() * INTERVAL '10 days') AS exec_time,  -- Random timestamp within the last 10 days
   (RANDOM() * 1000)::int AS qty,  -- Random quantity
   (RANDOM() * 100)::numeric(10,2) AS price  -- Random price
FROM
   exec_ids;


Stepan Yankevych


pgsql-performance by date:

Previous
From: Ba Jinsheng
Date:
Subject: Re: Unexpected Performance for the Function simplify_function
Next
From: Vijaykumar Jain
Date:
Subject: Re: Postgresql 14/15/16/17 partition pruning on dependent table during join