Join for the parent table will not leverage the index scan - Mailing list pgsql-admin

From Shrikant Bhende
Subject Join for the parent table will not leverage the index scan
Date
Msg-id CAMTQpJAKR66kUqZmcc+-VgkPtT6VRPVAghio=X5Kb8i2pphCbw@mail.gmail.com
Whole thread Raw
Responses Re: Join for the parent table will not leverage the index scan  (Vijaykumar Jain <vijaykumarjain.github@gmail.com>)
Re: Join for the parent table will not leverage the index scan  (Shrikant Bhende <shrikantpostgresql@gmail.com>)
Re: Join for the parent table will not leverage the index scan  (Ron <ronljohnsonjr@gmail.com>)
List pgsql-admin

We have two tables, both have ~36 partitions.
This is my first query, which attempts to join the parent tables on the columns that have been used to create multi-column indexes for all partitions (multi-column indexes defined on session_id, detail_id in that order):

PostgreSQL version : 10
-- query

explain

select * from dm_ci360.page_details pd
inner join dm_ci360.page_details_ext pde
 on (pd.session_id = pde.session_id
 and pd.detail_id = pde.detail_id)
where pd.session_dt >= now()::date-2;

-- plan

Hash Join  (cost=36927955.75..100106605.07 rows=11 width=8355)

  Hash Cond: (((pd.session_id)::text = (pde.session_id)::text) AND ((pd.detail_id)::text = (pde.detail_id)::text))

  ->  Append  (cost=1297.33..571870.20 rows=630535 width=8130)

        ->  Bitmap Heap Scan on page_details_null pd  (cost=1297.33..240207.85 rows=69259 width=8220)

              Recheck Cond: (session_dt >= ((now())::date - 2))

              ->  Bitmap Index Scan on page_details_null_session_dt_idx  (cost=0.00..1280.02 rows=69259 width=0)

                    Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m3_session_dt_idx on page_details_m3 pd_1  (cost=0.44..4.71 rows=1 width=8216)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m2_session_dt_idx on page_details_m2 pd_2  (cost=0.43..102.79 rows=621 width=8253)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m1_session_dt_idx on page_details_m1 pd_3  (cost=0.44..4.55 rows=1 width=8281)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_5_session_dt_idx on page_details_5 pd_4  (cost=0.57..51622.53 rows=196574 width=8202)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Bitmap Heap Scan on page_details_31 pd_5  (cost=982.61..194857.98 rows=52391 width=7721)

              Recheck Cond: (session_dt >= ((now())::date - 2))

              ->  Bitmap Index Scan on page_details_31_session_dt_idx1  (cost=0.00..969.51 rows=52391 width=0)

                    Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_49_session_dt_idx on page_details_49 pd_6  (cost=0.45..10991.08 rows=50039 width=7721)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_52_session_dt_idx on page_details_52 pd_7  (cost=0.44..2337.94 rows=10222 width=8297)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_59_session_dt_idx on page_details_59 pd_8  (cost=0.57..24790.17 rows=50148 width=8276)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_60_session_dt_idx on page_details_60 pd_9  (cost=0.44..7292.41 rows=4259 width=8263)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_61_session_dt_idx on page_details_61 pd_10  (cost=0.43..309.10 rows=704 width=8182)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_63_session_dt_idx on page_details_63 pd_11  (cost=0.43..13.20 rows=25 width=8250)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_64_session_dt_idx on page_details_64 pd_12  (cost=0.44..5.41 rows=1 width=8314)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_66_session_dt_idx on page_details_66 pd_13  (cost=0.42..4.49 rows=1 width=8277)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_67_session_dt_idx on page_details_67 pd_14  (cost=0.42..58.62 rows=325 width=8227)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_68_session_dt_idx on page_details_68 pd_15  (cost=0.42..35.20 rows=111 width=8259)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_78_session_dt_idx on page_details_78 pd_16  (cost=0.44..1738.10 rows=4608 width=8263)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_98_session_dt_idx on page_details_98 pd_17  (cost=0.44..2157.31 rows=7136 width=8225)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_101_session_dt_idx on page_details_101 pd_18  (cost=0.44..2956.03 rows=9241 width=8205)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_130_session_dt_idx on page_details_130 pd_19  (cost=0.44..1683.53 rows=19656 width=8199)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m100_session_dt_idx on page_details_m100 pd_20  (cost=0.43..4.96 rows=1 width=8225)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m4_session_dt_idx on page_details_m4 pd_21  (cost=0.43..659.89 rows=6097 width=8195)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_134_session_dt_idx on page_details_134 pd_22  (cost=0.45..4666.25 rows=50409 width=8176)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_58_session_dt_idx on page_details_58 pd_23  (cost=0.43..7.05 rows=1 width=8362)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_69_session_dt_idx on page_details_69 pd_24  (cost=0.43..125.03 rows=348 width=8268)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_100_session_dt_idx on page_details_100 pd_25  (cost=0.43..381.13 rows=466 width=8195)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_114_session_dt_idx on page_details_114 pd_26  (cost=0.43..7.80 rows=1 width=8183)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m5_session_dt_idx on page_details_m5 pd_27  (cost=0.57..4505.18 rows=50872 width=8225)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_32_session_dt_idx on page_details_32 pd_28  (cost=0.43..8.44 rows=1 width=8807)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_23_session_dt_idx on page_details_23 pd_29  (cost=0.29..7.47 rows=1 width=9327)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m6_session_dt_idx on page_details_m6 pd_30  (cost=0.44..769.32 rows=7135 width=7814)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m7_session_dt_idx on page_details_m7 pd_31  (cost=0.44..11400.50 rows=14694 width=8203)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m8_session_dt_idx on page_details_m8 pd_32  (cost=0.44..6568.86 rows=8045 width=8200)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_144_session_dt_idx on page_details_144 pd_33  (cost=0.29..21.16 rows=162 width=8189)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_143_session_dt_idx on page_details_143 pd_34  (cost=0.43..1477.00 rows=16135 width=8197)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_145_session_dt_idx on page_details_145 pd_35  (cost=0.29..87.17 rows=844 width=8226)

              Index Cond: (session_dt >= ((now())::date - 2))

  ->  Hash  (cost=16117516.77..16117516.77 rows=449927377 width=226)

        ->  Append  (cost=0.00..16117516.77 rows=449927377 width=226)

              ->  Seq Scan on page_details_ext_null pde  (cost=0.00..1467668.52 rows=40845752 width=226)

              ->  Seq Scan on page_details_ext_m3 pde_1  (cost=0.00..347732.32 rows=9980132 width=226)

              ->  Seq Scan on page_details_ext_m2 pde_2  (cost=0.00..26146.27 rows=715527 width=226)

              ->  Seq Scan on page_details_ext_m1 pde_3  (cost=0.00..180093.08 rows=4793908 width=226)

              ->  Seq Scan on page_details_ext_5 pde_4  (cost=0.00..3056150.08 rows=82652608 width=226)

              ->  Seq Scan on page_details_ext_31 pde_5  (cost=0.00..4177416.08 rows=124953208 width=226)

              ->  Seq Scan on page_details_ext_49 pde_6  (cost=0.00..1034364.84 rows=28538484 width=226)

              ->  Seq Scan on page_details_ext_52 pde_7  (cost=0.00..575529.72 rows=16018272 width=226)

              ->  Seq Scan on page_details_ext_59 pde_8  (cost=0.00..1226617.78 rows=33054378 width=226)

              ->  Seq Scan on page_details_ext_60 pde_9  (cost=0.00..206925.60 rows=5920260 width=226)

              ->  Seq Scan on page_details_ext_61 pde_10  (cost=0.00..10599.63 rows=296463 width=226)

              ->  Seq Scan on page_details_ext_63 pde_11  (cost=0.00..32993.45 rows=898145 width=226)

              ->  Seq Scan on page_details_ext_64 pde_12  (cost=0.00..211220.00 rows=6212500 width=226)

              ->  Seq Scan on page_details_ext_66 pde_13  (cost=0.00..4888.44 rows=127544 width=226)

              ->  Seq Scan on page_details_ext_67 pde_14  (cost=0.00..5121.02 rows=135402 width=226)

              ->  Seq Scan on page_details_ext_68 pde_15  (cost=0.00..4284.18 rows=116218 width=226)

              ->  Seq Scan on page_details_ext_78 pde_16  (cost=0.00..135663.64 rows=3705564 width=226)

              ->  Seq Scan on page_details_ext_98 pde_17  (cost=0.00..232421.44 rows=6556944 width=226)

              ->  Seq Scan on page_details_ext_101 pde_18  (cost=0.00..189844.81 rows=5231481 width=226)

              ->  Seq Scan on page_details_ext_130 pde_19  (cost=0.00..318011.40 rows=8558640 width=226)

              ->  Seq Scan on page_details_ext_m100 pde_20  (cost=0.00..4914.04 rows=143504 width=226)

              ->  Seq Scan on page_details_ext_m4 pde_21  (cost=0.00..75878.68 rows=1994468 width=226)

              ->  Seq Scan on page_details_ext_134 pde_22  (cost=0.00..724264.36 rows=19192136 width=226)

              ->  Seq Scan on page_details_ext_58 pde_23  (cost=0.00..12171.94 rows=361394 width=331)

              ->  Seq Scan on page_details_ext_69 pde_24  (cost=0.00..6464.95 rows=173295 width=226)

              ->  Seq Scan on page_details_ext_100 pde_25  (cost=0.00..9217.39 rows=255239 width=226)

              ->  Seq Scan on page_details_ext_114 pde_26  (cost=0.00..11702.69 rows=358469 width=226)

              ->  Seq Scan on page_details_ext_m5 pde_27  (cost=0.00..1274434.84 rows=33330284 width=226)

              ->  Seq Scan on page_details_ext_23 pde_28  (cost=0.00..822.03 rows=25503 width=378)

              ->  Seq Scan on page_details_ext_32 pde_29  (cost=0.00..7011.56 rows=217756 width=378)

              ->  Seq Scan on page_details_ext_m6 pde_30  (cost=0.00..177039.58 rows=4616658 width=226)

              ->  Seq Scan on page_details_ext_m7 pde_31  (cost=0.00..188264.43 rows=5143643 width=226)

              ->  Seq Scan on page_details_ext_m8 pde_32  (cost=0.00..139185.56 rows=3799056 width=226)

              ->  Seq Scan on page_details_ext_144 pde_33  (cost=0.00..687.46 rows=16246 width=226)

              ->  Seq Scan on page_details_ext_143 pde_34  (cost=0.00..41053.82 rows=971482 width=226)

              ->  Seq Scan on page_details_ext_145 pde_35  (cost=0.00..711.14 rows=16814 width=226)

 

The only place an index scan is used is to filter down page_details. In my second query,
when explicitly joining single partitions an index scan is used to perform filtering, and the join as well:

--query

explain

select * from dm_ci360.page_details_5 pd
inner join dm_ci360.page_details_ext_5 pde
on (pd.session_id = pde.session_id
and pd.detail_id = pde.detail_id)
where pd.session_dt >= now()::date-2;


--plan

Nested Loop  (cost=1.27..1685980.38 rows=1 width=8428)

  ->  Index Scan using page_details_5_session_dt_idx on page_details_5 pd  (cost=0.57..51267.67 rows=196574 width=8202)

        Index Cond: (session_dt >= ((now())::date - 2))

  ->  Index Scan using page_details_ext_5_session_id_detail_id_idx on page_details_ext_5 pde  (cost=0.69..8.31 rows=1 width=226)

        Index Cond: (((session_id)::text = (pd.session_id)::text) AND ((detail_id)::text = (pd.detail_id)::text))

I just cannot figure out why the join for the parent table will not leverage the index,
but the join for a single partition will. We are running on Postgres 10, and the row counts for the tables can be found below:

Row Count
page_details -> ~413M
page_details_ext -> ~450M

Table DDL

-- page_details

CREATE TABLE dm_ci360.page_details (
    dml_id int4 NULL,
    dml_ts timestamp NULL,
    dml_ts_utc timestamp NULL,
    dml_type varchar(1) NULL,
    dwh_job_id int4 NULL,
    ins_dwh_job_id int4 NULL,
    upd_dwh_job_id int4 NULL,
    deleted_flag varchar(1) NULL,
    processed_dttm timestamp NULL,
    valid_from_dttm timestamp(0) NULL,
    valid_to_dttm timestamp(0) NULL,
    ccd varchar(2000) NULL,
    client_id int4 NULL,
    detail_id varchar(32) NULL,
    session_id varchar(29) NULL,
    visit_id varchar(32) NULL,
    window_size_txt varchar(20) NULL,
    session_dt date NULL,
    page_url_txt varchar(1332) NULL,
    domain_nm varchar(165) NULL,
    bytes_sent_cnt int4 NULL,
    page_load_sec_cnt int8 NULL,
    page_complete_sec_cnt int8 NULL,
    protocol_nm varchar(8) NULL,
    page_desc varchar(1332) NULL,
    class1_id varchar(650) NULL,
    class2_id varchar(650) NULL,
    class3_id varchar(650) NULL,
    class4_id varchar(650) NULL,
    class5_id varchar(650) NULL,
    class6_id varchar(650) NULL,
    class7_id varchar(650) NULL,
    class8_id varchar(650) NULL,
    class9_id varchar(650) NULL,
    class10_id varchar(650) NULL,
    class11_id varchar(650) NULL,
    class12_id varchar(650) NULL,
    class13_id varchar(650) NULL,
    class14_id varchar(650) NULL,
    class15_id varchar(650) NULL,
    url_domain varchar(215) NULL,
    identity_id varchar(36) NULL,
    detail_dttm timestamp NULL,
    detail_dttm_tz timestamp NULL,
    load_dttm timestamp NULL,
    session_dt_tz date NULL,
    detail_id_hex varchar(32) NULL,
    visit_id_hex varchar(32) NULL,
    session_id_hex varchar(29) NULL
)
PARTITION BY LIST (client_id);

CREATE TABLE dm_ci360.page_details_100 PARTITION OF dm_ci360.page_details FOR VALUES IN (100);

CREATE INDEX page_details_100_detail_dttm_idx ON dm_ci360.page_details_100 USING btree (detail_dttm)
CREATE INDEX page_details_100_identity_id_idx ON dm_ci360.page_details_100 USING btree (identity_id)
CREATE INDEX page_details_100_session_dt_idx ON dm_ci360.page_details_100 USING btree (session_dt)
CREATE INDEX page_details_100_session_id_detail_id_idx ON dm_ci360.page_details_100 USING btree (session_id, detail_id)

-- page_details_ext

CREATE TABLE dm_ci360.page_details_ext (
    dml_id int4 NULL,
    dml_ts timestamp NULL,
    dml_ts_utc timestamp NULL,
    dml_type varchar(1) NULL,
    dwh_job_id int4 NULL,
    ins_dwh_job_id int4 NULL,
    upd_dwh_job_id int4 NULL,
    deleted_flag varchar(1) NULL,
    processed_dttm timestamp NULL,
    valid_from_dttm timestamp(0) NULL,
    valid_to_dttm timestamp(0) NULL,
    ccd varchar(2000) NULL,
    client_id int4 NULL,
    detail_id varchar(32) NULL,
    session_id varchar(29) NULL,
    active_sec_spent_on_page_cnt int4 NULL,
    seconds_spent_on_page_cnt int4 NULL,
    load_dttm timestamp NULL,
    detail_id_hex varchar(32) NULL,
    session_id_hex varchar(29) NULL
)
PARTITION BY LIST (client_id);

CREATE TABLE dm_ci360.page_details_ext_100 PARTITION OF dm_ci360.page_details_ext FOR VALUES IN (100);

CREATE INDEX page_details_ext_100_session_id_detail_id_idx ON dm_ci360.page_details_ext_100 USING btree (session_id, detail_id)
CREATE INDEX page_details_ext_100_session_id_detail_id_idx ON dm_ci360.page_details_ext_100 USING btree (session_id, detail_id)


Thanks

pgsql-admin by date:

Previous
From: otar shavadze
Date:
Subject: Index usage statistic for last X hours
Next
From: Vijaykumar Jain
Date:
Subject: Re: Join for the parent table will not leverage the index scan