Re: transitive pruning optimization on the right side of a join for partition tables - Mailing list pgsql-general

From Ondrej Ivanič
Subject Re: transitive pruning optimization on the right side of a join for partition tables
Date
Msg-id CAM6mieK4ZZf1+RSCqh91-vPxZUDaNK1v6=DL62vqR_=VpZr78g@mail.gmail.com
Whole thread Raw
In response to Re: transitive pruning optimization on the right side of a join for partition tables  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-general
Hi,

On 1 October 2012 01:14, Tom Lane <tgl@sss.pgh.pa.us> wrote:
> "Waldo, Ethan" <ewaldo@healthetechs.com> writes:
>> This query does a sequence scan and append across all the partition tables:
>> select "dates"."date_description" FROM "myfact" as "myfact", "dates" as "dates" where "myfact"."recorded_on_id" =
"dates"."recorded_on_id"and "dates"."recorded_on_id" IN ('4617', '4618', '4619', '4620', '4621', '4622', '4623',
'4624','4625', '4626', '4627', '4628', '4629', '4630', '4631', '4632', '4633', '4634', '4635', '4636', '4637', '4638',
'4639','4640', '4641', '4642', '4643', '4644', '4645', '4646', '4647'); 
>
> When I try that in 9.1, I get a plan with inner indexscans for each
> child table; which, while not exactly what you're asking for, should
> perform well enough when the fact table is large enough that
> partitioning is actually a useful activity.

I do not have 9.1 handy but this is from 9.0.4:

db=# EXPLAIN select dates.datetime, myfact.recorded_on_id from myfact,
dates where myfact.recorded_on_id = dates.recorded_on_id and
dates.recorded_on_id in (1813, 1814);
                                         QUERY PLAN
--------------------------------------------------------------------------------------------
 Hash Join  (cost=33.46..86124.15 rows=2858831 width=16)
   Hash Cond: (public.myfact.recorded_on_id = public.dates.recorded_on_id)
   ->  Append  (cost=0.00..46245.73 rows=3001773 width=8)
         ->  Seq Scan on myfact  (cost=0.00..27.70 rows=1770 width=8)
         ->  Seq Scan on myfact_y2004w51 myfact  (cost=0.00..15406.01
rows=1000001 width=8)
         ->  Seq Scan on myfact_y2004w52 myfact  (cost=0.00..15406.01
rows=1000001 width=8)
         ->  Seq Scan on myfact_y2004w53 myfact  (cost=0.00..15406.01
rows=1000001 width=8)
   ->  Hash  (cost=33.21..33.21 rows=20 width=16)
         ->  Append  (cost=0.00..33.21 rows=20 width=16)
               ->  Seq Scan on dates  (cost=0.00..32.12 rows=18 width=16)
                     Filter: (recorded_on_id = ANY ('{1813,1814}'::bigint[]))
               ->  Seq Scan on dates_y2004w51 dates  (cost=0.00..1.09
rows=2 width=16)
                     Filter: (recorded_on_id = ANY ('{1813,1814}'::bigint[]))
(13 rows)

Postgres can't infer that myfact.recorded_on_id is in (1813, 1814)
from the join condition (myfact.recorded_on_id = dates.recorded_on_id)
hence all partitons are included (myfact_y2004w51, myfact_y2004w53,
myfact_y2004w53). Adding "myfact.recorded_on_id in (1813, 1814)"
creates much better plan:

db=# EXPLAIN select dates.datetime, myfact.recorded_on_id from myfact,
dates where myfact.recorded_on_id = dates.recorded_on_id and
dates.recorded_on_id in (1813, 1814) and myfact.recorded_on_id in
(1813, 1814);
                                                   QUERY PLAN
-----------------------------------------------------------------------------------------------------------------
 Hash Join  (cost=33.46..18878.72 rows=296673 width=16)
   Hash Cond: (public.myfact.recorded_on_id = public.dates.recorded_on_id)
   ->  Append  (cost=0.00..14710.38 rows=311507 width=8)
         ->  Seq Scan on myfact  (cost=0.00..32.12 rows=18 width=8)
               Filter: (recorded_on_id = ANY ('{1813,1814}'::bigint[]))
         ->  Bitmap Heap Scan on myfact_y2004w51 myfact
(cost=5378.64..14678.25 rows=311489 width=8)
               Recheck Cond: (recorded_on_id = ANY ('{1813,1814}'::bigint[]))
               ->  Bitmap Index Scan on myfact_y2004w51_recorded_on_id
 (cost=0.00..5300.77 rows=311489 width=0)
                     Index Cond: (recorded_on_id = ANY
('{1813,1814}'::bigint[]))
   ->  Hash  (cost=33.21..33.21 rows=20 width=16)
         ->  Append  (cost=0.00..33.21 rows=20 width=16)
               ->  Seq Scan on dates  (cost=0.00..32.12 rows=18 width=16)
                     Filter: (recorded_on_id = ANY ('{1813,1814}'::bigint[]))
               ->  Seq Scan on dates_y2004w51 dates  (cost=0.00..1.09
rows=2 width=16)
                     Filter: (recorded_on_id = ANY ('{1813,1814}'::bigint[]))
(15 rows)


--------------
create table myfact (
    id bigint not null,
    recorded_on_id bigint not null
);

create table myfact_y2004w51 (
    check (recorded_on_id >= 1812 and recorded_on_id <= 1818),
    primary key (id)
) inherits (myfact);
create table myfact_y2004w52 (
    check (recorded_on_id >= 1819 and recorded_on_id <= 1825),
    primary key (id)
) inherits (myfact);
create table myfact_y2004w53 (
    check (recorded_on_id >= 1826 and recorded_on_id <= 1832),
    primary key (id)
) inherits (myfact);

create table dates (
    datetime timestamp without time zone not null,
    recorded_on_id bigint not null
);

create table dates_y2004w51 (
    check (recorded_on_id >= 1812 and recorded_on_id <= 1818),
    primary key (datetime)
) inherits (dates);
create table dates_y2004w52 (
    check (recorded_on_id >= 1819 and recorded_on_id <= 1825),
    primary key (datetime)
) inherits (dates);
create table dates_y2004w53 (
    check (recorded_on_id >= 1826 and recorded_on_id <= 1832),
    primary key (datetime)
) inherits (dates);

insert into myfact_y2004w51 select s.i, (random() * 6 + 1812)::integer
from generate_series(0, 1000000, 1) as s(i);
insert into myfact_y2004w52 select s.i, (random() * 6 + 1819)::integer
from generate_series(0, 1000000, 1) as s(i);
insert into myfact_y2004w53 select s.i, (random() * 6 + 1826)::integer
from generate_series(0, 1000000, 1) as s(i);

insert into dates_y2004w51 select distinct on (recorded_on_id)
to_timestamp(recorded_on_id), recorded_on_id from myfact_y2004w51;
insert into dates_y2004w52 select distinct on (recorded_on_id)
to_timestamp(recorded_on_id), recorded_on_id from myfact_y2004w52;
insert into dates_y2004w53 select distinct on (recorded_on_id)
to_timestamp(recorded_on_id), recorded_on_id from myfact_y2004w53;

create index myfact_y2004w51_recorded_on_id on myfact_y2004w51(recorded_on_id);
create index myfact_y2004w52_recorded_on_id on myfact_y2004w52(recorded_on_id);
create index myfact_y2004w53_recorded_on_id on myfact_y2004w53(recorded_on_id);

create index dates_y2004w51_recorded_on_id on dates_y2004w51(recorded_on_id);
create index dates_y2004w52_recorded_on_id on dates_y2004w52(recorded_on_id);
create index dates_y2004w53_recorded_on_id on dates_y2004w53(recorded_on_id);

ANALYZE;ANALYZE;ANALYZE;ANALYZE;ANALYZE;

EXPLAIN select dates.datetime, myfact.recorded_on_id from myfact,
dates where myfact.recorded_on_id = dates.recorded_on_id and
dates.recorded_on_id in (1813, 1814);
EXPLAIN select dates.datetime, myfact.recorded_on_id from myfact,
dates where myfact.recorded_on_id = dates.recorded_on_id and
dates.recorded_on_id in (1813, 1814) and myfact.recorded_on_id in
(1813, 1814);
----------------------

--
Ondrej Ivanic
(ondrej.ivanic@gmail.com)


pgsql-general by date:

Previous
From: Jeff Janes
Date:
Subject: Re: Would my postgresql 8.4.12 profit from doubling RAM?
Next
From: Rajesh Kumar Mallah
Date:
Subject: enforcing transaction mode for read-write queries.