Re: break table into portions for writing to separate files - Mailing list pgsql-general

From Szymon Guz
Subject Re: break table into portions for writing to separate files
Date
Msg-id CAFjNrYsqdrhSnr7+-nZ=UqWzqdEPkrCOnjxyRJ8bCM88go1T_A@mail.gmail.com
Whole thread Raw
In response to Re: break table into portions for writing to separate files  (Seb <spluque@gmail.com>)
List pgsql-general



On 1 May 2014 22:50, Seb <spluque@gmail.com> wrote:
On Thu, 1 May 2014 22:31:46 +0200,
Szymon Guz <mabewlun@gmail.com> wrote:

[...]

> Can you show us the query plan for the queries you are using, the view
> definition, and how you query that view?

Thanks for your help with this. Here's the view definition (eliding
similar column references):

---<--------------------cut here---------------start------------------->---
CREATE OR REPLACE VIEW public.daily_flux_2013 AS
 WITH pre AS (
         SELECT mot.time_study, ...,
            wind.wind_speed_u, ...,
            op.op_analyzer_status, ...,
            count(wind.wind_speed_u) OVER w AS nwind
           FROM daily_motion_2013 mot
      JOIN daily_wind3d_analog_2013 wind USING (time_study)
   JOIN daily_opath_2013 op USING (time_study)
   JOIN ( SELECT generate_series('2013-07-28 00:00:00'::timestamp without time zone, '2013-09-13 00:00:00'::timestamp without time zone, '00:20:00'::interval) AS time_20min) ts_20min ON mot.time_study >= ts_20min.time_20min AND mot.time_study < (ts_20min.time_20min + '00:20:00'::interval)
  WINDOW w AS (PARTITION BY ts_20min.time_20min ORDER BY ts_20min.time_20min)
        )
 SELECT pre.time_study, ...,
   FROM pre
  WHERE pre.nwind = 12000
  ORDER BY pre.time_study;
---<--------------------cut here---------------end--------------------->---

Here, mot, wind, and op are views that are similarly constructed
(i.e. they use generate_series () and join a few tables).  The WHERE
clause is used to output only 20 minute periods where every 0.1 second
record is available (as determined by the wind_speed_u column).

I'm SELECT'ing this view simply as 'SELECT * FROM daily_flux_2013', and
that shows this query plan (lengthy output from pgadmin's):

---<--------------------cut here---------------start------------------->---
"Sort  (cost=29182411.29..29182411.57 rows=111 width=976)"
"  Sort Key: pre.time_study"
"  CTE pre"
"    ->  WindowAgg  (cost=29181518.64..29181907.52 rows=22222 width=434)"
"          ->  Sort  (cost=29181518.64..29181574.19 rows=22222 width=434)"
"                Sort Key: (generate_series('2013-07-28 00:00:00'::timestamp without time zone, '2013-09-13 00:00:00'::timestamp without time zone, '00:20:00'::interval))"
"                ->  Nested Loop  (cost=22171519.20..29179914.24 rows=22222 width=434)"
"                      Join Filter: (((generate_series('2013-07-28 00:00:00'::timestamp without time zone, '2013-09-13 00:00:00'::timestamp without time zone, '00:00:00.1'::interval)) >= (generate_series('2013-07-28 00:00:00'::timestamp without time zone, ' (...)"
"                      ->  Result  (cost=0.00..5.01 rows=1000 width=0)"
"                      ->  Materialize  (cost=22171519.20..29175899.74 rows=200 width=426)"
"                            ->  Merge Join  (cost=22171519.20..29175898.74 rows=200 width=426)"
"                                  Merge Cond: ((generate_series('2013-07-28 00:00:00'::timestamp without time zone, '2013-09-13 00:00:00'::timestamp without time zone, '00:00:00.1'::interval)) = (generate_series('2013-07-28 00:00:00'::timestamp without tim (...)"
"                                  ->  Merge Join  (cost=9360527.55..12865370.87 rows=200 width=123)"
"                                        Merge Cond: ((generate_series('2013-07-28 00:00:00'::timestamp without time zone, '2013-09-13 00:00:00'::timestamp without time zone, '00:00:00.1'::interval)) = (generate_series('2013-07-28 00:00:00'::timestamp witho (...)"
"                                        ->  Unique  (cost=8625.16..8626.84 rows=200 width=50)"
"                                              ->  Sort  (cost=8625.16..8626.00 rows=337 width=50)"
"                                                    Sort Key: (generate_series('2013-07-28 00:00:00'::timestamp without time zone, '2013-09-13 00:00:00'::timestamp without time zone, '00:00:00.1'::interval))"
"                                                    ->  Nested Loop  (cost=0.57..8611.01 rows=337 width=50)"
"                                                          ->  Result  (cost=0.00..5.01 rows=1000 width=0)"
"                                                          ->  Index Scan using motion_series_time_idx on motion_series mot  (cost=0.57..8.59 rows=1 width=50)"
"                                                                Index Cond: ("time" = (generate_series('2013-07-28 00:00:00'::timestamp without time zone, '2013-09-13 00:00:00'::timestamp without time zone, '00:00:00.1'::interval)))"
"                                                                Filter: (logging_group_id = 33)"
"                                        ->  Materialize  (cost=9351902.39..12856739.03 rows=200 width=73)"
"                                              ->  Unique  (cost=9351902.39..12856736.53 rows=200 width=73)"
"                                                    ->  Merge Join  (cost=9351902.39..12369954.70 rows=194712730 width=73)"
"                                                          Merge Cond: ((generate_series('2013-07-28 00:00:00'::timestamp without time zone, '2013-09-13 00:00:00'::timestamp without time zone, '00:00:00.1'::interval)) = w."time")"
"                                                          ->  Sort  (cost=64.84..67.34 rows=1000 width=8)"
"                                                                Sort Key: (generate_series('2013-07-28 00:00:00'::timestamp without time zone, '2013-09-13 00:00:00'::timestamp without time zone, '00:00:00.1'::interval))"
"                                                                ->  Result  (cost=0.00..5.01 rows=1000 width=0)"
"                                                          ->  Materialize  (cost=9351837.55..9546550.28 rows=38942546 width=73)"
"                                                                ->  Sort  (cost=9351837.55..9449193.92 rows=38942546 width=73)"
"                                                                      Sort Key: w."time""
"                                                                      ->  Append  (cost=0.00..2711828.47 rows=38942546 width=73)"
"                                                                            ->  Seq Scan on wind3d_series w  (cost=0.00..0.00 rows=1 width=236)"
"                                                                                  Filter: (((stream_type)::text = 'analog'::text) AND (logging_group_id = 33))"
"                                                                            ->  Bitmap Heap Scan on wind3d_series_analog w_1  (cost=728917.29..2711828.47 rows=38942545 width=73)"
"                                                                                  Recheck Cond: (logging_group_id = 33)"
"                                                                                  Filter: ((stream_type)::text = 'analog'::text)"
"                                                                                  ->  Bitmap Index Scan on fki_wind3d_series_analog_logging_group_id_fkey  (cost=0.00..719181.65 rows=38942545 width=0)"
"                                                                                        Index Cond: (logging_group_id = 33)"
"                                  ->  Materialize  (cost=12810991.66..16310524.87 rows=200 width=319)"
"                                        ->  Unique  (cost=12810991.66..16310522.37 rows=200 width=319)"
"                                              ->  Merge Join  (cost=12810991.66..15824477.13 rows=194418095 width=319)"
"                                                    Merge Cond: ((generate_series('2013-07-28 00:00:00'::timestamp without time zone, '2013-09-13 00:00:00'::timestamp without time zone, '00:00:00.1'::interval)) = op."time")"
"                                                    ->  Sort  (cost=64.84..67.34 rows=1000 width=8)"
"                                                          Sort Key: (generate_series('2013-07-28 00:00:00'::timestamp without time zone, '2013-09-13 00:00:00'::timestamp without time zone, '00:00:00.1'::interval))"
"                                                          ->  Result  (cost=0.00..5.01 rows=1000 width=0)"
"                                                    ->  Materialize  (cost=12810926.82..13005344.91 rows=38883619 width=319)"
"                                                          ->  Sort  (cost=12810926.82..12908135.87 rows=38883619 width=319)"
"                                                                Sort Key: op."time""
"                                                                ->  Append  (cost=0.00..2194298.15 rows=38883619 width=319)"
"                                                                      ->  Seq Scan on open_path_series op  (cost=0.00..0.00 rows=1 width=556)"
"                                                                            Filter: ((NOT is_shroud) AND (logging_group_id = 33))"
"                                                                      ->  Index Scan using fki_open_path_series_noshroud_logging_group_id_fkey on open_path_series_noshroud op_1  (cost=0.57..2194298.15 rows=38883618 width=319)"
"                                                                            Index Cond: (logging_group_id = 33)"
"                                                                            Filter: (NOT is_shroud)"
"  ->  CTE Scan on pre  (cost=0.00..500.00 rows=111 width=976)"
"        Filter: (nwind = 12000)"
---<--------------------cut here---------------end--------------------->---


--
Seb




In this form it is quite unreadible. Could you paste the plan to the http://explain.depesz.com/ and provide her an url of the page?

thanks,
Szymon

pgsql-general by date:

Previous
From: Steve Crawford
Date:
Subject: Re: Ubuntu Packages / Config Files
Next
From: Seb
Date:
Subject: Re: break table into portions for writing to separate files