Re: Performance die when COPYing to table with bigint PK - Mailing list pgsql-performance

From Robert Ayrapetyan
Subject Re: Performance die when COPYing to table with bigint PK
Date
Msg-id CAAboi9t8i_+W=c-bWCeYsc4mwYyT7+mGQ5QymtO6MLcH96YdqA@mail.gmail.com
Whole thread Raw
In response to Re: Performance die when COPYing to table with bigint PK  ("Kevin Grittner" <Kevin.Grittner@wicourts.gov>)
List pgsql-performance
Hi.

Timings for your test:

foo=# create table bi (big bigint not null, medium int not null);
CREATE TABLE
foo=# insert into bi with x(n) as (select generate_series(1, 1000000))
foo-# select n + 5000000000, n from x;
INSERT 0 1000000
foo=# \timing on
Timing is on.
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 211.205 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 2789.607 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 206.712 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 2959.679 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 594.584 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 3651.206 ms
foo=# create unique index bi_medium on bi (medium);
CREATE INDEX
Time: 781.407 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 42.177 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 5671.883 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 139.418 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 5668.894 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 204.479 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 6530.010 ms
foo=# drop index bi_medium;
DROP INDEX
Time: 212.038 ms
foo=# create unique index bi_big on bi (big);
CREATE INDEX
Time: 650.492 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 39.818 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 8093.276 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 282.165 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 5988.694 ms
foo=# truncate table bi; insert into bi with x(n) as (select
TRUNCATE TABLE
Time: 245.859 ms
foo(# generate_series(1, 1000000)) select n + 5000000000, n from x;
INSERT 0 1000000
Time: 5702.236 ms
foo=# \timing off
Timing is off.


Now please perform mine:

CREATE TABLESPACE tblsp_ix LOCATION '/foo';
CREATE SCHEMA test;
CREATE TABLE test.t
(
    id_big bigint, --PRIMARY KEY USING INDEX TABLESPACE tblsp_ix,
    ts timestamp NOT NULL,
    ip inet,
    id_medium integer NOT NULL,
    id_small smallint NOT NULL,
    id_smalll smallint NOT NULL
);
CREATE INDEX ix_t ON test.t
    USING btree (ts, ip, id_medium, id_small) TABLESPACE tblsp_ix;

gen_data.csh
-------------cut here-----------------------------------------------------------
#!/bin/tcsh
set f = $1
set lines_cnt = $2
rm ${f}
set id_big = -2147483648
set time_t = 1000000000
set ts = `date -r ${time_t}`
set ip = "127.0.0.1"
set id_medium = -2147483648
set id_small = 0
set echo_style = both
while ( $lines_cnt > 0 )
    echo "${id_big}\t${ts}\t${ip}\t${id_medium}\t${id_small}\t${id_small}"
>> ${f}
    @ id_big = ${id_big} + 1
    @ time_t = ${time_t} + 1
    @ id_medium = ${id_medium} + 1
    @ lines_cnt = ${lines_cnt} - 1
end
exit 0
-------------cut here-----------------------------------------------------------

time ./gen_data.csh app.data 100000
9.564u 2.487s 0:12.05 99.9%     420+1113k 0+51io 0pf+0w

copy_data.csh
-------------cut here-----------------------------------------------------------
#!/bin/tcsh
set f = $1
set cnt = $2
while ( $cnt > 0 )
    time psql -d foo -c "COPY test.t(id_big, ts, ip, id_medium,
id_small, id_smalll) from '$f'"
    @ cnt = ${cnt} - 1
end
exit 0
-------------cut here-----------------------------------------------------------

time copy_data.csh /aaa/app.data 100
...
0.000u 0.027s 0:01.55 1.2%      474+1254k 0+0io 0pf+0w
COPY 100000
...
(~1-3 sec for every of 100 iterations with 3-4 spikes to 5 secs max)

CREATE INDEX ix_t_big ON test.t USING btree (id_big) TABLESPACE tblsp_ix;

time copy_data.csh /aaa/app.data 100
(the show begins from iteration # ~20):
COPY 100000
0.000u 0.005s 0:20.70 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 0:06.50 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.037s 0:03.44 0.8%      704+514k 0+0io 0pf+0w
COPY 100000
0.007u 0.029s 0:04.55 0.4%      808+1746k 0+0io 0pf+0w
COPY 100000
0.005u 0.000s 0:03.60 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.005u 0.000s 0:02.55 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.037s 0:03.03 0.9%      469+197k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 0:03.85 0.7%      526+1393k 0+0io 0pf+0w
COPY 100000
0.005u 0.000s 0:06.66 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.037s 0:02.73 1.0%      526+1393k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 0:11.85 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.022s 0:02.56 0.7%      492+1238k 0+0io 0pf+0w
COPY 100000
0.007u 0.022s 0:02.46 0.8%      650+1328k 0+0io 0pf+0w
COPY 100000
0.006u 0.031s 0:04.71 0.6%      692+525k 0+0io 0pf+0w
COPY 100000
0.000u 0.039s 0:29.10 0.1%      526+1393k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 0:36.29 0.0%      538+1164k 0+0io 0pf+0w
COPY 100000
0.000u 0.037s 0:43.77 0.0%      526+1393k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 1:01.94 0.0%      538+1164k 0+0io 0pf+0w
COPY 100000
0.007u 0.029s 0:13.99 0.1%      808+2074k 0+0io 0pf+0w
COPY 100000
0.003u 0.005s 0:46.02 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.031s 0:45.58 0.0%      316+836k 0+0io 0pf+0w
COPY 100000
0.000u 0.038s 1:00.39 0.0%      526+1393k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 0:24.38 0.1%      538+1164k 0+0io 0pf+0w
COPY 100000
0.000u 0.037s 0:41.32 0.0%      538+1382k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 0:46.13 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.005u 0.000s 0:43.15 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 0:45.59 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 1:54.92 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.037s 2:22.47 0.0%      538+1382k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 1:40.65 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.006u 0.020s 1:43.52 0.0%      650+1328k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 1:43.33 0.0%      538+1164k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 1:47.00 0.0%      526+1393k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 2:18.94 0.0%      538+1164k 0+0io 0pf+0w

from that moment all iterations went for more then 1 min and I interrupted test.

DROP INDEX test.ix_t_big;

time copy_data.csh /aaa/app.data 100
COPY 100000
0.000u 0.005s 0:02.42 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.007u 0.029s 0:01.88 1.0%      808+2074k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 0:01.83 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 0:01.75 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.005s 0:01.82 0.0%      0+0k 0+0io 0pf+0w
COPY 100000
0.000u 0.037s 0:01.81 1.6%      526+1393k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 0:01.84 1.6%      538+1164k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 0:01.86 1.6%      421+1114k 0+0io 0pf+0w
COPY 100000
0.000u 0.036s 0:01.77 1.6%      538+1164k 0+0io 0pf+0w
...
Everything returned back to good perfomance state.

With number of rows > 50 mln all numbers in test with index on bigint column
are multiplied on 20, while without index even on 200 mln rows speed
remains constant (1-2 sec per 100k rows file).

P.S. tried same with 2 columns (bigint and int) - it didn't produced such effect
probably because data volume has critical effect.


On Tue, Aug 2, 2011 at 8:41 PM, Kevin Grittner
<Kevin.Grittner@wicourts.gov> wrote:
> Robert Ayrapetyan <robert.ayrapetyan@comodo.com> wrote:
>
>> So I'm still convinced - this bug relates to FreeBSD 64-bit + UFS
>> + bigint column index
>> (some of these may be superfluous, but I have no resources to
>> check on different platforms with different filesystems).
>
> Linux 64 bit XFS bigint column index only shows a slightly longer
> run time for bigint versus int here.  What timings do you get for
> the insert statements if you run the following in your environment?
>
> create table bi (big bigint not null, medium int not null);
> insert into bi with x(n) as (select generate_series(1, 1000000)
> select n + 5000000000, n from x;
> \timing on
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> create unique index bi_medium on bi (medium);
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> drop index bi_medium;
> create unique index bi_big on bi (big);
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> truncate table bi; insert into bi with x(n) as (select
> generate_series(1, 1000000)) select n + 5000000000, n from x;
> \timing off
> drop table bi;
>
> Here's what I get:
>
> Time: 1629.141 ms
> Time: 1638.060 ms
> Time: 1711.833 ms
>
> Time: 4151.953 ms
> Time: 4602.679 ms
> Time: 5107.259 ms
>
> Time: 4654.060 ms
> Time: 5158.157 ms
> Time: 5101.110 ms
>
> -Kevin
>



--
Ayrapetyan Robert,
Comodo Anti-Malware Data Processing Analysis and Management System (CAMDPAMS)
http://repo-qa.camdpams.odessa.office.comodo.net/mediawiki/index.php

pgsql-performance by date:

Previous
From: Li Jin
Date:
Subject: Re: Performance penalty when using WITH
Next
From: Craig Ringer
Date:
Subject: Re: Parameters for PostgreSQL