Very slow inserts when using postgres_fdw + declarative partitioning - Mailing list pgsql-bugs

From Hardik Bansal
Subject Very slow inserts when using postgres_fdw + declarative partitioning
Date
Msg-id CACnz+Q1q0+2KoJam9LyNMk8JmdC6qYHXWB895Wu2xcpoip18xQ@mail.gmail.com
Whole thread Raw
Responses Re: Very slow inserts when using postgres_fdw + declarative partitioning  (Etsuro Fujita <etsuro.fujita@gmail.com>)
List pgsql-bugs
We have been trying to partition postgresql database on google cloud using the inbuilt Postgresql declarative partitioning and `postgres_fdw` as explained [here][1]. Each machine that we have consists of 4 CPU cores and 8GB RAM with 10GB of disk.

We are running commands to create partition as follow:

Shard 1:

CREATE TABLE message_1 (
    id SERIAL,                                                                                        
    m_type character varying(20),
    content character varying(256) NOT NULL,
    is_received boolean NOT NULL,                                                              
    is_seen boolean NOT NULL,
    is_active boolean NOT NULL,
    created_at timestamp with time zone NOT NULL,
    room_no_id integer NOT NULL,
    sender_id integer NOT NULL
);

CREATE TABLE message_2 (
    id SERIAL,                                                                                        
    m_type character varying(20),
    content character varying(256) NOT NULL,
    is_received boolean NOT NULL,                                                              
    is_seen boolean NOT NULL,
    is_active boolean NOT NULL,
    created_at timestamp with time zone NOT NULL,
    room_no_id integer NOT NULL,
    sender_id integer NOT NULL
);

Shard 2:  

CREATE TABLE message_3 (
    id SERIAL,                                                                                        
    m_type character varying(20),
    content character varying(256) NOT NULL,
    is_received boolean NOT NULL,                                                              
    is_seen boolean NOT NULL,
    is_active boolean NOT NULL,
    created_at timestamp with time zone NOT NULL,
    room_no_id integer NOT NULL,
    sender_id integer NOT NULL
);

CREATE TABLE message_4 (
    id SERIAL,                                                                                        
    m_type character varying(20),
    content character varying(256) NOT NULL,
    is_received boolean NOT NULL,                                                              
    is_seen boolean NOT NULL,
    is_active boolean NOT NULL,
    created_at timestamp with time zone NOT NULL,
    room_no_id integer NOT NULL,
    sender_id integer NOT NULL
);  

Source machine: 

CREATE SERVER shard_1 FOREIGN DATA WRAPPER postgres_fdw OPTIONS (host 'shard_1_ip', dbname 'shard_1_db', port '5432');
CREATE SERVER shard_2 FOREIGN DATA WRAPPER postgres_fdw OPTIONS (host 'shard_2_ip', dbname 'shard_2_db', port '5432');

CREATE USER MAPPING for source_user SERVER shard_1 OPTIONS (user 'shard_1_user', password 'shard_1_user_password');
CREATE USER MAPPING for source_user SERVER shard_2 OPTIONS (user 'shard_2_user', password 'shard_2_user_password');

CREATE TABLE room (
    id SERIAL PRIMARY KEY,
    name character varying(20) NOT NULL,
    created_at timestamp with time zone NOT NULL,
    updated_at timestamp with time zone NOT NULL,
    is_active boolean NOT NULL
);

insert into room (
    name, created_at, updated_at, is_active
)
select
    concat('Room_', floor(random() * 400000 + 1)::int, '_', floor(random() * 400000 + 1)::int),
    i,
    i,
    TRUE
from generate_series('2019-01-01 00:00:00'::timestamp, '2019-4-30 01:00:00', '5 seconds') as s(i);

CREATE TABLE message (
    id SERIAL,                                                                                        
    m_type character varying(20),
    content character varying(256) NOT NULL,
    is_received boolean NOT NULL,                                                              
    is_seen boolean NOT NULL,
    is_active boolean NOT NULL,
    created_at timestamp with time zone NOT NULL,
    room_no_id integer NOT NULL,
    sender_id integer NOT NULL
) PARTITION BY HASH (room_no_id);

CREATE FOREIGN TABLE message_1
    PARTITION OF message
    FOR VALUES WITH (MODULUS 4, REMAINDER 1)
    SERVER shard_1;

CREATE FOREIGN TABLE message_2
    PARTITION OF message
    FOR VALUES WITH (MODULUS 4, REMAINDER 2)
    SERVER shard_1;

CREATE FOREIGN TABLE message_3
    PARTITION OF message
    FOR VALUES WITH (MODULUS 4, REMAINDER 3)
    SERVER shard_2;

CREATE FOREIGN TABLE message_4
    PARTITION OF message
    FOR VALUES WITH (MODULUS 4, REMAINDER 0)
    SERVER shard_2;


Problem:

The problem we are facing is that when we are trying to insert data using following query:

insert into message (
    m_type, content, is_received, is_seen, is_active, created_at, room_no_id, sender_id
)                                
select                                      
    'TEXT',                                                                                    
    CASE WHEN s.i % 2 = 0 THEN 'text 1'
        ELSE 'text 2'
    end,                                        
    TRUE,                      
    TRUE,                      
    TRUE,                        
    dr.created_at + s.i * (interval '1 hour'),
    dr.id,
    CASE WHEN s.i % 2 = 0 THEN split_part(dr.name, '_', 2)::int                                  
        ELSE split_part(dr.name, '_', 3)::int
    end,
from room as dr, generate_series(0, 10) as s(i);


It is taking nearly 1 hour 50 minutes to insert around 20 million entries. When we are not sharding the table, it takes around 8 minutes to perform the same. So, that is basically 14 times slower than without sharding. Are we missing anything here or inserts are that slow in sharding using this method?

Citus seems to be performing better in insert as described in this [video][2], so it seems a little odd to me that sharding will actually degrade the performance by this much. I can understand it migh not have as good performance as citus but why so much low performance.

Thanks in advance!!!


  [1]: https://pgdash.io/blog/postgres-11-sharding.html
  [2]: https://youtu.be/g3H4nGsJsl0

pgsql-bugs by date:

Previous
From: Tom Lane
Date:
Subject: Re: PostgreSQL always uses own versions of *printf(), but does not export them
Next
From: Wilm Hoyer
Date:
Subject: AW: Stranger Character Input Bug