Re: How batch processing works - Mailing list pgsql-general

From Lok P
Subject Re: How batch processing works
Date
Msg-id CAKna9VYNiTpaQkEnRRsN7P2PX2NU6vpYZF376NFvWv8LhzfX3g@mail.gmail.com
Whole thread Raw
In response to Re: How batch processing works  (Ron Johnson <ronljohnsonjr@gmail.com>)
Responses Re: How batch processing works
List pgsql-general


On Thu, Sep 19, 2024 at 11:31 AM Ron Johnson <ronljohnsonjr@gmail.com> wrote:

[snip] 

Method-4

INSERT INTO parent_table VALUES  (1, 'a'), (2, 'a');
INSERT INTO child_table VALUES   (1,1, 'a'), (1,2, 'a');
commit;

If I knew that I had to load a structured input data file (even if it had parent and child records), this is how I'd do it (but probably first try and see if "in-memory COPY INTO" is such a thing).



I was trying to reproduce this behaviour using row by row commit vs just batch commit vs true batch insert as you mentioned, i am not able to see any difference between "batch commit" and "true batch insert" response. Am I missing anything?

CREATE TABLE debug_log (
    method1 TEXT,
    start_time TIMESTAMP,
    end_time TIMESTAMP,
    elapsed_time INTERVAL
);

CREATE TABLE parent_table (
    id SERIAL PRIMARY KEY,
    name TEXT
);


CREATE TABLE parent_table2 (
    id SERIAL PRIMARY KEY,
    name TEXT
);


CREATE TABLE parent_table3 (
    id SERIAL PRIMARY KEY,
    name TEXT
);
DO $$
DECLARE
    num_inserts INTEGER := 100000;
    batch_size INTEGER := 50;
    start_time TIMESTAMP;
    end_time TIMESTAMP;
    elapsed_time INTERVAL;
    i INTEGER;
BEGIN
    -- Method 1: Individual Inserts with Commit after every Row
    start_time := clock_timestamp();

    FOR i IN 1..num_inserts LOOP
        INSERT INTO parent_table VALUES (i, 'a');
        COMMIT;  
    END LOOP;

    end_time := clock_timestamp();
    elapsed_time := end_time - start_time;
    INSERT INTO debug_log (method1, start_time, end_time, elapsed_time)
    VALUES ('Method 1: Individual Inserts with Commit after every Row', start_time, end_time, elapsed_time);

    -- Method 2: Individual Inserts with Commit after 100 Rows
    start_time := clock_timestamp();

    FOR i IN 1..num_inserts LOOP
        INSERT INTO parent_table2 VALUES (i, 'a');
        -- Commit after every 100 rows
        IF i % batch_size = 0 THEN
            COMMIT;  
        END IF;
    END LOOP;

    -- Final commit if not already committed
   commit;

    end_time := clock_timestamp();
    elapsed_time := end_time - start_time;
    INSERT INTO debug_log (method1, start_time, end_time, elapsed_time)
    VALUES ('Method 2: Individual Inserts with Commit after 100 Rows', start_time, end_time, elapsed_time);

    -- Method 3: Batch Inserts with Commit after all
    start_time := clock_timestamp();

    FOR i IN 1..(num_inserts / batch_size) LOOP
        INSERT INTO parent_table3 VALUES
            (1 + (i - 1) * batch_size, 'a'),
            (2 + (i - 1) * batch_size, 'a'),
            (3 + (i - 1) * batch_size, 'a'),
            (4 + (i - 1) * batch_size, 'a'),
            (5 + (i - 1) * batch_size, 'a'),
            (6 + (i - 1) * batch_size, 'a'),
            (7 + (i - 1) * batch_size, 'a'),
            (8 + (i - 1) * batch_size, 'a'),
            (9 + (i - 1) * batch_size, 'a'),
            (10 + (i - 1) * batch_size, 'a'),
            (11 + (i - 1) * batch_size, 'a'),
            (12 + (i - 1) * batch_size, 'a'),
            (13 + (i - 1) * batch_size, 'a'),
            (14 + (i - 1) * batch_size, 'a'),
            (15 + (i - 1) * batch_size, 'a'),
            (16 + (i - 1) * batch_size, 'a'),
            (17 + (i - 1) * batch_size, 'a'),
            (18 + (i - 1) * batch_size, 'a'),
            (19 + (i - 1) * batch_size, 'a'),
            (20 + (i - 1) * batch_size, 'a'),
            (21 + (i - 1) * batch_size, 'a'),
            (22 + (i - 1) * batch_size, 'a'),
            (23 + (i - 1) * batch_size, 'a'),
            (24 + (i - 1) * batch_size, 'a'),
            (25 + (i - 1) * batch_size, 'a'),
            (26 + (i - 1) * batch_size, 'a'),
            (27 + (i - 1) * batch_size, 'a'),
            (28 + (i - 1) * batch_size, 'a'),
            (29 + (i - 1) * batch_size, 'a'),
            (30 + (i - 1) * batch_size, 'a'),
            (31 + (i - 1) * batch_size, 'a'),
            (32 + (i - 1) * batch_size, 'a'),
            (33 + (i - 1) * batch_size, 'a'),
            (34 + (i - 1) * batch_size, 'a'),
            (35 + (i - 1) * batch_size, 'a'),
            (36 + (i - 1) * batch_size, 'a'),
            (37 + (i - 1) * batch_size, 'a'),
            (38 + (i - 1) * batch_size, 'a'),
            (39 + (i - 1) * batch_size, 'a'),
            (40 + (i - 1) * batch_size, 'a'),
            (41 + (i - 1) * batch_size, 'a'),
            (42 + (i - 1) * batch_size, 'a'),
            (43 + (i - 1) * batch_size, 'a'),
            (44 + (i - 1) * batch_size, 'a'),
            (45 + (i - 1) * batch_size, 'a'),
            (46 + (i - 1) * batch_size, 'a'),
            (47 + (i - 1) * batch_size, 'a'),
            (48 + (i - 1) * batch_size, 'a'),
            (49 + (i - 1) * batch_size, 'a'),
            (50 + (i - 1) * batch_size, 'a'));
COMMIT;
    END LOOP;

    COMMIT;  -- Final commit for all
    end_time := clock_timestamp();
    elapsed_time := end_time - start_time;
    INSERT INTO debug_log (method1, start_time, end_time, elapsed_time)
    VALUES ('Method 3: Batch Inserts with Commit after All', start_time, end_time, elapsed_time);

END $$;


 

pgsql-general by date:

Previous
From: veem v
Date:
Subject: Re: IO related waits
Next
From: Ron Johnson
Date:
Subject: Re: How batch processing works