Re: How batch processing works - Mailing list pgsql-general
From | Lok P |
---|---|
Subject | Re: How batch processing works |
Date | |
Msg-id | CAKna9VYNiTpaQkEnRRsN7P2PX2NU6vpYZF376NFvWv8LhzfX3g@mail.gmail.com Whole thread Raw |
In response to | Re: How batch processing works (Ron Johnson <ronljohnsonjr@gmail.com>) |
Responses |
Re: How batch processing works
|
List | pgsql-general |
On Thu, Sep 19, 2024 at 11:31 AM Ron Johnson <ronljohnsonjr@gmail.com> wrote:
[snip]
Method-4
INSERT INTO parent_table VALUES (1, 'a'), (2, 'a');
INSERT INTO child_table VALUES (1,1, 'a'), (1,2, 'a');
commit;If I knew that I had to load a structured input data file (even if it had parent and child records), this is how I'd do it (but probably first try and see if "in-memory COPY INTO" is such a thing).
CREATE TABLE debug_log (
method1 TEXT,
start_time TIMESTAMP,
end_time TIMESTAMP,
elapsed_time INTERVAL
);
CREATE TABLE parent_table (
id SERIAL PRIMARY KEY,
name TEXT
);
CREATE TABLE parent_table2 (
id SERIAL PRIMARY KEY,
name TEXT
);
CREATE TABLE parent_table3 (
id SERIAL PRIMARY KEY,
name TEXT
);
DO $$
DECLARE
num_inserts INTEGER := 100000;
batch_size INTEGER := 50;
start_time TIMESTAMP;
end_time TIMESTAMP;
elapsed_time INTERVAL;
i INTEGER;
BEGIN
-- Method 1: Individual Inserts with Commit after every Row
start_time := clock_timestamp();
FOR i IN 1..num_inserts LOOP
INSERT INTO parent_table VALUES (i, 'a');
COMMIT;
END LOOP;
end_time := clock_timestamp();
elapsed_time := end_time - start_time;
INSERT INTO debug_log (method1, start_time, end_time, elapsed_time)
VALUES ('Method 1: Individual Inserts with Commit after every Row', start_time, end_time, elapsed_time);
-- Method 2: Individual Inserts with Commit after 100 Rows
start_time := clock_timestamp();
FOR i IN 1..num_inserts LOOP
INSERT INTO parent_table2 VALUES (i, 'a');
-- Commit after every 100 rows
IF i % batch_size = 0 THEN
COMMIT;
END IF;
END LOOP;
-- Final commit if not already committed
commit;
end_time := clock_timestamp();
elapsed_time := end_time - start_time;
INSERT INTO debug_log (method1, start_time, end_time, elapsed_time)
VALUES ('Method 2: Individual Inserts with Commit after 100 Rows', start_time, end_time, elapsed_time);
-- Method 3: Batch Inserts with Commit after all
start_time := clock_timestamp();
FOR i IN 1..(num_inserts / batch_size) LOOP
INSERT INTO parent_table3 VALUES
(1 + (i - 1) * batch_size, 'a'),
(2 + (i - 1) * batch_size, 'a'),
(3 + (i - 1) * batch_size, 'a'),
(4 + (i - 1) * batch_size, 'a'),
(5 + (i - 1) * batch_size, 'a'),
(6 + (i - 1) * batch_size, 'a'),
(7 + (i - 1) * batch_size, 'a'),
(8 + (i - 1) * batch_size, 'a'),
(9 + (i - 1) * batch_size, 'a'),
(10 + (i - 1) * batch_size, 'a'),
(11 + (i - 1) * batch_size, 'a'),
(12 + (i - 1) * batch_size, 'a'),
(13 + (i - 1) * batch_size, 'a'),
(14 + (i - 1) * batch_size, 'a'),
(15 + (i - 1) * batch_size, 'a'),
(16 + (i - 1) * batch_size, 'a'),
(17 + (i - 1) * batch_size, 'a'),
(18 + (i - 1) * batch_size, 'a'),
(19 + (i - 1) * batch_size, 'a'),
(20 + (i - 1) * batch_size, 'a'),
(21 + (i - 1) * batch_size, 'a'),
(22 + (i - 1) * batch_size, 'a'),
(23 + (i - 1) * batch_size, 'a'),
(24 + (i - 1) * batch_size, 'a'),
(25 + (i - 1) * batch_size, 'a'),
(26 + (i - 1) * batch_size, 'a'),
(27 + (i - 1) * batch_size, 'a'),
(28 + (i - 1) * batch_size, 'a'),
(29 + (i - 1) * batch_size, 'a'),
(30 + (i - 1) * batch_size, 'a'),
(31 + (i - 1) * batch_size, 'a'),
(32 + (i - 1) * batch_size, 'a'),
(33 + (i - 1) * batch_size, 'a'),
(34 + (i - 1) * batch_size, 'a'),
(35 + (i - 1) * batch_size, 'a'),
(36 + (i - 1) * batch_size, 'a'),
(37 + (i - 1) * batch_size, 'a'),
(38 + (i - 1) * batch_size, 'a'),
(39 + (i - 1) * batch_size, 'a'),
(40 + (i - 1) * batch_size, 'a'),
(41 + (i - 1) * batch_size, 'a'),
(42 + (i - 1) * batch_size, 'a'),
(43 + (i - 1) * batch_size, 'a'),
(44 + (i - 1) * batch_size, 'a'),
(45 + (i - 1) * batch_size, 'a'),
(46 + (i - 1) * batch_size, 'a'),
(47 + (i - 1) * batch_size, 'a'),
(48 + (i - 1) * batch_size, 'a'),
(49 + (i - 1) * batch_size, 'a'),
(50 + (i - 1) * batch_size, 'a'));
COMMIT;
END LOOP;
COMMIT; -- Final commit for all
end_time := clock_timestamp();
elapsed_time := end_time - start_time;
INSERT INTO debug_log (method1, start_time, end_time, elapsed_time)
VALUES ('Method 3: Batch Inserts with Commit after All', start_time, end_time, elapsed_time);
END $$;
pgsql-general by date: