Re: How batch processing works

From: Lok P <loknath(dot)73(at)gmail(dot)com>
To: Ron Johnson <ronljohnsonjr(at)gmail(dot)com>
Cc: pgsql-general <pgsql-general(at)lists(dot)postgresql(dot)org>
Subject: Re: How batch processing works
Date: 2024-09-19 09:24:16
Message-ID: CAKna9VYNiTpaQkEnRRsN7P2PX2NU6vpYZF376NFvWv8LhzfX3g@mail.gmail.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-general

On Thu, Sep 19, 2024 at 11:31 AM Ron Johnson <ronljohnsonjr(at)gmail(dot)com>
wrote:

>
> [snip]
>
>>
>> Method-4
>>
>> INSERT INTO parent_table VALUES (1, 'a'), (2, 'a');
>> INSERT INTO child_table VALUES (1,1, 'a'), (1,2, 'a');
>> commit;
>>
>
> If I knew that I had to load a structured input data file (even if it had
> parent and child records), this is how I'd do it (but probably first try
> and see if "in-memory COPY INTO" is such a thing).
>
>

I was trying to reproduce this behaviour using row by row commit vs just
batch commit vs true batch insert as you mentioned, i am not able to see
any difference between "batch commit" and "true batch insert" response. Am
I missing anything?

CREATE TABLE debug_log (
method1 TEXT,
start_time TIMESTAMP,
end_time TIMESTAMP,
elapsed_time INTERVAL
);

CREATE TABLE parent_table (
id SERIAL PRIMARY KEY,
name TEXT
);

CREATE TABLE parent_table2 (
id SERIAL PRIMARY KEY,
name TEXT
);

CREATE TABLE parent_table3 (
id SERIAL PRIMARY KEY,
name TEXT
);
DO $$
DECLARE
num_inserts INTEGER := 100000;
batch_size INTEGER := 50;
start_time TIMESTAMP;
end_time TIMESTAMP;
elapsed_time INTERVAL;
i INTEGER;
BEGIN
-- Method 1: Individual Inserts with Commit after every Row
start_time := clock_timestamp();

FOR i IN 1..num_inserts LOOP
INSERT INTO parent_table VALUES (i, 'a');
COMMIT;
END LOOP;

end_time := clock_timestamp();
elapsed_time := end_time - start_time;
INSERT INTO debug_log (method1, start_time, end_time, elapsed_time)
VALUES ('Method 1: Individual Inserts with Commit after every Row',
start_time, end_time, elapsed_time);

-- Method 2: Individual Inserts with Commit after 100 Rows
start_time := clock_timestamp();

FOR i IN 1..num_inserts LOOP
INSERT INTO parent_table2 VALUES (i, 'a');
-- Commit after every 100 rows
IF i % batch_size = 0 THEN
COMMIT;
END IF;
END LOOP;

-- Final commit if not already committed
commit;

end_time := clock_timestamp();
elapsed_time := end_time - start_time;
INSERT INTO debug_log (method1, start_time, end_time, elapsed_time)
VALUES ('Method 2: Individual Inserts with Commit after 100 Rows',
start_time, end_time, elapsed_time);

-- Method 3: Batch Inserts with Commit after all
start_time := clock_timestamp();

FOR i IN 1..(num_inserts / batch_size) LOOP
INSERT INTO parent_table3 VALUES
(1 + (i - 1) * batch_size, 'a'),
(2 + (i - 1) * batch_size, 'a'),
(3 + (i - 1) * batch_size, 'a'),
(4 + (i - 1) * batch_size, 'a'),
(5 + (i - 1) * batch_size, 'a'),
(6 + (i - 1) * batch_size, 'a'),
(7 + (i - 1) * batch_size, 'a'),
(8 + (i - 1) * batch_size, 'a'),
(9 + (i - 1) * batch_size, 'a'),
(10 + (i - 1) * batch_size, 'a'),
(11 + (i - 1) * batch_size, 'a'),
(12 + (i - 1) * batch_size, 'a'),
(13 + (i - 1) * batch_size, 'a'),
(14 + (i - 1) * batch_size, 'a'),
(15 + (i - 1) * batch_size, 'a'),
(16 + (i - 1) * batch_size, 'a'),
(17 + (i - 1) * batch_size, 'a'),
(18 + (i - 1) * batch_size, 'a'),
(19 + (i - 1) * batch_size, 'a'),
(20 + (i - 1) * batch_size, 'a'),
(21 + (i - 1) * batch_size, 'a'),
(22 + (i - 1) * batch_size, 'a'),
(23 + (i - 1) * batch_size, 'a'),
(24 + (i - 1) * batch_size, 'a'),
(25 + (i - 1) * batch_size, 'a'),
(26 + (i - 1) * batch_size, 'a'),
(27 + (i - 1) * batch_size, 'a'),
(28 + (i - 1) * batch_size, 'a'),
(29 + (i - 1) * batch_size, 'a'),
(30 + (i - 1) * batch_size, 'a'),
(31 + (i - 1) * batch_size, 'a'),
(32 + (i - 1) * batch_size, 'a'),
(33 + (i - 1) * batch_size, 'a'),
(34 + (i - 1) * batch_size, 'a'),
(35 + (i - 1) * batch_size, 'a'),
(36 + (i - 1) * batch_size, 'a'),
(37 + (i - 1) * batch_size, 'a'),
(38 + (i - 1) * batch_size, 'a'),
(39 + (i - 1) * batch_size, 'a'),
(40 + (i - 1) * batch_size, 'a'),
(41 + (i - 1) * batch_size, 'a'),
(42 + (i - 1) * batch_size, 'a'),
(43 + (i - 1) * batch_size, 'a'),
(44 + (i - 1) * batch_size, 'a'),
(45 + (i - 1) * batch_size, 'a'),
(46 + (i - 1) * batch_size, 'a'),
(47 + (i - 1) * batch_size, 'a'),
(48 + (i - 1) * batch_size, 'a'),
(49 + (i - 1) * batch_size, 'a'),
(50 + (i - 1) * batch_size, 'a'));
COMMIT;
END LOOP;

COMMIT; -- Final commit for all
end_time := clock_timestamp();
elapsed_time := end_time - start_time;
INSERT INTO debug_log (method1, start_time, end_time, elapsed_time)
VALUES ('Method 3: Batch Inserts with Commit after All', start_time,
end_time, elapsed_time);

END $$;

In response to

Responses

Browse pgsql-general by date

  From Date Subject
Next Message Ron Johnson 2024-09-19 12:07:54 Re: How batch processing works
Previous Message veem v 2024-09-19 09:17:21 Re: IO related waits