From 4106f2563681012b37681a6701ac55a43979e55b Mon Sep 17 00:00:00 2001 From: Greg Nancarrow Date: Fri, 12 Feb 2021 12:58:46 +1100 Subject: [PATCH v17 4/4] Parallel INSERT and/or SELECT for "INSERT INTO ... SELECT ..." - tests and documentation updates. --- doc/src/sgml/parallel.sgml | 83 +++++++++-- src/test/regress/expected/insert_parallel.out | 136 +++++++++--------- src/test/regress/sql/insert_parallel.sql | 24 ++-- 3 files changed, 155 insertions(+), 88 deletions(-) diff --git a/doc/src/sgml/parallel.sgml b/doc/src/sgml/parallel.sgml index cec1329e25..039ac18f81 100644 --- a/doc/src/sgml/parallel.sgml +++ b/doc/src/sgml/parallel.sgml @@ -141,14 +141,16 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%'; - The query writes any data or locks any database rows. If a query - contains a data-modifying operation either at the top level or within - a CTE, no parallel plans for that query will be generated. As an - exception, the commands CREATE TABLE ... AS, SELECT - INTO, and CREATE MATERIALIZED VIEW which create a new - table and populate it can use a parallel plan. Another exception is the command - INSERT INTO ... SELECT ... which can use a parallel plan for - the underlying SELECT part of the query. + The query locks any database rows, or writes data within a CTE or using + a parallel-unsupported data-modifying operation. Currently, the only + parallel-supported data-modifying operations are + INSERT INTO ... SELECT ..., and the table creation + and population commands CREATE TABLE ... AS, + SELECT INTO, and + CREATE MATERIALIZED VIEW. If a query contains a + parallel-unsupported data-modifying operation at the top level, or any + data-modifying operation within a CTE, no parallel plans for that query + will be generated. @@ -426,6 +428,71 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%'; + + Parallel Insert + + + When an INSERT statement uses an underlying + SELECT query to supply the rows to be inserted, a + parallel query plan may be generated for which the work of both data + retrieval and data insertion is divided amongst the workers. In this + case, each worker handles insertion of its portion of the rows retrieved + by the underlying SELECT query. + + + + Parallel INSERT is not supported in all situations. The + operations invoked by the INSERT statement must be + parallel-safe, including those that are invoked as a result of certain + features of the target table. + + + + For example, any of the following will prevent the use of parallel + INSERT in the query plan: + + + + + + A parallel query plan can't be generated for the underlying + SELECT, because, for example, the + SELECT statement uses a parallel-unsafe function. + + + + + The INSERT statement uses an ON CONFLICT DO UPDATE clause. + + + + + The target table is a foreign or temporary table. + + + + + The target table has a foreign key, or has a parallel-unsafe trigger, + index expression, column default expression or check constraint. + + + + + The target table is a partitioned table with a parallel-unsafe partition + key expression or support function. + + + + + + Where the target table is a foreign or temporary table, or the above target + table features are determined to be, at worst, parallel-restricted, rather + than parallel-unsafe, at least a parallel table scan may be used in the + query plan for the INSERT statement. For more information + about Parallel Safety, see . + + + Parallel Plan Tips diff --git a/src/test/regress/expected/insert_parallel.out b/src/test/regress/expected/insert_parallel.out index 638eafd2d7..a5a309aff1 100644 --- a/src/test/regress/expected/insert_parallel.out +++ b/src/test/regress/expected/insert_parallel.out @@ -77,14 +77,14 @@ create table para_insert_f1 ( ); -- -- Test INSERT with underlying query. --- (should create plan with parallel SELECT, Gather parent node) +-- (should create plan with parallel INSERT+SELECT, Gather parent node) -- explain (costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1; QUERY PLAN ---------------------------------------- - Insert on para_insert_p1 - -> Gather - Workers Planned: 4 + Gather + Workers Planned: 4 + -> Insert on para_insert_p1 -> Parallel Seq Scan on tenk1 (4 rows) @@ -120,7 +120,7 @@ select count(*) from (select distinct cmin,xmin from para_insert_p1) as dt; -- -- Test INSERT with ordered underlying query. --- (should create plan with parallel SELECT, GatherMerge parent node) +-- (should create plan with INSERT + parallel SELECT, GatherMerge parent node) -- truncate para_insert_p1 cascade; NOTICE: truncate cascades to table "para_insert_f1" @@ -167,15 +167,15 @@ select count(*) from (select distinct cmin,xmin from para_insert_p1) as dt; -- -- Test INSERT with RETURNING clause. --- (should create plan with parallel SELECT, Gather parent node) +-- (should create plan with parallel INSERT+SELECT, Gather parent node) -- create table test_data1(like test_data); explain (costs off) insert into test_data1 select * from test_data where a = 10 returning a as data; QUERY PLAN -------------------------------------------- - Insert on test_data1 - -> Gather - Workers Planned: 3 + Gather + Workers Planned: 3 + -> Insert on test_data1 -> Parallel Seq Scan on test_data Filter: (a = 10) (5 rows) @@ -260,9 +260,9 @@ NOTICE: truncate cascades to table "para_insert_f1" explain (costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1 where unique1 <= 2500; QUERY PLAN ----------------------------------------- - Insert on para_insert_p1 - -> Gather - Workers Planned: 4 + Gather + Workers Planned: 4 + -> Insert on para_insert_p1 -> Parallel Seq Scan on tenk1 Filter: (unique1 <= 2500) (5 rows) @@ -299,9 +299,9 @@ NOTICE: truncate cascades to table "para_insert_f1" explain (costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1 where unique1 <= 2500; QUERY PLAN ----------------------------------------- - Insert on para_insert_p1 - -> Gather - Workers Planned: 4 + Gather + Workers Planned: 4 + -> Insert on para_insert_p1 -> Parallel Seq Scan on tenk1 Filter: (unique1 <= 2500) (5 rows) @@ -341,9 +341,9 @@ create table test_conflict_table(id serial primary key, somedata int); explain (costs off) insert into test_conflict_table(id, somedata) select a, a from test_data; QUERY PLAN -------------------------------------------- - Insert on test_conflict_table - -> Gather - Workers Planned: 3 + Gather + Workers Planned: 3 + -> Insert on test_conflict_table -> Parallel Seq Scan on test_data (4 rows) @@ -391,9 +391,9 @@ NOTICE: truncate cascades to table "para_insert_f1" explain (costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1 where unique1 >= 7500; QUERY PLAN ------------------------------------------------------ - Insert on para_insert_p1 - -> Gather - Workers Planned: 4 + Gather + Workers Planned: 4 + -> Insert on para_insert_p1 -> Parallel Bitmap Heap Scan on tenk1 Recheck Cond: (unique1 >= 7500) -> Bitmap Index Scan on tenk1_unique1 @@ -426,9 +426,9 @@ create table a_star_data(aa int); explain (costs off) insert into a_star_data select aa from a_star where aa > 10; QUERY PLAN -------------------------------------------------------- - Insert on a_star_data - -> Gather - Workers Planned: 3 + Gather + Workers Planned: 3 + -> Insert on a_star_data -> Parallel Append -> Parallel Seq Scan on d_star a_star_4 Filter: (aa > 10) @@ -462,9 +462,9 @@ NOTICE: truncate cascades to table "para_insert_f1" explain (costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1 where unique1 >= 500; QUERY PLAN -------------------------------------------------------------- - Insert on para_insert_p1 - -> Gather - Workers Planned: 4 + Gather + Workers Planned: 4 + -> Insert on para_insert_p1 -> Parallel Index Scan using tenk1_unique1 on tenk1 Index Cond: (unique1 >= 500) (5 rows) @@ -500,9 +500,9 @@ NOTICE: truncate cascades to table "para_insert_f1" explain (costs off) insert into para_insert_p1 select unique1 from tenk1 where unique1 >= 500; QUERY PLAN ------------------------------------------------------------------- - Insert on para_insert_p1 - -> Gather - Workers Planned: 4 + Gather + Workers Planned: 4 + -> Insert on para_insert_p1 -> Parallel Index Only Scan using tenk1_unique1 on tenk1 Index Cond: (unique1 >= 500) (5 rows) @@ -540,9 +540,9 @@ reset enable_bitmapscan; explain (costs off) insert into names3 select * from names; QUERY PLAN ---------------------------------------- - Insert on names3 - -> Gather - Workers Planned: 3 + Gather + Workers Planned: 3 + -> Insert on names3 -> Parallel Seq Scan on names (4 rows) @@ -614,21 +614,21 @@ select * from names4 order by fullname_parallel_restricted(first_name, last_name -- -- Test INSERT with underlying query - and RETURNING (no projection) --- (should create a parallel plan; parallel SELECT) +-- (should create a parallel plan; parallel INSERT+SELECT) -- create table names5 (like names); explain (costs off) insert into names5 select * from names returning *; QUERY PLAN ---------------------------------------- - Insert on names5 - -> Gather - Workers Planned: 3 + Gather + Workers Planned: 3 + -> Insert on names5 -> Parallel Seq Scan on names (4 rows) -- -- Test INSERT with underlying ordered query - and RETURNING (no projection) --- (should create a parallel plan; parallel SELECT) +-- (should create a parallel plan; INSERT + parallel SELECT) -- create table names6 (like names); explain (costs off) insert into names6 select * from names order by last_name returning *; @@ -657,7 +657,7 @@ insert into names6 select * from names order by last_name returning *; -- -- Test INSERT with underlying ordered query - and RETURNING (with projection) --- (should create a parallel plan; parallel SELECT) +-- (should create a parallel plan; INSERT + parallel SELECT) -- create table names7 (like names); explain (costs off) insert into names7 select * from names order by last_name returning last_name || ', ' || first_name as last_name_then_first_name; @@ -710,14 +710,14 @@ insert into temp_names select * from names; -- d: safe default -- -- --- No column defaults, should use parallel SELECT +-- No column defaults, should use parallel INSERT+SELECT -- explain (costs off) insert into testdef(a,b,c,d) select a,a*2,a*4,a*8 from test_data; QUERY PLAN -------------------------------------------- - Insert on testdef - -> Gather - Workers Planned: 3 + Gather + Workers Planned: 3 + -> Insert on testdef -> Parallel Seq Scan on test_data (4 rows) @@ -766,7 +766,7 @@ select * from testdef order by a; truncate testdef; -- --- Parallel restricted column default, should use parallel SELECT +-- Parallel restricted column default, should use INSERT + parallel SELECT -- explain (costs off) insert into testdef(a,b,d) select a,a*2,a*8 from test_data; QUERY PLAN @@ -795,14 +795,14 @@ select * from testdef order by a; truncate testdef; -- --- Parallel safe column default, should use parallel SELECT +-- Parallel safe column default, should use parallel INSERT+SELECT -- explain (costs off) insert into testdef(a,b,c) select a,a*2,a*4 from test_data; QUERY PLAN -------------------------------------------- - Insert on testdef - -> Gather - Workers Planned: 3 + Gather + Workers Planned: 3 + -> Insert on testdef -> Parallel Seq Scan on test_data (4 rows) @@ -859,9 +859,9 @@ create table parttable1_2 partition of parttable1 for values from (5000) to (100 explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1; QUERY PLAN ---------------------------------------- - Insert on parttable1 - -> Gather - Workers Planned: 4 + Gather + Workers Planned: 4 + -> Insert on parttable1 -> Parallel Seq Scan on tenk1 (4 rows) @@ -923,9 +923,9 @@ create table table_check_a(a int4 check (check_a(a)), b name); explain (costs off) insert into table_check_a select unique1, stringu1 from tenk1; QUERY PLAN ---------------------------------------- - Insert on table_check_a - -> Gather - Workers Planned: 4 + Gather + Workers Planned: 4 + -> Insert on table_check_a -> Parallel Seq Scan on tenk1 (4 rows) @@ -962,7 +962,7 @@ select count(*), sum(a) from table_check_b; -- -- Test INSERT into table with before+after parallel-safe stmt-level triggers --- (should create a parallel SELECT plan; +-- (should create a parallel INSERT+SELECT plan; -- stmt-level before+after triggers should fire) -- create table names_with_safe_trigger (like names); @@ -983,11 +983,11 @@ create trigger insert_before_trigger_safe before insert on names_with_safe_trigg create trigger insert_after_trigger_safe after insert on names_with_safe_trigger for each statement execute procedure insert_after_trigger_safe(); explain (costs off) insert into names_with_safe_trigger select * from names; - QUERY PLAN ----------------------------------------- - Insert on names_with_safe_trigger - -> Gather - Workers Planned: 3 + QUERY PLAN +----------------------------------------- + Gather + Workers Planned: 3 + -> Insert on names_with_safe_trigger -> Parallel Seq Scan on names (4 rows) @@ -1028,7 +1028,7 @@ NOTICE: hello from insert_before_trigger_unsafe NOTICE: hello from insert_after_trigger_unsafe -- -- Test INSERT into table with before+after parallel-restricted stmt-level trigger --- (should create a parallel plan with parallel SELECT; +-- (should create a parallel plan with INSERT + parallel SELECT; -- stmt-level before+after triggers should fire) -- create table names_with_restricted_trigger (like names); @@ -1085,13 +1085,13 @@ create table rp1 partition of rp for values from (minvalue) to (0); create table rp2 partition of rp for values from (0) to (maxvalue); create table foo (a) as select unique1 from tenk1; prepare q as insert into rp select * from foo where a%2 = 0; --- should create a parallel plan +-- should create a plan with parallel INSERT+SELECT explain (costs off) execute q; QUERY PLAN -------------------------------------- - Insert on rp - -> Gather - Workers Planned: 4 + Gather + Workers Planned: 4 + -> Insert on rp -> Parallel Seq Scan on foo Filter: ((a % 2) = 0) (5 rows) @@ -1118,9 +1118,9 @@ insert into insert_toast_table_data select i, rpad('T', 16384, 'ABCDEFGH') from explain (costs off) insert into insert_toast_table select index, data from insert_toast_table_data; QUERY PLAN ---------------------------------------------------------- - Insert on insert_toast_table - -> Gather - Workers Planned: 3 + Gather + Workers Planned: 3 + -> Insert on insert_toast_table -> Parallel Seq Scan on insert_toast_table_data (4 rows) diff --git a/src/test/regress/sql/insert_parallel.sql b/src/test/regress/sql/insert_parallel.sql index cb02ba89e4..9183032c07 100644 --- a/src/test/regress/sql/insert_parallel.sql +++ b/src/test/regress/sql/insert_parallel.sql @@ -97,7 +97,7 @@ create table para_insert_f1 ( -- -- Test INSERT with underlying query. --- (should create plan with parallel SELECT, Gather parent node) +-- (should create plan with parallel INSERT+SELECT, Gather parent node) -- explain (costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1; insert into para_insert_p1 select unique1, stringu1 from tenk1; @@ -109,7 +109,7 @@ select count(*) from (select distinct cmin,xmin from para_insert_p1) as dt; -- -- Test INSERT with ordered underlying query. --- (should create plan with parallel SELECT, GatherMerge parent node) +-- (should create plan with INSERT + parallel SELECT, GatherMerge parent node) -- truncate para_insert_p1 cascade; explain (costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1 order by unique1; @@ -122,7 +122,7 @@ select count(*) from (select distinct cmin,xmin from para_insert_p1) as dt; -- -- Test INSERT with RETURNING clause. --- (should create plan with parallel SELECT, Gather parent node) +-- (should create plan with parallel INSERT+SELECT, Gather parent node) -- create table test_data1(like test_data); explain (costs off) insert into test_data1 select * from test_data where a = 10 returning a as data; @@ -266,14 +266,14 @@ select * from names4 order by fullname_parallel_restricted(first_name, last_name -- -- Test INSERT with underlying query - and RETURNING (no projection) --- (should create a parallel plan; parallel SELECT) +-- (should create a parallel plan; parallel INSERT+SELECT) -- create table names5 (like names); explain (costs off) insert into names5 select * from names returning *; -- -- Test INSERT with underlying ordered query - and RETURNING (no projection) --- (should create a parallel plan; parallel SELECT) +-- (should create a parallel plan; INSERT + parallel SELECT) -- create table names6 (like names); explain (costs off) insert into names6 select * from names order by last_name returning *; @@ -281,7 +281,7 @@ insert into names6 select * from names order by last_name returning *; -- -- Test INSERT with underlying ordered query - and RETURNING (with projection) --- (should create a parallel plan; parallel SELECT) +-- (should create a parallel plan; INSERT + parallel SELECT) -- create table names7 (like names); explain (costs off) insert into names7 select * from names order by last_name returning last_name || ', ' || first_name as last_name_then_first_name; @@ -307,7 +307,7 @@ insert into temp_names select * from names; -- -- --- No column defaults, should use parallel SELECT +-- No column defaults, should use parallel INSERT+SELECT -- explain (costs off) insert into testdef(a,b,c,d) select a,a*2,a*4,a*8 from test_data; insert into testdef(a,b,c,d) select a,a*2,a*4,a*8 from test_data; @@ -323,7 +323,7 @@ select * from testdef order by a; truncate testdef; -- --- Parallel restricted column default, should use parallel SELECT +-- Parallel restricted column default, should use INSERT + parallel SELECT -- explain (costs off) insert into testdef(a,b,d) select a,a*2,a*8 from test_data; insert into testdef(a,b,d) select a,a*2,a*8 from test_data; @@ -331,7 +331,7 @@ select * from testdef order by a; truncate testdef; -- --- Parallel safe column default, should use parallel SELECT +-- Parallel safe column default, should use parallel INSERT+SELECT -- explain (costs off) insert into testdef(a,b,c) select a,a*2,a*4 from test_data; insert into testdef(a,b,c) select a,a*2,a*4 from test_data; @@ -415,7 +415,7 @@ select count(*), sum(a) from table_check_b; -- -- Test INSERT into table with before+after parallel-safe stmt-level triggers --- (should create a parallel SELECT plan; +-- (should create a parallel INSERT+SELECT plan; -- stmt-level before+after triggers should fire) -- create table names_with_safe_trigger (like names); @@ -465,7 +465,7 @@ insert into names_with_unsafe_trigger select * from names; -- -- Test INSERT into table with before+after parallel-restricted stmt-level trigger --- (should create a parallel plan with parallel SELECT; +-- (should create a parallel plan with INSERT + parallel SELECT; -- stmt-level before+after triggers should fire) -- create table names_with_restricted_trigger (like names); @@ -511,7 +511,7 @@ create table rp1 partition of rp for values from (minvalue) to (0); create table rp2 partition of rp for values from (0) to (maxvalue); create table foo (a) as select unique1 from tenk1; prepare q as insert into rp select * from foo where a%2 = 0; --- should create a parallel plan +-- should create a plan with parallel INSERT+SELECT explain (costs off) execute q; create or replace function make_table_bar () returns trigger language -- 2.27.0