From 33ba9f8f52300564a4410ebe753ef10284fc34f0 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Sat, 30 Oct 2021 10:46:42 -0700 Subject: [PATCH v2 1/2] Fix parallel VACUUM confusion with smaller indexes. Commit b4af70cb, which simplified state managed by VACUUM, performed minor refactoring of parallel VACUUM in passing. This had an unintended consequence: it made parallel VACUUM neglect to VACUUM indexes that happened to have been deemed too small to be worth vacuuming. This typically happened with partial indexes on tables with several other indexes. There had to be at least three indexes on the table to hit the bug: the smaller index itself, plus two additional indexes that both exceed the min_parallel_index_scan_size cutoff. Cases with just one additional index would not run into trouble, since the parallel VACUUM cost model requires two larger-than-cutoff indexes on the table for parallelism to be used at all. Test case based on tests from a larger patch to test parallel VACUUM by Masahiko Sawada. Many thanks to Kamigishi Rei for her invaluable help with tracking this problem down. Author: Peter Geoghegan Author: Masahiko Sawada Reported-By: Kamigishi Rei Reported-By: Andrew Gierth Diagnosed-By: Andres Freund Bug: #17245 Discussion: https://postgr.es/m/17245-ddf06aaf85735f36@postgresql.org Discussion: https://postgr.es/m/20211030023740.qbnsl2xaoh2grq3d@alap3.anarazel.de Backpatch: 14-, where VACUUM parallel VACUUM was refactored. --- src/include/commands/vacuum.h | 2 +- src/backend/access/heap/vacuumlazy.c | 65 ++++++++++++------- src/test/regress/expected/vacuum_parallel.out | 51 +++++++++++++++ src/test/regress/parallel_schedule | 1 + src/test/regress/sql/vacuum_parallel.sql | 48 ++++++++++++++ 5 files changed, 141 insertions(+), 26 deletions(-) create mode 100644 src/test/regress/expected/vacuum_parallel.out create mode 100644 src/test/regress/sql/vacuum_parallel.sql diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index bf3126aa9..4cfd52eaf 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -40,7 +40,7 @@ /* * bulkdelete can be performed in parallel. This option can be used by - * IndexAm's that need to scan the index to delete the tuples. + * index AMs that need to scan indexes to delete tuples. */ #define VACUUM_OPTION_PARALLEL_BULKDEL (1 << 0) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 05221cc1d..e0fb895e5 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -452,7 +452,7 @@ static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, TransactionId *visibility_cutoff_xid, bool *all_frozen); static int compute_parallel_vacuum_workers(LVRelState *vacrel, int nrequested, - bool *can_parallel_vacuum); + bool *will_parallel_vacuum); static void update_index_statistics(LVRelState *vacrel); static LVParallelState *begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks, @@ -2636,8 +2636,8 @@ do_parallel_lazy_vacuum_all_indexes(LVRelState *vacrel) vacrel->lps->lvshared->first_time = false; /* - * We can only provide an approximate value of num_heap_tuples in vacuum - * cases. + * We can only provide an approximate value of num_heap_tuples in parallel + * VACUUM, at least until cleanup runs. This matches serial VACUUM. */ vacrel->lps->lvshared->reltuples = vacrel->old_live_tuples; vacrel->lps->lvshared->estimated_count = true; @@ -2825,7 +2825,7 @@ do_parallel_processing(LVRelState *vacrel, LVShared *lvshared) if (idx >= vacrel->nindexes) break; - /* Get the index statistics of this index from DSM */ + /* Get the index statistics space from DSM, if any */ shared_istat = parallel_stats_for_idx(lvshared, idx); /* Skip indexes not participating in parallelism */ @@ -2858,8 +2858,14 @@ do_parallel_processing(LVRelState *vacrel, LVShared *lvshared) } /* - * Vacuum or cleanup indexes that can be processed by only the leader process - * because these indexes don't support parallel operation at that phase. + * Perform parallel processing of indexes in leader process. + * + * Handles index vacuuming (or index cleanup) for indexes that are not + * parallel safe for bulk deletes (or are unsafe for bulk cleanup). + * + * Also performs processing of smaller indexes that fell under the cutoff, + * min_parallel_index_scan_size. This typically happens with a partial index + * on a large table with several other non-partial indexes. */ static void do_serial_processing_for_unsafe_indexes(LVRelState *vacrel, LVShared *lvshared) @@ -2879,17 +2885,15 @@ do_serial_processing_for_unsafe_indexes(LVRelState *vacrel, LVShared *lvshared) IndexBulkDeleteResult *istat; shared_istat = parallel_stats_for_idx(lvshared, idx); - - /* Skip already-complete indexes */ - if (shared_istat != NULL) - continue; - indrel = vacrel->indrels[idx]; /* - * We're only here for the unsafe indexes + * We're only here for the subset of indexes that are either too small + * to be worth vacuuming in parallel workers (and so have no slot for + * statistics in DSM), or indexes that are parallel unsafe. */ - if (parallel_processing_is_safe(indrel, lvshared)) + if (shared_istat != NULL && + parallel_processing_is_safe(indrel, lvshared)) continue; /* Do vacuum or cleanup of the index */ @@ -3730,12 +3734,12 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, * nrequested is the number of parallel workers that user requested. If * nrequested is 0, we compute the parallel degree based on nindexes, that is * the number of indexes that support parallel vacuum. This function also - * sets can_parallel_vacuum to remember indexes that participate in parallel + * sets will_parallel_vacuum to remember indexes that participate in parallel * vacuum. */ static int compute_parallel_vacuum_workers(LVRelState *vacrel, int nrequested, - bool *can_parallel_vacuum) + bool *will_parallel_vacuum) { int nindexes_parallel = 0; int nindexes_parallel_bulkdel = 0; @@ -3761,7 +3765,7 @@ compute_parallel_vacuum_workers(LVRelState *vacrel, int nrequested, RelationGetNumberOfBlocks(indrel) < min_parallel_index_scan_size) continue; - can_parallel_vacuum[idx] = true; + will_parallel_vacuum[idx] = true; if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0) nindexes_parallel_bulkdel++; @@ -3839,7 +3843,7 @@ begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks, LVDeadTuples *dead_tuples; BufferUsage *buffer_usage; WalUsage *wal_usage; - bool *can_parallel_vacuum; + bool *will_parallel_vacuum; long maxtuples; Size est_shared; Size est_deadtuples; @@ -3857,15 +3861,15 @@ begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks, /* * Compute the number of parallel vacuum workers to launch */ - can_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes); + will_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes); parallel_workers = compute_parallel_vacuum_workers(vacrel, nrequested, - can_parallel_vacuum); + will_parallel_vacuum); /* Can't perform vacuum in parallel */ if (parallel_workers <= 0) { - pfree(can_parallel_vacuum); + pfree(will_parallel_vacuum); return lps; } @@ -3893,7 +3897,7 @@ begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks, Assert(vacoptions <= VACUUM_OPTION_MAX_VALID_VALUE); /* Skip indexes that don't participate in parallel vacuum */ - if (!can_parallel_vacuum[idx]) + if (!will_parallel_vacuum[idx]) continue; if (indrel->rd_indam->amusemaintenanceworkmem) @@ -3970,7 +3974,7 @@ begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks, memset(shared->bitmap, 0x00, BITMAPLEN(nindexes)); for (int idx = 0; idx < nindexes; idx++) { - if (!can_parallel_vacuum[idx]) + if (!will_parallel_vacuum[idx]) continue; /* Set NOT NULL as this index does support parallelism */ @@ -4013,7 +4017,7 @@ begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks, PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery); } - pfree(can_parallel_vacuum); + pfree(will_parallel_vacuum); return lps; } @@ -4043,8 +4047,14 @@ end_parallel_vacuum(LVRelState *vacrel) shared_istat = parallel_stats_for_idx(lps->lvshared, idx); /* - * Skip unused slot. The statistics of this index are already stored - * in local memory. + * The statistics of this index must already be stored in local + * memory. It must have been processed by the leader during a call to + * do_serial_processing_for_unsafe_indexes(). + * + * XXX: Actually, there might be no statistics at all -- index AMs + * sometimes return NULL from amvacuumcleanup. That makes it hard to + * assert that we really called amvacuumcleanup in the leader from + * here. */ if (shared_istat == NULL) continue; @@ -4068,6 +4078,11 @@ end_parallel_vacuum(LVRelState *vacrel) /* * Return shared memory statistics for index at offset 'getidx', if any + * + * Returning NULL indicates that compute_parallel_vacuum_workers() determined + * that the index is totally unsuitable target for all parallel processing up + * front. For example, the index might be < min_parallel_index_scan_size + * cutoff. */ static LVSharedIndStats * parallel_stats_for_idx(LVShared *lvshared, int getidx) diff --git a/src/test/regress/expected/vacuum_parallel.out b/src/test/regress/expected/vacuum_parallel.out new file mode 100644 index 000000000..3b40b824c --- /dev/null +++ b/src/test/regress/expected/vacuum_parallel.out @@ -0,0 +1,51 @@ +SET max_parallel_maintenance_workers TO 4; +SET min_parallel_index_scan_size TO '128kB'; +DROP TABLE IF EXISTS parallel_vacuum_table; +NOTICE: table "parallel_vacuum_table" does not exist, skipping +-- Bug #17245: Make sure that we don't totally fail to VACUUM individual indexes that +-- happen to be below min_parallel_index_scan_size during parallel VACUUM: +CREATE TABLE parallel_vacuum_table (a int) WITH (autovacuum_enabled = off); +INSERT INTO parallel_vacuum_table SELECT i from generate_series(1, 10000) i; +-- Parallel VACUUM will never be used unless there are at least two indexes +-- that exceed min_parallel_index_scan_size. Create two such indexes, and +-- a third index that is smaller than min_parallel_index_scan_size. +CREATE INDEX regular_sized_index ON parallel_vacuum_table(a); +CREATE INDEX typically_sized_index ON parallel_vacuum_table(a); +-- Note: vacuum_in_leader_small_index can apply deduplication, making it ~3x +-- smaller than the other indexes +CREATE INDEX vacuum_in_leader_small_index ON parallel_vacuum_table((1)); +-- Verify (as best we can) that the cost model for parallel VACUUM +-- will make our VACUUM run in parallel, while always leaving it up to the +-- parallel leader to handle the vacuum_in_leader_small_index index: +SELECT EXISTS ( +SELECT 1 +FROM pg_class +WHERE oid = 'vacuum_in_leader_small_index'::regclass AND + pg_relation_size(oid) < + pg_size_bytes(current_setting('min_parallel_index_scan_size')) +) as leader_will_handle_small_index; + leader_will_handle_small_index +-------------------------------- + t +(1 row) + +SELECT count(*) as trigger_vacuum_reliably_indexes +FROM pg_class +WHERE oid in ('regular_sized_index'::regclass, 'typically_sized_index'::regclass) AND + pg_relation_size(oid) >= + pg_size_bytes(current_setting('min_parallel_index_scan_size')); + trigger_vacuum_reliably_indexes +--------------------------------- + 2 +(1 row) + +-- Parallel VACUUM with B-Tree page deletions, ambulkdelete calls: +DELETE FROM parallel_vacuum_table; +VACUUM (PARALLEL 4, INDEX_CLEANUP ON) parallel_vacuum_table; +-- Since vacuum_in_leader_small_index uses deduplication, we expect an +-- assertion failure with bug #17245 (in the absence of bugfix): +INSERT INTO parallel_vacuum_table SELECT i FROM generate_series(1, 10000) i; +RESET max_parallel_maintenance_workers; +RESET min_parallel_index_scan_size; +-- Deliberately don't drop table, to get further coverage from tools like +-- pg_amcheck in some testing scenarios diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 7be89178f..017e962fe 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -96,6 +96,7 @@ test: rules psql psql_crosstab amutils stats_ext collate.linux.utf8 # run by itself so it can run parallel workers test: select_parallel test: write_parallel +test: vacuum_parallel # no relation related tests can be put in this group test: publication subscription diff --git a/src/test/regress/sql/vacuum_parallel.sql b/src/test/regress/sql/vacuum_parallel.sql new file mode 100644 index 000000000..7918455a4 --- /dev/null +++ b/src/test/regress/sql/vacuum_parallel.sql @@ -0,0 +1,48 @@ +SET max_parallel_maintenance_workers TO 4; +SET min_parallel_index_scan_size TO '128kB'; + +DROP TABLE IF EXISTS parallel_vacuum_table; + +-- Bug #17245: Make sure that we don't totally fail to VACUUM individual indexes that +-- happen to be below min_parallel_index_scan_size during parallel VACUUM: +CREATE TABLE parallel_vacuum_table (a int) WITH (autovacuum_enabled = off); +INSERT INTO parallel_vacuum_table SELECT i from generate_series(1, 10000) i; + +-- Parallel VACUUM will never be used unless there are at least two indexes +-- that exceed min_parallel_index_scan_size. Create two such indexes, and +-- a third index that is smaller than min_parallel_index_scan_size. +CREATE INDEX regular_sized_index ON parallel_vacuum_table(a); +CREATE INDEX typically_sized_index ON parallel_vacuum_table(a); +-- Note: vacuum_in_leader_small_index can apply deduplication, making it ~3x +-- smaller than the other indexes +CREATE INDEX vacuum_in_leader_small_index ON parallel_vacuum_table((1)); + +-- Verify (as best we can) that the cost model for parallel VACUUM +-- will make our VACUUM run in parallel, while always leaving it up to the +-- parallel leader to handle the vacuum_in_leader_small_index index: +SELECT EXISTS ( +SELECT 1 +FROM pg_class +WHERE oid = 'vacuum_in_leader_small_index'::regclass AND + pg_relation_size(oid) < + pg_size_bytes(current_setting('min_parallel_index_scan_size')) +) as leader_will_handle_small_index; +SELECT count(*) as trigger_vacuum_reliably_indexes +FROM pg_class +WHERE oid in ('regular_sized_index'::regclass, 'typically_sized_index'::regclass) AND + pg_relation_size(oid) >= + pg_size_bytes(current_setting('min_parallel_index_scan_size')); + +-- Parallel VACUUM with B-Tree page deletions, ambulkdelete calls: +DELETE FROM parallel_vacuum_table; +VACUUM (PARALLEL 4, INDEX_CLEANUP ON) parallel_vacuum_table; + +-- Since vacuum_in_leader_small_index uses deduplication, we expect an +-- assertion failure with bug #17245 (in the absence of bugfix): +INSERT INTO parallel_vacuum_table SELECT i FROM generate_series(1, 10000) i; + +RESET max_parallel_maintenance_workers; +RESET min_parallel_index_scan_size; + +-- Deliberately don't drop table, to get further coverage from tools like +-- pg_amcheck in some testing scenarios -- 2.30.2