From 90c02dd6e38f7c1e6c9cdf5b9725e0a5add5327c Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Fri, 25 Oct 2019 22:47:41 +0900 Subject: [PATCH v33 2/3] Add parallel option to VACUUM command This change adds PARALLEL option to VACUUM command that enable us to perform index vacuuming and index cleanup with background workers. Individual indexes is processed by one vacuum process. Therefore parallel vacuum can be used when the table has at least two indexes and it cannot specify larger parallel degree than the number of indexes that the table has. The parallel degree is either specified by user or determined based on the number of indexes that the table has, and further limited by max_parallel_maintenance_workers. The table size and index size don't affect it. --- doc/src/sgml/config.sgml | 14 +- doc/src/sgml/ref/vacuum.sgml | 45 + src/backend/access/heap/vacuumlazy.c | 1214 +++++++++++++++++++++++-- src/backend/access/transam/parallel.c | 4 + src/backend/commands/vacuum.c | 109 ++- src/backend/postmaster/autovacuum.c | 2 + src/bin/psql/tab-complete.c | 2 +- src/include/access/heapam.h | 6 + src/include/commands/vacuum.h | 5 + src/test/regress/expected/vacuum.out | 26 + src/test/regress/sql/vacuum.sql | 25 + 11 files changed, 1340 insertions(+), 112 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index f83770350e..90ac399228 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2289,13 +2289,13 @@ include_dir 'conf.d' Sets the maximum number of parallel workers that can be - started by a single utility command. Currently, the only - parallel utility command that supports the use of parallel - workers is CREATE INDEX, and only when - building a B-tree index. Parallel workers are taken from the - pool of processes established by , limited by . Note that the requested + started by a single utility command. Currently, the parallel + utility commands that support the use of parallel workers are + CREATE INDEX only when building a B-tree index, + and VACUUM without FULL + option. Parallel workers are taken from the pool of processes + established by , limited + by . Note that the requested number of workers may not actually be available at run time. If this occurs, the utility operation will run with fewer workers than expected. The default value is 2. Setting this diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml index f9b0fb8794..ae086b976b 100644 --- a/doc/src/sgml/ref/vacuum.sgml +++ b/doc/src/sgml/ref/vacuum.sgml @@ -34,6 +34,7 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ boolean ] INDEX_CLEANUP [ boolean ] TRUNCATE [ boolean ] + PARALLEL [ integer ] and table_and_columns is: @@ -223,6 +224,32 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ integer + background workers (for the detail of each vacuum phases, please + refer to ). If the parallel degree + integer is omitted, + then VACUUM decides the number of workers based + on number of indexes that support parallel vacuum operation on the + relation which is further limited by + . Please note + that it is not guaranteed that the number of parallel worker specified + in integer will be used + during execution. It is possible for a vacuum to run with fewer workers + than specified, or even with no workers at all. Only one worker can + be used per index. So parallel workers are launched only when there + are at least 2 indexes in the table. Workers for + vacuum launches before starting each phases and exit at the end of + the phase. These behaviors might change in a future release. This + option can not use with FULL option. + + + + boolean @@ -237,6 +264,18 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ integer + + + Specifies a positive integer value passed to the selected option. + The integer value can + also be omitted, in which case the default value of the selected + option is used. + + + + table_name @@ -316,6 +355,12 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ offset)) +#define IndStatsIsNull(s, i) \ + (!(((LVShared *)(s))->bitmap[(i) >> 3] & (1 << ((i) & 0x07)))) + +/* + * Variables for cost-based vacuum delay for parallel index vacuuming. + * The basic idea of cost-based vacuum delay for parallel index vacuuming + * is to allow all parallel vacuum workers including the leader process + * to have a shared view of cost related parameters (mainly VacuumCostBalance) + * and allow each worker to update it and then based on that decide + * whether it needs to sleep. Besides, we allow any worker to sleep + * only if it has performed the I/O above a certain threshold, which is + * calculated based on the number of active workers (VacuumActiveNWorkers), + * and the overall cost balance is more than VacuumCostLimit set by the + * system. Then we will allow the worker to sleep proportional to the work + * done and reduce the VacuumSharedCostBalance by the amount which is + * consumed by the current worker (VacuumCostBalanceLocal). This can + * avoid letting the workers sleep which has done less or no I/O as compared + * to other workers, and therefore can ensure that workers who are doing + * more I/O got throttled more. + */ +pg_atomic_uint32 *VacuumSharedCostBalance = NULL; +pg_atomic_uint32 *VacuumActiveNWorkers = NULL; +int VacuumCostBalanceLocal = 0; + +/* + * Struct for an index bulk-deletion statistic used for parallel lazy + * vacuum. This is allocated in the DSM segment. IndexBulkDeleteResult + * follows at end of struct. + */ +typedef struct LVSharedIndStats +{ + Size size; + bool updated; /* are the stats updated */ + + /* Index bulk-deletion result data follows at end of struct */ +} LVSharedIndStats; +#define SizeOfSharedIndStats(s) \ + (sizeof(LVSharedIndStats) + ((LVSharedIndStats *)(s))->size) +#define GetIndexBulkDeleteResult(s) \ + ((IndexBulkDeleteResult *)((char *)(s) + sizeof(LVSharedIndStats))) + +/* Struct for parallel lazy vacuum */ +typedef struct LVParallelState +{ + ParallelContext *pcxt; + + /* Shared information among parallel vacuum workers */ + LVShared *lvshared; + + /* + * The number of indexes that do NOT support parallel + * index bulk-deletion and parallel index cleanup respectively. + */ + int nindexes_nonparallel_bulkdel; + int nindexes_nonparallel_cleanup; + + /* + * Always true except for a debugging case where + * PARALLEL_VACUUM_DISABLE_LEADER_PARTICIPATION are defined. + */ + bool leaderparticipates; +} LVParallelState; + typedef struct LVRelStats { /* useindex = true means two-pass strategy; false means one-pass */ @@ -128,11 +321,7 @@ typedef struct LVRelStats BlockNumber pages_removed; double tuples_deleted; BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ - /* List of TIDs of tuples we intend to delete */ - /* NB: this list is ordered by TID address */ - int num_dead_tuples; /* current # of entries */ - int max_dead_tuples; /* # slots allocated in array */ - ItemPointer dead_tuples; /* array of ItemPointerData */ + LVDeadTuples *dead_tuples; int num_index_scans; TransactionId latestRemovedXid; bool lock_waiter_detected; @@ -155,12 +344,11 @@ static void lazy_scan_heap(Relation onerel, VacuumParams *params, bool aggressive); static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats); static bool lazy_check_needs_freeze(Buffer buf, bool *hastup); -static void lazy_vacuum_index(Relation indrel, - IndexBulkDeleteResult **stats, - LVRelStats *vacrelstats); +static void lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats, + LVDeadTuples *dead_tuples, double reltuples); static void lazy_cleanup_index(Relation indrel, - IndexBulkDeleteResult *stats, - LVRelStats *vacrelstats); + IndexBulkDeleteResult **stats, + double reltuples, bool estimated_count); static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer); static bool should_attempt_truncation(VacuumParams *params, @@ -169,12 +357,39 @@ static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats); static BlockNumber count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats); static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks); -static void lazy_record_dead_tuple(LVRelStats *vacrelstats, +static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples, ItemPointer itemptr); static bool lazy_tid_reaped(ItemPointer itemptr, void *state); static int vac_cmp_itemptr(const void *left, const void *right); static bool heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid, bool *all_frozen); +static LVParallelState *begin_parallel_vacuum(LVRelStats *vacrelstats, Oid relid, + BlockNumber nblocks, Relation *Irel, + int nindexes, int nrequested); +static void end_parallel_vacuum(LVParallelState *lps, Relation *Irel, int nindexes, + IndexBulkDeleteResult **stats); +static void prepare_index_statistics(LVShared *lvshared, Relation *Irel, int nindexes, + int nworkers); +static void lazy_vacuum_indexes(LVRelStats *vacrelstats, Relation *Irel, + int nindexes, IndexBulkDeleteResult **stats, + LVParallelState *lps); +static void lazy_cleanup_indexes(LVRelStats *vacrelstats, Relation *Irel, + int nindexes, IndexBulkDeleteResult **stats, + LVParallelState *lps); +static void lazy_parallel_vacuum_or_cleanup_indexes(LVRelStats *vacrelstats, Relation *Irel, + int nindexes, IndexBulkDeleteResult **stats, + LVParallelState *lps); +static void vacuum_or_cleanup_indexes_worker(Relation *Irel, int nindexes, + IndexBulkDeleteResult **stats, + LVShared *lvshared, + LVDeadTuples *dead_tuples); +static void update_index_statistics(Relation *Irel, IndexBulkDeleteResult **stats, + int nindexes); +static LVSharedIndStats *get_indstats(LVShared *lvshared, int n); +static int compute_parallel_workers(Relation *Irel, int nindexes, int nrequested); +static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex); +static bool skip_parallel_index_vacuum(Relation indrel, bool for_cleanup, + bool first_time); /* @@ -488,6 +703,18 @@ vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats) * dead-tuple TIDs, invoke vacuuming of indexes and call lazy_vacuum_heap * to reclaim dead line pointers. * + * If the table has at least two indexes and parallel lazy vacuum is + * requested, we execute both index vacuuming and index cleanup with + * parallel workers. In parallel lazy vacuum, we enter parallel mode and + * then create both the parallel context and the DSM segment before starting + * heap scan so that we can record dead tuples to the DSM segment. All + * parallel workers are launched at beginning of index vacuuming and index + * cleanup and they exit once done with all indexes. At the end of this + * function we exit from parallel mode. Index bulk-deletion results are + * stored in the DSM segment and update index statistics as a whole after + * exited from parallel mode since all writes are not allowed during parallel + * mode. + * * If there are no indexes then we can reclaim line pointers on the fly; * dead line pointers need only be retained until all index pointers that * reference them have been killed. @@ -496,6 +723,8 @@ static void lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, Relation *Irel, int nindexes, bool aggressive) { + LVParallelState *lps = NULL; + LVDeadTuples *dead_tuples; BlockNumber nblocks, blkno; HeapTupleData tuple; @@ -518,6 +747,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, bool skipping_blocks; xl_heap_freeze_tuple *frozen; StringInfoData buf; + int parallel_workers = 0; const int initprog_index[] = { PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_TOTAL_HEAP_BLKS, @@ -553,13 +783,41 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, vacrelstats->nonempty_pages = 0; vacrelstats->latestRemovedXid = InvalidTransactionId; - lazy_space_alloc(vacrelstats, nblocks); + /* + * If parallel lazy vacuum is requested and we vacuum indexes, compute + * the number of parallel vacuum worker to launch. + */ + if (params->nworkers >= 0 && vacrelstats->useindex) + parallel_workers = compute_parallel_workers(Irel, nindexes, + params->nworkers); + + if (parallel_workers > 0) + { + /* + * Enter parallel mode, create the parallel context and allocate the + * DSM segment. + */ + lps = begin_parallel_vacuum(vacrelstats, + RelationGetRelid(onerel), + nblocks, Irel, nindexes, + parallel_workers); + } + else + { + /* + * Use single process vacuum. We allocate the memory space for dead + * tuples locally. + */ + lazy_space_alloc(vacrelstats, nblocks); + } + + dead_tuples = vacrelstats->dead_tuples; frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage); /* Report that we're scanning the heap, advertising total # of blocks */ initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP; initprog_val[1] = nblocks; - initprog_val[2] = vacrelstats->max_dead_tuples; + initprog_val[2] = dead_tuples->max_tuples; pgstat_progress_update_multi_param(3, initprog_index, initprog_val); /* @@ -737,8 +995,8 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, * If we are close to overrunning the available space for dead-tuple * TIDs, pause and do a cycle of vacuuming before we tackle this page. */ - if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage && - vacrelstats->num_dead_tuples > 0) + if ((dead_tuples->max_tuples - dead_tuples->num_tuples) < MaxHeapTuplesPerPage && + dead_tuples->num_tuples > 0) { const int hvp_index[] = { PROGRESS_VACUUM_PHASE, @@ -766,10 +1024,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, PROGRESS_VACUUM_PHASE_VACUUM_INDEX); /* Remove index entries */ - for (i = 0; i < nindexes; i++) - lazy_vacuum_index(Irel[i], - &indstats[i], - vacrelstats); + lazy_vacuum_indexes(vacrelstats, Irel, nindexes, indstats, lps); /* * Report that we are now vacuuming the heap. We also increase @@ -789,7 +1044,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, * not to reset latestRemovedXid since we want that value to be * valid. */ - vacrelstats->num_dead_tuples = 0; + dead_tuples->num_tuples = 0; vacrelstats->num_index_scans++; /* @@ -985,7 +1240,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, has_dead_tuples = false; nfrozen = 0; hastup = false; - prev_dead_count = vacrelstats->num_dead_tuples; + prev_dead_count = dead_tuples->num_tuples; maxoff = PageGetMaxOffsetNumber(page); /* @@ -1024,7 +1279,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, */ if (ItemIdIsDead(itemid)) { - lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); + lazy_record_dead_tuple(dead_tuples, &(tuple.t_self)); all_visible = false; continue; } @@ -1170,7 +1425,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, if (tupgone) { - lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); + lazy_record_dead_tuple(dead_tuples, &(tuple.t_self)); HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data, &vacrelstats->latestRemovedXid); tups_vacuumed += 1; @@ -1240,7 +1495,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, * doing a second scan. Also we don't do that but forget dead tuples * when index cleanup is disabled. */ - if (!vacrelstats->useindex && vacrelstats->num_dead_tuples > 0) + if (!vacrelstats->useindex && dead_tuples->num_tuples > 0) { if (nindexes == 0) { @@ -1269,7 +1524,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, * not to reset latestRemovedXid since we want that value to be * valid. */ - vacrelstats->num_dead_tuples = 0; + dead_tuples->num_tuples = 0; /* * Periodically do incremental FSM vacuuming to make newly-freed @@ -1384,7 +1639,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, * page, so remember its free space as-is. (This path will always be * taken if there are no indexes.) */ - if (vacrelstats->num_dead_tuples == prev_dead_count) + if (dead_tuples->num_tuples == prev_dead_count) RecordPageWithFreeSpace(onerel, blkno, freespace); } @@ -1418,7 +1673,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, /* If any tuples need to be deleted, perform final vacuum cycle */ /* XXX put a threshold on min number of tuples here? */ - if (vacrelstats->num_dead_tuples > 0) + if (dead_tuples->num_tuples > 0) { const int hvp_index[] = { PROGRESS_VACUUM_PHASE, @@ -1434,10 +1689,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, PROGRESS_VACUUM_PHASE_VACUUM_INDEX); /* Remove index entries */ - for (i = 0; i < nindexes; i++) - lazy_vacuum_index(Irel[i], - &indstats[i], - vacrelstats); + lazy_vacuum_indexes(vacrelstats, Irel, nindexes, indstats, lps); /* Report that we are now vacuuming the heap */ hvp_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_HEAP; @@ -1463,12 +1715,19 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_INDEX_CLEANUP); - /* Do post-vacuum cleanup and statistics update for each index */ + /* Do post-vacuum cleanup */ if (vacrelstats->useindex) - { - for (i = 0; i < nindexes; i++) - lazy_cleanup_index(Irel[i], indstats[i], vacrelstats); - } + lazy_cleanup_indexes(vacrelstats, Irel, nindexes, indstats, lps); + + /* + * End parallel mode before updating index statistics as we cannot write + * during parallel mode. + */ + if (ParallelVacuumIsActive(lps)) + end_parallel_vacuum(lps, Irel, nindexes, indstats); + + /* Update index statistics */ + update_index_statistics(Irel, indstats, nindexes); /* If no indexes, make log report that lazy_vacuum_heap would've made */ if (vacuumed_pages) @@ -1534,7 +1793,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) npages = 0; tupindex = 0; - while (tupindex < vacrelstats->num_dead_tuples) + while (tupindex < vacrelstats->dead_tuples->num_tuples) { BlockNumber tblk; Buffer buf; @@ -1543,7 +1802,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) vacuum_delay_point(); - tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]); + tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]); buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL, vac_strategy); if (!ConditionalLockBufferForCleanup(buf)) @@ -1591,6 +1850,7 @@ static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer) { + LVDeadTuples *dead_tuples = vacrelstats->dead_tuples; Page page = BufferGetPage(buffer); OffsetNumber unused[MaxOffsetNumber]; int uncnt = 0; @@ -1601,16 +1861,16 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, START_CRIT_SECTION(); - for (; tupindex < vacrelstats->num_dead_tuples; tupindex++) + for (; tupindex < dead_tuples->num_tuples; tupindex++) { BlockNumber tblk; OffsetNumber toff; ItemId itemid; - tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]); + tblk = ItemPointerGetBlockNumber(&dead_tuples->itemptrs[tupindex]); if (tblk != blkno) break; /* past end of tuples for this block */ - toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]); + toff = ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]); itemid = PageGetItemId(page, toff); ItemIdSetUnused(itemid); unused[uncnt++] = toff; @@ -1731,19 +1991,336 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup) return false; } +/* + * Perform index vacuuming or index cleanup with parallel workers. This function + * must be used by the parallel vacuum leader process. The caller must set + * lps->lvshared->for_cleanup to indicate whether vacuuming or cleanup. + */ +static void +lazy_parallel_vacuum_or_cleanup_indexes(LVRelStats *vacrelstats, Relation *Irel, + int nindexes, IndexBulkDeleteResult **stats, + LVParallelState *lps) +{ + int nindexes_remains; + + Assert(!IsParallelWorker()); + Assert(ParallelVacuumIsActive(lps)); + Assert(nindexes > 0); + + /* Enable shared cost balance */ + VacuumSharedCostBalance = &(lps->lvshared->cost_balance); + VacuumActiveNWorkers = &(lps->lvshared->active_nworkers); + + /* + * Set up shared cost balance and the number of active workers for + * vacuum delay. + */ + pg_atomic_write_u32(VacuumSharedCostBalance, VacuumCostBalance); + pg_atomic_write_u32(VacuumActiveNWorkers, 0); + + /* + * Reset the local value so that we compute cost balance during + * parallel index vacuuming. + */ + VacuumCostBalance = 0; + VacuumCostBalanceLocal = 0; + + /* Launch all workers */ + LaunchParallelWorkers(lps->pcxt); + + if (lps->lvshared->for_cleanup) + ereport(elevel, + (errmsg(ngettext("launched %d parallel vacuum worker for index cleanup (planned: %d)", + "launched %d parallel vacuum workers for index cleanup (planned: %d)", + lps->pcxt->nworkers_launched), + lps->pcxt->nworkers_launched, lps->pcxt->nworkers))); + else + ereport(elevel, + (errmsg(ngettext("launched %d parallel vacuum worker for index vacuuming (planned: %d)", + "launched %d parallel vacuum workers for index vacuuming (planned: %d)", + lps->pcxt->nworkers_launched), + lps->pcxt->nworkers_launched, lps->pcxt->nworkers))); + + /* + * Increment the active worker count. We cannot decrement until the + * all parallel workers finish. + */ + pg_atomic_add_fetch_u32(VacuumActiveNWorkers, 1); + + /* + * Join as parallel workers. The leader process alone does that in + * case where no workers launched. + */ + if (lps->leaderparticipates || lps->pcxt->nworkers_launched == 0) + vacuum_or_cleanup_indexes_worker(Irel, nindexes, stats, lps->lvshared, + vacrelstats->dead_tuples); + + /* + * Here, the indexes that had been skipped during parallel index vacuuming + * are remaining. If there are such indexes the leader process does vacuum + * or cleanup them one by one. + */ + nindexes_remains = nindexes - pg_atomic_read_u32(&(lps->lvshared->nprocessed)); + if (nindexes_remains > 0) + { + int i; +#ifdef USE_ASSERT_CHECKING + int nprocessed = 0; +#endif + + for (i = 0; i < nindexes; i++) + { + bool processed = !skip_parallel_index_vacuum(Irel[i], + lps->lvshared->for_cleanup, + lps->lvshared->first_time); + + /* Skip the already processed indexes */ + if (processed) + continue; + + if (lps->lvshared->for_cleanup) + lazy_cleanup_index(Irel[i], &stats[i], + vacrelstats->new_rel_tuples, + vacrelstats->tupcount_pages < vacrelstats->rel_pages); + else + lazy_vacuum_index(Irel[i], &stats[i], vacrelstats->dead_tuples, + vacrelstats->old_live_tuples); +#ifdef USE_ASSERT_CHECKING + nprocessed++; +#endif + } +#ifdef USE_ASSERT_CHECKING + Assert(nprocessed == nindexes_remains); +#endif + } + + /* + * We have completed the index vacuum so decrement the active worker + * count. + */ + pg_atomic_sub_fetch_u32(VacuumActiveNWorkers, 1); + + /* Wait for all vacuum workers to finish */ + WaitForParallelWorkersToFinish(lps->pcxt); + + /* Take over the shared balance value to heap scan */ + VacuumCostBalance = pg_atomic_read_u32(VacuumSharedCostBalance); + + /* Disable shared cost balance for vacuum delay */ + VacuumSharedCostBalance = NULL; + VacuumActiveNWorkers = NULL; + + /* + * In cleanup case we don't need to reinitialize the parallel + * context as no more index vacuuming and index cleanup will be + * performed after that. + */ + if (!lps->lvshared->for_cleanup) + { + /* Reset the processing counts */ + pg_atomic_write_u32(&(lps->lvshared->idx), 0); + pg_atomic_write_u32(&(lps->lvshared->nprocessed), 0); + + /* + * Reinitialize the parallel context to relaunch parallel workers + * for the next execution. + */ + ReinitializeParallelDSM(lps->pcxt); + } +} + +/* + * Index vacuuming and index cleanup routine used by parallel vacuum + * worker processes including the leader process. After finished each + * indexes this function copies the index statistics returned from + * ambulkdelete and amvacuumcleanup to the DSM segment. + */ +static void +vacuum_or_cleanup_indexes_worker(Relation *Irel, int nindexes, + IndexBulkDeleteResult **stats, + LVShared *lvshared, + LVDeadTuples *dead_tuples) +{ + /* Loop until all indexes are vacuumed */ + for (;;) + { + int idx; + LVSharedIndStats *shared_indstats; + IndexBulkDeleteResult *bulkdelete_res; + + /* Get an index number to process */ + idx = pg_atomic_fetch_add_u32(&(lvshared->idx), 1); + + /* Done for all indexes? */ + if (idx >= nindexes) + break; + + /* + * Skip if this index doesn't support parallel execution + * at this time. + */ + if (skip_parallel_index_vacuum(Irel[idx], lvshared->for_cleanup, + lvshared->first_time)) + continue; + + /* Get index statistics struct of this index */ + shared_indstats = get_indstats(lvshared, idx); + + /* Skip if this index doesn't support parallel index vacuuming */ + if (shared_indstats == NULL) + continue; + + /* Get the space for IndexBulkDeleteResult */ + bulkdelete_res = GetIndexBulkDeleteResult(shared_indstats); + + /* + * Update the pointer to the corresponding bulk-deletion result + * if someone has already updated it. + */ + if (shared_indstats->updated && stats[idx] == NULL) + stats[idx] = bulkdelete_res; + + /* Increment the processing count */ + pg_atomic_add_fetch_u32(&(lvshared->nprocessed), 1); + + /* Do vacuum or cleanup one index */ + if (lvshared->for_cleanup) + lazy_cleanup_index(Irel[idx], &(stats[idx]), lvshared->reltuples, + lvshared->estimated_count); + else + lazy_vacuum_index(Irel[idx], &(stats[idx]), dead_tuples, + lvshared->reltuples); + + /* + * Copy the index bulk-deletion result returned from ambulkdelete + * and amvacuumcleanup to the DSM segment if it's the first time to + * get it from them, because they allocate it locally and it's + * possible that an index will be vacuumed by the different vacuum + * process at the next time. The copying the result normally + * happens only after the first time of index vacuuming. From the + * second time, we pass the result on the DSM segment so that they + * then update it directly. + * + * Since all vacuum workers write the bulk-deletion result at + * different slots we can write them without locking. + */ + if (!shared_indstats->updated && stats[idx] != NULL) + { + memcpy(bulkdelete_res, stats[idx], shared_indstats->size); + shared_indstats->updated = true; + + /* + * no longer need the locally allocated result and now + * stats[idx] points to the DSM segment. + */ + pfree(stats[idx]); + stats[idx] = bulkdelete_res; + } + } +} + +/* + * Cleanup indexes. This function must be used by the parallel vacuum + * leader process in parallel vacuum case. + */ +static void +lazy_cleanup_indexes(LVRelStats *vacrelstats, Relation *Irel, + int nindexes, IndexBulkDeleteResult **stats, + LVParallelState *lps) +{ + int idx; + + Assert(!IsParallelWorker()); + Assert(nindexes > 0); + + /* + * If parallel vacuum is active we perform index cleanup with parallel + * workers. + */ + if (ParallelVacuumIsActive(lps)) + { + /* Tell parallel workers to do index cleanup */ + lps->lvshared->for_cleanup = true; + lps->lvshared->first_time = + (vacrelstats->num_index_scans == 0); + + /* + * Now we can provide a better estimate of total number of + * surviving tuples (we assume indexes are more interested in that + * than in the number of nominally live tuples). + */ + lps->lvshared->reltuples = vacrelstats->new_rel_tuples; + lps->lvshared->estimated_count = + (vacrelstats->tupcount_pages < vacrelstats->rel_pages); + + lazy_parallel_vacuum_or_cleanup_indexes(vacrelstats, Irel, nindexes, + stats, lps); + } + else + { + for (idx = 0; idx < nindexes; idx++) + lazy_cleanup_index(Irel[idx], &stats[idx], + vacrelstats->new_rel_tuples, + vacrelstats->tupcount_pages < vacrelstats->rel_pages); + } +} + +/* + * Vacuum indexes. This function must be used by the parallel vacuum leader + * process in parallel vacuum case. + */ +static void +lazy_vacuum_indexes(LVRelStats *vacrelstats, Relation *Irel, + int nindexes, IndexBulkDeleteResult **stats, + LVParallelState *lps) +{ + int idx; + + Assert(!IsParallelWorker()); + Assert(nindexes > 0); + + /* + * If parallel vacuum is active we perform index vacuuming with + * parallel workers. + */ + if (ParallelVacuumIsActive(lps)) + { + /* Tell parallel workers to do index vacuuming */ + lps->lvshared->for_cleanup = false; + lps->lvshared->first_time = false; + + /* + * We can only provide an approximate value of num_heap_tuples in + * vacuum cases. + */ + lps->lvshared->reltuples = vacrelstats->old_live_tuples; + lps->lvshared->estimated_count = true; + + lazy_parallel_vacuum_or_cleanup_indexes(vacrelstats, Irel, nindexes, + stats, lps); + } + else + { + for (idx = 0; idx < nindexes; idx++) + lazy_vacuum_index(Irel[idx], &stats[idx], vacrelstats->dead_tuples, + vacrelstats->old_live_tuples); + } +} /* * lazy_vacuum_index() -- vacuum one index relation. * * Delete all the index entries pointing to tuples listed in * vacrelstats->dead_tuples, and update running statistics. + * reltuples is the number of heap tuples to be passed to the + * bulk delete callback. */ static void -lazy_vacuum_index(Relation indrel, - IndexBulkDeleteResult **stats, - LVRelStats *vacrelstats) +lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats, + LVDeadTuples *dead_tuples, double reltuples) { IndexVacuumInfo ivinfo; + char *msgfmt; PGRUsage ru0; pg_rusage_init(&ru0); @@ -1753,30 +2330,38 @@ lazy_vacuum_index(Relation indrel, ivinfo.report_progress = false; ivinfo.estimated_count = true; ivinfo.message_level = elevel; - /* We can only provide an approximate value of num_heap_tuples here */ - ivinfo.num_heap_tuples = vacrelstats->old_live_tuples; + ivinfo.num_heap_tuples = reltuples; ivinfo.strategy = vac_strategy; /* Do bulk deletion */ *stats = index_bulk_delete(&ivinfo, *stats, - lazy_tid_reaped, (void *) vacrelstats); + lazy_tid_reaped, (void *) dead_tuples); + + if (IsParallelWorker()) + msgfmt = gettext_noop("scanned index \"%s\" to remove %d row versions by parallel vacuum worker"); + else + msgfmt = gettext_noop("scanned index \"%s\" to remove %d row versions"); ereport(elevel, - (errmsg("scanned index \"%s\" to remove %d row versions", + (errmsg(msgfmt, RelationGetRelationName(indrel), - vacrelstats->num_dead_tuples), + dead_tuples->num_tuples), errdetail_internal("%s", pg_rusage_show(&ru0)))); } /* * lazy_cleanup_index() -- do post-vacuum cleanup for one index relation. + * + * reltuples is the number of heap tuples and estimated_count is true + * if the reltuples is an estimated value. */ static void lazy_cleanup_index(Relation indrel, - IndexBulkDeleteResult *stats, - LVRelStats *vacrelstats) + IndexBulkDeleteResult **stats, + double reltuples, bool estimated_count) { IndexVacuumInfo ivinfo; + char *msgfmt; PGRUsage ru0; pg_rusage_init(&ru0); @@ -1784,49 +2369,62 @@ lazy_cleanup_index(Relation indrel, ivinfo.index = indrel; ivinfo.analyze_only = false; ivinfo.report_progress = false; - ivinfo.estimated_count = (vacrelstats->tupcount_pages < vacrelstats->rel_pages); + ivinfo.estimated_count = estimated_count; ivinfo.message_level = elevel; - /* - * Now we can provide a better estimate of total number of surviving - * tuples (we assume indexes are more interested in that than in the - * number of nominally live tuples). - */ - ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples; + ivinfo.num_heap_tuples = reltuples; ivinfo.strategy = vac_strategy; - stats = index_vacuum_cleanup(&ivinfo, stats); + *stats = index_vacuum_cleanup(&ivinfo, *stats); - if (!stats) + if (!(*stats)) return; - /* - * Now update statistics in pg_class, but only if the index says the count - * is accurate. - */ - if (!stats->estimated_count) - vac_update_relstats(indrel, - stats->num_pages, - stats->num_index_tuples, - 0, - false, - InvalidTransactionId, - InvalidMultiXactId, - false); + if (IsParallelWorker()) + msgfmt = gettext_noop("index \"%s\" now contains %.0f row versions in %u pages, reported by parallel vacuum worker"); + else + msgfmt = gettext_noop("index \"%s\" now contains %.0f row versions in %u pages"); ereport(elevel, - (errmsg("index \"%s\" now contains %.0f row versions in %u pages", + (errmsg(msgfmt, RelationGetRelationName(indrel), - stats->num_index_tuples, - stats->num_pages), + (*stats)->num_index_tuples, + (*stats)->num_pages), errdetail("%.0f index row versions were removed.\n" "%u index pages have been deleted, %u are currently reusable.\n" "%s.", - stats->tuples_removed, - stats->pages_deleted, stats->pages_free, + (*stats)->tuples_removed, + (*stats)->pages_deleted, (*stats)->pages_free, pg_rusage_show(&ru0)))); +} + +/* + * Update index statistics in pg_class if the statistics is accurate. + */ +static void +update_index_statistics(Relation *Irel, IndexBulkDeleteResult **stats, + int nindexes) +{ + int i; - pfree(stats); + Assert(!IsInParallelMode()); + + for (i = 0; i < nindexes; i++) + { + if (stats[i] == NULL || stats[i]->estimated_count) + continue; + + /* Update index statistics */ + vac_update_relstats(Irel[i], + stats[i]->num_pages, + stats[i]->num_index_tuples, + 0, + false, + InvalidTransactionId, + InvalidMultiXactId, + false); + pfree(stats[i]); + } } /* @@ -2134,19 +2732,17 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) } /* - * lazy_space_alloc - space allocation decisions for lazy vacuum - * - * See the comments at the head of this file for rationale. + * Return the maximum number of dead tuples we can record. */ -static void -lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks) +static long +compute_max_dead_tuples(BlockNumber relblocks, bool useindex) { long maxtuples; int vac_work_mem = IsAutoVacuumWorkerProcess() && autovacuum_work_mem != -1 ? autovacuum_work_mem : maintenance_work_mem; - if (vacrelstats->useindex) + if (useindex) { maxtuples = (vac_work_mem * 1024L) / sizeof(ItemPointerData); maxtuples = Min(maxtuples, INT_MAX); @@ -2160,34 +2756,49 @@ lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks) maxtuples = Max(maxtuples, MaxHeapTuplesPerPage); } else - { maxtuples = MaxHeapTuplesPerPage; - } - vacrelstats->num_dead_tuples = 0; - vacrelstats->max_dead_tuples = (int) maxtuples; - vacrelstats->dead_tuples = (ItemPointer) - palloc(maxtuples * sizeof(ItemPointerData)); + return maxtuples; +} + +/* + * lazy_space_alloc - space allocation decisions for lazy vacuum + * + * See the comments at the head of this file for rationale. + */ +static void +lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks) +{ + LVDeadTuples *dead_tuples = NULL; + long maxtuples; + + maxtuples = compute_max_dead_tuples(relblocks, vacrelstats->useindex); + + dead_tuples = (LVDeadTuples *) + palloc(SizeOfLVDeadTuples + maxtuples * sizeof(ItemPointerData)); + dead_tuples->num_tuples = 0; + dead_tuples->max_tuples = (int) maxtuples; + + vacrelstats->dead_tuples = dead_tuples; } /* * lazy_record_dead_tuple - remember one deletable tuple */ static void -lazy_record_dead_tuple(LVRelStats *vacrelstats, - ItemPointer itemptr) +lazy_record_dead_tuple(LVDeadTuples *dead_tuples, ItemPointer itemptr) { /* * The array shouldn't overflow under normal behavior, but perhaps it * could if we are given a really small maintenance_work_mem. In that * case, just forget the last few tuples (we'll get 'em next time). */ - if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples) + if (dead_tuples->num_tuples < dead_tuples->max_tuples) { - vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr; - vacrelstats->num_dead_tuples++; + dead_tuples->itemptrs[dead_tuples->num_tuples] = *itemptr; + dead_tuples->num_tuples++; pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES, - vacrelstats->num_dead_tuples); + dead_tuples->num_tuples); } } @@ -2201,12 +2812,12 @@ lazy_record_dead_tuple(LVRelStats *vacrelstats, static bool lazy_tid_reaped(ItemPointer itemptr, void *state) { - LVRelStats *vacrelstats = (LVRelStats *) state; + LVDeadTuples *dead_tuples = (LVDeadTuples *) state; ItemPointer res; res = (ItemPointer) bsearch((void *) itemptr, - (void *) vacrelstats->dead_tuples, - vacrelstats->num_dead_tuples, + (void *) dead_tuples->itemptrs, + dead_tuples->num_tuples, sizeof(ItemPointerData), vac_cmp_itemptr); @@ -2354,3 +2965,406 @@ heap_page_is_all_visible(Relation rel, Buffer buf, return all_visible; } + +/* + * Compute the number of parallel worker processes to request. Both index + * vacuuming and index cleanup can be executed together with parallel workers. + * The relation sizes of table and indexes don't affect to the parallel + * degree for now. nrequested is the number of parallel workers that user + * requested. If nrequested is 0 we compute the parallel degree based on + * nindexes that is the number of indexes that support parallel index + * vacuuming. + */ +static int +compute_parallel_workers(Relation *Irel, int nindexes, int nrequested) +{ + bool leaderparticipates = true; + int nindexes_to_vacuum = 0; + int parallel_workers; + int i; + + Assert(nrequested >= 0); + + /* Return immediately when parallelism disabled */ + if (max_parallel_maintenance_workers == 0) + return 0; + + /* + * Compute the number of indexes that can participate to parallel index + * vacuuming. + */ + for (i = 0; i < nindexes; i++) + { + if (Irel[i]->rd_indam->amparallelvacuumoptions != + VACUUM_OPTION_NO_PARALLEL) + nindexes_to_vacuum++; + } + + /* No index supports parallel index vacuuming */ + if (nindexes_to_vacuum == 0) + return 0; + +#ifdef PARALLEL_VACUUM_DISABLE_LEADER_PARTICIPATION + leaderparticipates = false; +#endif + + /* The leader process takes one index */ + if (leaderparticipates) + nindexes_to_vacuum--; + + /* Compute the parallel degree */ + parallel_workers = (nrequested > 0) ? + Min(nrequested, nindexes_to_vacuum) : nindexes_to_vacuum; + + /* cap by max_parallel_maintenace_workers */ + parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers); + + return parallel_workers; +} + +/* + * Enter parallel mode, allocate and initialize the DSM segment. + */ +static LVParallelState * +begin_parallel_vacuum(LVRelStats *vacrelstats, Oid relid, BlockNumber nblocks, + Relation *Irel, int nindexes, int nrequested) +{ + LVParallelState *lps = (LVParallelState *) palloc0(sizeof(LVParallelState)); + ParallelContext *pcxt; + LVShared *shared; + LVDeadTuples *dead_tuples; + long maxtuples; + char *sharedquery; + Size est_shared; + Size est_deadtuples; + int querylen; + int i; + + Assert(nrequested > 0); + Assert(nindexes > 0); + + lps->leaderparticipates = true; + +#ifdef PARALLEL_VACUUM_DISABLE_LEADER_PARTICIPATION + lps->leaderparticipates = false; +#endif + + EnterParallelMode(); + pcxt = CreateParallelContext("postgres", "heap_parallel_vacuum_main", + nrequested); + lps->pcxt = pcxt; + Assert(pcxt->nworkers > 0); + + /* Estimate size for shared information -- PARALLEL_VACUUM_KEY_SHARED */ + est_shared = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes))); + for (i = 0; i < nindexes; i++) + { + uint8 vacoptions = Irel[i]->rd_indam->amparallelvacuumoptions; + + /* + * Cleanup option should be either disabled, always performing + * in parallel or conditionally performing in parallel. + */ + Assert(!((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) && + (vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP))); + Assert(vacoptions <= VACUUM_OPTION_MAX_VALID_VALUE); + + if (vacoptions != VACUUM_OPTION_NO_PARALLEL) + { + est_shared = add_size(est_shared, + add_size(sizeof(LVSharedIndStats), + index_parallelvacuum_estimate(Irel[i]))); + + /* + * Remember the number of indexes that don't support parallel + * bulk-deletion and parallel cleanup respectively. + */ + if (!VACUUM_OPTION_SUPPORT_PARALLEL_BULKDEL(vacoptions)) + lps->nindexes_nonparallel_bulkdel++; + if (!VACUUM_OPTION_SUPPORT_PARALLEL_CLEANUP(vacoptions)) + lps->nindexes_nonparallel_cleanup++; + } + } + shm_toc_estimate_chunk(&pcxt->estimator, est_shared); + shm_toc_estimate_keys(&pcxt->estimator, 1); + + /* Estimate size for dead tuples -- PARALLEL_VACUUM_KEY_DEAD_TUPLES */ + maxtuples = compute_max_dead_tuples(nblocks, true); + est_deadtuples = MAXALIGN(add_size(SizeOfLVDeadTuples, + mul_size(sizeof(ItemPointerData), maxtuples))); + shm_toc_estimate_chunk(&pcxt->estimator, est_deadtuples); + shm_toc_estimate_keys(&pcxt->estimator, 1); + + /* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */ + querylen = strlen(debug_query_string); + shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1); + shm_toc_estimate_keys(&pcxt->estimator, 1); + + InitializeParallelDSM(pcxt); + + /* Prepare shared information */ + shared = (LVShared *) shm_toc_allocate(pcxt->toc, est_shared); + MemSet(shared, 0, est_shared); + shared->relid = relid; + shared->elevel = elevel; + shared->offset = add_size(SizeOfLVShared, BITMAPLEN(nindexes)); + prepare_index_statistics(shared, Irel, nindexes, nrequested); + pg_atomic_init_u32(&(shared->idx), 0); + pg_atomic_init_u32(&(shared->nprocessed), 0); + pg_atomic_init_u32(&(shared->cost_balance), 0); + pg_atomic_init_u32(&(shared->active_nworkers), 0); + + shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_SHARED, shared); + lps->lvshared = shared; + + /* Prepare the dead tuple space */ + dead_tuples = (LVDeadTuples *) shm_toc_allocate(pcxt->toc, est_deadtuples); + dead_tuples->max_tuples = maxtuples; + dead_tuples->num_tuples = 0; + MemSet(dead_tuples->itemptrs, 0, sizeof(ItemPointerData) * maxtuples); + shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_TUPLES, dead_tuples); + vacrelstats->dead_tuples = dead_tuples; + + /* Store query string for workers */ + sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1); + memcpy(sharedquery, debug_query_string, querylen + 1); + sharedquery[querylen] = '\0'; + shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery); + + return lps; +} + +/* + * Initialize variables for shared index statistics, set NULL bitmap and + * the struct size of each indexes. Also this function sets the number of + * indexes that do not support parallel index vacuuming and that use + * maintenance_work_mem. Since currently we don't support parallel vacuum + * for autovacuum we don't need to care about autovacuum_work_mem. + */ +static void +prepare_index_statistics(LVShared *lvshared, Relation *Irel, int nindexes, + int nworkers) +{ + char *p = (char *) GetSharedIndStats(lvshared); + int nindexes_mwm = 0; + int i; + + Assert(!IsAutoVacuumWorkerProcess()); + + for (i = 0; i < nindexes; i++) + { + LVSharedIndStats *indstats; + + if (Irel[i]->rd_indam->amparallelvacuumoptions == + VACUUM_OPTION_NO_PARALLEL) + { + /* Set NULL as this index does not support parallel vacuum */ + lvshared->bitmap[i >> 3] |= 0 << (i & 0x07); + continue; + } + + if (Irel[i]->rd_indam->amusemaintenanceworkmem) + nindexes_mwm++; + + /* Set the size for index statistics */ + indstats = (LVSharedIndStats *) p; + lvshared->bitmap[i >> 3] |= 1 << (i & 0x07); + indstats->size = index_parallelvacuum_estimate(Irel[i]); + + p += SizeOfSharedIndStats(indstats); + } + + /* Compute the new maitenance_work_mem value for index vacuuming */ + lvshared->maintenance_work_mem_worker = + (nindexes_mwm > 0) ? + maintenance_work_mem / Min(nworkers, nindexes_mwm) : + maintenance_work_mem; +} + +/* + * Destroy the parallel context, and end parallel mode. + * + * All writes are not allowed during parallel mode and it might not be + * safe to exit from the parallel mode while keeping the parallel context. + * So we copy the updated index statistics to a local memory and then later + * use that to update the index statistics. + */ +static void +end_parallel_vacuum(LVParallelState *lps, Relation *Irel, int nindexes, + IndexBulkDeleteResult **stats) +{ + int i; + + Assert(!IsParallelWorker()); + + /* copy the updated statistics */ + for (i = 0; i < nindexes; i++) + { + LVSharedIndStats *indstats = get_indstats(lps->lvshared, i); + + /* + * Skip unused slot. The statistics of this index are already + * stored in local memory. + */ + if (indstats == NULL) + continue; + + if (indstats->updated) + { + stats[i] = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); + memcpy(stats[i], + GetIndexBulkDeleteResult(indstats), + sizeof(IndexBulkDeleteResult)); + } + else + stats[i] = NULL; + } + + DestroyParallelContext(lps->pcxt); + ExitParallelMode(); + + /* Deactivate parallel vacuum */ + pfree(lps); + lps = NULL; +} + +/* Return the Nth index statistics or NULL */ +static LVSharedIndStats * +get_indstats(LVShared *lvshared, int n) +{ + int i; + char *p; + + if (IndStatsIsNull(lvshared, n)) + return NULL; + + p = (char *) GetSharedIndStats(lvshared); + for (i = 0; i < (n - 1); i++) + { + if (IndStatsIsNull(lvshared, i)) + continue; + + p += SizeOfSharedIndStats(p); + } + + return (LVSharedIndStats *) p; +} + +/* + * Check if the given index participates parallel index vacuuming + * or parallel index cleanup. for_cleanup indicates whether index + * cleanup or index bulk-deletion. first_time is true if bulk-deletion + * is not performed yet. Return true if the index is skipped. + */ +static bool +skip_parallel_index_vacuum(Relation indrel, bool for_cleanup, + bool first_time) +{ + uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions; + + /* first_time must be true only if for_cleanup is true */ + Assert(for_cleanup || !first_time); + + if (for_cleanup) + { + /* Skip if the index does not support parallel cleanup */ + if (!VACUUM_OPTION_SUPPORT_PARALLEL_CLEANUP(vacoptions)) + return true; + + /* + * Skip if the index support to parallel cleanup only first + * time cleanup but it is not the first time. + */ + if (!first_time && + ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP)) != 0) + return true; + } + else if (!VACUUM_OPTION_SUPPORT_PARALLEL_BULKDEL(vacoptions)) + { + /* Skip if the index does not support parallel bulk deletion */ + return true; + } + + return false; +} + +/* + * Perform work within a launched parallel process. + * + * Since parallel vacuum workers work only within index vacuuming and index + * cleanup, no need to report the progress information. + */ +void +heap_parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) +{ + Relation onerel; + Relation *indrels; + LVShared *lvshared; + LVDeadTuples *dead_tuples; + int nindexes; + char *sharedquery; + IndexBulkDeleteResult **stats; + + lvshared = (LVShared *) shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_SHARED, + false); + elevel = lvshared->elevel; + + ereport(DEBUG1, + (errmsg("starting parallel lazy vacuum worker for %s", + lvshared->for_cleanup ? "cleanup" : "vacuuming"))); + + /* Set debug_query_string for individual workers */ + sharedquery = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, false); + debug_query_string = sharedquery; + pgstat_report_activity(STATE_RUNNING, debug_query_string); + + /* + * Open table. The lock mode is the same as the leader process. It's + * okay because The lockmode does not conflict among the parallel + * workers. + */ + onerel = table_open(lvshared->relid, ShareUpdateExclusiveLock); + + /* + * Open all indexes. indrels are sorted in order by OID, which should + * be matched to the leader's one. + */ + vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &indrels); + Assert(nindexes > 0); + + /* Set dead tuple space */ + dead_tuples = (LVDeadTuples *) shm_toc_lookup(toc, + PARALLEL_VACUUM_KEY_DEAD_TUPLES, + false); + + /* Set cost-based vacuum delay */ + VacuumCostActive = (VacuumCostDelay > 0); + VacuumCostBalance = 0; + VacuumPageHit = 0; + VacuumPageMiss = 0; + VacuumPageDirty = 0; + VacuumSharedCostBalance = &(lvshared->cost_balance); + VacuumActiveNWorkers = &(lvshared->active_nworkers); + + stats = (IndexBulkDeleteResult **) + palloc0(nindexes * sizeof(IndexBulkDeleteResult *)); + + if (lvshared->maintenance_work_mem_worker > 0) + maintenance_work_mem = lvshared->maintenance_work_mem_worker; + + /* Increment the active worker count. */ + pg_atomic_add_fetch_u32(VacuumActiveNWorkers, 1); + + /* Do either vacuuming indexes or cleaning indexes */ + vacuum_or_cleanup_indexes_worker(indrels, nindexes, stats, lvshared, + dead_tuples); + + /* + * We have completed the index vacuum so decrement the active worker + * count. + */ + pg_atomic_sub_fetch_u32(VacuumActiveNWorkers, 1); + + vac_close_indexes(nindexes, indrels, RowExclusiveLock); + table_close(onerel, ShareUpdateExclusiveLock); +} diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c index d147236429..b42cbd58d8 100644 --- a/src/backend/access/transam/parallel.c +++ b/src/backend/access/transam/parallel.c @@ -14,6 +14,7 @@ #include "postgres.h" +#include "access/heapam.h" #include "access/nbtree.h" #include "access/parallel.h" #include "access/session.h" @@ -139,6 +140,9 @@ static const struct }, { "_bt_parallel_build_main", _bt_parallel_build_main + }, + { + "heap_parallel_vacuum_main", heap_parallel_vacuum_main } }; diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index da1da23400..4b7f480fd6 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -42,6 +42,7 @@ #include "nodes/makefuncs.h" #include "pgstat.h" #include "postmaster/autovacuum.h" +#include "postmaster/bgworker_internals.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" #include "storage/proc.h" @@ -99,6 +100,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel) /* Set default value */ params.index_cleanup = VACOPT_TERNARY_DEFAULT; params.truncate = VACOPT_TERNARY_DEFAULT; + params.nworkers = -1; /* Parse options list */ foreach(lc, vacstmt->options) @@ -129,6 +131,28 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel) params.index_cleanup = get_vacopt_ternary_value(opt); else if (strcmp(opt->defname, "truncate") == 0) params.truncate = get_vacopt_ternary_value(opt); + else if (strcmp(opt->defname, "parallel") == 0) + { + if (opt->arg == NULL) + { + /* + * Parallel lazy vacuum is requested but user didn't specify + * the parallel degree. The parallel degree will be determined + * at the start of lazy vacuum. + */ + params.nworkers = 0; + } + else + { + params.nworkers = defGetInt32(opt); + if (params.nworkers < 1 || params.nworkers > MAX_PARALLEL_WORKER_LIMIT) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("parallel vacuum degree must be between 1 and %d", + MAX_PARALLEL_WORKER_LIMIT), + parser_errposition(pstate, opt->location))); + } + } else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -170,6 +194,11 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel) } } + if ((params.options & VACOPT_FULL) && params.nworkers >= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot specify FULL option with PARALLEL option"))); + /* * All freeze ages are zero if the FREEZE option is given; otherwise pass * them as -1 which means to use the default values. @@ -383,6 +412,7 @@ vacuum(List *relations, VacuumParams *params, VacuumPageHit = 0; VacuumPageMiss = 0; VacuumPageDirty = 0; + VacuumSharedCostBalance = NULL; /* * Loop to process each selected relation. @@ -1738,6 +1768,20 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params) return false; } + /* + * Since parallel workers cannot access data in temporary tables, parallel + * vacuum is not allowed for temporary relation. However rather than + * skipping vacuum on the table, just disabling parallel option is better + * option in most cases. + */ + if (RelationUsesLocalBuffers(onerel) && params->nworkers >= 0) + { + ereport(WARNING, + (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel", + RelationGetRelationName(onerel)))); + params->nworkers = 0; + } + /* * Silently ignore partitioned tables as there is no work to be done. The * useful work is on their child partitions, which have been queued up for @@ -1941,16 +1985,73 @@ vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode) void vacuum_delay_point(void) { + double msec = 0; + /* Always check for interrupts */ CHECK_FOR_INTERRUPTS(); - /* Nap if appropriate */ - if (VacuumCostActive && !InterruptPending && - VacuumCostBalance >= VacuumCostLimit) + if (!VacuumCostActive || InterruptPending) + return; + + /* + * If the vacuum cost balance is shared among parallel workers we + * decide whether to sleep based on that. + */ + if (VacuumSharedCostBalance != NULL) { - double msec; + int nworkers = pg_atomic_read_u32(VacuumActiveNWorkers); + + /* At least count itself */ + Assert(nworkers >= 1); + + /* Update the shared cost balance value atomically */ + while (true) + { + uint32 shared_balance; + uint32 new_balance; + uint32 local_balance; + + msec = 0; + + /* compute new balance by adding the local value */ + shared_balance = pg_atomic_read_u32(VacuumSharedCostBalance); + new_balance = shared_balance + VacuumCostBalance; + /* also compute the total local balance */ + local_balance = VacuumCostBalanceLocal + VacuumCostBalance; + + if ((new_balance >= VacuumCostLimit) && + (local_balance > 0.5 * (VacuumCostLimit / nworkers))) + { + /* compute sleep time based on the local cost balance */ + msec = VacuumCostDelay * VacuumCostBalanceLocal / VacuumCostLimit; + new_balance = shared_balance - VacuumCostBalanceLocal; + VacuumCostBalanceLocal = 0; + } + + if (pg_atomic_compare_exchange_u32(VacuumSharedCostBalance, + &shared_balance, + new_balance)) + { + /* Updated successfully, break */ + break; + } + } + + VacuumCostBalanceLocal += VacuumCostBalance; + + /* + * Reset the local balance as we accumulated it into the shared + * value. + */ + VacuumCostBalance = 0; + } + else if (VacuumCostBalance >= VacuumCostLimit) msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit; + + /* Nap if appropriate */ + if (msec > 0) + { if (msec > VacuumCostDelay * 4) msec = VacuumCostDelay * 4; diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index c1dd8168ca..c3690f9c41 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -2891,6 +2891,8 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, (!wraparound ? VACOPT_SKIP_LOCKED : 0); tab->at_params.index_cleanup = VACOPT_TERNARY_DEFAULT; tab->at_params.truncate = VACOPT_TERNARY_DEFAULT; + /* We don't support parallel vacuum for autovacuum for now */ + tab->at_params.nworkers = -1; tab->at_params.freeze_min_age = freeze_min_age; tab->at_params.freeze_table_age = freeze_table_age; tab->at_params.multixact_freeze_min_age = multixact_freeze_min_age; diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 98c917bf7a..ce35be710f 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -3560,7 +3560,7 @@ psql_completion(const char *text, int start, int end) if (ends_with(prev_wd, '(') || ends_with(prev_wd, ',')) COMPLETE_WITH("FULL", "FREEZE", "ANALYZE", "VERBOSE", "DISABLE_PAGE_SKIPPING", "SKIP_LOCKED", - "INDEX_CLEANUP", "TRUNCATE"); + "INDEX_CLEANUP", "TRUNCATE", "PARALLEL"); else if (TailMatches("FULL|FREEZE|ANALYZE|VERBOSE|DISABLE_PAGE_SKIPPING|SKIP_LOCKED|INDEX_CLEANUP|TRUNCATE")) COMPLETE_WITH("ON", "OFF"); } diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 858bcb6bc9..61725e749f 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -24,6 +24,8 @@ #include "nodes/primnodes.h" #include "storage/bufpage.h" #include "storage/lockdefs.h" +#include "storage/shm_toc.h" +#include "storage/dsm.h" #include "utils/relcache.h" #include "utils/snapshot.h" @@ -190,9 +192,13 @@ extern void SyncScanShmemInit(void); extern Size SyncScanShmemSize(void); /* in heap/vacuumlazy.c */ +extern pg_atomic_uint32 *VacuumSharedCostBalance; +extern pg_atomic_uint32 *VacuumActiveNWorkers; +extern int VacuumCostBalanceLocal; struct VacuumParams; extern void heap_vacuum_rel(Relation onerel, struct VacuumParams *params, BufferAccessStrategy bstrategy); +extern void heap_parallel_vacuum_main(dsm_segment *seg, shm_toc *toc); /* in heap/heapam_visibility.c */ extern bool HeapTupleSatisfiesVisibility(HeapTuple stup, Snapshot snapshot, diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 7b6f269785..295b6a17f0 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -212,6 +212,11 @@ typedef struct VacuumParams * default value depends on reloptions */ VacOptTernaryValue truncate; /* Truncate empty pages at the end, * default value depends on reloptions */ + /* + * The number of parallel vacuum workers. -1 by default for no workers + * and 0 for choosing based on the number of indexes. + */ + int nworkers; } VacuumParams; /* GUC parameters */ diff --git a/src/test/regress/expected/vacuum.out b/src/test/regress/expected/vacuum.out index 9996d882d1..cf5e1f0a4e 100644 --- a/src/test/regress/expected/vacuum.out +++ b/src/test/regress/expected/vacuum.out @@ -92,6 +92,32 @@ CONTEXT: SQL function "do_analyze" statement 1 SQL function "wrap_do_analyze" statement 1 VACUUM FULL vactst; VACUUM (DISABLE_PAGE_SKIPPING) vaccluster; +-- PARALLEL option +CREATE TABLE pvactst (i INT, a INT[], p POINT) with (autovacuum_enabled = off); +INSERT INTO pvactst SELECT i, array[1,2,3], point(i, i+1) FROM generate_series(1,100000) i; +CREATE INDEX btree_pvactst ON pvactst USING btree (i); +CREATE INDEX hash_pvactst ON pvactst USING hash (i); +CREATE INDEX brin_pvactst ON pvactst USING brin (i); +CREATE INDEX gin_pvactst ON pvactst USING gin (a); +CREATE INDEX gist_pvactst ON pvactst USING gist (p); +CREATE INDEX spgist_pvactst ON pvactst USING spgist (p); +-- VACUUM invokes parallel index cleanup +VACUUM (PARALLEL 2) pvactst; +-- VACUUM invokes parallel bulk-deletion and parallel index cleanup +UPDATE pvactst SET i = i WHERE i < 1000; +VACUUM (PARALLEL 2) pvactst; +VACUUM (PARALLEL 0) pvactst; -- error +ERROR: parallel vacuum degree must be between 1 and 1024 +LINE 1: VACUUM (PARALLEL 0) pvactst; + ^ +VACUUM (PARALLEL 2, INDEX_CLEANUP FALSE) pvactst; +VACUUM (PARALLEL 2, FULL TRUE) pvactst; -- error, cannot use both PARALLEL and FULL +ERROR: cannot specify FULL option with PARALLEL option +CREATE TEMPORARY TABLE tmp (a int PRIMARY KEY); +CREATE INDEX tmp_idx1 ON tmp (a); +VACUUM (PARALLEL 1) tmp; -- disables parallel vacuum option +WARNING: disabling parallel option of vacuum on "tmp" --- cannot vacuum temporary tables in parallel +DROP TABLE pvactst; -- INDEX_CLEANUP option CREATE TABLE no_index_cleanup (i INT PRIMARY KEY, t TEXT); -- Use uncompressed data stored in toast. diff --git a/src/test/regress/sql/vacuum.sql b/src/test/regress/sql/vacuum.sql index 69987f75e9..0aecf17773 100644 --- a/src/test/regress/sql/vacuum.sql +++ b/src/test/regress/sql/vacuum.sql @@ -75,6 +75,31 @@ VACUUM FULL vactst; VACUUM (DISABLE_PAGE_SKIPPING) vaccluster; +-- PARALLEL option +CREATE TABLE pvactst (i INT, a INT[], p POINT) with (autovacuum_enabled = off); +INSERT INTO pvactst SELECT i, array[1,2,3], point(i, i+1) FROM generate_series(1,100000) i; +CREATE INDEX btree_pvactst ON pvactst USING btree (i); +CREATE INDEX hash_pvactst ON pvactst USING hash (i); +CREATE INDEX brin_pvactst ON pvactst USING brin (i); +CREATE INDEX gin_pvactst ON pvactst USING gin (a); +CREATE INDEX gist_pvactst ON pvactst USING gist (p); +CREATE INDEX spgist_pvactst ON pvactst USING spgist (p); + +-- VACUUM invokes parallel index cleanup +VACUUM (PARALLEL 2) pvactst; + +-- VACUUM invokes parallel bulk-deletion and parallel index cleanup +UPDATE pvactst SET i = i WHERE i < 1000; +VACUUM (PARALLEL 2) pvactst; + +VACUUM (PARALLEL 0) pvactst; -- error +VACUUM (PARALLEL 2, INDEX_CLEANUP FALSE) pvactst; +VACUUM (PARALLEL 2, FULL TRUE) pvactst; -- error, cannot use both PARALLEL and FULL +CREATE TEMPORARY TABLE tmp (a int PRIMARY KEY); +CREATE INDEX tmp_idx1 ON tmp (a); +VACUUM (PARALLEL 1) tmp; -- disables parallel vacuum option +DROP TABLE pvactst; + -- INDEX_CLEANUP option CREATE TABLE no_index_cleanup (i INT PRIMARY KEY, t TEXT); -- Use uncompressed data stored in toast. -- 2.23.0