From b09327137062cd88239c687ff2b3f99833a200a0 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Wed, 9 Apr 2025 23:17:25 +0200 Subject: [PATCH v00] WIP: Optimize VACUUM for tables with only summarizing indexes This should reduce their IO requirements by a nice margin. Even though these tables to exist, they're probably more likely than large tables with no indexes at all, so this is probably a net win. --- src/include/commands/vacuum.h | 10 ++++----- src/backend/access/heap/vacuumlazy.c | 31 +++++++++++++++++++-------- src/backend/commands/analyze.c | 2 +- src/backend/commands/vacuum.c | 10 ++++++++- src/backend/commands/vacuumparallel.c | 12 ++++++++--- 5 files changed, 46 insertions(+), 19 deletions(-) diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index bc37a80dc74..88e5973ad42 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -340,7 +340,8 @@ extern void vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, MemoryContext vac_context, bool isTopLevel); extern void vac_open_indexes(Relation relation, LOCKMODE lockmode, - int *nindexes, Relation **Irel); + int *nindexes, bool *allsumindexes, + Relation **Irel); extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode); extern double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, @@ -379,10 +380,9 @@ extern void AutoVacuumUpdateCostLimit(void); extern void VacuumUpdateCosts(void); /* in commands/vacuumparallel.c */ -extern ParallelVacuumState *parallel_vacuum_init(Relation rel, Relation *indrels, - int nindexes, int nrequested_workers, - int vac_work_mem, int elevel, - BufferAccessStrategy bstrategy); +extern ParallelVacuumState * +parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, bool indallsummarizing, int nrequested_workers, + int vac_work_mem, int elevel, BufferAccessStrategy bstrategy); extern void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats); extern TidStore *parallel_vacuum_get_dead_items(ParallelVacuumState *pvs, VacDeadItemsInfo **dead_items_info_p); diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index f28326bad09..9408f89b197 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -263,6 +263,14 @@ typedef struct LVRelState Relation *indrels; int nindexes; + /* + * indallsummarizing is true if nindexes == 0, or if all indexes are + * summarizing (and thus don't need to be informed about tuple deletions). + * This allows us to apply the single-heapscan vacuum optimization when + * all indexes on the table are summarizing. + */ + bool indallsummarizing; + /* Buffer access strategy and parallel vacuum state */ BufferAccessStrategy bstrategy; ParallelVacuumState *pvs; @@ -680,7 +688,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, /* Set up high level stuff about rel and its indexes */ vacrel->rel = rel; vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes, - &vacrel->indrels); + &vacrel->indallsummarizing, &vacrel->indrels); vacrel->bstrategy = bstrategy; if (instrument && vacrel->nindexes > 0) { @@ -1460,7 +1468,7 @@ lazy_scan_heap(LVRelState *vacrel) * revisit this page. Since updating the FSM is desirable but not * absolutely required, that's OK. */ - if (vacrel->nindexes == 0 + if (vacrel->indallsummarizing || !vacrel->do_index_vacuuming || !has_lpdead_items) { @@ -1475,7 +1483,7 @@ lazy_scan_heap(LVRelState *vacrel) * table has indexes. There will only be newly-freed space if we * held the cleanup lock and lazy_scan_prune() was called. */ - if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items && + if (got_cleanup_lock && vacrel->indallsummarizing && has_lpdead_items && blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES) { FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, @@ -1960,8 +1968,8 @@ lazy_scan_prune(LVRelState *vacrel, /* * Prune all HOT-update chains and potentially freeze tuples on this page. * - * If the relation has no indexes, we can immediately mark would-be dead - * items LP_UNUSED. + * If the relation has no indexes, or only summarizing indexes, we can + * immediately mark would-be dead items LP_UNUSED. * * The number of tuples removed from the page is returned in * presult.ndeleted. It should not be confused with presult.lpdead_items; @@ -1973,7 +1981,7 @@ lazy_scan_prune(LVRelState *vacrel, * all-visible. */ prune_options = HEAP_PAGE_PRUNE_FREEZE; - if (vacrel->nindexes == 0) + if (vacrel->indallsummarizing) prune_options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW; heap_page_prune_and_freeze(rel, buf, vacrel->vistest, prune_options, @@ -2381,7 +2389,7 @@ lazy_scan_noprune(LVRelState *vacrel, vacrel->NewRelminMxid = NoFreezePageRelminMxid; /* Save any LP_DEAD items found on the page in dead_items */ - if (vacrel->nindexes == 0) + if (vacrel->indallsummarizing) { /* Using one-pass strategy (since table has no indexes) */ if (lpdead_items > 0) @@ -2536,8 +2544,12 @@ lazy_vacuum(LVRelState *vacrel) /* * We successfully completed a round of index vacuuming. Do related * heap vacuuming now. + * + * If all valid indexes are summarizing, then the TIDs have already + * been reclaimed, requiring us to skip that last phase. */ - lazy_vacuum_heap_rel(vacrel); + if (!vacrel->indallsummarizing) + lazy_vacuum_heap_rel(vacrel); } else { @@ -3511,7 +3523,8 @@ dead_items_alloc(LVRelState *vacrel, int nworkers) } else vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels, - vacrel->nindexes, nworkers, + vacrel->nindexes, + vacrel->indallsummarizing, nworkers, vac_work_mem, vacrel->verbose ? INFO : DEBUG2, vacrel->bstrategy); diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 4fffb76e557..c846f42a06c 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -435,7 +435,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, } else if (!inh) { - vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel); + vac_open_indexes(onerel, AccessShareLock, &nindexes, NULL, &Irel); hasindex = nindexes > 0; } else diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index db5da3ce826..0c876260d05 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -2336,11 +2336,13 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, */ void vac_open_indexes(Relation relation, LOCKMODE lockmode, - int *nindexes, Relation **Irel) + int *nindexes, bool *indallsummarizing, + Relation **Irel) { List *indexoidlist; ListCell *indexoidscan; int i; + bool allsummarizing = true; Assert(lockmode != NoLock); @@ -2363,13 +2365,19 @@ vac_open_indexes(Relation relation, LOCKMODE lockmode, indrel = index_open(indexoid, lockmode); if (indrel->rd_index->indisready) + { (*Irel)[i++] = indrel; + allsummarizing &= indrel->rd_indam->amsummarizing; + } else index_close(indrel, lockmode); } *nindexes = i; + if (indallsummarizing) + *indallsummarizing = allsummarizing; + list_free(indexoidlist); } diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c index 2b9d548cdeb..7fe879c68e5 100644 --- a/src/backend/commands/vacuumparallel.c +++ b/src/backend/commands/vacuumparallel.c @@ -170,6 +170,7 @@ struct ParallelVacuumState /* Target indexes */ Relation *indrels; int nindexes; + bool indallsummarizing; /* Shared information among parallel vacuum workers */ PVShared *shared; @@ -241,8 +242,9 @@ static void parallel_vacuum_error_callback(void *arg); */ ParallelVacuumState * parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, - int nrequested_workers, int vac_work_mem, - int elevel, BufferAccessStrategy bstrategy) + bool indallsummarizing, int nrequested_workers, + int vac_work_mem, int elevel, + BufferAccessStrategy bstrategy) { ParallelVacuumState *pvs; ParallelContext *pcxt; @@ -282,6 +284,7 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, pvs = (ParallelVacuumState *) palloc0(sizeof(ParallelVacuumState)); pvs->indrels = indrels; pvs->nindexes = nindexes; + pvs->indallsummarizing = nindexes; pvs->will_parallel_vacuum = will_parallel_vacuum; pvs->bstrategy = bstrategy; pvs->heaprel = rel; @@ -997,6 +1000,7 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) BufferUsage *buffer_usage; WalUsage *wal_usage; int nindexes; + bool indallsummarizing; char *sharedquery; ErrorContextCallback errcallback; @@ -1029,7 +1033,8 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) * Open all indexes. indrels are sorted in order by OID, which should be * matched to the leader's one. */ - vac_open_indexes(rel, RowExclusiveLock, &nindexes, &indrels); + vac_open_indexes(rel, RowExclusiveLock, &nindexes, + &indallsummarizing, &indrels); Assert(nindexes > 0); /* @@ -1061,6 +1066,7 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) /* Set parallel vacuum state */ pvs.indrels = indrels; pvs.nindexes = nindexes; + pvs.indallsummarizing = indallsummarizing; pvs.indstats = indstats; pvs.shared = shared; pvs.dead_items = dead_items; -- 2.45.2