From 8b1bc24566a2e732177fbecae849570554a797e1 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Sat, 13 Mar 2021 20:37:32 -0800 Subject: [PATCH v4 1/3] Refactor vacuumlazy.c. Break up lazy_scan_heap() into functions. Aside from being useful cleanup work in its own right, this is also preparation for an upcoming patch that removes the "tupgone" special case from vacuumlazy.c. --- src/backend/access/heap/vacuumlazy.c | 1384 +++++++++++++++---------- contrib/pg_visibility/pg_visibility.c | 8 +- contrib/pgstattuple/pgstatapprox.c | 8 +- 3 files changed, 832 insertions(+), 568 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 8341879d89..6382393516 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -294,8 +294,6 @@ typedef struct LVRelStats { char *relnamespace; char *relname; - /* useindex = true means two-pass strategy; false means one-pass */ - bool useindex; /* Overall statistics about rel */ BlockNumber old_rel_pages; /* previous value of pg_class.relpages */ BlockNumber rel_pages; /* total number of pages */ @@ -330,9 +328,47 @@ typedef struct LVSavedErrInfo VacErrPhase phase; } LVSavedErrInfo; +/* + * Counters maintained by lazy_scan_heap() (and scan_prune_page()): + */ +typedef struct LVTempCounters +{ + double num_tuples; /* total number of nonremovable tuples */ + double live_tuples; /* live tuples (reltuples estimate) */ + double tups_vacuumed; /* tuples cleaned up by current vacuum */ + double nkeep; /* dead-but-not-removable tuples */ + double nunused; /* # existing unused line pointers */ +} LVTempCounters; + +/* + * State output by scan_prune_page(): + */ +typedef struct LVPrunePageState +{ + bool hastup; /* Page is truncatable? */ + bool has_dead_items; /* includes existing LP_DEAD items */ + bool all_visible; /* Every item visible to all? */ + bool all_frozen; /* provided all_visible is also true */ +} LVPrunePageState; + +/* + * State set up and maintained in lazy_scan_heap() (also maintained in + * scan_prune_page()) that represents VM bit status. + * + * Used by scan_setvmbit_page() when we're done pruning. + */ +typedef struct LVVisMapPageState +{ + bool all_visible_according_to_vm; + TransactionId visibility_cutoff_xid; +} LVVisMapPageState; + /* A few variables that don't seem worth passing around as parameters */ static int elevel = -1; +static TransactionId RelFrozenXid; +static MultiXactId RelMinMxid; + static TransactionId OldestXmin; static TransactionId FreezeLimit; static MultiXactId MultiXactCutoff; @@ -344,6 +380,10 @@ static BufferAccessStrategy vac_strategy; static void lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, Relation *Irel, int nindexes, bool aggressive); +static void two_pass_strategy(Relation onerel, LVRelStats *vacrelstats, + Relation *Irel, IndexBulkDeleteResult **indstats, + int nindexes, LVParallelState *lps, + VacOptTernaryValue index_cleanup); static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats); static bool lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelStats *vacrelstats); @@ -363,7 +403,8 @@ static bool should_attempt_truncation(VacuumParams *params, static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats); static BlockNumber count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats); -static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks); +static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks, + bool hasindex); static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples, ItemPointer itemptr); static bool lazy_tid_reaped(ItemPointer itemptr, void *state); @@ -448,10 +489,6 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params, Assert(params->index_cleanup != VACOPT_TERNARY_DEFAULT); Assert(params->truncate != VACOPT_TERNARY_DEFAULT); - /* not every AM requires these to be valid, but heap does */ - Assert(TransactionIdIsNormal(onerel->rd_rel->relfrozenxid)); - Assert(MultiXactIdIsValid(onerel->rd_rel->relminmxid)); - /* measure elapsed time iff autovacuum logging requires it */ if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0) { @@ -474,6 +511,13 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params, vac_strategy = bstrategy; + RelFrozenXid = onerel->rd_rel->relfrozenxid; + RelMinMxid = onerel->rd_rel->relminmxid; + + /* not every AM requires these to be valid, but heap does */ + Assert(TransactionIdIsNormal(RelFrozenXid)); + Assert(MultiXactIdIsValid(RelMinMxid)); + vacuum_set_xid_limits(onerel, params->freeze_min_age, params->freeze_table_age, @@ -509,8 +553,6 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params, /* Open all indexes of the relation */ vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel); - vacrelstats->useindex = (nindexes > 0 && - params->index_cleanup == VACOPT_TERNARY_ENABLED); /* * Setup error traceback support for ereport(). The idea is to set up an @@ -740,6 +782,555 @@ vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats) (void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid); } +/* + * Handle new page during lazy_scan_heap(). + * + * Caller must hold pin and buffer cleanup lock on buf. + * + * All-zeroes pages can be left over if either a backend extends the relation + * by a single page, but crashes before the newly initialized page has been + * written out, or when bulk-extending the relation (which creates a number of + * empty pages at the tail end of the relation, but enters them into the FSM). + * + * Note we do not enter the page into the visibilitymap. That has the downside + * that we repeatedly visit this page in subsequent vacuums, but otherwise + * we'll never not discover the space on a promoted standby. The harm of + * repeated checking ought to normally not be too bad - the space usually + * should be used at some point, otherwise there wouldn't be any regular + * vacuums. + * + * Make sure these pages are in the FSM, to ensure they can be reused. Do that + * by testing if there's any space recorded for the page. If not, enter it. We + * do so after releasing the lock on the heap page, the FSM is approximate, + * after all. + */ +static void +scan_new_page(Relation onerel, Buffer buf) +{ + BlockNumber blkno = BufferGetBlockNumber(buf); + + if (GetRecordedFreeSpace(onerel, blkno) == 0) + { + Size freespace = BufferGetPageSize(buf) - SizeOfPageHeaderData; + + UnlockReleaseBuffer(buf); + RecordPageWithFreeSpace(onerel, blkno, freespace); + return; + } + + UnlockReleaseBuffer(buf); +} + +/* + * Handle empty page during lazy_scan_heap(). + * + * Caller must hold pin and buffer cleanup lock on buf, as well as a pin (but + * not a lock) on vmbuffer. + */ +static void +scan_empty_page(Relation onerel, Buffer buf, Buffer vmbuffer, + LVRelStats *vacrelstats) +{ + Page page = BufferGetPage(buf); + BlockNumber blkno = BufferGetBlockNumber(buf); + Size freespace = PageGetHeapFreeSpace(page); + + /* + * Empty pages are always all-visible and all-frozen (note that the same + * is currently not true for new pages, see scan_new_page()). + */ + if (!PageIsAllVisible(page)) + { + START_CRIT_SECTION(); + + /* mark buffer dirty before writing a WAL record */ + MarkBufferDirty(buf); + + /* + * It's possible that another backend has extended the heap, + * initialized the page, and then failed to WAL-log the page due to an + * ERROR. Since heap extension is not WAL-logged, recovery might try + * to replay our record setting the page all-visible and find that the + * page isn't initialized, which will cause a PANIC. To prevent that, + * check whether the page has been previously WAL-logged, and if not, + * do that now. + */ + if (RelationNeedsWAL(onerel) && + PageGetLSN(page) == InvalidXLogRecPtr) + log_newpage_buffer(buf, true); + + PageSetAllVisible(page); + visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, + vmbuffer, InvalidTransactionId, + VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN); + END_CRIT_SECTION(); + } + + UnlockReleaseBuffer(buf); + RecordPageWithFreeSpace(onerel, blkno, freespace); +} + +/* + * scan_prune_page() -- lazy_scan_heap() pruning and freezing. + * + * Caller must hold pin and buffer cleanup lock on the buffer. + * + * Prior to PostgreSQL 14 there were very rare cases where lazy_scan_heap() + * treated tuples that still had storage after pruning as DEAD. That happened + * when heap_page_prune() could not prune tuples that were nevertheless deemed + * DEAD by its own HeapTupleSatisfiesVacuum() call. This created rare hard to + * test cases. It meant that there was no very sharp distinction between DEAD + * tuples and tuples that are to be kept and be considered for freezing inside + * heap_prepare_freeze_tuple(). It also meant that lazy_vacuum_page() had to + * be prepared to remove items with storage (tuples with tuple headers) that + * didn't get pruned, which created a special case to handle recovery + * conflicts. + * + * The approach we take here now (to eliminate all of this complexity) is to + * simply restart pruning in these very rare cases -- cases where a concurrent + * abort of an xact makes our HeapTupleSatisfiesVacuum() call disagrees with + * what heap_page_prune() thought about the tuple only microseconds earlier. + * + * Since we might have to prune a second time here, the code is structured to + * use a local per-page copy of the counters that caller accumulates. We add + * our per-page counters to the per-VACUUM totals from caller last of all, to + * avoid double counting. + */ +static void +scan_prune_page(Relation onerel, Buffer buf, + LVRelStats *vacrelstats, + GlobalVisState *vistest, xl_heap_freeze_tuple *frozen, + LVTempCounters *c, LVPrunePageState *ps, + LVVisMapPageState *vms, + VacOptTernaryValue index_cleanup) +{ + BlockNumber blkno; + Page page; + OffsetNumber offnum, + maxoff; + int nfrozen, + ndead; + LVTempCounters pc; + OffsetNumber deaditems[MaxHeapTuplesPerPage]; + bool tupgone; + + blkno = BufferGetBlockNumber(buf); + page = BufferGetPage(buf); + + /* Initialize (or reset) page-level counters */ + pc.num_tuples = 0; + pc.live_tuples = 0; + pc.tups_vacuumed = 0; + pc.nkeep = 0; + pc.nunused = 0; + + /* + * Prune all HOT-update chains in this page. + * + * We count tuples removed by the pruning step as removed by VACUUM + * (existing LP_DEAD line pointers don't count). + */ + pc.tups_vacuumed = heap_page_prune(onerel, buf, vistest, + InvalidTransactionId, 0, false, + &vacrelstats->latestRemovedXid, + &vacrelstats->offnum); + + /* + * Now scan the page to collect vacuumable items and check for tuples + * requiring freezing. + */ + ps->hastup = false; + ps->has_dead_items = false; + ps->all_visible = true; + ps->all_frozen = true; + nfrozen = 0; + ndead = 0; + maxoff = PageGetMaxOffsetNumber(page); + + tupgone = false; + + /* + * Note: If you change anything in the loop below, also look at + * heap_page_is_all_visible to see if that needs to be changed. + */ + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + HeapTupleData tuple; + + /* + * Set the offset number so that we can display it along with any + * error that occurred while processing this tuple. + */ + vacrelstats->offnum = offnum; + itemid = PageGetItemId(page, offnum); + + /* Unused items require no processing, but we count 'em */ + if (!ItemIdIsUsed(itemid)) + { + pc.nunused += 1; + continue; + } + + /* Redirect items mustn't be touched */ + if (ItemIdIsRedirected(itemid)) + { + ps->hastup = true; /* this page won't be truncatable */ + continue; + } + + /* + * LP_DEAD line pointers are to be vacuumed normally; but we don't + * count them in tups_vacuumed, else we'd be double-counting (at least + * in the common case where heap_page_prune() just freed up a non-HOT + * tuple). + * + * Note also that the final tups_vacuumed value might be very low for + * tables where opportunistic page pruning happens to occur very + * frequently (via heap_page_prune_opt() calls that free up non-HOT + * tuples). + */ + if (ItemIdIsDead(itemid)) + { + deaditems[ndead++] = offnum; + ps->all_visible = false; + ps->has_dead_items = true; + continue; + } + + Assert(ItemIdIsNormal(itemid)); + + ItemPointerSet(&(tuple.t_self), blkno, offnum); + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(onerel); + + /* + * The criteria for counting a tuple as live in this block need to + * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM + * and ANALYZE may produce wildly different reltuples values, e.g. + * when there are many recently-dead tuples. + * + * The logic here is a bit simpler than acquire_sample_rows(), as + * VACUUM can't run inside a transaction block, which makes some cases + * impossible (e.g. in-progress insert from the same transaction). + */ + switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf)) + { + case HEAPTUPLE_DEAD: + + /* + * Ordinarily, DEAD tuples would have been removed by + * heap_page_prune(), but it's possible that the tuple state + * changed since heap_page_prune() looked. In particular an + * INSERT_IN_PROGRESS tuple could have changed to DEAD if the + * inserter aborted. So this cannot be considered an error + * condition. + * + * If the tuple is HOT-updated then it must only be removed by + * a prune operation; so we keep it just as if it were + * RECENTLY_DEAD. Also, if it's a heap-only tuple, we choose + * to keep it, because it'll be a lot cheaper to get rid of it + * in the next pruning pass than to treat it like an indexed + * tuple. Finally, if index cleanup is disabled, the second + * heap pass will not execute, and the tuple will not get + * removed, so we must treat it like any other dead tuple that + * we choose to keep. + * + * If this were to happen for a tuple that actually needed to + * be deleted, we'd be in trouble, because it'd possibly leave + * a tuple below the relation's xmin horizon alive. + * heap_prepare_freeze_tuple() is prepared to detect that case + * and abort the transaction, preventing corruption. + */ + if (HeapTupleIsHotUpdated(&tuple) || + HeapTupleIsHeapOnly(&tuple) || + index_cleanup == VACOPT_TERNARY_DISABLED) + pc.nkeep += 1; + else + tupgone = true; /* we can delete the tuple */ + ps->all_visible = false; + break; + case HEAPTUPLE_LIVE: + + /* + * Count it as live. Not only is this natural, but it's also + * what acquire_sample_rows() does. + */ + pc.live_tuples += 1; + + /* + * Is the tuple definitely visible to all transactions? + * + * NB: Like with per-tuple hint bits, we can't set the + * PD_ALL_VISIBLE flag if the inserter committed + * asynchronously. See SetHintBits for more info. Check that + * the tuple is hinted xmin-committed because of that. + */ + if (ps->all_visible) + { + TransactionId xmin; + + if (!HeapTupleHeaderXminCommitted(tuple.t_data)) + { + ps->all_visible = false; + break; + } + + /* + * The inserter definitely committed. But is it old enough + * that everyone sees it as committed? + */ + xmin = HeapTupleHeaderGetXmin(tuple.t_data); + if (!TransactionIdPrecedes(xmin, OldestXmin)) + { + ps->all_visible = false; + break; + } + + /* Track newest xmin on page. */ + if (TransactionIdFollows(xmin, vms->visibility_cutoff_xid)) + vms->visibility_cutoff_xid = xmin; + } + break; + case HEAPTUPLE_RECENTLY_DEAD: + + /* + * If tuple is recently deleted then we must not remove it + * from relation. + */ + pc.nkeep += 1; + ps->all_visible = false; + break; + case HEAPTUPLE_INSERT_IN_PROGRESS: + + /* + * This is an expected case during concurrent vacuum. + * + * We do not count these rows as live, because we expect the + * inserting transaction to update the counters at commit, and + * we assume that will happen only after we report our + * results. This assumption is a bit shaky, but it is what + * acquire_sample_rows() does, so be consistent. + */ + ps->all_visible = false; + break; + case HEAPTUPLE_DELETE_IN_PROGRESS: + /* This is an expected case during concurrent vacuum */ + ps->all_visible = false; + + /* + * Count such rows as live. As above, we assume the deleting + * transaction will commit and update the counters after we + * report. + */ + pc.live_tuples += 1; + break; + default: + elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); + break; + } + + if (tupgone) + { + deaditems[ndead++] = offnum; + HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data, + &vacrelstats->latestRemovedXid); + pc.tups_vacuumed += 1; + ps->has_dead_items = true; + } + else + { + bool tuple_totally_frozen; + + pc.num_tuples += 1; + ps->hastup = true; + + /* + * Each non-removable tuple must be checked to see if it needs + * freezing + */ + if (heap_prepare_freeze_tuple(tuple.t_data, + RelFrozenXid, RelMinMxid, + FreezeLimit, MultiXactCutoff, + &frozen[nfrozen], + &tuple_totally_frozen)) + frozen[nfrozen++].offset = offnum; + + pc.num_tuples += 1; + ps->hastup = true; + + if (!tuple_totally_frozen) + ps->all_frozen = false; + } + } + + /* + * Success -- we're done pruning, and have determined which tuples are to + * be recorded as dead in local array. We've also prepared the details of + * which remaining tuples are to be frozen. + * + * First clear the offset information once we have processed all the + * tuples on the page. + */ + vacrelstats->offnum = InvalidOffsetNumber; + + /* + * Next add page level counters to caller's counts + */ + c->num_tuples += pc.num_tuples; + c->live_tuples += pc.live_tuples; + c->tups_vacuumed += pc.tups_vacuumed; + c->nkeep += pc.nkeep; + c->nunused += pc.nunused; + + /* + * Now save the local dead items array to VACUUM's dead_tuples array. + */ + for (int i = 0; i < ndead; i++) + { + ItemPointerData itemptr; + + ItemPointerSet(&itemptr, blkno, deaditems[i]); + lazy_record_dead_tuple(vacrelstats->dead_tuples, &itemptr); + } + + /* + * Finally, execute tuple freezing as planned. + * + * If we need to freeze any tuples we'll mark the buffer dirty, and write + * a WAL record recording the changes. We must log the changes to be + * crash-safe against future truncation of CLOG. + */ + if (nfrozen > 0) + { + START_CRIT_SECTION(); + + MarkBufferDirty(buf); + + /* execute collected freezes */ + for (int i = 0; i < nfrozen; i++) + { + ItemId itemid; + HeapTupleHeader htup; + + itemid = PageGetItemId(page, frozen[i].offset); + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + heap_execute_freeze_tuple(htup, &frozen[i]); + } + + /* Now WAL-log freezing if necessary */ + if (RelationNeedsWAL(onerel)) + { + XLogRecPtr recptr; + + recptr = log_heap_freeze(onerel, buf, FreezeLimit, + frozen, nfrozen); + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + } +} + +/* + * Handle setting VM bit inside lazy_scan_heap(), after pruning and freezing. + */ +static void +scan_setvmbit_page(Relation onerel, Buffer buf, Buffer vmbuffer, + LVPrunePageState *ps, LVVisMapPageState *vms) +{ + Page page = BufferGetPage(buf); + BlockNumber blkno = BufferGetBlockNumber(buf); + + /* mark page all-visible, if appropriate */ + if (ps->all_visible && !vms->all_visible_according_to_vm) + { + uint8 flags = VISIBILITYMAP_ALL_VISIBLE; + + if (ps->all_frozen) + flags |= VISIBILITYMAP_ALL_FROZEN; + + /* + * It should never be the case that the visibility map page is set + * while the page-level bit is clear, but the reverse is allowed (if + * checksums are not enabled). Regardless, set both bits so that we + * get back in sync. + * + * NB: If the heap page is all-visible but the VM bit is not set, we + * don't need to dirty the heap page. However, if checksums are + * enabled, we do need to make sure that the heap page is dirtied + * before passing it to visibilitymap_set(), because it may be logged. + * Given that this situation should only happen in rare cases after a + * crash, it is not worth optimizing. + */ + PageSetAllVisible(page); + MarkBufferDirty(buf); + visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, + vmbuffer, vms->visibility_cutoff_xid, flags); + } + + /* + * The visibility map bit should never be set if the page-level bit is + * clear. However, it's possible that the bit got cleared after we + * checked it and before we took the buffer content lock, so we must + * recheck before jumping to the conclusion that something bad has + * happened. + */ + else if (vms->all_visible_according_to_vm && !PageIsAllVisible(page) && + VM_ALL_VISIBLE(onerel, blkno, &vmbuffer)) + { + elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", + RelationGetRelationName(onerel), blkno); + visibilitymap_clear(onerel, blkno, vmbuffer, + VISIBILITYMAP_VALID_BITS); + } + + /* + * It's possible for the value returned by + * GetOldestNonRemovableTransactionId() to move backwards, so it's not + * wrong for us to see tuples that appear to not be visible to everyone + * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value + * never moves backwards, but GetOldestNonRemovableTransactionId() is + * conservative and sometimes returns a value that's unnecessarily small, + * so if we see that contradiction it just means that the tuples that we + * think are not visible to everyone yet actually are, and the + * PD_ALL_VISIBLE flag is correct. + * + * There should never be dead tuples on a page with PD_ALL_VISIBLE set, + * however. + */ + else if (PageIsAllVisible(page) && ps->has_dead_items) + { + elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u", + RelationGetRelationName(onerel), blkno); + PageClearAllVisible(page); + MarkBufferDirty(buf); + visibilitymap_clear(onerel, blkno, vmbuffer, + VISIBILITYMAP_VALID_BITS); + } + + /* + * If the all-visible page is all-frozen but not marked as such yet, mark + * it as all-frozen. Note that all_frozen is only valid if all_visible is + * true, so we must check both. + */ + else if (vms->all_visible_according_to_vm && ps->all_visible && + ps->all_frozen && !VM_ALL_FROZEN(onerel, blkno, &vmbuffer)) + { + /* + * We can pass InvalidTransactionId as the cutoff XID here, because + * setting the all-frozen bit doesn't cause recovery conflicts. + */ + visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, + vmbuffer, InvalidTransactionId, + VISIBILITYMAP_ALL_FROZEN); + } +} + /* * lazy_scan_heap() -- scan an open heap relation * @@ -748,9 +1339,9 @@ vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats) * page, and set commit status bits (see heap_page_prune). It also builds * lists of dead tuples and pages with free space, calculates statistics * on the number of live tuples in the heap, and marks pages as - * all-visible if appropriate. When done, or when we run low on space for - * dead-tuple TIDs, invoke vacuuming of indexes and call lazy_vacuum_heap - * to reclaim dead line pointers. + * all-visible if appropriate. When done, or when we run low on space + * for dead-tuple TIDs, invoke two_pass_strategy to vacuum indexes and + * mark dead line pointers for reuse via a second heap pass. * * If the table has at least two indexes, we execute both index vacuum * and index cleanup with parallel workers unless parallel vacuum is @@ -775,23 +1366,12 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, LVParallelState *lps = NULL; LVDeadTuples *dead_tuples; BlockNumber nblocks, - blkno; - HeapTupleData tuple; - TransactionId relfrozenxid = onerel->rd_rel->relfrozenxid; - TransactionId relminmxid = onerel->rd_rel->relminmxid; - BlockNumber empty_pages, - vacuumed_pages, + blkno, + next_unskippable_block, next_fsm_block_to_vacuum; - double num_tuples, /* total number of nonremovable tuples */ - live_tuples, /* live tuples (reltuples estimate) */ - tups_vacuumed, /* tuples cleaned up by current vacuum */ - nkeep, /* dead-but-not-removable tuples */ - nunused; /* # existing unused line pointers */ IndexBulkDeleteResult **indstats; - int i; PGRUsage ru0; Buffer vmbuffer = InvalidBuffer; - BlockNumber next_unskippable_block; bool skipping_blocks; xl_heap_freeze_tuple *frozen; StringInfoData buf; @@ -802,6 +1382,11 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, }; int64 initprog_val[3]; GlobalVisState *vistest; + LVTempCounters c; + + /* Counters of # blocks in onerel: */ + BlockNumber empty_pages, + vacuumed_pages; pg_rusage_init(&ru0); @@ -817,18 +1402,24 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, vacrelstats->relname))); empty_pages = vacuumed_pages = 0; - next_fsm_block_to_vacuum = (BlockNumber) 0; - num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0; + + /* Initialize counters */ + c.num_tuples = 0; + c.live_tuples = 0; + c.tups_vacuumed = 0; + c.nkeep = 0; + c.nunused = 0; indstats = (IndexBulkDeleteResult **) palloc0(nindexes * sizeof(IndexBulkDeleteResult *)); nblocks = RelationGetNumberOfBlocks(onerel); + next_unskippable_block = 0; + next_fsm_block_to_vacuum = 0; vacrelstats->rel_pages = nblocks; vacrelstats->scanned_pages = 0; vacrelstats->tupcount_pages = 0; vacrelstats->nonempty_pages = 0; - vacrelstats->latestRemovedXid = InvalidTransactionId; vistest = GlobalVisTestFor(onerel); @@ -837,7 +1428,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, * be used for an index, so we invoke parallelism only if there are at * least two indexes on a table. */ - if (params->nworkers >= 0 && vacrelstats->useindex && nindexes > 1) + if (params->nworkers >= 0 && nindexes > 1) { /* * Since parallel workers cannot access data in temporary tables, we @@ -865,7 +1456,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, * initialized. */ if (!ParallelVacuumIsActive(lps)) - lazy_space_alloc(vacrelstats, nblocks); + lazy_space_alloc(vacrelstats, nblocks, nindexes > 0); dead_tuples = vacrelstats->dead_tuples; frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage); @@ -920,7 +1511,6 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, * the last page. This is worth avoiding mainly because such a lock must * be replayed on any hot standby, where it can be disruptive. */ - next_unskippable_block = 0; if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0) { while (next_unskippable_block < nblocks) @@ -953,20 +1543,22 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, { Buffer buf; Page page; - OffsetNumber offnum, - maxoff; - bool tupgone, - hastup; - int prev_dead_count; - int nfrozen; + LVVisMapPageState vms; + LVPrunePageState ps; + bool savefreespace; Size freespace; - bool all_visible_according_to_vm = false; - bool all_visible; - bool all_frozen = true; /* provided all_visible is also true */ - bool has_dead_items; /* includes existing LP_DEAD items */ - TransactionId visibility_cutoff_xid = InvalidTransactionId; - /* see note above about forcing scanning of last page */ + /* Initialize vm state for block: */ + vms.all_visible_according_to_vm = false; + vms.visibility_cutoff_xid = InvalidTransactionId; + + /* Note: Can't touch ps until we reach scan_prune_page() */ + + /* + * Step 1 for block: Consider need to skip blocks. + * + * See note above about forcing scanning of last page. + */ #define FORCE_CHECK_PAGE() \ (blkno == nblocks - 1 && should_attempt_truncation(params, vacrelstats)) @@ -1018,7 +1610,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, * that it's not all-frozen, so it might still be all-visible. */ if (aggressive && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer)) - all_visible_according_to_vm = true; + vms.all_visible_according_to_vm = true; } else { @@ -1045,12 +1637,15 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, vacrelstats->frozenskipped_pages++; continue; } - all_visible_according_to_vm = true; + vms.all_visible_according_to_vm = true; } vacuum_delay_point(); /* + * Step 2 for block: Consider if we definitely have enough space to + * process TIDs on page already. + * * If we are close to overrunning the available space for dead-tuple * TIDs, pause and do a cycle of vacuuming before we tackle this page. */ @@ -1069,23 +1664,16 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, vmbuffer = InvalidBuffer; } - /* Work on all the indexes, then the heap */ - lazy_vacuum_all_indexes(onerel, Irel, indstats, - vacrelstats, lps, nindexes); - - /* Remove tuples from heap */ - lazy_vacuum_heap(onerel, vacrelstats); - - /* - * Forget the now-vacuumed tuples, and press on, but be careful - * not to reset latestRemovedXid since we want that value to be - * valid. - */ - dead_tuples->num_tuples = 0; + /* Remove the collected garbage tuples from table and indexes */ + two_pass_strategy(onerel, vacrelstats, Irel, indstats, nindexes, + lps, params->index_cleanup); /* * Vacuum the Free Space Map to make newly-freed space visible on * upper-level FSM pages. Note we have not yet processed blkno. + * Even if we skipped heap vacuum, FSM vacuuming could be + * worthwhile since we could have updated the freespace of empty + * pages. */ FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno); next_fsm_block_to_vacuum = blkno; @@ -1096,22 +1684,29 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, } /* + * Step 3 for block: Set up visibility map page as needed. + * * Pin the visibility map page in case we need to mark the page * all-visible. In most cases this will be very cheap, because we'll * already have the correct page pinned anyway. However, it's * possible that (a) next_unskippable_block is covered by a different * VM page than the current block or (b) we released our pin and did a * cycle of index vacuuming. - * */ visibilitymap_pin(onerel, blkno, &vmbuffer); buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno, RBM_NORMAL, vac_strategy); - /* We need buffer cleanup lock so that we can prune HOT chains. */ + /* + * Step 4 for block: Acquire super-exclusive lock for pruning. + * + * We need buffer cleanup lock so that we can prune HOT chains. + */ if (!ConditionalLockBufferForCleanup(buf)) { + bool hastup; + /* * If we're not performing an aggressive scan to guard against XID * wraparound, and we don't want to forcibly check the page, then @@ -1168,6 +1763,12 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, /* drop through to normal processing */ } + /* + * Step 5 for block: Handle empty/new pages. + * + * By here we have a super-exclusive lock, and it's clear that this + * page is one that we consider scanned + */ vacrelstats->scanned_pages++; vacrelstats->tupcount_pages++; @@ -1175,399 +1776,84 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, if (PageIsNew(page)) { - /* - * All-zeroes pages can be left over if either a backend extends - * the relation by a single page, but crashes before the newly - * initialized page has been written out, or when bulk-extending - * the relation (which creates a number of empty pages at the tail - * end of the relation, but enters them into the FSM). - * - * Note we do not enter the page into the visibilitymap. That has - * the downside that we repeatedly visit this page in subsequent - * vacuums, but otherwise we'll never not discover the space on a - * promoted standby. The harm of repeated checking ought to - * normally not be too bad - the space usually should be used at - * some point, otherwise there wouldn't be any regular vacuums. - * - * Make sure these pages are in the FSM, to ensure they can be - * reused. Do that by testing if there's any space recorded for - * the page. If not, enter it. We do so after releasing the lock - * on the heap page, the FSM is approximate, after all. - */ - UnlockReleaseBuffer(buf); - empty_pages++; - - if (GetRecordedFreeSpace(onerel, blkno) == 0) - { - Size freespace; - - freespace = BufferGetPageSize(buf) - SizeOfPageHeaderData; - RecordPageWithFreeSpace(onerel, blkno, freespace); - } + /* Releases lock on buf for us: */ + scan_new_page(onerel, buf); continue; } - - if (PageIsEmpty(page)) + else if (PageIsEmpty(page)) { empty_pages++; - freespace = PageGetHeapFreeSpace(page); - - /* - * Empty pages are always all-visible and all-frozen (note that - * the same is currently not true for new pages, see above). - */ - if (!PageIsAllVisible(page)) - { - START_CRIT_SECTION(); - - /* mark buffer dirty before writing a WAL record */ - MarkBufferDirty(buf); - - /* - * It's possible that another backend has extended the heap, - * initialized the page, and then failed to WAL-log the page - * due to an ERROR. Since heap extension is not WAL-logged, - * recovery might try to replay our record setting the page - * all-visible and find that the page isn't initialized, which - * will cause a PANIC. To prevent that, check whether the - * page has been previously WAL-logged, and if not, do that - * now. - */ - if (RelationNeedsWAL(onerel) && - PageGetLSN(page) == InvalidXLogRecPtr) - log_newpage_buffer(buf, true); - - PageSetAllVisible(page); - visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, - vmbuffer, InvalidTransactionId, - VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN); - END_CRIT_SECTION(); - } - - UnlockReleaseBuffer(buf); - RecordPageWithFreeSpace(onerel, blkno, freespace); + /* Releases lock on buf for us (though keeps vmbuffer pin): */ + scan_empty_page(onerel, buf, vmbuffer, vacrelstats); continue; } /* - * Prune all HOT-update chains in this page. + * Step 6 for block: Do pruning. * - * We count tuples removed by the pruning step as removed by VACUUM - * (existing LP_DEAD line pointers don't count). + * Also accumulates details of remaining LP_DEAD line pointers on page + * in dead tuple list. This includes LP_DEAD line pointers that we + * ourselves just pruned, as well as existing LP_DEAD line pointers + * pruned earlier. + * + * Also handles tuple freezing -- considers freezing XIDs from all + * tuple headers left behind following pruning. */ - tups_vacuumed += heap_page_prune(onerel, buf, vistest, - InvalidTransactionId, 0, false, - &vacrelstats->latestRemovedXid, - &vacrelstats->offnum); + scan_prune_page(onerel, buf, vacrelstats, vistest, frozen, + &c, &ps, &vms, params->index_cleanup); /* - * Now scan the page to collect vacuumable items and check for tuples - * requiring freezing. + * Step 7 for block: Set up details for saving free space in FSM at + * end of loop. (Also performs extra single pass strategy steps in + * "nindexes == 0" case.) + * + * If we have any LP_DEAD items on this page (i.e. any new dead_tuples + * entries compared to just before scan_prune_page()) then the page + * will be visited again by lazy_vacuum_heap(), which will compute and + * record its post-compaction free space. If not, then we're done + * with this page, so remember its free space as-is. */ - all_visible = true; - has_dead_items = false; - nfrozen = 0; - hastup = false; - prev_dead_count = dead_tuples->num_tuples; - maxoff = PageGetMaxOffsetNumber(page); - - /* - * Note: If you change anything in the loop below, also look at - * heap_page_is_all_visible to see if that needs to be changed. - */ - for (offnum = FirstOffsetNumber; - offnum <= maxoff; - offnum = OffsetNumberNext(offnum)) + savefreespace = false; + freespace = 0; + if (nindexes > 0 && ps.has_dead_items && + params->index_cleanup != VACOPT_TERNARY_DISABLED) { - ItemId itemid; - - /* - * Set the offset number so that we can display it along with any - * error that occurred while processing this tuple. - */ - vacrelstats->offnum = offnum; - itemid = PageGetItemId(page, offnum); - - /* Unused items require no processing, but we count 'em */ - if (!ItemIdIsUsed(itemid)) - { - nunused += 1; - continue; - } - - /* Redirect items mustn't be touched */ - if (ItemIdIsRedirected(itemid)) - { - hastup = true; /* this page won't be truncatable */ - continue; - } - - ItemPointerSet(&(tuple.t_self), blkno, offnum); - - /* - * LP_DEAD line pointers are to be vacuumed normally; but we don't - * count them in tups_vacuumed, else we'd be double-counting (at - * least in the common case where heap_page_prune() just freed up - * a non-HOT tuple). Note also that the final tups_vacuumed value - * might be very low for tables where opportunistic page pruning - * happens to occur very frequently (via heap_page_prune_opt() - * calls that free up non-HOT tuples). - */ - if (ItemIdIsDead(itemid)) - { - lazy_record_dead_tuple(dead_tuples, &(tuple.t_self)); - all_visible = false; - has_dead_items = true; - continue; - } - - Assert(ItemIdIsNormal(itemid)); - - tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); - tuple.t_len = ItemIdGetLength(itemid); - tuple.t_tableOid = RelationGetRelid(onerel); - - tupgone = false; - - /* - * The criteria for counting a tuple as live in this block need to - * match what analyze.c's acquire_sample_rows() does, otherwise - * VACUUM and ANALYZE may produce wildly different reltuples - * values, e.g. when there are many recently-dead tuples. - * - * The logic here is a bit simpler than acquire_sample_rows(), as - * VACUUM can't run inside a transaction block, which makes some - * cases impossible (e.g. in-progress insert from the same - * transaction). - */ - switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf)) - { - case HEAPTUPLE_DEAD: - - /* - * Ordinarily, DEAD tuples would have been removed by - * heap_page_prune(), but it's possible that the tuple - * state changed since heap_page_prune() looked. In - * particular an INSERT_IN_PROGRESS tuple could have - * changed to DEAD if the inserter aborted. So this - * cannot be considered an error condition. - * - * If the tuple is HOT-updated then it must only be - * removed by a prune operation; so we keep it just as if - * it were RECENTLY_DEAD. Also, if it's a heap-only - * tuple, we choose to keep it, because it'll be a lot - * cheaper to get rid of it in the next pruning pass than - * to treat it like an indexed tuple. Finally, if index - * cleanup is disabled, the second heap pass will not - * execute, and the tuple will not get removed, so we must - * treat it like any other dead tuple that we choose to - * keep. - * - * If this were to happen for a tuple that actually needed - * to be deleted, we'd be in trouble, because it'd - * possibly leave a tuple below the relation's xmin - * horizon alive. heap_prepare_freeze_tuple() is prepared - * to detect that case and abort the transaction, - * preventing corruption. - */ - if (HeapTupleIsHotUpdated(&tuple) || - HeapTupleIsHeapOnly(&tuple) || - params->index_cleanup == VACOPT_TERNARY_DISABLED) - nkeep += 1; - else - tupgone = true; /* we can delete the tuple */ - all_visible = false; - break; - case HEAPTUPLE_LIVE: - - /* - * Count it as live. Not only is this natural, but it's - * also what acquire_sample_rows() does. - */ - live_tuples += 1; - - /* - * Is the tuple definitely visible to all transactions? - * - * NB: Like with per-tuple hint bits, we can't set the - * PD_ALL_VISIBLE flag if the inserter committed - * asynchronously. See SetHintBits for more info. Check - * that the tuple is hinted xmin-committed because of - * that. - */ - if (all_visible) - { - TransactionId xmin; - - if (!HeapTupleHeaderXminCommitted(tuple.t_data)) - { - all_visible = false; - break; - } - - /* - * The inserter definitely committed. But is it old - * enough that everyone sees it as committed? - */ - xmin = HeapTupleHeaderGetXmin(tuple.t_data); - if (!TransactionIdPrecedes(xmin, OldestXmin)) - { - all_visible = false; - break; - } - - /* Track newest xmin on page. */ - if (TransactionIdFollows(xmin, visibility_cutoff_xid)) - visibility_cutoff_xid = xmin; - } - break; - case HEAPTUPLE_RECENTLY_DEAD: - - /* - * If tuple is recently deleted then we must not remove it - * from relation. - */ - nkeep += 1; - all_visible = false; - break; - case HEAPTUPLE_INSERT_IN_PROGRESS: - - /* - * This is an expected case during concurrent vacuum. - * - * We do not count these rows as live, because we expect - * the inserting transaction to update the counters at - * commit, and we assume that will happen only after we - * report our results. This assumption is a bit shaky, - * but it is what acquire_sample_rows() does, so be - * consistent. - */ - all_visible = false; - break; - case HEAPTUPLE_DELETE_IN_PROGRESS: - /* This is an expected case during concurrent vacuum */ - all_visible = false; - - /* - * Count such rows as live. As above, we assume the - * deleting transaction will commit and update the - * counters after we report. - */ - live_tuples += 1; - break; - default: - elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); - break; - } - - if (tupgone) - { - lazy_record_dead_tuple(dead_tuples, &(tuple.t_self)); - HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data, - &vacrelstats->latestRemovedXid); - tups_vacuumed += 1; - has_dead_items = true; - } - else - { - bool tuple_totally_frozen; - - num_tuples += 1; - hastup = true; - - /* - * Each non-removable tuple must be checked to see if it needs - * freezing. Note we already have exclusive buffer lock. - */ - if (heap_prepare_freeze_tuple(tuple.t_data, - relfrozenxid, relminmxid, - FreezeLimit, MultiXactCutoff, - &frozen[nfrozen], - &tuple_totally_frozen)) - frozen[nfrozen++].offset = offnum; - - if (!tuple_totally_frozen) - all_frozen = false; - } - } /* scan along page */ - - /* - * Clear the offset information once we have processed all the tuples - * on the page. - */ - vacrelstats->offnum = InvalidOffsetNumber; - - /* - * If we froze any tuples, mark the buffer dirty, and write a WAL - * record recording the changes. We must log the changes to be - * crash-safe against future truncation of CLOG. - */ - if (nfrozen > 0) + /* Wait until lazy_vacuum_heap() to save free space */ + } + else { - START_CRIT_SECTION(); - - MarkBufferDirty(buf); - - /* execute collected freezes */ - for (i = 0; i < nfrozen; i++) - { - ItemId itemid; - HeapTupleHeader htup; - - itemid = PageGetItemId(page, frozen[i].offset); - htup = (HeapTupleHeader) PageGetItem(page, itemid); - - heap_execute_freeze_tuple(htup, &frozen[i]); - } - - /* Now WAL-log freezing if necessary */ - if (RelationNeedsWAL(onerel)) - { - XLogRecPtr recptr; - - recptr = log_heap_freeze(onerel, buf, FreezeLimit, - frozen, nfrozen); - PageSetLSN(page, recptr); - } - - END_CRIT_SECTION(); + /* + * Will never reach lazy_vacuum_heap() (or will, but won't reach + * this specific page) + */ + savefreespace = true; + freespace = PageGetHeapFreeSpace(page); } - /* - * If there are no indexes we can vacuum the page right now instead of - * doing a second scan. Also we don't do that but forget dead tuples - * when index cleanup is disabled. - */ - if (!vacrelstats->useindex && dead_tuples->num_tuples > 0) + if (nindexes == 0 && ps.has_dead_items) { - if (nindexes == 0) - { - /* Remove tuples from heap if the table has no index */ - lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer); - vacuumed_pages++; - has_dead_items = false; - } - else - { - /* - * Here, we have indexes but index cleanup is disabled. - * Instead of vacuuming the dead tuples on the heap, we just - * forget them. - * - * Note that vacrelstats->dead_tuples could have tuples which - * became dead after HOT-pruning but are not marked dead yet. - * We do not process them because it's a very rare condition, - * and the next vacuum will process them anyway. - */ - Assert(params->index_cleanup == VACOPT_TERNARY_DISABLED); - } + Assert(dead_tuples->num_tuples > 0); /* - * Forget the now-vacuumed tuples, and press on, but be careful - * not to reset latestRemovedXid since we want that value to be - * valid. + * One pass strategy (no indexes) case. + * + * Mark LP_DEAD item pointers for LP_UNUSED now, since there won't + * be a second pass in lazy_vacuum_heap(). */ + lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer); + vacuumed_pages++; + + /* This won't have changed: */ + Assert(savefreespace && freespace == PageGetHeapFreeSpace(page)); + + /* + * Make sure scan_setvmbit_page() won't stop setting VM due to + * now-vacuumed LP_DEAD items: + */ + ps.has_dead_items = false; + + /* Forget the now-vacuumed tuples */ dead_tuples->num_tuples = 0; /* @@ -1584,109 +1870,27 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, } } - freespace = PageGetHeapFreeSpace(page); - - /* mark page all-visible, if appropriate */ - if (all_visible && !all_visible_according_to_vm) - { - uint8 flags = VISIBILITYMAP_ALL_VISIBLE; - - if (all_frozen) - flags |= VISIBILITYMAP_ALL_FROZEN; - - /* - * It should never be the case that the visibility map page is set - * while the page-level bit is clear, but the reverse is allowed - * (if checksums are not enabled). Regardless, set both bits so - * that we get back in sync. - * - * NB: If the heap page is all-visible but the VM bit is not set, - * we don't need to dirty the heap page. However, if checksums - * are enabled, we do need to make sure that the heap page is - * dirtied before passing it to visibilitymap_set(), because it - * may be logged. Given that this situation should only happen in - * rare cases after a crash, it is not worth optimizing. - */ - PageSetAllVisible(page); - MarkBufferDirty(buf); - visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, - vmbuffer, visibility_cutoff_xid, flags); - } + /* One pass strategy had better have no dead tuples by now: */ + Assert(nindexes > 0 || dead_tuples->num_tuples == 0); /* - * As of PostgreSQL 9.2, the visibility map bit should never be set if - * the page-level bit is clear. However, it's possible that the bit - * got cleared after we checked it and before we took the buffer - * content lock, so we must recheck before jumping to the conclusion - * that something bad has happened. + * Step 8 for block: Handle setting visibility map bit as appropriate */ - else if (all_visible_according_to_vm && !PageIsAllVisible(page) - && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer)) - { - elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", - vacrelstats->relname, blkno); - visibilitymap_clear(onerel, blkno, vmbuffer, - VISIBILITYMAP_VALID_BITS); - } + scan_setvmbit_page(onerel, buf, vmbuffer, &ps, &vms); /* - * It's possible for the value returned by - * GetOldestNonRemovableTransactionId() to move backwards, so it's not - * wrong for us to see tuples that appear to not be visible to - * everyone yet, while PD_ALL_VISIBLE is already set. The real safe - * xmin value never moves backwards, but - * GetOldestNonRemovableTransactionId() is conservative and sometimes - * returns a value that's unnecessarily small, so if we see that - * contradiction it just means that the tuples that we think are not - * visible to everyone yet actually are, and the PD_ALL_VISIBLE flag - * is correct. - * - * There should never be dead tuples on a page with PD_ALL_VISIBLE - * set, however. + * Step 9 for block: drop super-exclusive lock, finalize page by + * recording its free space in the FSM as appropriate */ - else if (PageIsAllVisible(page) && has_dead_items) - { - elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u", - vacrelstats->relname, blkno); - PageClearAllVisible(page); - MarkBufferDirty(buf); - visibilitymap_clear(onerel, blkno, vmbuffer, - VISIBILITYMAP_VALID_BITS); - } - - /* - * If the all-visible page is all-frozen but not marked as such yet, - * mark it as all-frozen. Note that all_frozen is only valid if - * all_visible is true, so we must check both. - */ - else if (all_visible_according_to_vm && all_visible && all_frozen && - !VM_ALL_FROZEN(onerel, blkno, &vmbuffer)) - { - /* - * We can pass InvalidTransactionId as the cutoff XID here, - * because setting the all-frozen bit doesn't cause recovery - * conflicts. - */ - visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, - vmbuffer, InvalidTransactionId, - VISIBILITYMAP_ALL_FROZEN); - } UnlockReleaseBuffer(buf); - /* Remember the location of the last page with nonremovable tuples */ - if (hastup) + if (ps.hastup) vacrelstats->nonempty_pages = blkno + 1; - - /* - * If we remembered any tuples for deletion, then the page will be - * visited again by lazy_vacuum_heap, which will compute and record - * its post-compaction free space. If not, then we're done with this - * page, so remember its free space as-is. (This path will always be - * taken if there are no indexes.) - */ - if (dead_tuples->num_tuples == prev_dead_count) + if (savefreespace) RecordPageWithFreeSpace(onerel, blkno, freespace); + + /* Finished all steps for block by here (at the latest) */ } /* report that everything is scanned and vacuumed */ @@ -1698,14 +1902,14 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, pfree(frozen); /* save stats for use later */ - vacrelstats->tuples_deleted = tups_vacuumed; - vacrelstats->new_dead_tuples = nkeep; + vacrelstats->tuples_deleted = c.tups_vacuumed; + vacrelstats->new_dead_tuples = c.nkeep; /* now we can compute the new value for pg_class.reltuples */ vacrelstats->new_live_tuples = vac_estimate_reltuples(onerel, nblocks, vacrelstats->tupcount_pages, - live_tuples); + c.live_tuples); /* * Also compute the total number of surviving heap entries. In the @@ -1724,20 +1928,14 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, } /* If any tuples need to be deleted, perform final vacuum cycle */ - /* XXX put a threshold on min number of tuples here? */ + Assert(nindexes > 0 || dead_tuples->num_tuples == 0); if (dead_tuples->num_tuples > 0) - { - /* Work on all the indexes, and then the heap */ - lazy_vacuum_all_indexes(onerel, Irel, indstats, vacrelstats, - lps, nindexes); - - /* Remove tuples from heap */ - lazy_vacuum_heap(onerel, vacrelstats); - } + two_pass_strategy(onerel, vacrelstats, Irel, indstats, nindexes, + lps, params->index_cleanup); /* * Vacuum the remainder of the Free Space Map. We must do this whether or - * not there were indexes. + * not there were indexes, and whether or not we skipped index vacuuming. */ if (blkno > next_fsm_block_to_vacuum) FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno); @@ -1745,8 +1943,13 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, /* report all blocks vacuumed */ pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno); - /* Do post-vacuum cleanup */ - if (vacrelstats->useindex) + /* + * Do post-vacuum cleanup. + * + * Note that post-vacuum cleanup does not take place with + * INDEX_CLEANUP=OFF. + */ + if (nindexes > 0 && params->index_cleanup != VACOPT_TERNARY_DISABLED) lazy_cleanup_all_indexes(Irel, indstats, vacrelstats, lps, nindexes); /* @@ -1756,23 +1959,29 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, if (ParallelVacuumIsActive(lps)) end_parallel_vacuum(indstats, lps, nindexes); - /* Update index statistics */ - if (vacrelstats->useindex) + /* + * Update index statistics. + * + * Note that updating the statistics does not take place with + * INDEX_CLEANUP=OFF. + */ + if (nindexes > 0 && params->index_cleanup != VACOPT_TERNARY_DISABLED) update_index_statistics(Irel, indstats, nindexes); - /* If no indexes, make log report that lazy_vacuum_heap would've made */ - if (vacuumed_pages) + /* If no indexes, make log report that two_pass_strategy() would've made */ + Assert(nindexes == 0 || vacuumed_pages == 0); + if (nindexes == 0) ereport(elevel, (errmsg("\"%s\": removed %.0f row versions in %u pages", vacrelstats->relname, - tups_vacuumed, vacuumed_pages))); + vacrelstats->tuples_deleted, vacuumed_pages))); initStringInfo(&buf); appendStringInfo(&buf, _("%.0f dead row versions cannot be removed yet, oldest xmin: %u\n"), - nkeep, OldestXmin); + c.nkeep, OldestXmin); appendStringInfo(&buf, _("There were %.0f unused item identifiers.\n"), - nunused); + c.nunused); appendStringInfo(&buf, ngettext("Skipped %u page due to buffer pins, ", "Skipped %u pages due to buffer pins, ", vacrelstats->pinskipped_pages), @@ -1788,18 +1997,76 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, appendStringInfo(&buf, _("%s."), pg_rusage_show(&ru0)); ereport(elevel, - (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages", + (errmsg("\"%s\": newly pruned %.0f items, found %.0f nonremovable items in %u out of %u pages", vacrelstats->relname, - tups_vacuumed, num_tuples, + c.tups_vacuumed, c.num_tuples, vacrelstats->scanned_pages, nblocks), errdetail_internal("%s", buf.data))); pfree(buf.data); } /* - * lazy_vacuum_all_indexes() -- vacuum all indexes of relation. + * Remove the collected garbage tuples from the table and its indexes. * - * We process the indexes serially unless we are doing parallel vacuum. + * We may be required to skip index vacuuming by INDEX_CLEANUP reloption. + */ +static void +two_pass_strategy(Relation onerel, LVRelStats *vacrelstats, + Relation *Irel, IndexBulkDeleteResult **indstats, int nindexes, + LVParallelState *lps, VacOptTernaryValue index_cleanup) +{ + bool skipping; + + /* Should not end up here with no indexes */ + Assert(nindexes > 0); + Assert(!IsParallelWorker()); + + /* Check whether or not to do index vacuum and heap vacuum */ + if (index_cleanup == VACOPT_TERNARY_DISABLED) + skipping = true; + else + skipping = false; + + if (!skipping) + { + /* Okay, we're going to do index vacuuming */ + lazy_vacuum_all_indexes(onerel, Irel, indstats, vacrelstats, lps, + nindexes); + + /* Remove tuples from heap */ + lazy_vacuum_heap(onerel, vacrelstats); + } + else + { + /* + * skipped index vacuuming. Make log report that lazy_vacuum_heap + * would've made. + * + * Don't report tups_vacuumed here because it will be zero here in + * common case where there are no newly pruned LP_DEAD items for this + * VACUUM. This is roughly consistent with lazy_vacuum_heap(), and + * the similar !useindex ereport() at the end of lazy_scan_heap(). + * Note, however, that has_dead_items_pages is # of heap pages with + * one or more LP_DEAD items (could be from us or from another + * VACUUM), not # blocks scanned. + */ + ereport(elevel, + (errmsg("\"%s\": INDEX_CLEANUP off forced VACUUM to not totally remove %d pruned items", + vacrelstats->relname, + vacrelstats->dead_tuples->num_tuples))); + } + + /* + * Forget the now-vacuumed tuples, and press on, but be careful not to + * reset latestRemovedXid since we want that value to be valid. + */ + vacrelstats->dead_tuples->num_tuples = 0; +} + +/* + * lazy_vacuum_all_indexes() -- Main entry for index vacuuming + * + * Should only be called through two_pass_strategy() */ static void lazy_vacuum_all_indexes(Relation onerel, Relation *Irel, @@ -1848,17 +2115,14 @@ lazy_vacuum_all_indexes(Relation onerel, Relation *Irel, vacrelstats->num_index_scans); } - /* - * lazy_vacuum_heap() -- second pass over the heap + * lazy_vacuum_heap() -- second pass over the heap for two pass strategy * * This routine marks dead tuples as unused and compacts out free * space on their pages. Pages not having dead tuples recorded from * lazy_scan_heap are not visited at all. * - * Note: the reason for doing this as a second pass is we cannot remove - * the tuples until we've removed their index entries, and we want to - * process index entry removal in batches as large as possible. + * Should only be called through two_pass_strategy() */ static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) @@ -2867,14 +3131,14 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) * Return the maximum number of dead tuples we can record. */ static long -compute_max_dead_tuples(BlockNumber relblocks, bool useindex) +compute_max_dead_tuples(BlockNumber relblocks, bool hasindex) { long maxtuples; int vac_work_mem = IsAutoVacuumWorkerProcess() && autovacuum_work_mem != -1 ? autovacuum_work_mem : maintenance_work_mem; - if (useindex) + if (hasindex) { maxtuples = MAXDEADTUPLES(vac_work_mem * 1024L); maxtuples = Min(maxtuples, INT_MAX); @@ -2899,12 +3163,12 @@ compute_max_dead_tuples(BlockNumber relblocks, bool useindex) * See the comments at the head of this file for rationale. */ static void -lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks) +lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks, bool hasindex) { LVDeadTuples *dead_tuples = NULL; long maxtuples; - maxtuples = compute_max_dead_tuples(relblocks, vacrelstats->useindex); + maxtuples = compute_max_dead_tuples(relblocks, hasindex); dead_tuples = (LVDeadTuples *) palloc(SizeOfDeadTuples(maxtuples)); dead_tuples->num_tuples = 0; @@ -3024,7 +3288,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, /* * This is a stripped down version of the line pointer scan in - * lazy_scan_heap(). So if you change anything here, also check that code. + * scan_new_page. So if you change anything here, also check that code. */ maxoff = PageGetMaxOffsetNumber(page); for (offnum = FirstOffsetNumber; @@ -3070,7 +3334,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, { TransactionId xmin; - /* Check comments in lazy_scan_heap. */ + /* Check comments in scan_new_page. */ if (!HeapTupleHeaderXminCommitted(tuple.t_data)) { all_visible = false; diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index dd0c124e62..3ac8df7d07 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -756,10 +756,10 @@ tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer) return false; /* all-visible implies live */ /* - * Neither lazy_scan_heap nor heap_page_is_all_visible will mark a page - * all-visible unless every tuple is hinted committed. However, those hint - * bits could be lost after a crash, so we can't be certain that they'll - * be set here. So just check the xmin. + * Neither lazy_scan_heap/scan_new_page nor heap_page_is_all_visible will + * mark a page all-visible unless every tuple is hinted committed. + * However, those hint bits could be lost after a crash, so we can't be + * certain that they'll be set here. So just check the xmin. */ xmin = HeapTupleHeaderGetXmin(tup->t_data); diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c index 1fe193bb25..34670c6264 100644 --- a/contrib/pgstattuple/pgstatapprox.c +++ b/contrib/pgstattuple/pgstatapprox.c @@ -58,8 +58,8 @@ typedef struct output_type * and approximate tuple_len on that basis. For the others, we count * the exact number of dead tuples etc. * - * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but - * we do not try to avoid skipping single pages. + * This scan is loosely based on vacuumlazy.c:lazy_scan_heap/scan_new_page, + * but we do not try to avoid skipping single pages. */ static void statapprox_heap(Relation rel, output_type *stat) @@ -126,8 +126,8 @@ statapprox_heap(Relation rel, output_type *stat) /* * Look at each tuple on the page and decide whether it's live or - * dead, then count it and its size. Unlike lazy_scan_heap, we can - * afford to ignore problems and special cases. + * dead, then count it and its size. Unlike lazy_scan_heap and + * scan_new_page, we can afford to ignore problems and special cases. */ maxoff = PageGetMaxOffsetNumber(page); -- 2.27.0