From 47af1ca65cf55ca876869b43bff47f9d43f0750e Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 8 Mar 2024 17:32:19 +0200 Subject: [PATCH v8 2/3] Confine vacuum skip logic to lazy_scan_skip() Rename lazy_scan_skip() to heap_vac_scan_next_block() and move more code into the function, so that the caller doesn't need to know about ranges or skipping anymore. heap_vac_scan_next_block() returns the next block to process, and the logic for determining that block is all within the function. This makes the skipping logic easier to understand, as it's all in the same function, and makes the calling code easier to understand as it's less cluttered. The state variables needed to manage the skipping logic are moved to LVRelState. heap_vac_scan_next_block() now manages its own VM buffer separately from the caller's vmbuffer variable. The caller's vmbuffer holds the VM page for the current block its processing, while heap_vac_scan_next_block() keeps a pin on the VM page for the next unskippable block. Most of the time they are the same, so we hold two pins on the same buffer, but it's more convenient to manage them separately. This refactoring will also help future patches to switch to using a streaming read interface, and eventually AIO (https://postgr.es/m/CA%2BhUKGJkOiOCa%2Bmag4BF%2BzHo7qo%3Do9CFheB8%3Dg6uT5TUm2gkvA%40mail.gmail.com) Author: Melanie Plageman, with some changes by me Discussion: https://postgr.es/m/CAAKRu_Yf3gvXGcCnqqfoq0Q8LX8UM-e-qbm_B1LeZh60f8WhWA%40mail.gmail.com --- src/backend/access/heap/vacuumlazy.c | 256 +++++++++++++++------------ 1 file changed, 141 insertions(+), 115 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index ac55ebd2ae5..0aa08762015 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -204,6 +204,12 @@ typedef struct LVRelState int64 live_tuples; /* # live tuples remaining */ int64 recently_dead_tuples; /* # dead, but not yet removable */ int64 missed_dead_tuples; /* # removable, but not removed */ + + /* State maintained by heap_vac_scan_next_block() */ + BlockNumber current_block; /* last block returned */ + BlockNumber next_unskippable_block; /* next unskippable block */ + bool next_unskippable_allvis; /* its visibility status */ + Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */ } LVRelState; /* Struct for saving and restoring vacuum error information. */ @@ -217,10 +223,8 @@ typedef struct LVSavedErrInfo /* non-export function prototypes */ static void lazy_scan_heap(LVRelState *vacrel); -static BlockNumber lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, - BlockNumber next_block, - bool *next_unskippable_allvis, - bool *skipping_current_range); +static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, + bool *all_visible_according_to_vm); static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer); @@ -803,12 +807,11 @@ lazy_scan_heap(LVRelState *vacrel) { BlockNumber rel_pages = vacrel->rel_pages, blkno, - next_unskippable_block, next_fsm_block_to_vacuum = 0; + bool all_visible_according_to_vm; + VacDeadItems *dead_items = vacrel->dead_items; Buffer vmbuffer = InvalidBuffer; - bool next_unskippable_allvis, - skipping_current_range; const int initprog_index[] = { PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_TOTAL_HEAP_BLKS, @@ -822,44 +825,19 @@ lazy_scan_heap(LVRelState *vacrel) initprog_val[2] = dead_items->max_items; pgstat_progress_update_multi_param(3, initprog_index, initprog_val); - /* Set up an initial range of skippable blocks using the visibility map */ - next_unskippable_block = lazy_scan_skip(vacrel, &vmbuffer, 0, - &next_unskippable_allvis, - &skipping_current_range); - for (blkno = 0; blkno < rel_pages; blkno++) + /* Initialize for the first heap_vac_scan_next_block() call */ + vacrel->current_block = InvalidBlockNumber; + vacrel->next_unskippable_block = InvalidBlockNumber; + vacrel->next_unskippable_allvis = false; + vacrel->next_unskippable_vmbuffer = InvalidBuffer; + + while (heap_vac_scan_next_block(vacrel, &blkno, &all_visible_according_to_vm)) { Buffer buf; Page page; - bool all_visible_according_to_vm; bool has_lpdead_items; bool got_cleanup_lock = false; - if (blkno == next_unskippable_block) - { - /* - * Can't skip this page safely. Must scan the page. But - * determine the next skippable range after the page first. - */ - all_visible_according_to_vm = next_unskippable_allvis; - next_unskippable_block = lazy_scan_skip(vacrel, &vmbuffer, - blkno + 1, - &next_unskippable_allvis, - &skipping_current_range); - - Assert(next_unskippable_block >= blkno + 1); - } - else - { - /* Last page always scanned (may need to set nonempty_pages) */ - Assert(blkno < rel_pages - 1); - - if (skipping_current_range) - continue; - - /* Current range is too small to skip -- just scan the page */ - all_visible_according_to_vm = true; - } - vacrel->scanned_pages++; /* Report as block scanned, update error traceback information */ @@ -1077,18 +1055,22 @@ lazy_scan_heap(LVRelState *vacrel) } /* - * lazy_scan_skip() -- set up range of skippable blocks using visibility map. + * heap_vac_scan_next_block() -- get next block for vacuum to process * - * lazy_scan_heap() calls here every time it needs to set up a new range of - * blocks to skip via the visibility map. Caller passes the next block in - * line. We return a next_unskippable_block for this range. When there are - * no skippable blocks we just return caller's next_block. The all-visible - * status of the returned block is set in *next_unskippable_allvis for caller, - * too. Block usually won't be all-visible (since it's unskippable), but it - * can be during aggressive VACUUMs (as well as in certain edge cases). + * lazy_scan_heap() calls here every time it needs to get the next block to + * prune and vacuum. The function uses the visibility map, vacuum options, + * and various thresholds to skip blocks which do not need to be processed and + * sets blkno to the next block that actually needs to be processed. * - * Sets *skipping_current_range to indicate if caller should skip this range. - * Costs and benefits drive our decision. Very small ranges won't be skipped. + * The block number and visibility status of the next block to process are set + * in *blkno and *all_visible_according_to_vm. The return value is false if + * there are no further blocks to process. + * + * vacrel is an in/out parameter here; vacuum options and information about + * the relation are read, and vacrel->skippedallvis is set to ensure we don't + * advance relfrozenxid when we have skipped vacuuming all-visible blocks. It + * also holds information about the next unskippable block, as bookkeeping for + * this function. * * Note: our opinion of which blocks can be skipped can go stale immediately. * It's okay if caller "misses" a page whose all-visible or all-frozen marking @@ -1098,88 +1080,132 @@ lazy_scan_heap(LVRelState *vacrel) * older XIDs/MXIDs. The vacrel->skippedallvis flag will be set here when the * choice to skip such a range is actually made, making everything safe.) */ -static BlockNumber -lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block, - bool *next_unskippable_allvis, bool *skipping_current_range) +static bool +heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, + bool *all_visible_according_to_vm) { - BlockNumber rel_pages = vacrel->rel_pages, - next_unskippable_block = next_block, - nskippable_blocks = 0; + BlockNumber next_block; bool skipsallvis = false; + BlockNumber rel_pages = vacrel->rel_pages; + BlockNumber next_unskippable_block; + bool next_unskippable_allvis; + Buffer next_unskippable_vmbuffer; - *next_unskippable_allvis = true; - while (next_unskippable_block < rel_pages) - { - uint8 mapbits = visibilitymap_get_status(vacrel->rel, - next_unskippable_block, - vmbuffer); + /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */ + next_block = vacrel->current_block + 1; - if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) == 0) + /* Have we reached the end of the relation? */ + if (next_block >= rel_pages) + { + if (BufferIsValid(vacrel->next_unskippable_vmbuffer)) { - Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0); - *next_unskippable_allvis = false; - break; + ReleaseBuffer(vacrel->next_unskippable_vmbuffer); + vacrel->next_unskippable_vmbuffer = InvalidBuffer; } + *blkno = rel_pages; + return false; + } + next_unskippable_block = vacrel->next_unskippable_block; + next_unskippable_allvis = vacrel->next_unskippable_allvis; + if (next_unskippable_block == InvalidBlockNumber || + next_block > next_unskippable_block) + { /* - * Caller must scan the last page to determine whether it has tuples - * (caller must have the opportunity to set vacrel->nonempty_pages). - * This rule avoids having lazy_truncate_heap() take access-exclusive - * lock on rel to attempt a truncation that fails anyway, just because - * there are tuples on the last page (it is likely that there will be - * tuples on other nearby pages as well, but those can be skipped). - * - * Implement this by always treating the last block as unsafe to skip. + * Find the next unskippable block using the visibility map. */ - if (next_unskippable_block == rel_pages - 1) - break; + next_unskippable_block = next_block; + next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer; + for (;;) + { + uint8 mapbits = visibilitymap_get_status(vacrel->rel, + next_unskippable_block, + &next_unskippable_vmbuffer); - /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */ - if (!vacrel->skipwithvm) - break; + next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0; - /* - * Aggressive VACUUM caller can't skip pages just because they are - * all-visible. They may still skip all-frozen pages, which can't - * contain XIDs < OldestXmin (XIDs that aren't already frozen by now). - */ - if ((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0) - { - if (vacrel->aggressive) + /* + * A block is unskippable if it is not all visible according to + * the visibility map. + */ + if (!next_unskippable_allvis) + { + Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0); + break; + } + + /* + * Caller must scan the last page to determine whether it has + * tuples (caller must have the opportunity to set + * vacrel->nonempty_pages). This rule avoids having + * lazy_truncate_heap() take access-exclusive lock on rel to + * attempt a truncation that fails anyway, just because there are + * tuples on the last page (it is likely that there will be tuples + * on other nearby pages as well, but those can be skipped). + * + * Implement this by always treating the last block as unsafe to + * skip. + */ + if (next_unskippable_block == rel_pages - 1) + break; + + /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */ + if (!vacrel->skipwithvm) break; /* - * All-visible block is safe to skip in non-aggressive case. But - * remember that the final range contains such a block for later. + * Aggressive VACUUM caller can't skip pages just because they are + * all-visible. They may still skip all-frozen pages, which can't + * contain XIDs < OldestXmin (XIDs that aren't already frozen by + * now). */ - skipsallvis = true; + if ((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0) + { + if (vacrel->aggressive) + break; + + /* + * All-visible block is safe to skip in non-aggressive case. + * But remember that the final range contains such a block for + * later. + */ + skipsallvis = true; + } + + vacuum_delay_point(); + next_unskippable_block++; } + /* write the local variables back to vacrel */ + vacrel->next_unskippable_block = next_unskippable_block; + vacrel->next_unskippable_allvis = next_unskippable_allvis; + vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer; - vacuum_delay_point(); - next_unskippable_block++; - nskippable_blocks++; + /* + * We only skip a range with at least SKIP_PAGES_THRESHOLD consecutive + * pages. Since we're reading sequentially, the OS should be doing + * readahead for us, so there's no gain in skipping a page now and + * then. Skipping such a range might even discourage sequential + * detection. + * + * This test also enables more frequent relfrozenxid advancement + * during non-aggressive VACUUMs. If the range has any all-visible + * pages then skipping makes updating relfrozenxid unsafe, which is a + * real downside. + */ + if (next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD) + { + next_block = next_unskippable_block; + if (skipsallvis) + vacrel->skippedallvis = true; + } } - /* - * We only skip a range with at least SKIP_PAGES_THRESHOLD consecutive - * pages. Since we're reading sequentially, the OS should be doing - * readahead for us, so there's no gain in skipping a page now and then. - * Skipping such a range might even discourage sequential detection. - * - * This test also enables more frequent relfrozenxid advancement during - * non-aggressive VACUUMs. If the range has any all-visible pages then - * skipping makes updating relfrozenxid unsafe, which is a real downside. - */ - if (nskippable_blocks < SKIP_PAGES_THRESHOLD) - *skipping_current_range = false; + if (next_block == next_unskippable_block) + *all_visible_according_to_vm = next_unskippable_allvis; else - { - *skipping_current_range = true; - if (skipsallvis) - vacrel->skippedallvis = true; - } - - return next_unskippable_block; + *all_visible_according_to_vm = true; + *blkno = vacrel->current_block = next_block; + return true; } /* @@ -1752,8 +1778,8 @@ lazy_scan_prune(LVRelState *vacrel, /* * Handle setting visibility map bit based on information from the VM (as - * of last lazy_scan_skip() call), and from all_visible and all_frozen - * variables + * of last heap_vac_scan_next_block() call), and from all_visible and + * all_frozen variables */ if (!all_visible_according_to_vm && all_visible) { @@ -1788,8 +1814,8 @@ lazy_scan_prune(LVRelState *vacrel, /* * As of PostgreSQL 9.2, the visibility map bit should never be set if the * page-level bit is clear. However, it's possible that the bit got - * cleared after lazy_scan_skip() was called, so we must recheck with - * buffer lock before concluding that the VM is corrupt. + * cleared after heap_vac_scan_next_block() was called, so we must recheck + * with buffer lock before concluding that the VM is corrupt. */ else if (all_visible_according_to_vm && !PageIsAllVisible(page) && visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0) -- 2.39.2