From dde0dfc578137f7c93f9a0e34af38dcdb841b080 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Wed, 8 Oct 2025 15:39:01 -0400 Subject: [PATCH v17 07/15] Eliminate XLOG_HEAP2_VISIBLE from vacuum prune/freeze MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vacuum no longer emits a separate WAL record for each page set all-visible or all-frozen during phase I. Instead, visibility map updates are now included in the XLOG_HEAP2_PRUNE_VACUUM_SCAN record that is already emitted for pruning and freezing. Previously, heap_page_prune_and_freeze() determined whether a page was all-visible, but the corresponding VM bits were only set later in lazy_scan_prune(). Now the VM is updated immediately in heap_page_prune_and_freeze(), at the same time as the heap modifications. This change applies only to vacuum’s prune/freeze work, not to pruning performed during normal page access. Reviewed-by: Robert Haas --- src/backend/access/heap/heapam_xlog.c | 41 ++- src/backend/access/heap/pruneheap.c | 429 ++++++++++++++++++++------ src/backend/access/heap/vacuumlazy.c | 205 +----------- src/include/access/heapam.h | 41 ++- 4 files changed, 414 insertions(+), 302 deletions(-) diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c index aaf595e75d6..f6624bc98d0 100644 --- a/src/backend/access/heap/heapam_xlog.c +++ b/src/backend/access/heap/heapam_xlog.c @@ -104,6 +104,8 @@ heap_xlog_prune_freeze(XLogReaderState *record) OffsetNumber *frz_offsets; char *dataptr = XLogRecGetBlockData(record, 0, &datalen); bool do_prune; + bool set_lsn = false; + bool mark_buffer_dirty = false; heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags, &nplans, &plans, &frz_offsets, @@ -157,20 +159,37 @@ heap_xlog_prune_freeze(XLogReaderState *record) /* There should be no more data */ Assert((char *) frz_offsets == dataptr + datalen); - if ((vmflags & VISIBILITYMAP_VALID_BITS)) - PageSetAllVisible(page); - - MarkBufferDirty(buffer); + if (do_prune || nplans > 0) + mark_buffer_dirty = set_lsn = true; /* - * Always emit a WAL record when setting PD_ALL_VISIBLE but only emit - * an FPI if checksums/wal_log_hints are enabled. Advance the page LSN - * only if the record could include an FPI, since recovery skips - * records <= the stamped LSN. Otherwise it might skip an earlier FPI - * needed to repair a torn page. + * The critical integrity requirement here is that we must never end + * up with with the visibility map bit set and the page-level + * PD_ALL_VISIBLE bit clear. If that were to occur, a subsequent page + * modification would fail to clear the visibility map bit. + * + * If this record only sets the VM, no need to dirty the heap page. */ - if (do_prune || nplans > 0 || - ((vmflags & VISIBILITYMAP_VALID_BITS) && XLogHintBitIsNeeded())) + if ((vmflags & VISIBILITYMAP_VALID_BITS) && !PageIsAllVisible(page)) + { + PageSetAllVisible(page); + mark_buffer_dirty = true; + + /* + * Always emit a WAL record when setting PD_ALL_VISIBLE but only + * emit an FPI if checksums/wal_log_hints are enabled. Advance the + * page LSN only if the record could include an FPI, since + * recovery skips records <= the stamped LSN. Otherwise it might + * skip an earlier FPI needed to repair a torn page. + */ + if (XLogHintBitIsNeeded()) + set_lsn = true; + } + + if (mark_buffer_dirty) + MarkBufferDirty(buffer); + + if (set_lsn) PageSetLSN(page, lsn); /* diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 5892ed5a07e..f70563008e1 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -19,7 +19,7 @@ #include "access/htup_details.h" #include "access/multixact.h" #include "access/transam.h" -#include "access/visibilitymapdefs.h" +#include "access/visibilitymap.h" #include "access/xlog.h" #include "access/xloginsert.h" #include "commands/vacuum.h" @@ -44,6 +44,8 @@ typedef struct bool mark_unused_now; /* whether to attempt freezing tuples */ bool attempt_freeze; + /* whether or not to attempt updating the VM */ + bool attempt_update_vm; struct VacuumCutoffs *cutoffs; /*------------------------------------------------------- @@ -133,17 +135,17 @@ typedef struct * all_visible and all_frozen indicate if the all-visible and all-frozen * bits in the visibility map can be set for this page after pruning. * - * visibility_cutoff_xid is the newest xmin of live tuples on the page. - * The caller can use it as the conflict horizon, when setting the VM - * bits. It is only valid if we froze some tuples, and all_frozen is - * true. + * visibility_cutoff_xid is the newest xmin of live tuples on the page. It + * can be used as the conflict horizon when setting the VM or when + * freezing all the tuples on the page. It is only valid when all the live + * tuples on the page are all-visible. * * NOTE: all_visible and all_frozen initially don't include LP_DEAD items. * That's convenient for heap_page_prune_and_freeze(), to use them to - * decide whether to freeze the page or not. The all_visible and - * all_frozen values returned to the caller are adjusted to include - * LP_DEAD items after we determine whether or not to opportunistically - * freeze. + * decide whether to opportunistically freeze the page or not. The + * all_visible and all_frozen values ultimately used to set the VM are + * adjusted to include LP_DEAD items after we determine whether or not to + * opportunistically freeze. */ bool all_visible; bool all_frozen; @@ -174,6 +176,19 @@ static void heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetN static void page_verify_redirects(Page page); +static TransactionId get_conflict_xid(bool do_prune, bool do_freeze, bool do_set_vm, + TransactionId latest_xid_removed, TransactionId frz_conflict_horizon, + TransactionId visibility_cutoff_xid, bool blk_already_av, + bool set_blk_all_frozen); + +static bool heap_page_will_set_vis(Relation relation, + BlockNumber heap_blk, + Buffer heap_buf, + Buffer vmbuffer, + bool blk_known_av, + const PruneState *prstate, + uint8 *vmflags, + bool *do_set_pd_vis); /* * Optionally prune and repair fragmentation in the specified page. @@ -259,6 +274,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer) params.reason = PRUNE_ON_ACCESS; params.vistest = vistest; params.cutoffs = NULL; + params.vmbuffer = InvalidBuffer; + params.blk_known_av = false; /* * For now, pass mark_unused_now as false regardless of whether or @@ -431,10 +448,108 @@ heap_page_will_freeze(Relation relation, Buffer buffer, return do_freeze; } +/* + * Decide whether to set the visibility map bits for heap_blk, using + * information from PruneState and blk_known_av. Some callers may already + * have examined this page’s VM bits (e.g., VACUUM in the previous + * heap_vac_scan_next_block() call) and can pass that along. + * + * Returns true if one or both VM bits should be set, along with the desired + * flags in *vmflags. Also indicates via do_set_pd_vis whether PD_ALL_VISIBLE + * should be set on the heap page. + */ +static bool +heap_page_will_set_vis(Relation relation, + BlockNumber heap_blk, + Buffer heap_buf, + Buffer vmbuffer, + bool blk_known_av, + const PruneState *prstate, + uint8 *vmflags, + bool *do_set_pd_vis) +{ + Page heap_page = BufferGetPage(heap_buf); + bool do_set_vm = false; + + *do_set_pd_vis = false; + + if (!prstate->attempt_update_vm) + { + Assert(!prstate->all_visible && !prstate->all_frozen); + Assert(*vmflags == 0); + return false; + } + + if (prstate->all_visible && !PageIsAllVisible(heap_page)) + *do_set_pd_vis = true; + + if ((prstate->all_visible && !blk_known_av) || + (prstate->all_frozen && !VM_ALL_FROZEN(relation, heap_blk, &vmbuffer))) + { + *vmflags = VISIBILITYMAP_ALL_VISIBLE; + if (prstate->all_frozen) + *vmflags |= VISIBILITYMAP_ALL_FROZEN; + + do_set_vm = true; + } + + /* + * Now handle two potential corruption cases: + * + * These do not need to happen in a critical section and are not + * WAL-logged. + * + * As of PostgreSQL 9.2, the visibility map bit should never be set if the + * page-level bit is clear. However, it's possible that in vacuum the bit + * got cleared after heap_vac_scan_next_block() was called, so we must + * recheck with buffer lock before concluding that the VM is corrupt. + */ + else if (blk_known_av && !PageIsAllVisible(heap_page) && + visibilitymap_get_status(relation, heap_blk, &vmbuffer) != 0) + { + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", + RelationGetRelationName(relation), heap_blk))); + + visibilitymap_clear(relation, heap_blk, vmbuffer, + VISIBILITYMAP_VALID_BITS); + } + + /* + * It's possible for the value returned by + * GetOldestNonRemovableTransactionId() to move backwards, so it's not + * wrong for us to see tuples that appear to not be visible to everyone + * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value + * never moves backwards, but GetOldestNonRemovableTransactionId() is + * conservative and sometimes returns a value that's unnecessarily small, + * so if we see that contradiction it just means that the tuples that we + * think are not visible to everyone yet actually are, and the + * PD_ALL_VISIBLE flag is correct. + * + * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set, + * however. + */ + else if (prstate->lpdead_items > 0 && PageIsAllVisible(heap_page)) + { + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u", + RelationGetRelationName(relation), heap_blk))); + + PageClearAllVisible(heap_page); + MarkBufferDirty(heap_buf); + visibilitymap_clear(relation, heap_blk, vmbuffer, + VISIBILITYMAP_VALID_BITS); + } + + return do_set_vm; +} /* * Prune and repair fragmentation and potentially freeze tuples on the - * specified page. + * specified page. If the page's visibility status has changed, update it in + * the VM. * * Caller must have pin and buffer cleanup lock on the page. Note that we * don't update the FSM information for page on caller's behalf. Caller might @@ -449,12 +564,13 @@ heap_page_will_freeze(Relation relation, Buffer buffer, * it's required in order to advance relfrozenxid / relminmxid, or if it's * considered advantageous for overall system performance to do so now. The * 'params.cutoffs', 'presult', 'new_relfrozen_xid' and 'new_relmin_mxid' - * arguments are required when freezing. When HEAP_PRUNE_FREEZE option is - * passed, we also set presult->all_visible and presult->all_frozen after - * determining whether or not to opporunistically freeze, to indicate if the - * VM bits can be set. They are always set to false when the - * HEAP_PRUNE_FREEZE option is not passed, because at the moment only callers - * that also freeze need that information. + * arguments are required when freezing. + * + * If HEAP_PAGE_PRUNE_UPDATE_VIS is set in params and the visibility status of + * the page has changed, we will update the VM at the same time as pruning and + * freezing the heap page. We will also update presult->old_vmbits and + * presult->new_vmbits with the state of the VM before and after updating it + * for the caller to use in bookkeeping. * * presult contains output parameters needed by callers, such as the number of * tuples removed and the offsets of dead items on the page after pruning. @@ -479,6 +595,7 @@ heap_page_prune_and_freeze(PruneFreezeParams *params, MultiXactId *new_relmin_mxid) { Buffer buffer = params->buffer; + Buffer vmbuffer = params->vmbuffer; Page page = BufferGetPage(buffer); BlockNumber blockno = BufferGetBlockNumber(buffer); OffsetNumber offnum, @@ -488,15 +605,22 @@ heap_page_prune_and_freeze(PruneFreezeParams *params, bool do_freeze; bool do_prune; bool do_hint_prune; + bool do_set_vm; + bool do_set_pd_vis; bool did_tuple_hint_fpi; int64 fpi_before = pgWalUsage.wal_fpi; TransactionId frz_conflict_horizon = InvalidTransactionId; + TransactionId conflict_xid = InvalidTransactionId; + uint8 new_vmbits = 0; + uint8 old_vmbits = 0; /* Copy parameters to prstate */ prstate.vistest = params->vistest; prstate.mark_unused_now = (params->options & HEAP_PAGE_PRUNE_MARK_UNUSED_NOW) != 0; prstate.attempt_freeze = (params->options & HEAP_PAGE_PRUNE_FREEZE) != 0; + prstate.attempt_update_vm = + (params->options & HEAP_PAGE_PRUNE_UPDATE_VIS) != 0; prstate.cutoffs = params->cutoffs; /* @@ -543,50 +667,54 @@ heap_page_prune_and_freeze(PruneFreezeParams *params, prstate.deadoffsets = presult->deadoffsets; /* - * Caller may update the VM after we're done. We can keep track of - * whether the page will be all-visible and all-frozen after pruning and - * freezing to help the caller to do that. + * Track whether the page could be marked all-visible and/or all-frozen. + * This information is used for opportunistic freezing and for updating + * the visibility map (VM) if requested by the caller. + * + * Currently, only VACUUM performs freezing, but other callers may in the + * future. Visibility bookkeeping is required not just for setting the VM + * bits, but also for opportunistic freezing: we only consider freezing if + * the page would become all-frozen, or if it would be all-frozen except + * for dead tuples that VACUUM will remove. If attempt_update_vm is false, + * we will not set the VM bit even if the page is found to be all-visible. + * + * If HEAP_PAGE_PRUNE_UPDATE_VIS is passed without HEAP_PAGE_PRUNE_FREEZE, + * prstate.all_frozen must be initialized to false, since we will not call + * heap_prepare_freeze_tuple() for each tuple. * - * Currently, only VACUUM sets the VM bits. To save the effort, only do - * the bookkeeping if the caller needs it. Currently, that's tied to - * HEAP_PAGE_PRUNE_FREEZE, but it could be a separate flag if you wanted - * to update the VM bits without also freezing or freeze without also - * setting the VM bits. + * Dead tuples that will be removed by the end of vacuum should not + * prevent opportunistic freezing. Therefore, we do not clear all_visible + * when we encounter LP_DEAD items. Instead, we correct all_visible after + * deciding whether to freeze, but before updating the VM, to avoid + * setting the VM bit incorrectly. * - * In addition to telling the caller whether it can set the VM bit, we - * also use 'all_visible' and 'all_frozen' for our own decision-making. If - * the whole page would become frozen, we consider opportunistically - * freezing tuples. We will not be able to freeze the whole page if there - * are tuples present that are not visible to everyone or if there are - * dead tuples which are not yet removable. However, dead tuples which - * will be removed by the end of vacuuming should not preclude us from - * opportunistically freezing. Because of that, we do not immediately - * clear all_visible when we see LP_DEAD items. We fix that after - * scanning the line pointers, before we return the value to the caller, - * so that the caller doesn't set the VM bit incorrectly. + * If neither freezing nor VM updates are requested, we skip the extra + * bookkeeping. In this case, initializing all_visible to false allows + * heap_prune_record_unchanged_lp_normal() to bypass unnecessary work. */ if (prstate.attempt_freeze) { prstate.all_visible = true; prstate.all_frozen = true; } + else if (prstate.attempt_update_vm) + { + prstate.all_visible = true; + prstate.all_frozen = false; + } else { - /* - * Initializing to false allows skipping the work to update them in - * heap_prune_record_unchanged_lp_normal(). - */ prstate.all_visible = false; prstate.all_frozen = false; } /* - * The visibility cutoff xid is the newest xmin of live tuples on the - * page. In the common case, this will be set as the conflict horizon the - * caller can use for updating the VM. If, at the end of freezing and - * pruning, the page is all-frozen, there is no possibility that any - * running transaction on the standby does not see tuples on the page as - * all-visible, so the conflict horizon remains InvalidTransactionId. + * The visibility cutoff xid is the newest xmin of live, committed tuples + * older than OldestXmin on the page. This field is only kept up-to-date + * if the page is all-visible. As soon as a tuple is encountered that is + * not visible to all, this field is unmaintained. As long as it is + * maintained, it can be used to calculate the snapshot conflict horizon + * when updating the VM and/or freezing all the tuples on the page. */ prstate.visibility_cutoff_xid = InvalidTransactionId; @@ -818,6 +946,35 @@ heap_page_prune_and_freeze(PruneFreezeParams *params, prstate.all_visible = prstate.all_frozen = false; Assert(!prstate.all_frozen || prstate.all_visible); + Assert(!prstate.all_visible || (prstate.lpdead_items == 0)); + + /* + * Decide whether to set the page-level PD_ALL_VISIBLE bit and the VM bits + * based on information from the VM and the all_visible/all_frozen flags. + * + * While it is valid for PD_ALL_VISIBLE to be set when the corresponding + * VM bit is clear, we strongly prefer to keep them in sync. + * + * Accordingly, we also allow updating only the VM when PD_ALL_VISIBLE has + * already been set. Setting only the VM is most common when setting an + * already all-visible page all-frozen. + */ + do_set_vm = heap_page_will_set_vis(params->relation, + blockno, buffer, vmbuffer, params->blk_known_av, + &prstate, &new_vmbits, &do_set_pd_vis); + + /* We should only set the VM if PD_ALL_VISIBLE is set or will be */ + Assert(!do_set_vm || do_set_pd_vis || PageIsAllVisible(page)); + + conflict_xid = get_conflict_xid(do_prune, do_freeze, do_set_vm, + prstate.latest_xid_removed, frz_conflict_horizon, + prstate.visibility_cutoff_xid, params->blk_known_av, + (do_set_vm && (new_vmbits & VISIBILITYMAP_ALL_FROZEN))); + + /* Lock vmbuffer before entering a critical section */ + if (do_set_vm) + LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE); + /* Any error while applying the changes is critical */ START_CRIT_SECTION(); @@ -838,14 +995,17 @@ heap_page_prune_and_freeze(PruneFreezeParams *params, /* * If that's all we had to do to the page, this is a non-WAL-logged - * hint. If we are going to freeze or prune the page, we will mark - * the buffer dirty below. + * hint. If we are going to freeze or prune the page or set + * PD_ALL_VISIBLE, we will mark the buffer dirty below. + * + * Setting PD_ALL_VISIBLE is fully WAL-logged because it is forbidden + * for the VM to be set and PD_ALL_VISIBLE to be clear. */ - if (!do_freeze && !do_prune) + if (!do_freeze && !do_prune && !do_set_pd_vis) MarkBufferDirtyHint(buffer, true); } - if (do_prune || do_freeze) + if (do_prune || do_freeze || do_set_vm) { /* Apply the planned item changes and repair page fragmentation. */ if (do_prune) @@ -859,66 +1019,91 @@ heap_page_prune_and_freeze(PruneFreezeParams *params, if (do_freeze) heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen); - MarkBufferDirty(buffer); + if (do_set_pd_vis) + PageSetAllVisible(page); - /* - * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did - */ - if (RelationNeedsWAL(params->relation)) + if (do_prune || do_freeze || do_set_pd_vis) + MarkBufferDirty(buffer); + + if (do_set_vm) { - /* - * The snapshotConflictHorizon for the whole record should be the - * most conservative of all the horizons calculated for any of the - * possible modifications. If this record will prune tuples, any - * transactions on the standby older than the youngest xmax of the - * most recently removed tuple this record will prune will - * conflict. If this record will freeze tuples, any transactions - * on the standby with xids older than the youngest tuple this - * record will freeze will conflict. - */ - TransactionId conflict_xid; + Assert(PageIsAllVisible(page)); - if (TransactionIdFollows(frz_conflict_horizon, prstate.latest_xid_removed)) - conflict_xid = frz_conflict_horizon; - else - conflict_xid = prstate.latest_xid_removed; + old_vmbits = visibilitymap_set_vmbits(blockno, + vmbuffer, new_vmbits, + RelationGetRelationName(params->relation)); + if (old_vmbits == new_vmbits) + { + LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK); + /* Unset so we don't emit WAL since no change occurred */ + do_set_vm = false; + } + } + /* + * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did. If we were + * only updating the VM and it turns out it was already set, we will + * have unset do_set_vm earlier. As such, check it again before + * emitting the record. + */ + if (RelationNeedsWAL(params->relation) && + (do_prune || do_freeze || do_set_vm)) log_heap_prune_and_freeze(params->relation, buffer, - InvalidBuffer, /* vmbuffer */ - 0, /* vmflags */ + do_set_vm ? vmbuffer : InvalidBuffer, + do_set_vm ? new_vmbits : 0, conflict_xid, - true, params->reason, + true, /* cleanup lock */ + do_set_pd_vis, + params->reason, prstate.frozen, prstate.nfrozen, prstate.redirected, prstate.nredirected, prstate.nowdead, prstate.ndead, prstate.nowunused, prstate.nunused); - } } END_CRIT_SECTION(); + if (do_set_vm) + LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK); + + /* + * During its second pass over the heap, VACUUM calls + * heap_page_would_be_all_visible() to determine whether a page is + * all-visible and all-frozen. The logic here is similar. After completing + * pruning and freezing, use an assertion to verify that our results + * remain consistent with heap_page_would_be_all_visible(). + */ +#ifdef USE_ASSERT_CHECKING + if (prstate.all_visible) + { + TransactionId debug_cutoff; + bool debug_all_frozen; + + Assert(prstate.lpdead_items == 0); + Assert(prstate.cutoffs); + + if (!heap_page_is_all_visible(params->relation, buffer, + prstate.cutoffs->OldestXmin, + &debug_all_frozen, + &debug_cutoff, off_loc)) + Assert(false); + + Assert(prstate.all_frozen == debug_all_frozen); + + Assert(!TransactionIdIsValid(debug_cutoff) || + debug_cutoff == prstate.visibility_cutoff_xid); + } +#endif + /* Copy information back for caller */ presult->ndeleted = prstate.ndeleted; presult->nnewlpdead = prstate.ndead; presult->nfrozen = prstate.nfrozen; presult->live_tuples = prstate.live_tuples; presult->recently_dead_tuples = prstate.recently_dead_tuples; - presult->all_visible = prstate.all_visible; - presult->all_frozen = prstate.all_frozen; presult->hastup = prstate.hastup; - - /* - * For callers planning to update the visibility map, the conflict horizon - * for that record must be the newest xmin on the page. However, if the - * page is completely frozen, there can be no conflict and the - * vm_conflict_horizon should remain InvalidTransactionId. This includes - * the case that we just froze all the tuples; the prune-freeze record - * included the conflict XID already so the caller doesn't need it. - */ - if (presult->all_frozen) - presult->vm_conflict_horizon = InvalidTransactionId; - else - presult->vm_conflict_horizon = prstate.visibility_cutoff_xid; + presult->new_vmbits = new_vmbits; + presult->old_vmbits = old_vmbits; presult->lpdead_items = prstate.lpdead_items; /* the presult->deadoffsets array was already filled in */ @@ -2060,6 +2245,64 @@ heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, return nplans; } +/* + * Calculate the conflict horizon for the whole XLOG_HEAP2_PRUNE_VACUUM_SCAN + * record. + */ +static TransactionId +get_conflict_xid(bool do_prune, bool do_freeze, bool do_set_vm, + TransactionId latest_xid_removed, TransactionId frz_conflict_horizon, + TransactionId visibility_cutoff_xid, bool blk_already_av, + bool set_blk_all_frozen) +{ + + /* + * The snapshotConflictHorizon for the whole record should be the most + * conservative of all the horizons calculated for any of the possible + * modifications. If this record will prune tuples, any transactions on + * the standby older than the youngest xmax of the most recently removed + * tuple this record will prune will conflict. If this record will freeze + * tuples, any transactions on the standby with xids older than the + * youngest tuple this record will freeze will conflict. + */ + TransactionId conflict_xid = InvalidTransactionId; + + /* + * If we are updating the VM, the conflict horizon is almost always the + * visibility cutoff XID. + * + * Separately, if we are freezing any tuples, as an optimization, we can + * use the visibility_cutoff_xid as the conflict horizon if the page will + * be all-frozen. This is true even if there are LP_DEAD line pointers + * because we ignored those when maintaining the visibility_cutoff_xid. + * This will have been calculated earlier as the frz_conflict_horizon when + * we determined we would freeze. + */ + if (do_set_vm) + conflict_xid = visibility_cutoff_xid; + else if (do_freeze) + conflict_xid = frz_conflict_horizon; + + /* + * If we are removing tuples with a younger xmax than our so far + * calculated conflict_xid, we must use this as our horizon. + */ + if (TransactionIdFollows(latest_xid_removed, conflict_xid)) + conflict_xid = latest_xid_removed; + + /* + * We can omit the snapshot conflict horizon if we are not pruning or + * freezing any tuples and are setting an already all-visible page + * all-frozen in the VM. In this case, all of the tuples on the page must + * already be visible to all MVCC snapshots on the standby. + */ + if (!do_prune && !do_freeze && + do_set_vm && blk_already_av && set_blk_all_frozen) + conflict_xid = InvalidTransactionId; + + return conflict_xid; +} + /* * Write an XLOG_HEAP2_PRUNE* WAL record * @@ -2084,6 +2327,10 @@ heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, * case, vmbuffer should already have been updated and marked dirty and should * still be pinned and locked. * + * set_pd_all_vis indicates that we set PD_ALL_VISIBLE and thus should update + * the page LSN when checksums/wal_log_hints are enabled even if we did not + * prune or freeze tuples on the page. + * * Note: This function scribbles on the 'frozen' array. * * Note: This is called in a critical section, so careful what you do here. @@ -2093,6 +2340,7 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, + bool set_pd_all_vis, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, @@ -2127,7 +2375,7 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer, */ if (!do_prune && nfrozen == 0 && - (!do_set_vm || !XLogHintBitIsNeeded())) + (!set_pd_all_vis || !XLogHintBitIsNeeded())) regbuf_flags |= REGBUF_NO_IMAGE; /* @@ -2248,7 +2496,8 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer, * update PD_ALL_VISIBLE without bumping the LSN, but this is deemed okay * for page hint updates. */ - if (do_prune || nfrozen > 0 || (do_set_vm && XLogHintBitIsNeeded())) + if (do_prune || nfrozen > 0 || + (set_pd_all_vis && XLogHintBitIsNeeded())) { Assert(BufferIsDirty(buffer)); PageSetLSN(BufferGetPage(buffer), recptr); diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index c2618c6449c..2f719108ad2 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -464,11 +464,6 @@ static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber * static void dead_items_reset(LVRelState *vacrel); static void dead_items_cleanup(LVRelState *vacrel); -static bool heap_page_is_all_visible(Relation rel, Buffer buf, - TransactionId OldestXmin, - bool *all_frozen, - TransactionId *visibility_cutoff_xid, - OffsetNumber *logging_offnum); static bool heap_page_would_be_all_visible(Relation rel, Buffer buf, TransactionId OldestXmin, OffsetNumber *deadoffsets, @@ -1971,6 +1966,8 @@ lazy_scan_prune(LVRelState *vacrel, params.reason = PRUNE_VACUUM_SCAN; params.cutoffs = &vacrel->cutoffs; params.vistest = vacrel->vistest; + params.vmbuffer = vmbuffer; + params.blk_known_av = all_visible_according_to_vm; /* * Prune all HOT-update chains and potentially freeze tuples on this page. @@ -1987,7 +1984,7 @@ lazy_scan_prune(LVRelState *vacrel, * tuples. Pruning will have determined whether or not the page is * all-visible. */ - params.options = HEAP_PAGE_PRUNE_FREEZE; + params.options = HEAP_PAGE_PRUNE_FREEZE | HEAP_PAGE_PRUNE_UPDATE_VIS; if (vacrel->nindexes == 0) params.options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW; @@ -2010,33 +2007,6 @@ lazy_scan_prune(LVRelState *vacrel, vacrel->new_frozen_tuple_pages++; } - /* - * VACUUM will call heap_page_is_all_visible() during the second pass over - * the heap to determine all_visible and all_frozen for the page -- this - * is a specialized version of the logic from this function. Now that - * we've finished pruning and freezing, make sure that we're in total - * agreement with heap_page_is_all_visible() using an assertion. - */ -#ifdef USE_ASSERT_CHECKING - if (presult.all_visible) - { - TransactionId debug_cutoff; - bool debug_all_frozen; - - Assert(presult.lpdead_items == 0); - - if (!heap_page_is_all_visible(vacrel->rel, buf, - vacrel->cutoffs.OldestXmin, &debug_all_frozen, - &debug_cutoff, &vacrel->offnum)) - Assert(false); - - Assert(presult.all_frozen == debug_all_frozen); - - Assert(!TransactionIdIsValid(debug_cutoff) || - debug_cutoff == presult.vm_conflict_horizon); - } -#endif - /* * Now save details of the LP_DEAD items from the page in vacrel */ @@ -2070,168 +2040,26 @@ lazy_scan_prune(LVRelState *vacrel, /* Did we find LP_DEAD items? */ *has_lpdead_items = (presult.lpdead_items > 0); - Assert(!presult.all_visible || !(*has_lpdead_items)); - Assert(!presult.all_frozen || presult.all_visible); - /* - * Handle setting visibility map bit based on information from the VM (as - * of last heap_vac_scan_next_block() call), and from all_visible and - * all_frozen variables + * For the purposes of logging, count whether or not the page was newly + * set all-visible and, potentially, all-frozen. */ - if (!all_visible_according_to_vm && presult.all_visible) + if ((presult.old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0 && + (presult.new_vmbits & VISIBILITYMAP_ALL_VISIBLE) != 0) { - uint8 old_vmbits; - uint8 flags = VISIBILITYMAP_ALL_VISIBLE; - - if (presult.all_frozen) - { - Assert(!TransactionIdIsValid(presult.vm_conflict_horizon)); - flags |= VISIBILITYMAP_ALL_FROZEN; - } - - /* - * It should never be the case that the visibility map page is set - * while the page-level bit is clear, but the reverse is allowed (if - * checksums are not enabled). Regardless, set both bits so that we - * get back in sync. - * - * NB: If the heap page is all-visible but the VM bit is not set, we - * don't need to dirty the heap page. However, if checksums are - * enabled, we do need to make sure that the heap page is dirtied - * before passing it to visibilitymap_set(), because it may be logged. - * Given that this situation should only happen in rare cases after a - * crash, it is not worth optimizing. - */ - PageSetAllVisible(page); - MarkBufferDirty(buf); - old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf, - InvalidXLogRecPtr, - vmbuffer, presult.vm_conflict_horizon, - flags); - - /* - * If the page wasn't already set all-visible and/or all-frozen in the - * VM, count it as newly set for logging. - */ - if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0) - { - vacrel->vm_new_visible_pages++; - if (presult.all_frozen) - { - vacrel->vm_new_visible_frozen_pages++; - *vm_page_frozen = true; - } - } - else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 && - presult.all_frozen) + vacrel->vm_new_visible_pages++; + if ((presult.new_vmbits & VISIBILITYMAP_ALL_FROZEN) != 0) { - vacrel->vm_new_frozen_pages++; + vacrel->vm_new_visible_frozen_pages++; *vm_page_frozen = true; } } - - /* - * As of PostgreSQL 9.2, the visibility map bit should never be set if the - * page-level bit is clear. However, it's possible that the bit got - * cleared after heap_vac_scan_next_block() was called, so we must recheck - * with buffer lock before concluding that the VM is corrupt. - */ - else if (all_visible_according_to_vm && !PageIsAllVisible(page) && - visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0) + else if ((presult.old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 && + (presult.new_vmbits & VISIBILITYMAP_ALL_FROZEN) != 0) { - ereport(WARNING, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", - vacrel->relname, blkno))); - - visibilitymap_clear(vacrel->rel, blkno, vmbuffer, - VISIBILITYMAP_VALID_BITS); - } - - /* - * It's possible for the value returned by - * GetOldestNonRemovableTransactionId() to move backwards, so it's not - * wrong for us to see tuples that appear to not be visible to everyone - * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value - * never moves backwards, but GetOldestNonRemovableTransactionId() is - * conservative and sometimes returns a value that's unnecessarily small, - * so if we see that contradiction it just means that the tuples that we - * think are not visible to everyone yet actually are, and the - * PD_ALL_VISIBLE flag is correct. - * - * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set, - * however. - */ - else if (presult.lpdead_items > 0 && PageIsAllVisible(page)) - { - ereport(WARNING, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u", - vacrel->relname, blkno))); - - PageClearAllVisible(page); - MarkBufferDirty(buf); - visibilitymap_clear(vacrel->rel, blkno, vmbuffer, - VISIBILITYMAP_VALID_BITS); - } - - /* - * If the all-visible page is all-frozen but not marked as such yet, mark - * it as all-frozen. - */ - else if (all_visible_according_to_vm && presult.all_frozen && - !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer)) - { - uint8 old_vmbits; - - /* - * Avoid relying on all_visible_according_to_vm as a proxy for the - * page-level PD_ALL_VISIBLE bit being set, since it might have become - * stale -- even when all_visible is set - */ - if (!PageIsAllVisible(page)) - { - PageSetAllVisible(page); - MarkBufferDirty(buf); - } - - /* - * Set the page all-frozen (and all-visible) in the VM. - * - * We can pass InvalidTransactionId as our cutoff_xid, since a - * snapshotConflictHorizon sufficient to make everything safe for REDO - * was logged when the page's tuples were frozen. - */ - Assert(!TransactionIdIsValid(presult.vm_conflict_horizon)); - old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf, - InvalidXLogRecPtr, - vmbuffer, InvalidTransactionId, - VISIBILITYMAP_ALL_VISIBLE | - VISIBILITYMAP_ALL_FROZEN); - - /* - * The page was likely already set all-visible in the VM. However, - * there is a small chance that it was modified sometime between - * setting all_visible_according_to_vm and checking the visibility - * during pruning. Check the return value of old_vmbits anyway to - * ensure the visibility map counters used for logging are accurate. - */ - if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0) - { - vacrel->vm_new_visible_pages++; - vacrel->vm_new_visible_frozen_pages++; - *vm_page_frozen = true; - } - - /* - * We already checked that the page was not set all-frozen in the VM - * above, so we don't need to test the value of old_vmbits. - */ - else - { - vacrel->vm_new_frozen_pages++; - *vm_page_frozen = true; - } + Assert((presult.new_vmbits & VISIBILITYMAP_ALL_VISIBLE) != 0); + vacrel->vm_new_frozen_pages++; + *vm_page_frozen = true; } return presult.ndeleted; @@ -2950,6 +2778,7 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, vmbuffer, vmflags, conflict_xid, false, /* no cleanup lock required */ + (vmflags & VISIBILITYMAP_VALID_BITS) != 0, PRUNE_VACUUM_CLEANUP, NULL, 0, /* frozen */ NULL, 0, /* redirected */ @@ -3634,7 +3463,7 @@ dead_items_cleanup(LVRelState *vacrel) * Wrapper for heap_page_would_be_all_visible() which can be used for * callers that expect no LP_DEAD on the page. */ -static bool +bool heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId OldestXmin, bool *all_frozen, diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 74a5c24002b..2de39ba0cd1 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -42,6 +42,7 @@ /* "options" flag bits for heap_page_prune_and_freeze */ #define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW (1 << 0) #define HEAP_PAGE_PRUNE_FREEZE (1 << 1) +#define HEAP_PAGE_PRUNE_UPDATE_VIS (1 << 2) typedef struct BulkInsertStateData *BulkInsertState; typedef struct GlobalVisState GlobalVisState; @@ -238,6 +239,16 @@ typedef struct PruneFreezeParams Relation relation; /* relation containing buffer to be pruned */ Buffer buffer; /* buffer to be pruned */ + /* + * + * vmbuffer is the buffer that must already contain contain the required + * block of the visibility map if we are to update it. blk_known_av is the + * visibility status of the heap block as of the last call to + * find_next_unskippable_block(). + */ + Buffer vmbuffer; + bool blk_known_av; + /* * The reason pruning was performed. It is used to set the WAL record * opcode which is used for debugging and analysis purposes. @@ -250,8 +261,9 @@ typedef struct PruneFreezeParams * MARK_UNUSED_NOW indicates that dead items can be set LP_UNUSED during * pruning. * - * FREEZE indicates that we will also freeze tuples, and will return - * 'all_visible', 'all_frozen' flags to the caller. + * FREEZE indicates that we will also freeze tuples + * + * UPDATE_VIS indicates that we will set the page's status in the VM. */ int options; @@ -284,19 +296,15 @@ typedef struct PruneFreezeResult int recently_dead_tuples; /* - * all_visible and all_frozen indicate if the all-visible and all-frozen - * bits in the visibility map can be set for this page, after pruning. - * - * vm_conflict_horizon is the newest xmin of live tuples on the page. The - * caller can use it as the conflict horizon when setting the VM bits. It - * is only valid if we froze some tuples (nfrozen > 0), and all_frozen is - * true. + * old_vmbits are the state of the all-visible and all-frozen bits in the + * visibility map before updating it during phase I of vacuuming. + * new_vmbits are the state of those bits after phase I of vacuuming. * - * These are only set if the HEAP_PRUNE_FREEZE option is set. + * These are only set if the HEAP_PAGE_PRUNE_UPDATE_VIS option is set and + * we have attempted to update the VM. */ - bool all_visible; - bool all_frozen; - TransactionId vm_conflict_horizon; + uint8 new_vmbits; + uint8 old_vmbits; /* * Whether or not the page makes rel truncation unsafe. This is set to @@ -423,6 +431,7 @@ extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, + bool set_pd_all_vis, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, @@ -433,6 +442,12 @@ extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer, extern void heap_vacuum_rel(Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy); +extern bool heap_page_is_all_visible(Relation rel, Buffer buf, + TransactionId OldestXmin, + bool *all_frozen, + TransactionId *visibility_cutoff_xid, + OffsetNumber *logging_offnum); + /* in heap/heapam_visibility.c */ extern bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer); -- 2.43.0