From 0141c10d30bd7ea620d16d24201ba22e5337a4dc Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Sat, 27 Sep 2025 11:52:08 -0400 Subject: [PATCH v16 06/14] Eliminate XLOG_HEAP2_VISIBLE from vacuum prune/freeze MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vacuum no longer emits a separate WAL record for each page set all-visible or all-frozen during phase I. Instead, visibility map updates are now included in the XLOG_HEAP2_PRUNE_VACUUM_SCAN record that is already emitted for pruning and freezing. Previously, heap_page_prune_and_freeze() determined whether a page was all-visible, but the corresponding VM bits were only set later in lazy_scan_prune(). Now the VM is updated immediately in heap_page_prune_and_freeze(), at the same time as the heap modifications. This change applies only to vacuum’s prune/freeze work, not to pruning performed during normal page access. --- src/backend/access/heap/heapam_xlog.c | 158 +++++++-- src/backend/access/heap/pruneheap.c | 474 ++++++++++++++++++++----- src/backend/access/heap/vacuumlazy.c | 202 +---------- src/backend/access/rmgrdesc/heapdesc.c | 11 +- src/include/access/heapam.h | 36 +- src/include/access/heapam_xlog.h | 17 +- 6 files changed, 584 insertions(+), 314 deletions(-) diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c index c2c7e6ab086..911416bbc56 100644 --- a/src/backend/access/heap/heapam_xlog.c +++ b/src/backend/access/heap/heapam_xlog.c @@ -35,7 +35,9 @@ heap_xlog_prune_freeze(XLogReaderState *record) Buffer buffer; RelFileLocator rlocator; BlockNumber blkno; - XLogRedoAction action; + Buffer vmbuffer = InvalidBuffer; + uint8 vmflags = 0; + Size freespace = 0; XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno); memcpy(&xlrec, maindataptr, SizeOfHeapPrune); @@ -50,11 +52,22 @@ heap_xlog_prune_freeze(XLogReaderState *record) Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 || (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0); + if (xlrec.flags & XLHP_VM_ALL_VISIBLE) + { + vmflags = VISIBILITYMAP_ALL_VISIBLE; + if (xlrec.flags & XLHP_VM_ALL_FROZEN) + vmflags |= VISIBILITYMAP_ALL_FROZEN; + } + /* - * We are about to remove and/or freeze tuples. In Hot Standby mode, - * ensure that there are no queries running for which the removed tuples - * are still visible or which still consider the frozen xids as running. - * The conflict horizon XID comes after xl_heap_prune. + * After xl_heap_prune is the optional snapshot conflict horizon. + * + * In Hot Standby mode, we must ensure that there are no running queries + * which would conflict with the changes in this record. That means we + * can't replay this record if it removes tuples that are still visible to + * transactions on the standby, freeze tuples with xids that are still + * considered running on the standby, or set a page as all-visible in the + * VM if it isn't all-visible to all transactions on the standby. */ if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0) { @@ -71,12 +84,12 @@ heap_xlog_prune_freeze(XLogReaderState *record) } /* - * If we have a full-page image, restore it and we're done. + * If we have a full-page image of the heap block, restore it and we're + * done with the heap block. */ - action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, - (xlrec.flags & XLHP_CLEANUP_LOCK) != 0, - &buffer); - if (action == BLK_NEEDS_REDO) + if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, + (xlrec.flags & XLHP_CLEANUP_LOCK) != 0, + &buffer) == BLK_NEEDS_REDO) { Page page = BufferGetPage(buffer); OffsetNumber *redirected; @@ -90,6 +103,9 @@ heap_xlog_prune_freeze(XLogReaderState *record) xlhp_freeze_plan *plans; OffsetNumber *frz_offsets; char *dataptr = XLogRecGetBlockData(record, 0, &datalen); + bool do_prune; + bool mark_buffer_dirty = false; + bool set_lsn = false; heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags, &nplans, &plans, &frz_offsets, @@ -97,11 +113,16 @@ heap_xlog_prune_freeze(XLogReaderState *record) &ndead, &nowdead, &nunused, &nowunused); + do_prune = nredirected > 0 || ndead > 0 || nunused > 0; + + /* Ensure the record does something */ + Assert(do_prune || nplans > 0 || vmflags & VISIBILITYMAP_VALID_BITS); + /* * Update all line pointers per the record, and repair fragmentation * if needed. */ - if (nredirected > 0 || ndead > 0 || nunused > 0) + if (do_prune) heap_page_prune_execute(buffer, (xlrec.flags & XLHP_CLEANUP_LOCK) == 0, redirected, nredirected, @@ -138,36 +159,121 @@ heap_xlog_prune_freeze(XLogReaderState *record) /* There should be no more data */ Assert((char *) frz_offsets == dataptr + datalen); + if (do_prune || nplans > 0) + mark_buffer_dirty = set_lsn = true; + + /* + * The critical integrity requirement here is that we must never end + * up with with the visibility map bit set and the page-level + * PD_ALL_VISIBLE bit clear. If that were to occur, a subsequent page + * modification would fail to clear the visibility map bit. + * + * If this record only sets the VM, no need to dirty the heap page. + */ + if ((vmflags & VISIBILITYMAP_VALID_BITS) && !PageIsAllVisible(page)) + { + PageSetAllVisible(page); + mark_buffer_dirty = true; + + /* + * Always emit a WAL record when setting PD_ALL_VISIBLE but only + * emit an FPI if checksums/wal_log_hints are enabled. Advance the + * page LSN only if the record could include an FPI, since + * recovery skips records <= the stamped LSN. Otherwise it might + * skip an earlier FPI needed to repair a torn page. + */ + if (XLogHintBitIsNeeded()) + set_lsn = true; + } + + if (mark_buffer_dirty) + MarkBufferDirty(buffer); + + if (set_lsn) + PageSetLSN(page, lsn); + /* * Note: we don't worry about updating the page's prunability hints. * At worst this will cause an extra prune cycle to occur soon. */ - - PageSetLSN(page, lsn); - MarkBufferDirty(buffer); } /* - * If we released any space or line pointers, update the free space map. + * If we released any space or line pointers or set PD_ALL_VISIBLE or the + * VM, update the freespace map. + * + * Even when no actual space is freed (e.g., when only marking the page + * all-visible or frozen), we still update the FSM. Because the FSM is + * unlogged and maintained heuristically, it often becomes stale on + * standbys. If such a standby is later promoted and runs VACUUM, it will + * skip recalculating free space for pages that were marked all-visible + * (or all-frozen, depending on the mode). FreeSpaceMapVacuum can then + * propagate overly optimistic free space values upward, causing future + * insertions to select pages that turn out to be unusable. In bulk, this + * can lead to long stalls. + * + * To prevent this, always refresh the FSM’s view when a page becomes + * all-visible or all-frozen. + * + * Do this regardless of whether a full-page image is logged, since FSM + * data is not part of the page itself. * - * Do this regardless of a full-page image being applied, since the FSM - * data is not in the page anyway. */ if (BufferIsValid(buffer)) { - if (xlrec.flags & (XLHP_HAS_REDIRECTIONS | - XLHP_HAS_DEAD_ITEMS | - XLHP_HAS_NOW_UNUSED_ITEMS)) - { - Size freespace = PageGetHeapFreeSpace(BufferGetPage(buffer)); + if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS | + XLHP_HAS_DEAD_ITEMS | + XLHP_HAS_NOW_UNUSED_ITEMS)) || + (vmflags & VISIBILITYMAP_VALID_BITS)) + freespace = PageGetHeapFreeSpace(BufferGetPage(buffer)); - UnlockReleaseBuffer(buffer); + /* + * We want to avoid holding an exclusive lock on the heap buffer while + * doing IO (either of the FSM or the VM), so we'll release the lock + * on the heap buffer before doing either. + */ + UnlockReleaseBuffer(buffer); + } - XLogRecordPageWithFreeSpace(rlocator, blkno, freespace); + /* + * Now read and update the VM block. + * + * We must redo changes to the VM even if the heap page was skipped due to + * LSN interlock. See comment in heap_xlog_multi_insert() for more details + * on replaying changes to the VM. + */ + if ((vmflags & VISIBILITYMAP_VALID_BITS) && + XLogReadBufferForRedoExtended(record, 1, + RBM_ZERO_ON_ERROR, + false, + &vmbuffer) == BLK_NEEDS_REDO) + { + Page vmpage = BufferGetPage(vmbuffer); + char *relname; + uint8 old_vmbits = 0; + + /* initialize the page if it was read as zeros */ + if (PageIsNew(vmpage)) + PageInit(vmpage, BLCKSZ, 0); + + /* We don't have relation name during recovery, so use relfilenode */ + relname = psprintf("%u", rlocator.relNumber); + old_vmbits = visibilitymap_set_vmbits(blkno, vmbuffer, vmflags, relname); + + /* Only set VM page LSN if we modified the page */ + if (old_vmbits != vmflags) + { + Assert(BufferIsDirty(vmbuffer)); + PageSetLSN(BufferGetPage(vmbuffer), lsn); } - else - UnlockReleaseBuffer(buffer); + pfree(relname); } + + if (BufferIsValid(vmbuffer)) + UnlockReleaseBuffer(vmbuffer); + + if (freespace > 0) + XLogRecordPageWithFreeSpace(rlocator, blkno, freespace); } /* diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index c23a6a21a7f..f384d74416a 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -19,6 +19,7 @@ #include "access/htup_details.h" #include "access/multixact.h" #include "access/transam.h" +#include "access/visibilitymap.h" #include "access/xlog.h" #include "access/xloginsert.h" #include "commands/vacuum.h" @@ -43,6 +44,8 @@ typedef struct bool mark_unused_now; /* whether to attempt freezing tuples */ bool attempt_freeze; + /* whether or not to attempt updating the VM */ + bool attempt_update_vm; const struct VacuumCutoffs *cutoffs; /*------------------------------------------------------- @@ -132,17 +135,17 @@ typedef struct * all_visible and all_frozen indicate if the all-visible and all-frozen * bits in the visibility map can be set for this page after pruning. * - * visibility_cutoff_xid is the newest xmin of live tuples on the page. - * The caller can use it as the conflict horizon, when setting the VM - * bits. It is only valid if we froze some tuples, and all_frozen is - * true. + * visibility_cutoff_xid is the newest xmin of live tuples on the page. It + * can be used as the conflict horizon when setting the VM or when + * freezing all the tuples on the page. It is only valid when all the live + * tuples on the page are all-visible. * * NOTE: all_visible and all_frozen initially don't include LP_DEAD items. * That's convenient for heap_page_prune_and_freeze(), to use them to - * decide whether to freeze the page or not. The all_visible and - * all_frozen values returned to the caller are adjusted to include - * LP_DEAD items after we determine whether or not to opportunistically - * freeze. + * decide whether to opportunistically freeze the page or not. The + * all_visible and all_frozen values ultimately used to set the VM are + * adjusted to include LP_DEAD items after we determine whether or not to + * opportunistically freeze. */ bool all_visible; bool all_frozen; @@ -173,6 +176,19 @@ static void heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetN static void page_verify_redirects(Page page); +static TransactionId get_conflict_xid(bool do_prune, bool do_freeze, bool do_set_vm, + TransactionId latest_xid_removed, TransactionId frz_conflict_horizon, + TransactionId visibility_cutoff_xid, bool blk_already_av, + bool set_blk_all_frozen); + +static bool heap_page_will_set_vis(Relation relation, + BlockNumber heap_blk, + Buffer heap_buf, + Buffer vmbuffer, + bool blk_known_av, + const PruneState *prstate, + uint8 *vmflags, + bool *do_set_pd_vis); /* * Optionally prune and repair fragmentation in the specified page. @@ -258,6 +274,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer) params.reason = PRUNE_ON_ACCESS; params.vistest = vistest; params.cutoffs = NULL; + params.vmbuffer = InvalidBuffer; + params.blk_known_av = false; /* * For now, pass mark_unused_now as false regardless of whether or @@ -431,10 +449,108 @@ heap_page_will_freeze(Relation relation, Buffer buffer, return do_freeze; } +/* + * Decide whether to set the visibility map bits for heap_blk, using + * information from PruneState and blk_known_av. Some callers may already + * have examined this page’s VM bits (e.g., VACUUM in the previous + * heap_vac_scan_next_block() call) and can pass that along. + * + * Returns true if one or both VM bits should be set, along with the desired + * flags in *vmflags. Also indicates via do_set_pd_vis whether PD_ALL_VISIBLE + * should be set on the heap page. + */ +static bool +heap_page_will_set_vis(Relation relation, + BlockNumber heap_blk, + Buffer heap_buf, + Buffer vmbuffer, + bool blk_known_av, + const PruneState *prstate, + uint8 *vmflags, + bool *do_set_pd_vis) +{ + Page heap_page = BufferGetPage(heap_buf); + bool do_set_vm = false; + + *do_set_pd_vis = false; + + if (!prstate->attempt_update_vm) + { + Assert(!prstate->all_visible && !prstate->all_frozen); + Assert(*vmflags == 0); + return false; + } + + if (prstate->all_visible && !PageIsAllVisible(heap_page)) + *do_set_pd_vis = true; + + if ((prstate->all_visible && !blk_known_av) || + (prstate->all_frozen && !VM_ALL_FROZEN(relation, heap_blk, &vmbuffer))) + { + *vmflags = VISIBILITYMAP_ALL_VISIBLE; + if (prstate->all_frozen) + *vmflags |= VISIBILITYMAP_ALL_FROZEN; + + do_set_vm = true; + } + + /* + * Now handle two potential corruption cases: + * + * These do not need to happen in a critical section and are not + * WAL-logged. + * + * As of PostgreSQL 9.2, the visibility map bit should never be set if the + * page-level bit is clear. However, it's possible that in vacuum the bit + * got cleared after heap_vac_scan_next_block() was called, so we must + * recheck with buffer lock before concluding that the VM is corrupt. + */ + else if (blk_known_av && !PageIsAllVisible(heap_page) && + visibilitymap_get_status(relation, heap_blk, &vmbuffer) != 0) + { + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", + RelationGetRelationName(relation), heap_blk))); + + visibilitymap_clear(relation, heap_blk, vmbuffer, + VISIBILITYMAP_VALID_BITS); + } + + /* + * It's possible for the value returned by + * GetOldestNonRemovableTransactionId() to move backwards, so it's not + * wrong for us to see tuples that appear to not be visible to everyone + * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value + * never moves backwards, but GetOldestNonRemovableTransactionId() is + * conservative and sometimes returns a value that's unnecessarily small, + * so if we see that contradiction it just means that the tuples that we + * think are not visible to everyone yet actually are, and the + * PD_ALL_VISIBLE flag is correct. + * + * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set, + * however. + */ + else if (prstate->lpdead_items > 0 && PageIsAllVisible(heap_page)) + { + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u", + RelationGetRelationName(relation), heap_blk))); + + PageClearAllVisible(heap_page); + MarkBufferDirty(heap_buf); + visibilitymap_clear(relation, heap_blk, vmbuffer, + VISIBILITYMAP_VALID_BITS); + } + + return do_set_vm; +} /* * Prune and repair fragmentation and potentially freeze tuples on the - * specified page. + * specified page. If the page's visibility status has changed, update it in + * the VM. * * Caller must have pin and buffer cleanup lock on the page. Note that we * don't update the FSM information for page on caller's behalf. Caller might @@ -449,12 +565,13 @@ heap_page_will_freeze(Relation relation, Buffer buffer, * it's required in order to advance relfrozenxid / relminmxid, or if it's * considered advantageous for overall system performance to do so now. The * 'params.cutoffs', 'presult', 'new_relfrozen_xid' and 'new_relmin_mxid' - * arguments are required when freezing. When HEAP_PRUNE_FREEZE option is - * passed, we also set presult->all_visible and presult->all_frozen after - * determining whether or not to opporunistically freeze, to indicate if the - * VM bits can be set. They are always set to false when the - * HEAP_PRUNE_FREEZE option is not passed, because at the moment only callers - * that also freeze need that information. + * arguments are required when freezing. + * + * If HEAP_PAGE_PRUNE_UPDATE_VIS is set in params and the visibility status of + * the page has changed, we will update the VM at the same time as pruning and + * freezing the heap page. We will also update presult->old_vmbits and + * presult->new_vmbits with the state of the VM before and after updating it + * for the caller to use in bookkeeping. * * presult contains output parameters needed by callers, such as the number of * tuples removed and the offsets of dead items on the page after pruning. @@ -479,6 +596,7 @@ heap_page_prune_and_freeze(PruneFreezeParams *params, MultiXactId *new_relmin_mxid) { Buffer buffer = params->buffer; + Buffer vmbuffer = params->vmbuffer; Page page = BufferGetPage(buffer); BlockNumber blockno = BufferGetBlockNumber(buffer); OffsetNumber offnum, @@ -488,15 +606,22 @@ heap_page_prune_and_freeze(PruneFreezeParams *params, bool do_freeze; bool do_prune; bool do_hint_prune; + bool do_set_vm; + bool do_set_pd_vis; bool did_tuple_hint_fpi; int64 fpi_before = pgWalUsage.wal_fpi; TransactionId frz_conflict_horizon = InvalidTransactionId; + TransactionId conflict_xid = InvalidTransactionId; + uint8 new_vmbits = 0; + uint8 old_vmbits = 0; /* Copy parameters to prstate */ prstate.vistest = params->vistest; prstate.mark_unused_now = (params->options & HEAP_PAGE_PRUNE_MARK_UNUSED_NOW) != 0; prstate.attempt_freeze = (params->options & HEAP_PAGE_PRUNE_FREEZE) != 0; + prstate.attempt_update_vm = + (params->options & HEAP_PAGE_PRUNE_UPDATE_VIS) != 0; prstate.cutoffs = params->cutoffs; /* @@ -543,50 +668,54 @@ heap_page_prune_and_freeze(PruneFreezeParams *params, prstate.deadoffsets = presult->deadoffsets; /* - * Caller may update the VM after we're done. We can keep track of - * whether the page will be all-visible and all-frozen after pruning and - * freezing to help the caller to do that. + * Track whether the page could be marked all-visible and/or all-frozen. + * This information is used for opportunistic freezing and for updating + * the visibility map (VM) if requested by the caller. + * + * Currently, only VACUUM performs freezing, but other callers may in the + * future. Visibility bookkeeping is required not just for setting the VM + * bits, but also for opportunistic freezing: we only consider freezing if + * the page would become all-frozen, or if it would be all-frozen except + * for dead tuples that VACUUM will remove. If attempt_update_vm is false, + * we will not set the VM bit even if the page is found to be all-visible. * - * Currently, only VACUUM sets the VM bits. To save the effort, only do - * the bookkeeping if the caller needs it. Currently, that's tied to - * HEAP_PAGE_PRUNE_FREEZE, but it could be a separate flag if you wanted - * to update the VM bits without also freezing or freeze without also - * setting the VM bits. + * If HEAP_PAGE_PRUNE_UPDATE_VIS is passed without HEAP_PAGE_PRUNE_FREEZE, + * prstate.all_frozen must be initialized to false, since we will not call + * heap_prepare_freeze_tuple() for each tuple. * - * In addition to telling the caller whether it can set the VM bit, we - * also use 'all_visible' and 'all_frozen' for our own decision-making. If - * the whole page would become frozen, we consider opportunistically - * freezing tuples. We will not be able to freeze the whole page if there - * are tuples present that are not visible to everyone or if there are - * dead tuples which are not yet removable. However, dead tuples which - * will be removed by the end of vacuuming should not preclude us from - * opportunistically freezing. Because of that, we do not immediately - * clear all_visible when we see LP_DEAD items. We fix that after - * scanning the line pointers, before we return the value to the caller, - * so that the caller doesn't set the VM bit incorrectly. + * Dead tuples that will be removed by the end of vacuum should not + * prevent opportunistic freezing. Therefore, we do not clear all_visible + * when we encounter LP_DEAD items. Instead, we correct all_visible after + * deciding whether to freeze, but before updating the VM, to avoid + * setting the VM bit incorrectly. + * + * If neither freezing nor VM updates are requested, we skip the extra + * bookkeeping. In this case, initializing all_visible to false allows + * heap_prune_record_unchanged_lp_normal() to bypass unnecessary work. */ if (prstate.attempt_freeze) { prstate.all_visible = true; prstate.all_frozen = true; } + else if (prstate.attempt_update_vm) + { + prstate.all_visible = true; + prstate.all_frozen = false; + } else { - /* - * Initializing to false allows skipping the work to update them in - * heap_prune_record_unchanged_lp_normal(). - */ prstate.all_visible = false; prstate.all_frozen = false; } /* - * The visibility cutoff xid is the newest xmin of live tuples on the - * page. In the common case, this will be set as the conflict horizon the - * caller can use for updating the VM. If, at the end of freezing and - * pruning, the page is all-frozen, there is no possibility that any - * running transaction on the standby does not see tuples on the page as - * all-visible, so the conflict horizon remains InvalidTransactionId. + * The visibility cutoff xid is the newest xmin of live, committed tuples + * older than OldestXmin on the page. This field is only kept up-to-date + * if the page is all-visible. As soon as a tuple is encountered that is + * not visible to all, this field is unmaintained. As long as it is + * maintained, it can be used to calculate the snapshot conflict horizon + * when updating the VM and/or freezing all the tuples on the page. */ prstate.visibility_cutoff_xid = InvalidTransactionId; @@ -818,6 +947,35 @@ heap_page_prune_and_freeze(PruneFreezeParams *params, prstate.all_visible = prstate.all_frozen = false; Assert(!prstate.all_frozen || prstate.all_visible); + Assert(!prstate.all_visible || (prstate.lpdead_items == 0)); + + /* + * Decide whether to set the page-level PD_ALL_VISIBLE bit and the VM bits + * based on information from the VM and the all_visible/all_frozen flags. + * + * While it is valid for PD_ALL_VISIBLE to be set when the corresponding + * VM bit is clear, we strongly prefer to keep them in sync. + * + * Accordingly, we also allow updating only the VM when PD_ALL_VISIBLE has + * already been set. Setting only the VM is most common when setting an + * already all-visible page all-frozen. + */ + do_set_vm = heap_page_will_set_vis(params->relation, + blockno, buffer, vmbuffer, params->blk_known_av, + &prstate, &new_vmbits, &do_set_pd_vis); + + /* We should only set the VM if PD_ALL_VISIBLE is set or will be */ + Assert(!do_set_vm || do_set_pd_vis || PageIsAllVisible(page)); + + conflict_xid = get_conflict_xid(do_prune, do_freeze, do_set_vm, + prstate.latest_xid_removed, frz_conflict_horizon, + prstate.visibility_cutoff_xid, params->blk_known_av, + (do_set_vm && (new_vmbits & VISIBILITYMAP_ALL_FROZEN))); + + /* Lock vmbuffer before entering a critical section */ + if (do_set_vm) + LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE); + /* Any error while applying the changes is critical */ START_CRIT_SECTION(); @@ -838,14 +996,17 @@ heap_page_prune_and_freeze(PruneFreezeParams *params, /* * If that's all we had to do to the page, this is a non-WAL-logged - * hint. If we are going to freeze or prune the page, we will mark - * the buffer dirty below. + * hint. If we are going to freeze or prune the page or set + * PD_ALL_VISIBLE, we will mark the buffer dirty below. + * + * Setting PD_ALL_VISIBLE is fully WAL-logged because it is forbidden + * for the VM to be set and PD_ALL_VISIBLE to be clear. */ - if (!do_freeze && !do_prune) + if (!do_freeze && !do_prune && !do_set_pd_vis) MarkBufferDirtyHint(buffer, true); } - if (do_prune || do_freeze) + if (do_prune || do_freeze || do_set_vm) { /* Apply the planned item changes and repair page fragmentation. */ if (do_prune) @@ -859,64 +1020,91 @@ heap_page_prune_and_freeze(PruneFreezeParams *params, if (do_freeze) heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen); - MarkBufferDirty(buffer); + if (do_set_pd_vis) + PageSetAllVisible(page); - /* - * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did - */ - if (RelationNeedsWAL(params->relation)) + if (do_prune || do_freeze || do_set_pd_vis) + MarkBufferDirty(buffer); + + if (do_set_vm) { - /* - * The snapshotConflictHorizon for the whole record should be the - * most conservative of all the horizons calculated for any of the - * possible modifications. If this record will prune tuples, any - * transactions on the standby older than the youngest xmax of the - * most recently removed tuple this record will prune will - * conflict. If this record will freeze tuples, any transactions - * on the standby with xids older than the youngest tuple this - * record will freeze will conflict. - */ - TransactionId conflict_xid; + Assert(PageIsAllVisible(page)); - if (TransactionIdFollows(frz_conflict_horizon, prstate.latest_xid_removed)) - conflict_xid = frz_conflict_horizon; - else - conflict_xid = prstate.latest_xid_removed; + old_vmbits = visibilitymap_set_vmbits(blockno, + vmbuffer, new_vmbits, + RelationGetRelationName(params->relation)); + if (old_vmbits == new_vmbits) + { + LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK); + /* Unset so we don't emit WAL since no change occurred */ + do_set_vm = false; + } + } + /* + * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did If we were + * only updating the VM and it turns out it was already set, we will + * have unset do_set_vm earlier. As such, check it again before + * emitting the record. + */ + if (RelationNeedsWAL(params->relation) && + (do_prune || do_freeze || do_set_vm)) log_heap_prune_and_freeze(params->relation, buffer, + do_set_vm ? vmbuffer : InvalidBuffer, + do_set_vm ? new_vmbits : 0, conflict_xid, - true, params->reason, + true, /* cleanup lock */ + do_set_pd_vis, + params->reason, prstate.frozen, prstate.nfrozen, prstate.redirected, prstate.nredirected, prstate.nowdead, prstate.ndead, prstate.nowunused, prstate.nunused); - } } END_CRIT_SECTION(); + if (do_set_vm) + LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK); + + /* + * During its second pass over the heap, VACUUM calls + * heap_page_would_be_all_visible() to determine whether a page is + * all-visible and all-frozen. The logic here is similar. After completing + * pruning and freezing, use an assertion to verify that our results + * remain consistent with heap_page_would_be_all_visible(). + */ +#ifdef USE_ASSERT_CHECKING + if (prstate.all_visible) + { + TransactionId debug_cutoff; + bool debug_all_frozen; + + Assert(prstate.lpdead_items == 0); + Assert(prstate.cutoffs); + + if (!heap_page_is_all_visible(params->relation, buffer, + prstate.cutoffs->OldestXmin, + &debug_all_frozen, + &debug_cutoff, off_loc)) + Assert(false); + + Assert(prstate.all_frozen == debug_all_frozen); + + Assert(!TransactionIdIsValid(debug_cutoff) || + debug_cutoff == prstate.visibility_cutoff_xid); + } +#endif + /* Copy information back for caller */ presult->ndeleted = prstate.ndeleted; presult->nnewlpdead = prstate.ndead; presult->nfrozen = prstate.nfrozen; presult->live_tuples = prstate.live_tuples; presult->recently_dead_tuples = prstate.recently_dead_tuples; - presult->all_visible = prstate.all_visible; - presult->all_frozen = prstate.all_frozen; presult->hastup = prstate.hastup; - - /* - * For callers planning to update the visibility map, the conflict horizon - * for that record must be the newest xmin on the page. However, if the - * page is completely frozen, there can be no conflict and the - * vm_conflict_horizon should remain InvalidTransactionId. This includes - * the case that we just froze all the tuples; the prune-freeze record - * included the conflict XID already so the caller doesn't need it. - */ - if (presult->all_frozen) - presult->vm_conflict_horizon = InvalidTransactionId; - else - presult->vm_conflict_horizon = prstate.visibility_cutoff_xid; + presult->new_vmbits = new_vmbits; + presult->old_vmbits = old_vmbits; presult->lpdead_items = prstate.lpdead_items; /* the presult->deadoffsets array was already filled in */ @@ -2058,6 +2246,64 @@ heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, return nplans; } +/* + * Calculate the conflict horizon for the whole XLOG_HEAP2_PRUNE_VACUUM_SCAN + * record. + */ +static TransactionId +get_conflict_xid(bool do_prune, bool do_freeze, bool do_set_vm, + TransactionId latest_xid_removed, TransactionId frz_conflict_horizon, + TransactionId visibility_cutoff_xid, bool blk_already_av, + bool set_blk_all_frozen) +{ + + /* + * The snapshotConflictHorizon for the whole record should be the most + * conservative of all the horizons calculated for any of the possible + * modifications. If this record will prune tuples, any transactions on + * the standby older than the youngest xmax of the most recently removed + * tuple this record will prune will conflict. If this record will freeze + * tuples, any transactions on the standby with xids older than the + * youngest tuple this record will freeze will conflict. + */ + TransactionId conflict_xid = InvalidTransactionId; + + /* + * If we are updating the VM, the conflict horizon is almost always the + * visibility cutoff XID. + * + * Separately, if we are freezing any tuples, as an optimization, we can + * use the visibility_cutoff_xid as the conflict horizon if the page will + * be all-frozen. This is true even if there are LP_DEAD line pointers + * because we ignored those when maintaining the visibility_cutoff_xid. + * This will have been calculated earlier as the frz_conflict_horizon when + * we determined we would freeze. + */ + if (do_set_vm) + conflict_xid = visibility_cutoff_xid; + else if (do_freeze) + conflict_xid = frz_conflict_horizon; + + /* + * If we are removing tuples with a younger xmax than our so far + * calculated conflict_xid, we must use this as our horizon. + */ + if (TransactionIdFollows(latest_xid_removed, conflict_xid)) + conflict_xid = latest_xid_removed; + + /* + * We can omit the snapshot conflict horizon if we are not pruning or + * freezing any tuples and are setting an already all-visible page + * all-frozen in the VM. In this case, all of the tuples on the page must + * already be visible to all MVCC snapshots on the standby. + */ + if (!do_prune && !do_freeze && + do_set_vm && blk_already_av && set_blk_all_frozen) + conflict_xid = InvalidTransactionId; + + return conflict_xid; +} + /* * Write an XLOG_HEAP2_PRUNE* WAL record * @@ -2078,14 +2324,24 @@ heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, * replaying 'unused' items depends on whether they were all previously marked * as dead. * + * If the VM is being updated, vmflags will contain the bits to set. In this + * case, vmbuffer should already have been updated and marked dirty and should + * still be pinned and locked. + * + * set_pd_all_vis indicates that we set PD_ALL_VISIBLE and thus should update + * the page LSN when checksums/wal_log_hints are enabled even if we did not + * prune or freeze tuples on the page. + * * Note: This function scribbles on the 'frozen' array. * * Note: This is called in a critical section, so careful what you do here. */ void log_heap_prune_and_freeze(Relation relation, Buffer buffer, + Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, + bool set_pd_all_vis, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, @@ -2095,6 +2351,7 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer, xl_heap_prune xlrec; XLogRecPtr recptr; uint8 info; + uint8 regbuf_flags; /* The following local variables hold data registered in the WAL record: */ xlhp_freeze_plan plans[MaxHeapTuplesPerPage]; @@ -2103,8 +2360,23 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer, xlhp_prune_items dead_items; xlhp_prune_items unused_items; OffsetNumber frz_offsets[MaxHeapTuplesPerPage]; + bool do_prune = nredirected > 0 || ndead > 0 || nunused > 0; xlrec.flags = 0; + regbuf_flags = REGBUF_STANDARD; + + Assert((vmflags & VISIBILITYMAP_VALID_BITS) == vmflags); + + /* + * We can avoid an FPI if the only modification we are making to the heap + * page is to set PD_ALL_VISIBLE and checksums/wal_log_hints are disabled. + * Note that if we explicitly skip an FPI, we must not set the heap page + * LSN later. + */ + if (!do_prune && + nfrozen == 0 && + (!set_pd_all_vis || !XLogHintBitIsNeeded())) + regbuf_flags |= REGBUF_NO_IMAGE; /* * Prepare data for the buffer. The arrays are not actually in the @@ -2112,7 +2384,11 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer, * page image, the arrays can be omitted. */ XLogBeginInsert(); - XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); + XLogRegisterBuffer(0, buffer, regbuf_flags); + + if (vmflags & VISIBILITYMAP_VALID_BITS) + XLogRegisterBuffer(1, vmbuffer, 0); + if (nfrozen > 0) { int nplans; @@ -2169,6 +2445,12 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer, * Prepare the main xl_heap_prune record. We already set the XLHP_HAS_* * flag above. */ + if (vmflags & VISIBILITYMAP_ALL_VISIBLE) + { + xlrec.flags |= XLHP_VM_ALL_VISIBLE; + if (vmflags & VISIBILITYMAP_ALL_FROZEN) + xlrec.flags |= XLHP_VM_ALL_FROZEN; + } if (RelationIsAccessibleInLogicalDecoding(relation)) xlrec.flags |= XLHP_IS_CATALOG_REL; if (TransactionIdIsValid(conflict_xid)) @@ -2201,5 +2483,23 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer, } recptr = XLogInsert(RM_HEAP2_ID, info); - PageSetLSN(BufferGetPage(buffer), recptr); + if (vmflags & VISIBILITYMAP_VALID_BITS) + { + Assert(BufferIsDirty(vmbuffer)); + PageSetLSN(BufferGetPage(vmbuffer), recptr); + } + + /* + * We must bump the page LSN if pruning or freezing. If we are only + * updating PD_ALL_VISIBLE, though, we can skip doing this unless + * wal_log_hints/checksums are enabled. Torn pages are possible if we + * update PD_ALL_VISIBLE without bumping the LSN, but this is deemed okay + * for page hint updates. + */ + if (do_prune || nfrozen > 0 || + (set_pd_all_vis && XLogHintBitIsNeeded())) + { + Assert(BufferIsDirty(buffer)); + PageSetLSN(BufferGetPage(buffer), recptr); + } } diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index aed1f8e1139..39526bf608f 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -1958,6 +1958,8 @@ lazy_scan_prune(LVRelState *vacrel, params.reason = PRUNE_VACUUM_SCAN; params.cutoffs = &vacrel->cutoffs; params.vistest = vacrel->vistest; + params.vmbuffer = vmbuffer; + params.blk_known_av = all_visible_according_to_vm; /* * Prune all HOT-update chains and potentially freeze tuples on this page. @@ -1974,7 +1976,7 @@ lazy_scan_prune(LVRelState *vacrel, * tuples. Pruning will have determined whether or not the page is * all-visible. */ - params.options = HEAP_PAGE_PRUNE_FREEZE; + params.options = HEAP_PAGE_PRUNE_FREEZE | HEAP_PAGE_PRUNE_UPDATE_VIS; if (vacrel->nindexes == 0) params.options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW; @@ -1997,33 +1999,6 @@ lazy_scan_prune(LVRelState *vacrel, vacrel->new_frozen_tuple_pages++; } - /* - * VACUUM will call heap_page_is_all_visible() during the second pass over - * the heap to determine all_visible and all_frozen for the page -- this - * is a specialized version of the logic from this function. Now that - * we've finished pruning and freezing, make sure that we're in total - * agreement with heap_page_is_all_visible() using an assertion. - */ -#ifdef USE_ASSERT_CHECKING - if (presult.all_visible) - { - TransactionId debug_cutoff; - bool debug_all_frozen; - - Assert(presult.lpdead_items == 0); - - if (!heap_page_is_all_visible(vacrel->rel, buf, - vacrel->cutoffs.OldestXmin, &debug_all_frozen, - &debug_cutoff, &vacrel->offnum)) - Assert(false); - - Assert(presult.all_frozen == debug_all_frozen); - - Assert(!TransactionIdIsValid(debug_cutoff) || - debug_cutoff == presult.vm_conflict_horizon); - } -#endif - /* * Now save details of the LP_DEAD items from the page in vacrel */ @@ -2057,168 +2032,26 @@ lazy_scan_prune(LVRelState *vacrel, /* Did we find LP_DEAD items? */ *has_lpdead_items = (presult.lpdead_items > 0); - Assert(!presult.all_visible || !(*has_lpdead_items)); - Assert(!presult.all_frozen || presult.all_visible); - /* - * Handle setting visibility map bit based on information from the VM (as - * of last heap_vac_scan_next_block() call), and from all_visible and - * all_frozen variables + * For the purposes of logging, count whether or not the page was newly + * set all-visible and, potentially, all-frozen. */ - if (!all_visible_according_to_vm && presult.all_visible) + if ((presult.old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0 && + (presult.new_vmbits & VISIBILITYMAP_ALL_VISIBLE) != 0) { - uint8 old_vmbits; - uint8 flags = VISIBILITYMAP_ALL_VISIBLE; - - if (presult.all_frozen) - { - Assert(!TransactionIdIsValid(presult.vm_conflict_horizon)); - flags |= VISIBILITYMAP_ALL_FROZEN; - } - - /* - * It should never be the case that the visibility map page is set - * while the page-level bit is clear, but the reverse is allowed (if - * checksums are not enabled). Regardless, set both bits so that we - * get back in sync. - * - * NB: If the heap page is all-visible but the VM bit is not set, we - * don't need to dirty the heap page. However, if checksums are - * enabled, we do need to make sure that the heap page is dirtied - * before passing it to visibilitymap_set(), because it may be logged. - * Given that this situation should only happen in rare cases after a - * crash, it is not worth optimizing. - */ - PageSetAllVisible(page); - MarkBufferDirty(buf); - old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf, - InvalidXLogRecPtr, - vmbuffer, presult.vm_conflict_horizon, - flags); - - /* - * If the page wasn't already set all-visible and/or all-frozen in the - * VM, count it as newly set for logging. - */ - if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0) - { - vacrel->vm_new_visible_pages++; - if (presult.all_frozen) - { - vacrel->vm_new_visible_frozen_pages++; - *vm_page_frozen = true; - } - } - else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 && - presult.all_frozen) + vacrel->vm_new_visible_pages++; + if ((presult.new_vmbits & VISIBILITYMAP_ALL_FROZEN) != 0) { - vacrel->vm_new_frozen_pages++; + vacrel->vm_new_visible_frozen_pages++; *vm_page_frozen = true; } } - - /* - * As of PostgreSQL 9.2, the visibility map bit should never be set if the - * page-level bit is clear. However, it's possible that the bit got - * cleared after heap_vac_scan_next_block() was called, so we must recheck - * with buffer lock before concluding that the VM is corrupt. - */ - else if (all_visible_according_to_vm && !PageIsAllVisible(page) && - visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0) - { - ereport(WARNING, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", - vacrel->relname, blkno))); - - visibilitymap_clear(vacrel->rel, blkno, vmbuffer, - VISIBILITYMAP_VALID_BITS); - } - - /* - * It's possible for the value returned by - * GetOldestNonRemovableTransactionId() to move backwards, so it's not - * wrong for us to see tuples that appear to not be visible to everyone - * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value - * never moves backwards, but GetOldestNonRemovableTransactionId() is - * conservative and sometimes returns a value that's unnecessarily small, - * so if we see that contradiction it just means that the tuples that we - * think are not visible to everyone yet actually are, and the - * PD_ALL_VISIBLE flag is correct. - * - * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set, - * however. - */ - else if (presult.lpdead_items > 0 && PageIsAllVisible(page)) - { - ereport(WARNING, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u", - vacrel->relname, blkno))); - - PageClearAllVisible(page); - MarkBufferDirty(buf); - visibilitymap_clear(vacrel->rel, blkno, vmbuffer, - VISIBILITYMAP_VALID_BITS); - } - - /* - * If the all-visible page is all-frozen but not marked as such yet, mark - * it as all-frozen. - */ - else if (all_visible_according_to_vm && presult.all_frozen && - !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer)) + else if ((presult.old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 && + (presult.new_vmbits & VISIBILITYMAP_ALL_FROZEN) != 0) { - uint8 old_vmbits; - - /* - * Avoid relying on all_visible_according_to_vm as a proxy for the - * page-level PD_ALL_VISIBLE bit being set, since it might have become - * stale -- even when all_visible is set - */ - if (!PageIsAllVisible(page)) - { - PageSetAllVisible(page); - MarkBufferDirty(buf); - } - - /* - * Set the page all-frozen (and all-visible) in the VM. - * - * We can pass InvalidTransactionId as our cutoff_xid, since a - * snapshotConflictHorizon sufficient to make everything safe for REDO - * was logged when the page's tuples were frozen. - */ - Assert(!TransactionIdIsValid(presult.vm_conflict_horizon)); - old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf, - InvalidXLogRecPtr, - vmbuffer, InvalidTransactionId, - VISIBILITYMAP_ALL_VISIBLE | - VISIBILITYMAP_ALL_FROZEN); - - /* - * The page was likely already set all-visible in the VM. However, - * there is a small chance that it was modified sometime between - * setting all_visible_according_to_vm and checking the visibility - * during pruning. Check the return value of old_vmbits anyway to - * ensure the visibility map counters used for logging are accurate. - */ - if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0) - { - vacrel->vm_new_visible_pages++; - vacrel->vm_new_visible_frozen_pages++; - *vm_page_frozen = true; - } - - /* - * We already checked that the page was not set all-frozen in the VM - * above, so we don't need to test the value of old_vmbits. - */ - else - { - vacrel->vm_new_frozen_pages++; - *vm_page_frozen = true; - } + Assert((presult.new_vmbits & VISIBILITYMAP_ALL_VISIBLE) != 0); + vacrel->vm_new_frozen_pages++; + *vm_page_frozen = true; } return presult.ndeleted; @@ -2892,8 +2725,11 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, if (RelationNeedsWAL(vacrel->rel)) { log_heap_prune_and_freeze(vacrel->rel, buffer, - InvalidTransactionId, + InvalidBuffer, /* vmbuffer */ + 0, /* vmflags */ + InvalidTransactionId, /* conflict_xid */ false, /* no cleanup lock required */ + false, /* set_pd_all_vis */ PRUNE_VACUUM_CLEANUP, NULL, 0, /* frozen */ NULL, 0, /* redirected */ diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c index b48d7dc1d24..1cb44ca32d3 100644 --- a/src/backend/access/rmgrdesc/heapdesc.c +++ b/src/backend/access/rmgrdesc/heapdesc.c @@ -103,7 +103,7 @@ plan_elem_desc(StringInfo buf, void *plan, void *data) * code, the latter of which is used in frontend (pg_waldump) code. */ void -heap_xlog_deserialize_prune_and_freeze(char *cursor, uint8 flags, +heap_xlog_deserialize_prune_and_freeze(char *cursor, uint16 flags, int *nplans, xlhp_freeze_plan **plans, OffsetNumber **frz_offsets, int *nredirected, OffsetNumber **redirected, @@ -287,6 +287,15 @@ heap2_desc(StringInfo buf, XLogReaderState *record) appendStringInfo(buf, ", isCatalogRel: %c", xlrec->flags & XLHP_IS_CATALOG_REL ? 'T' : 'F'); + if (xlrec->flags & XLHP_VM_ALL_VISIBLE) + { + uint8 vmflags = VISIBILITYMAP_ALL_VISIBLE; + + if (xlrec->flags & XLHP_VM_ALL_FROZEN) + vmflags |= VISIBILITYMAP_ALL_FROZEN; + appendStringInfo(buf, ", vm_flags: 0x%02X", vmflags); + } + if (XLogRecHasBlockData(record, 0)) { Size datalen; diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index ea67fb83fbe..2de39ba0cd1 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -42,6 +42,7 @@ /* "options" flag bits for heap_page_prune_and_freeze */ #define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW (1 << 0) #define HEAP_PAGE_PRUNE_FREEZE (1 << 1) +#define HEAP_PAGE_PRUNE_UPDATE_VIS (1 << 2) typedef struct BulkInsertStateData *BulkInsertState; typedef struct GlobalVisState GlobalVisState; @@ -238,6 +239,16 @@ typedef struct PruneFreezeParams Relation relation; /* relation containing buffer to be pruned */ Buffer buffer; /* buffer to be pruned */ + /* + * + * vmbuffer is the buffer that must already contain contain the required + * block of the visibility map if we are to update it. blk_known_av is the + * visibility status of the heap block as of the last call to + * find_next_unskippable_block(). + */ + Buffer vmbuffer; + bool blk_known_av; + /* * The reason pruning was performed. It is used to set the WAL record * opcode which is used for debugging and analysis purposes. @@ -250,8 +261,9 @@ typedef struct PruneFreezeParams * MARK_UNUSED_NOW indicates that dead items can be set LP_UNUSED during * pruning. * - * FREEZE indicates that we will also freeze tuples, and will return - * 'all_visible', 'all_frozen' flags to the caller. + * FREEZE indicates that we will also freeze tuples + * + * UPDATE_VIS indicates that we will set the page's status in the VM. */ int options; @@ -284,19 +296,15 @@ typedef struct PruneFreezeResult int recently_dead_tuples; /* - * all_visible and all_frozen indicate if the all-visible and all-frozen - * bits in the visibility map can be set for this page, after pruning. - * - * vm_conflict_horizon is the newest xmin of live tuples on the page. The - * caller can use it as the conflict horizon when setting the VM bits. It - * is only valid if we froze some tuples (nfrozen > 0), and all_frozen is - * true. + * old_vmbits are the state of the all-visible and all-frozen bits in the + * visibility map before updating it during phase I of vacuuming. + * new_vmbits are the state of those bits after phase I of vacuuming. * - * These are only set if the HEAP_PRUNE_FREEZE option is set. + * These are only set if the HEAP_PAGE_PRUNE_UPDATE_VIS option is set and + * we have attempted to update the VM. */ - bool all_visible; - bool all_frozen; - TransactionId vm_conflict_horizon; + uint8 new_vmbits; + uint8 old_vmbits; /* * Whether or not the page makes rel truncation unsafe. This is set to @@ -420,8 +428,10 @@ extern void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *nowunused, int nunused); extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets); extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer, + Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, + bool set_pd_all_vis, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index d4c0625b632..16c2b2e3c9c 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -249,7 +249,7 @@ typedef struct xl_heap_update * Main data section: * * xl_heap_prune - * uint8 flags + * uint16 flags * TransactionId snapshot_conflict_horizon * * Block 0 data section: @@ -284,7 +284,7 @@ typedef struct xl_heap_update */ typedef struct xl_heap_prune { - uint8 flags; + uint16 flags; /* * If XLHP_HAS_CONFLICT_HORIZON is set, the conflict horizon XID follows, @@ -292,7 +292,7 @@ typedef struct xl_heap_prune */ } xl_heap_prune; -#define SizeOfHeapPrune (offsetof(xl_heap_prune, flags) + sizeof(uint8)) +#define SizeOfHeapPrune (offsetof(xl_heap_prune, flags) + sizeof(uint16)) /* to handle recovery conflict during logical decoding on standby */ #define XLHP_IS_CATALOG_REL (1 << 1) @@ -330,6 +330,15 @@ typedef struct xl_heap_prune #define XLHP_HAS_DEAD_ITEMS (1 << 6) #define XLHP_HAS_NOW_UNUSED_ITEMS (1 << 7) +/* + * The xl_heap_prune record's flags may also contain which VM bits to set. + * xl_heap_prune should always use the XLHP_VM_ALL_VISIBLE and + * XLHP_VM_ALL_FROZEN flags and translate them to their visibilitymapdefs.h + * equivalents, VISIBILITYMAP_ALL_VISIBLE and VISIBILITYMAP_ALL_FROZEN. + */ +#define XLHP_VM_ALL_VISIBLE (1 << 8) +#define XLHP_VM_ALL_FROZEN (1 << 9) + /* * xlhp_freeze_plan describes how to freeze a group of one or more heap tuples * (appears in xl_heap_prune's xlhp_freeze_plans sub-record) @@ -497,7 +506,7 @@ extern XLogRecPtr log_heap_visible(Relation rel, Buffer heap_buffer, uint8 vmflags); /* in heapdesc.c, so it can be shared between frontend/backend code */ -extern void heap_xlog_deserialize_prune_and_freeze(char *cursor, uint8 flags, +extern void heap_xlog_deserialize_prune_and_freeze(char *cursor, uint16 flags, int *nplans, xlhp_freeze_plan **plans, OffsetNumber **frz_offsets, int *nredirected, OffsetNumber **redirected, -- 2.43.0