From f9356564f651fa9f91e99adbbac50914601e3551 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Mon, 22 Jan 2024 17:25:56 -0500 Subject: [PATCH v4 16/19] Count tuples for vacuum logging in heap_page_prune lazy_scan_prune() loops through all of the tuple visibility information that was recorded in heap_page_prune() and then counts live and recently dead tuples. That information is available in heap_page_prune(), so just record it there. Add live and recently dead tuple counters to the PruneResult. Doing this counting in heap_page_prune() eliminates the need for saving the tuple visibility status information in the PruneResult. Instead, save it in the PruneState where it can be referenced by heap_prune_chain(). --- src/backend/access/heap/pruneheap.c | 99 ++++++++++++++++++++++++---- src/backend/access/heap/vacuumlazy.c | 93 +------------------------- src/include/access/heapam.h | 29 +------- 3 files changed, 93 insertions(+), 128 deletions(-) diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index d183912a402..f59b03222b0 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -55,6 +55,18 @@ typedef struct */ bool marked[MaxHeapTuplesPerPage + 1]; + /* + * Tuple visibility is only computed once for each tuple, for correctness + * and efficiency reasons; see comment in heap_page_prune_and_freeze() for + * details. This is of type int8[], instead of HTSV_Result[], so we can + * use -1 to indicate no visibility has been computed, e.g. for LP_DEAD + * items. + * + * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is + * 1. Otherwise every access would need to subtract 1. + */ + int8 htsv[MaxHeapTuplesPerPage + 1]; + /* * One entry for every tuple that we may freeze. */ @@ -69,6 +81,7 @@ static int heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate, PruneFreezeResult *presult); +static inline HTSV_Result htsv_get_valid_status(int status); static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid); static void heap_prune_record_redirect(PruneState *prstate, OffsetNumber offnum, OffsetNumber rdoffnum, @@ -269,7 +282,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, prstate.nredirected = prstate.ndead = prstate.nunused = 0; /* - * presult->htsv is not initialized here because all ntuple spots in the + * prstate.htsv is not initialized here because all ntuple spots in the * array will be set either to a valid HTSV_Result value or -1. */ memset(prstate.marked, 0, sizeof(prstate.marked)); @@ -280,6 +293,9 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, presult->hastup = false; + presult->live_tuples = 0; + presult->recently_dead_tuples = 0; + /* * Caller will update the VM after pruning, collecting LP_DEAD items, and * freezing tuples. Keep track of whether or not the page is all_visible @@ -329,7 +345,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, /* Nothing to do if slot doesn't contain a tuple */ if (!ItemIdIsNormal(itemid)) { - presult->htsv[offnum] = -1; + prstate.htsv[offnum] = -1; continue; } @@ -345,9 +361,29 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, if (off_loc) *off_loc = offnum; - presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup, - buffer); - switch (presult->htsv[offnum]) + prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup, + buffer); + + /* + * The criteria for counting a tuple as live in this block need to + * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM + * and ANALYZE may produce wildly different reltuples values, e.g. + * when there are many recently-dead tuples. + * + * The logic here is a bit simpler than acquire_sample_rows(), as + * VACUUM can't run inside a transaction block, which makes some cases + * impossible (e.g. in-progress insert from the same transaction). + * + * We treat LP_DEAD items (which are the closest thing to DEAD tuples + * that might be seen here) differently, too: we assume that they'll + * become LP_UNUSED before VACUUM finishes. This difference is only + * superficial. VACUUM effectively agrees with ANALYZE about DEAD + * items, in the end. VACUUM won't remember LP_DEAD items, but only + * because they're not supposed to be left behind when it is done. + * (Cases where we bypass index vacuuming will violate this optimistic + * assumption, but the overall impact of that should be negligible.) + */ + switch (prstate.htsv[offnum]) { case HEAPTUPLE_DEAD: @@ -367,6 +403,12 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, break; case HEAPTUPLE_LIVE: + /* + * Count it as live. Not only is this natural, but it's also + * what acquire_sample_rows() does. + */ + presult->live_tuples++; + /* * Is the tuple definitely visible to all transactions? * @@ -408,13 +450,34 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, } break; case HEAPTUPLE_RECENTLY_DEAD: + + /* + * If tuple is recently dead then we must not remove it from + * the relation. (We only remove items that are LP_DEAD from + * pruning.) + */ + presult->recently_dead_tuples++; presult->all_visible = false; break; case HEAPTUPLE_INSERT_IN_PROGRESS: + + /* + * We do not count these rows as live, because we expect the + * inserting transaction to update the counters at commit, and + * we assume that will happen only after we report our + * results. This assumption is a bit shaky, but it is what + * acquire_sample_rows() does, so be consistent. + */ presult->all_visible = false; break; case HEAPTUPLE_DELETE_IN_PROGRESS: - /* This is an expected case during concurrent vacuum */ + + /* + * This an expected case during concurrent vacuum. Count such + * rows as live. As above, we assume the deleting transaction + * will commit and update the counters after we report. + */ + presult->live_tuples++; presult->all_visible = false; break; default: @@ -422,7 +485,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, break; } - if (presult->htsv[offnum] != HEAPTUPLE_DEAD) + if (prstate.htsv[offnum] != HEAPTUPLE_DEAD) { /* * Deliberately don't set hastup for LP_DEAD items. We make the @@ -772,10 +835,24 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer) } +/* + * Pruning calculates tuple visibility once and saves the results in an array + * of int8. See PruneState.htsv for details. This helper function is meant to + * guard against examining visibility status array members which have not yet + * been computed. + */ +static inline HTSV_Result +htsv_get_valid_status(int status) +{ + Assert(status >= HEAPTUPLE_DEAD && + status <= HEAPTUPLE_DELETE_IN_PROGRESS); + return (HTSV_Result) status; +} + /* * Prune specified line pointer or a HOT chain originating at line pointer. * - * Tuple visibility information is provided in presult->htsv. + * Tuple visibility information is provided in prstate->htsv. * * If the item is an index-referenced tuple (i.e. not a heap-only tuple), * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT @@ -826,7 +903,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, */ if (ItemIdIsNormal(rootlp)) { - Assert(presult->htsv[rootoffnum] != -1); + Assert(prstate->htsv[rootoffnum] != -1); htup = (HeapTupleHeader) PageGetItem(dp, rootlp); if (HeapTupleHeaderIsHeapOnly(htup)) @@ -849,7 +926,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, * either here or while following a chain below. Whichever path * gets there first will mark the tuple unused. */ - if (presult->htsv[rootoffnum] == HEAPTUPLE_DEAD && + if (prstate->htsv[rootoffnum] == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup)) { heap_prune_record_unused(prstate, rootoffnum); @@ -950,7 +1027,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, */ tupdead = recent_dead = false; - switch (htsv_get_valid_status(presult->htsv[offnum])) + switch (htsv_get_valid_status(prstate->htsv[offnum])) { case HEAPTUPLE_DEAD: tupdead = true; diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 370721a619a..a3c971cd26d 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -1378,22 +1378,6 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, * * Caller must hold pin and buffer cleanup lock on the buffer. * - * Prior to PostgreSQL 14 there were very rare cases where - * heap_page_prune_and_freeze() was allowed to disagree with our - * HeapTupleSatisfiesVacuum() call about whether or not a tuple should be - * considered DEAD. This happened when an inserting transaction concurrently - * aborted (after our heap_page_prune_and_freeze() call, before our - * HeapTupleSatisfiesVacuum() call). There was rather a lot of complexity just - * so we could deal with tuples that were DEAD to VACUUM, but nevertheless were - * left with storage after pruning. - * - * As of Postgres 17, we circumvent this problem altogether by reusing the - * result of heap_page_prune_and_freeze()'s visibility check. Without the - * second call to HeapTupleSatisfiesVacuum(), there is no new HTSV_Result and - * there can be no disagreement. We'll just handle such tuples as if they had - * become fully dead right after this operation completes instead of in the - * middle of it. - * * vmbuffer is the buffer containing the VM block with visibility information * for the heap block, blkno. all_visible_according_to_vm is the saved * visibility status of the heap block looked up earlier by the caller. We @@ -1415,10 +1399,8 @@ lazy_scan_prune(LVRelState *vacrel, OffsetNumber offnum, maxoff; ItemId itemid; + int lpdead_items = 0; PruneFreezeResult presult; - int lpdead_items, - live_tuples, - recently_dead_tuples; HeapPageFreeze pagefrz; OffsetNumber deadoffsets[MaxHeapTuplesPerPage]; @@ -1438,9 +1420,6 @@ lazy_scan_prune(LVRelState *vacrel, pagefrz.NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid; pagefrz.NoFreezePageRelminMxid = vacrel->NewRelminMxid; pagefrz.cutoffs = &vacrel->cutoffs; - lpdead_items = 0; - live_tuples = 0; - recently_dead_tuples = 0; /* * Prune all HOT-update chains and potentially freeze tuples on this page. @@ -1476,9 +1455,6 @@ lazy_scan_prune(LVRelState *vacrel, vacrel->offnum = offnum; itemid = PageGetItemId(page, offnum); - /* Redirect items mustn't be touched */ - if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid)) - continue; if (ItemIdIsDead(itemid)) { @@ -1486,69 +1462,6 @@ lazy_scan_prune(LVRelState *vacrel, continue; } - Assert(ItemIdIsNormal(itemid)); - - /* - * The criteria for counting a tuple as live in this block need to - * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM - * and ANALYZE may produce wildly different reltuples values, e.g. - * when there are many recently-dead tuples. - * - * The logic here is a bit simpler than acquire_sample_rows(), as - * VACUUM can't run inside a transaction block, which makes some cases - * impossible (e.g. in-progress insert from the same transaction). - * - * We treat LP_DEAD items (which are the closest thing to DEAD tuples - * that might be seen here) differently, too: we assume that they'll - * become LP_UNUSED before VACUUM finishes. This difference is only - * superficial. VACUUM effectively agrees with ANALYZE about DEAD - * items, in the end. VACUUM won't remember LP_DEAD items, but only - * because they're not supposed to be left behind when it is done. - * (Cases where we bypass index vacuuming will violate this optimistic - * assumption, but the overall impact of that should be negligible.) - */ - switch (htsv_get_valid_status(presult.htsv[offnum])) - { - case HEAPTUPLE_LIVE: - - /* - * Count it as live. Not only is this natural, but it's also - * what acquire_sample_rows() does. - */ - live_tuples++; - break; - case HEAPTUPLE_RECENTLY_DEAD: - - /* - * If tuple is recently dead then we must not remove it from - * the relation. (We only remove items that are LP_DEAD from - * pruning.) - */ - recently_dead_tuples++; - break; - case HEAPTUPLE_INSERT_IN_PROGRESS: - - /* - * We do not count these rows as live, because we expect the - * inserting transaction to update the counters at commit, and - * we assume that will happen only after we report our - * results. This assumption is a bit shaky, but it is what - * acquire_sample_rows() does, so be consistent. - */ - break; - case HEAPTUPLE_DELETE_IN_PROGRESS: - - /* - * This an expected case during concurrent vacuum. Count such - * rows as live. As above, we assume the deleting transaction - * will commit and update the counters after we report. - */ - live_tuples++; - break; - default: - elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); - break; - } } vacrel->offnum = InvalidOffsetNumber; @@ -1626,8 +1539,8 @@ lazy_scan_prune(LVRelState *vacrel, vacrel->tuples_deleted += presult.ndeleted; vacrel->tuples_frozen += presult.nfrozen; vacrel->lpdead_items += lpdead_items; - vacrel->live_tuples += live_tuples; - vacrel->recently_dead_tuples += recently_dead_tuples; + vacrel->live_tuples += presult.live_tuples; + vacrel->recently_dead_tuples += presult.recently_dead_tuples; /* Can't truncate this page */ if (presult.hastup) diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 25dbae8139e..23ba23b5b01 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -198,6 +198,8 @@ typedef struct HeapPageFreeze */ typedef struct PruneFreezeResult { + int live_tuples; + int recently_dead_tuples; int ndeleted; /* Number of tuples deleted from the page */ int nnewlpdead; /* Number of newly LP_DEAD items */ bool all_visible; /* Whether or not the page is all visible */ @@ -211,19 +213,6 @@ typedef struct PruneFreezeResult int nfrozen; TransactionId frz_conflict_horizon; /* Newest xmin on the page */ - /* - * Tuple visibility is only computed once for each tuple, for correctness - * and efficiency reasons; see comment in heap_page_prune_and_freeze() for - * details. This is of type int8[], instead of HTSV_Result[], so we can - * use -1 to indicate no visibility has been computed, e.g. for LP_DEAD - * items. - * - * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is - * 1. Otherwise every access would need to subtract 1. - */ - int8 htsv[MaxHeapTuplesPerPage + 1]; - - /* New value of relfrozenxid found by heap_page_prune_and_freeze() */ TransactionId new_relfrozenxid; @@ -231,20 +220,6 @@ typedef struct PruneFreezeResult MultiXactId new_relminmxid; } PruneFreezeResult; -/* - * Pruning calculates tuple visibility once and saves the results in an array - * of int8. See PruneFreezeResult.htsv for details. This helper function is - * meant to guard against examining visibility status array members which have - * not yet been computed. - */ -static inline HTSV_Result -htsv_get_valid_status(int status) -{ - Assert(status >= HEAPTUPLE_DEAD && - status <= HEAPTUPLE_DELETE_IN_PROGRESS); - return (HTSV_Result) status; -} - /* ---------------- * function prototypes for heap access method * -- 2.40.1