From 1564aebed44f891b86d93b0edacb08deaf8937c6 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Fri, 29 Mar 2024 16:05:41 -0400 Subject: [PATCH v10 04/10] Handle non-chain tuples outside of heap_prune_chain() Dead branches of aborted HOT chains or leftover LP_DEAD and LP_REDIRECT line pointers can be handled outside of heap_prune_chain(). This simplifies the logic in heap_prune_chain() as well as allowing us to clean up more RECENTLY_DEAD -> DEAD chains. To accomplish this efficiently, partition tuples into HOT and non-HOT while first collecting visibility information for each tuple in heap_page_prune(). Then call heap_prune_chain() only on potential chain members. Then mop up the leftover HOT tuples afterwards. --- src/backend/access/heap/pruneheap.c | 270 ++++++++++++++++------------ 1 file changed, 154 insertions(+), 116 deletions(-) diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 6188c5b2f3..9e6cfbf9f9 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -44,6 +44,18 @@ typedef struct OffsetNumber nowdead[MaxHeapTuplesPerPage]; OffsetNumber nowunused[MaxHeapTuplesPerPage]; + /* + * Chain candidates contains indexes of all LP_NORMAL and LP_REDIRECT + * items. The first partition are the indexes of the LP_NORMAL and + * LP_REDIRECT items we know to be part of a chain. The second partition + * are the indexes of HOT tuples that may or may not be part of a HOT + * chain. Those which are part of a HOT chain will be visited and marked + * by heap_prune_chain() and the others will be processed afterward. + */ + int nchain_members; + int nchain_candidates; + OffsetNumber chain_candidates[MaxHeapTuplesPerPage]; + /* * marked[i] is true if item i is entered in one of the above arrays. * @@ -250,6 +262,8 @@ heap_page_prune(Relation relation, Buffer buffer, prstate.nredirected = prstate.ndead = prstate.nunused = 0; memset(prstate.marked, 0, sizeof(prstate.marked)); prstate.ndeleted = 0; + prstate.nchain_members = 0; + prstate.nchain_candidates = 0; /* * presult->htsv is not initialized here because all ntuple spots in the @@ -288,17 +302,11 @@ heap_page_prune(Relation relation, Buffer buffer, ItemId itemid = PageGetItemId(page, offnum); HeapTupleHeader htup; + presult->htsv[offnum] = -1; + /* Nothing to do if slot doesn't contain a tuple */ - if (!ItemIdIsNormal(itemid)) - { - presult->htsv[offnum] = -1; + if (!ItemIdIsUsed(itemid)) continue; - } - - htup = (HeapTupleHeader) PageGetItem(page, itemid); - tup.t_data = htup; - tup.t_len = ItemIdGetLength(itemid); - ItemPointerSet(&(tup.t_self), blockno, offnum); /* * Set the offset number so that we can display it along with any @@ -307,18 +315,66 @@ heap_page_prune(Relation relation, Buffer buffer, if (off_loc) *off_loc = offnum; + if (ItemIdIsDead(itemid)) + { + /* + * If the caller set mark_unused_now true, we can set dead line + * pointers LP_UNUSED now. + */ + if (unlikely(prstate.mark_unused_now)) + heap_prune_record_unused(&prstate, offnum, false); + else + heap_prune_record_unchanged_lp_dead(&prstate, offnum); + + continue; + } + + if (ItemIdIsRedirected(itemid)) + { + /* This is a chain member, so it goes in partition 1 */ + OffsetNumber swap = prstate.chain_candidates[prstate.nchain_members]; + + prstate.chain_candidates[prstate.nchain_candidates++] = swap; + prstate.chain_candidates[prstate.nchain_members++] = offnum; + + continue; + } + + Assert(ItemIdIsNormal(itemid)); + + /* + * Given that we have an LP_NORMAL item, let's get its visibility + * status and then examine the tuple to decide whether to put it in + * partition 1 or 2 of chain_candidates. + */ + htup = (HeapTupleHeader) PageGetItem(page, itemid); + tup.t_data = htup; + tup.t_len = ItemIdGetLength(itemid); + ItemPointerSet(&tup.t_self, blockno, offnum); + presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup, buffer); + + if (!HeapTupleHeaderIsHeapOnly(htup)) + { + /* All non-HOT tuples go in partition 1 */ + OffsetNumber swap = prstate.chain_candidates[prstate.nchain_members]; + + prstate.chain_candidates[prstate.nchain_candidates++] = swap; + prstate.chain_candidates[prstate.nchain_members++] = offnum; + + continue; + } + + /* All LP_NORMAL HOT tuples go in partition 2 */ + prstate.chain_candidates[prstate.nchain_candidates++] = offnum; } - /* Scan the page */ - for (offnum = FirstOffsetNumber; - offnum <= maxoff; - offnum = OffsetNumberNext(offnum)) + /* Process HOT chains */ + for (int i = 0; i < prstate.nchain_members; i++) { - ItemId itemid; + offnum = prstate.chain_candidates[i]; - /* Ignore items already processed as part of an earlier chain */ if (prstate.marked[offnum]) continue; @@ -326,23 +382,78 @@ heap_page_prune(Relation relation, Buffer buffer, if (off_loc) *off_loc = offnum; - /* Nothing to do if slot is empty */ - itemid = PageGetItemId(page, offnum); - if (!ItemIdIsUsed(itemid)) - continue; - /* Process this item or chain of items */ heap_prune_chain(buffer, offnum, presult->htsv, &prstate); } - for (offnum = FirstOffsetNumber; - offnum <= maxoff; - offnum = OffsetNumberNext(offnum)) + /* + * Check all HOT tuples to see if they have already been marked by + * heap_prune_chain() or if they still need to be processed. They will + * either be marked for removal or marked as unchanged. + */ + for (int i = prstate.nchain_members; i < prstate.nchain_candidates; i++) { - ItemId itemid = PageGetItemId(page, offnum); + offnum = prstate.chain_candidates[i]; - if (ItemIdIsUsed(itemid) && !prstate.marked[offnum]) - heap_prune_record_unchanged(page, &prstate, offnum); + if (prstate.marked[offnum]) + continue; + + /* see preceding loop */ + if (off_loc) + *off_loc = offnum; + + if (presult->htsv[offnum] == HEAPTUPLE_DEAD) + { + ItemId itemid = PageGetItemId(page, offnum); + HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid); + + /* + * If the tuple is DEAD and doesn't chain to anything else, mark + * it unused immediately. (If it does chain, we can only remove + * it as part of pruning its chain.) + * + * We need this primarily to handle aborted HOT updates, that is, + * XMIN_INVALID heap-only tuples. Those might not be linked to by + * any chain, since the parent tuple might be re-updated before + * any pruning occurs. So we have to be able to reap them + * separately from chain-pruning. (Note that + * HeapTupleHeaderIsHotUpdated will never return true for an + * XMIN_INVALID tuple, so this code will work even when there were + * sequential updates within the aborted transaction.) + * + * Note that we might first arrive at a dead heap-only tuple + * either above while following a chain or here. Whichever path + * gets there first will mark the tuple unused. + * + * Whether we arrive at the dead HOT tuple first here or while + * following a chain above affects whether preceding RECENTLY_DEAD + * tuples in the chain can be removed or not. Imagine that you + * have a chain with two tuples: RECENTLY_DEAD -> DEAD. If we + * reach the RECENTLY_DEAD tuple first, the chain-following logic + * will find the DEAD tuple and conclude that both tuples are in + * fact dead and can be removed. But if we reach the DEAD tuple + * at the end of the chain first, when we reach the RECENTLY_DEAD + * tuple later, we will not follow the chain because the DEAD + * TUPLE is already 'marked', and will not remove the + * RECENTLY_DEAD tuple. This is not a correctness issue, and the + * RECENTLY_DEAD tuple will be removed by a later VACUUM. + */ + if (!HeapTupleHeaderIsHotUpdated(htup)) + { + HeapTupleHeaderAdvanceConflictHorizon(htup, + &prstate.snapshotConflictHorizon); + heap_prune_record_unused(&prstate, offnum, true); + continue; + } + } + + /* + * HOT tuple is not DEAD or has been HOT-updated. If it is a DEAD, + * HOT-updated member of a chain, it should have been already been + * marked by heap_prune_chain() and heap_prune_record_unchanged() will + * return immediately. + */ + heap_prune_record_unchanged(page, &prstate, offnum); } /* We should now have processed every tuple exactly once */ @@ -351,12 +462,10 @@ heap_page_prune(Relation relation, Buffer buffer, offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { - ItemId itemid; - if (off_loc) *off_loc = offnum; - itemid = PageGetItemId(page, offnum); - if (ItemIdIsUsed(itemid)) + + if (ItemIdIsUsed(PageGetItemId(page, offnum))) Assert(prstate.marked[offnum]); else Assert(!prstate.marked[offnum]); @@ -490,11 +599,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, int8 *htsv, PruneState *prstate) { Page page = (Page) BufferGetPage(buffer); - TransactionId priorXmax = InvalidTransactionId; - ItemId rootlp; - HeapTupleHeader htup; - OffsetNumber maxoff = PageGetMaxOffsetNumber(page), - offnum; + ItemId rootlp = PageGetItemId(page, rootoffnum); OffsetNumber chainitems[MaxHeapTuplesPerPage]; /* @@ -504,67 +609,14 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, int ndeadchain = 0, nchain = 0; - rootlp = PageGetItemId(page, rootoffnum); - - /* - * If it's a heap-only tuple, then it is not the start of a HOT chain. - */ - if (ItemIdIsNormal(rootlp)) - { - Assert(htsv[rootoffnum] != -1); - htup = (HeapTupleHeader) PageGetItem(page, rootlp); - - if (HeapTupleHeaderIsHeapOnly(htup)) - { - /* - * If the tuple is DEAD and doesn't chain to anything else, mark - * it unused immediately. (If it does chain, we can only remove - * it as part of pruning its chain.) - * - * We need this primarily to handle aborted HOT updates, that is, - * XMIN_INVALID heap-only tuples. Those might not be linked to by - * any chain, since the parent tuple might be re-updated before - * any pruning occurs. So we have to be able to reap them - * separately from chain-pruning. (Note that - * HeapTupleHeaderIsHotUpdated will never return true for an - * XMIN_INVALID tuple, so this code will work even when there were - * sequential updates within the aborted transaction.) - * - * Note that we might first arrive at a dead heap-only tuple - * either here or while following a chain below. Whichever path - * gets there first will mark the tuple unused. - * - * Whether we arrive at the dead HOT tuple first here or while - * following a chain below affects whether preceding RECENTLY_DEAD - * tuples in the chain can be removed or not. Imagine that you - * have a chain with two tuples: RECENTLY_DEAD -> DEAD. If we - * reach the RECENTLY_DEAD tuple first, the chain-following logic - * will find the DEAD tuple and conclude that both tuples are in - * fact dead and can be removed. But if we reach the DEAD tuple - * at the end of the chain first, when we reach the RECENTLY_DEAD - * tuple later, we will not follow the chain because the DEAD - * TUPLE is already 'marked', and will not remove the - * RECENTLY_DEAD tuple. This is not a correctness issue, and the - * RECENTLY_DEAD tuple will be removed by a later VACUUM. - */ - if (htsv[rootoffnum] == HEAPTUPLE_DEAD && - !HeapTupleHeaderIsHotUpdated(htup)) - { - heap_prune_record_unused(prstate, rootoffnum, true); - HeapTupleHeaderAdvanceConflictHorizon(htup, - &prstate->snapshotConflictHorizon); - } - - return; - } - } + OffsetNumber maxoff = PageGetMaxOffsetNumber(page); + TransactionId priorXmax = InvalidTransactionId; /* Start from the root tuple */ - offnum = rootoffnum; - /* while not end of the chain */ - for (;;) + for (OffsetNumber offnum = rootoffnum;;) { + HeapTupleHeader htup; ItemId lp; /* Sanity check (pure paranoia) */ @@ -584,8 +636,13 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, lp = PageGetItemId(page, offnum); - /* Unused item obviously isn't part of the chain */ - if (!ItemIdIsUsed(lp)) + /* + * Unused item obviously isn't part of the chain. Likewise, a dead + * line pointer can't be part of the chain. (We already eliminated the + * case of dead root tuple outside this function.). MFIXME should dead + * item check be an assert? + */ + if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp)) break; /* @@ -602,27 +659,8 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, continue; } - /* - * Likewise, a dead line pointer can't be part of the chain. (We - * already eliminated the case of dead root tuple outside this - * function.) - */ - if (ItemIdIsDead(lp)) - { - /* - * If the caller set mark_unused_now true, we can set dead line - * pointers LP_UNUSED now. We don't increment ndeleted here since - * the LP was already marked dead. - */ - if (unlikely(prstate->mark_unused_now)) - heap_prune_record_unused(prstate, offnum, false); - else - heap_prune_record_unchanged_lp_dead(prstate, offnum); - - break; - } - Assert(ItemIdIsNormal(lp)); + htup = (HeapTupleHeader) PageGetItem(page, lp); /* @@ -748,7 +786,7 @@ process_chains: /* the rest of tuples in the chain are normal, unchanged tuples */ for (; i < nchain; i++) - heap_prune_record_unchanged(dp, prstate, chainitems[i]); + heap_prune_record_unchanged(page, prstate, chainitems[i]); } else if (ndeadchain == nchain) { @@ -780,7 +818,7 @@ process_chains: /* the rest of tuples in the chain are normal, unchanged tuples */ for (int i = ndeadchain; i < nchain; i++) - heap_prune_record_unchanged(dp, prstate, chainitems[i]); + heap_prune_record_unchanged(page, prstate, chainitems[i]); } } -- 2.40.1