From c6996e3743d75df11d3d06508c4f532e61f9f8e6 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Mon, 8 Nov 2021 22:58:14 -0800 Subject: [PATCH v1] Fix aborted HOT update bug in heap pruning. --- src/backend/access/heap/pruneheap.c | 201 +++++++++++++++++++--------- 1 file changed, 135 insertions(+), 66 deletions(-) diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index c7331d810..7cad94705 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -58,12 +58,16 @@ typedef struct OffsetNumber nowunused[MaxHeapTuplesPerPage]; /* marked[i] is true if item i is entered in one of the above arrays */ bool marked[MaxHeapTuplesPerPage + 1]; + /* inhotchain[i] is true if item i is in valid HOT chain */ + bool inhotchain[MaxHeapTuplesPerPage + 1]; } PruneState; /* Local functions */ static int heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate); +static int heap_prune_heaponly(Buffer buffer, OffsetNumber heaponlyoffnum, + PruneState *prstate); static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid); static void heap_prune_record_redirect(PruneState *prstate, OffsetNumber offnum, OffsetNumber rdoffnum); @@ -249,6 +253,7 @@ heap_page_prune(Relation relation, Buffer buffer, prstate.latestRemovedXid = InvalidTransactionId; prstate.nredirected = prstate.ndead = prstate.nunused = 0; memset(prstate.marked, 0, sizeof(prstate.marked)); + memset(prstate.inhotchain, 0, sizeof(prstate.inhotchain)); /* Scan the page */ maxoff = PageGetMaxOffsetNumber(page); @@ -278,6 +283,35 @@ heap_page_prune(Relation relation, Buffer buffer, ndeleted += heap_prune_chain(buffer, offnum, &prstate); } + /* + * Scan the page again, to pick up any heap-only tuples that were not + * recognized as part of some HOT chain during our first pass + */ + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + + /* Ignore items already processed as part of an earlier chain */ + if (prstate.inhotchain[offnum]) + continue; + Assert(!prstate.marked[offnum]); + itemid = PageGetItemId(page, offnum); + if (!ItemIdIsNormal(itemid)) + continue; + + /* + * Set the offset number so that we can display it along with any + * error that occurred while processing this tuple. + */ + if (off_loc) + *off_loc = offnum; + + /* Process this item */ + ndeleted += heap_prune_heaponly(buffer, offnum, &prstate); + } + /* Clear the offset information once we have processed the given page. */ if (off_loc) *off_loc = InvalidOffsetNumber; @@ -521,73 +555,48 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) Page dp = (Page) BufferGetPage(buffer); TransactionId priorXmax = InvalidTransactionId; ItemId rootlp; - HeapTupleHeader htup; OffsetNumber latestdead = InvalidOffsetNumber, maxoff = PageGetMaxOffsetNumber(dp), offnum; OffsetNumber chainitems[MaxHeapTuplesPerPage]; - int nchain = 0, - i; - HeapTupleData tup; - - tup.t_tableOid = RelationGetRelid(prstate->rel); + int nchain = 0; + bool startswithredirect PG_USED_FOR_ASSERTS_ONLY = false; rootlp = PageGetItemId(dp, rootoffnum); - /* - * If it's a heap-only tuple, then it is not the start of a HOT chain. - */ if (ItemIdIsNormal(rootlp)) { + HeapTupleHeader htup; + htup = (HeapTupleHeader) PageGetItem(dp, rootlp); - tup.t_data = htup; - tup.t_len = ItemIdGetLength(rootlp); - ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), rootoffnum); - + /* + * If it's a heap-only tuple, then it is not the start of a HOT chain. + * We should process it by finding it through its HOT chain (by a + * later call that starts at the HOT chain's root item). + * + * Sometimes that'll never happen, because the heap-only tuple will be + * disconnected from its HOT chain following a HOT update whose xact + * aborts. If that turns out to be the case for this heap-only tuple, + * we'll process it in heap_prune_heaponly() during our second pass + * over the page. + */ if (HeapTupleHeaderIsHeapOnly(htup)) - { - /* - * If the tuple is DEAD and doesn't chain to anything else, mark - * it unused immediately. (If it does chain, we can only remove - * it as part of pruning its chain.) - * - * We need this primarily to handle aborted HOT updates, that is, - * XMIN_INVALID heap-only tuples. Those might not be linked to by - * any chain, since the parent tuple might be re-updated before - * any pruning occurs. So we have to be able to reap them - * separately from chain-pruning. (Note that - * HeapTupleHeaderIsHotUpdated will never return true for an - * XMIN_INVALID tuple, so this code will work even when there were - * sequential updates within the aborted transaction.) - * - * Note that we might first arrive at a dead heap-only tuple - * either here or while following a chain below. Whichever path - * gets there first will mark the tuple unused. - */ - if (heap_prune_satisfies_vacuum(prstate, &tup, buffer) - == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup)) - { - heap_prune_record_unused(prstate, rootoffnum); - HeapTupleHeaderAdvanceLatestRemovedXid(htup, - &prstate->latestRemovedXid); - ndeleted++; - } - - /* Nothing more to do */ - return ndeleted; - } + return 0; } /* Start from the root tuple */ + Assert(!prstate->inhotchain[rootoffnum]); + prstate->inhotchain[rootoffnum] = true; offnum = rootoffnum; /* while not end of the chain */ for (;;) { ItemId lp; - bool tupdead, - recent_dead; + HeapTupleHeader htup; + HeapTupleData tup; + bool tupdead; /* Sanity check (pure paranoia) */ if (offnum < FirstOffsetNumber) @@ -606,10 +615,6 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) lp = PageGetItemId(dp, offnum); - /* Unused item obviously isn't part of the chain */ - if (!ItemIdIsUsed(lp)) - break; - /* * If we are looking at the redirected root line pointer, jump to the * first normal tuple in the chain. If we find a redirect somewhere @@ -619,24 +624,26 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) { if (nchain > 0) break; /* not at start of chain */ + startswithredirect = true; chainitems[nchain++] = offnum; offnum = ItemIdGetRedirect(rootlp); continue; } - /* - * Likewise, a dead line pointer can't be part of the chain. (We - * already eliminated the case of dead root tuple outside this - * function.) - */ - if (ItemIdIsDead(lp)) + /* LP_UNUSED or LP_DEAD items obviously not part of the chain */ + if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp)) + { + /* An LP_REDIRECT cannot point directly to an LP_UNUSED */ + Assert(!startswithredirect || nchain > 1); break; + } Assert(ItemIdIsNormal(lp)); htup = (HeapTupleHeader) PageGetItem(dp, lp); - tup.t_data = htup; tup.t_len = ItemIdGetLength(lp); + tup.t_tableOid = RelationGetRelid(prstate->rel); + tup.t_data = htup; ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum); /* @@ -644,17 +651,24 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) */ if (TransactionIdIsValid(priorXmax) && !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax)) + { + Assert(!startswithredirect || nchain > 1); break; + } + + Assert(nchain == 0 || HeapTupleHeaderIsHeapOnly(htup)); + Assert(nchain == 0 || !prstate->inhotchain[offnum]); /* * OK, this tuple is indeed a member of the chain. */ chainitems[nchain++] = offnum; + prstate->inhotchain[offnum] = true; /* * Check tuple's visibility status. */ - tupdead = recent_dead = false; + tupdead = false; switch (heap_prune_satisfies_vacuum(prstate, &tup, buffer)) { @@ -663,7 +677,6 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) break; case HEAPTUPLE_RECENTLY_DEAD: - recent_dead = true; /* * This tuple may soon become DEAD. Update the hint field so @@ -679,6 +692,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) * This tuple may soon become DEAD. Update the hint field so * that the page is reconsidered for pruning in future. */ + Assert(!tupdead); heap_prune_record_prunable(prstate, HeapTupleHeaderGetUpdateXid(htup)); break; @@ -692,6 +706,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) * But we don't. See related decisions about when to mark the * page prunable in heapam.c. */ + Assert(!tupdead); break; default: @@ -700,11 +715,14 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) } /* - * Remember the last DEAD tuple seen. We will advance past - * RECENTLY_DEAD tuples just in case there's a DEAD one after them; - * but we can't advance past anything else. We have to make sure that - * we don't miss any DEAD tuples, since DEAD tuples that still have - * tuple storage after pruning will confuse VACUUM. + * Remember the last DEAD tuple seen. We will advance past all other + * tuples (even LIVE tuples) so that we have a clear picture of which + * heap-only tuples are known to be part of a valid HOT chain. + * + * Note that we expect to sometimes find a DEAD tuple after a + * RECENTLY_DEAD tuple. When this happens, the RECENTLY_DEAD tuple + * will be treated as DEAD. But there has to be a later DEAD tuple + * for that to happen. */ if (tupdead) { @@ -712,8 +730,6 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) HeapTupleHeaderAdvanceLatestRemovedXid(htup, &prstate->latestRemovedXid); } - else if (!recent_dead) - break; /* * If the tuple is not HOT-updated, then we are at the end of this @@ -741,6 +757,8 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) */ if (OffsetNumberIsValid(latestdead)) { + int i; + /* * Mark as unused each intermediate item that we are able to remove * from the chain. @@ -787,6 +805,57 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) return ndeleted; } +/* + * Handle disconnected heap-only tuples during second pass over page. We + * always expect to mark these tuples as DEAD here. + * + * We need this to handle aborted HOT updates, that is, XMIN_INVALID heap-only + * tuples. Even then, we only do this for heap-only tuples that are not + * locatable in any HOT chain. This happens when the parent tuple was + * re-updated before any pruning took place. We have to be able to reap them + * separately from chain-pruning. + * + * We have to make sure that we don't miss any DEAD tuples, since DEAD tuples + * that still have tuple storage after pruning will confuse VACUUM. +*/ +static int +heap_prune_heaponly(Buffer buffer, OffsetNumber heaponlyoffnum, + PruneState *prstate) +{ + Page dp = (Page) BufferGetPage(buffer); + ItemId lp; + HeapTupleHeader htup; + HeapTupleData tup; + HTSV_Result res; + + lp = PageGetItemId(dp, heaponlyoffnum); + Assert(ItemIdIsNormal(lp)); + htup = (HeapTupleHeader) PageGetItem(dp, lp); + Assert(HeapTupleHeaderIsHeapOnly(htup)); + tup.t_len = ItemIdGetLength(lp); + tup.t_tableOid = RelationGetRelid(prstate->rel); + tup.t_data = htup; + ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), heaponlyoffnum); + + /* + * We expect that disconnected heap-only tuples must be from aborted + * transactions. Any RECENTLY_DEAD tuples we see here are really DEAD, + * but the heap_prune_satisfies_vacuum test is too coarse to detect it. + */ + res = heap_prune_satisfies_vacuum(prstate, &tup, buffer); + if (res == HEAPTUPLE_DEAD || res == HEAPTUPLE_RECENTLY_DEAD) + { + heap_prune_record_unused(prstate, heaponlyoffnum); + HeapTupleHeaderAdvanceLatestRemovedXid(htup, + &prstate->latestRemovedXid); + return 1; + } + else + Assert(false); + + return 0; +} + /* Record lowest soon-prunable XID */ static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid) -- 2.30.2