From 6a6a78640d69b5309d8d1fd14bde3b38eee1cb79 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Mon, 13 Nov 2023 14:10:05 -0500 Subject: [PATCH v4 1/4] Set would-be dead items LP_UNUSED while pruning If there are no indexes on a relation, items can be marked LP_UNUSED instead of LP_DEAD during lazy_scan_prune(). This avoids a separate invocation of lazy_vacuum_heap_page() and saves a vacuum WAL record. To accomplish this, pass lazy_scan_prune() a new parameter, no_indexes, which indicates that dead line pointers should be set to LP_UNUSED during pruning, allowing earlier reaping of tuples. Because we don't update the freespace map until after dropping the lock on the buffer and we need the lock while we update the visibility map, save our intent to update the freespace map in output parameter recordfreespace. This is not added to the LVPagePruneState because future commits will eliminate the LVPagePruneState. Discussion: https://postgr.es/m/CAAKRu_bgvb_k0gKOXWzNKWHt560R0smrGe3E8zewKPs8fiMKkw%40mail.gmail.com --- src/backend/access/heap/heapam.c | 9 +- src/backend/access/heap/pruneheap.c | 71 +++++++++--- src/backend/access/heap/vacuumlazy.c | 156 +++++++++++---------------- src/include/access/heapam.h | 3 +- 4 files changed, 126 insertions(+), 113 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 707460a5364..27d87b619f9 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8810,8 +8810,13 @@ heap_xlog_prune(XLogReaderState *record) nunused = (end - nowunused); Assert(nunused >= 0); - /* Update all line pointers per the record, and repair fragmentation */ - heap_page_prune_execute(buffer, + /* + * Update all line pointers per the record, and repair fragmentation. + * We always pass no_indexes as true, because we don't know whether or + * not this option was used when pruning. This reduces the validation + * done on replay in an assert build. + */ + heap_page_prune_execute(buffer, true, redirected, nredirected, nowdead, ndead, nowunused, nunused); diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 3e0a1a260e6..80e3db873e4 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -35,6 +35,8 @@ typedef struct /* tuple visibility test, initialized for the relation */ GlobalVisState *vistest; + /* whether or not dead items can be set LP_UNUSED during pruning */ + bool no_indexes; TransactionId new_prune_xid; /* new prune hint value for page */ TransactionId snapshotConflictHorizon; /* latest xid removed */ @@ -148,7 +150,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer) { PruneResult presult; - heap_page_prune(relation, buffer, vistest, &presult, NULL); + heap_page_prune(relation, buffer, vistest, false, + &presult, NULL); /* * Report the number of tuples reclaimed to pgstats. This is @@ -193,6 +196,9 @@ heap_page_prune_opt(Relation relation, Buffer buffer) * (see heap_prune_satisfies_vacuum and * HeapTupleSatisfiesVacuum). * + * no_indexes indicates whether or not dead items can be set LP_UNUSED during + * pruning. + * * off_loc is the offset location required by the caller to use in error * callback. * @@ -203,6 +209,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer) void heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, + bool no_indexes, PruneResult *presult, OffsetNumber *off_loc) { @@ -227,6 +234,7 @@ heap_page_prune(Relation relation, Buffer buffer, prstate.new_prune_xid = InvalidTransactionId; prstate.rel = relation; prstate.vistest = vistest; + prstate.no_indexes = no_indexes; prstate.snapshotConflictHorizon = InvalidTransactionId; prstate.nredirected = prstate.ndead = prstate.nunused = 0; memset(prstate.marked, 0, sizeof(prstate.marked)); @@ -306,9 +314,9 @@ heap_page_prune(Relation relation, Buffer buffer, if (off_loc) *off_loc = offnum; - /* Nothing to do if slot is empty or already dead */ + /* Nothing to do if slot is empty */ itemid = PageGetItemId(page, offnum); - if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid)) + if (!ItemIdIsUsed(itemid)) continue; /* Process this item or chain of items */ @@ -330,7 +338,7 @@ heap_page_prune(Relation relation, Buffer buffer, * Apply the planned item changes, then repair page fragmentation, and * update the page's hint bit about whether it has free line pointers. */ - heap_page_prune_execute(buffer, + heap_page_prune_execute(buffer, prstate.no_indexes, prstate.redirected, prstate.nredirected, prstate.nowdead, prstate.ndead, prstate.nowunused, prstate.nunused); @@ -581,7 +589,17 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, * function.) */ if (ItemIdIsDead(lp)) + { + /* + * If the relation has no indexes, we can set dead line pointers + * LP_UNUSED now. We don't increment ndeleted here since the LP + * was already marked dead. + */ + if (unlikely(prstate->no_indexes)) + heap_prune_record_unused(prstate, offnum); + break; + } Assert(ItemIdIsNormal(lp)); htup = (HeapTupleHeader) PageGetItem(dp, lp); @@ -726,7 +744,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, * item. This can happen if the loop in heap_page_prune caused us to * visit the dead successor of a redirect item before visiting the * redirect item. We can clean up by setting the redirect item to - * DEAD state. + * DEAD state or LP_UNUSED if the table has no indexes. */ heap_prune_record_dead(prstate, rootoffnum); } @@ -767,6 +785,17 @@ heap_prune_record_redirect(PruneState *prstate, static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum) { + /* + * If the relation has no indexes, we can remove dead tuples during + * pruning instead of marking their line pointers dead. Set this tuple's + * line pointer LP_UNUSED. We hint that tables with indexes are more + * likely. + */ + if (unlikely(prstate->no_indexes)) + { + heap_prune_record_unused(prstate, offnum); + return; + } Assert(prstate->ndead < MaxHeapTuplesPerPage); prstate->nowdead[prstate->ndead] = offnum; prstate->ndead++; @@ -792,7 +821,7 @@ heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum) * buffer. */ void -heap_page_prune_execute(Buffer buffer, +heap_page_prune_execute(Buffer buffer, bool no_indexes, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused) @@ -902,14 +931,28 @@ heap_page_prune_execute(Buffer buffer, #ifdef USE_ASSERT_CHECKING - /* - * Only heap-only tuples can become LP_UNUSED during pruning. They - * don't need to be left in place as LP_DEAD items until VACUUM gets - * around to doing index vacuuming. - */ - Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp)); - htup = (HeapTupleHeader) PageGetItem(page, lp); - Assert(HeapTupleHeaderIsHeapOnly(htup)); + if (no_indexes) + { + /* + * If the relation has no indexes, we may set any of LP_NORMAL, + * LP_REDIRECT, or LP_DEAD items to LP_UNUSED during pruning. We + * can't check much here except that, if the item is LP_NORMAL, it + * should have storage before it is set LP_UNUSED. + */ + Assert(!ItemIdIsNormal(lp) || ItemIdHasStorage(lp)); + } + else + { + /* + * If the relation has indexes, only heap-only tuples can become + * LP_UNUSED during pruning. They don't need to be left in place + * as LP_DEAD items until VACUUM gets around to doing index + * vacuuming. + */ + Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp)); + htup = (HeapTupleHeader) PageGetItem(page, lp); + Assert(HeapTupleHeaderIsHeapOnly(htup)); + } #endif ItemIdSetUnused(lp); diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index abbba8947fa..1b26d63d3d6 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -250,7 +250,8 @@ static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, bool sharelock, Buffer vmbuffer); static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, - LVPagePruneState *prunestate); + LVPagePruneState *prunestate, + bool *recordfreespace); static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool *hastup, bool *recordfreespace); @@ -830,8 +831,10 @@ lazy_scan_heap(LVRelState *vacrel) next_fsm_block_to_vacuum = 0; VacDeadItems *dead_items = vacrel->dead_items; Buffer vmbuffer = InvalidBuffer; + int tuples_already_deleted; bool next_unskippable_allvis, skipping_current_range; + bool recordfreespace; const int initprog_index[] = { PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_TOTAL_HEAP_BLKS, @@ -959,8 +962,7 @@ lazy_scan_heap(LVRelState *vacrel) page = BufferGetPage(buf); if (!ConditionalLockBufferForCleanup(buf)) { - bool hastup, - recordfreespace; + bool hastup; LockBuffer(buf, BUFFER_LOCK_SHARE); @@ -1010,6 +1012,8 @@ lazy_scan_heap(LVRelState *vacrel) continue; } + tuples_already_deleted = vacrel->tuples_deleted; + /* * Prune, freeze, and count tuples. * @@ -1019,7 +1023,7 @@ lazy_scan_heap(LVRelState *vacrel) * were pruned some time earlier. Also considers freezing XIDs in the * tuple headers of remaining items with storage. */ - lazy_scan_prune(vacrel, buf, blkno, page, &prunestate); + lazy_scan_prune(vacrel, buf, blkno, page, &prunestate, &recordfreespace); Assert(!prunestate.all_visible || !prunestate.has_lpdead_items); @@ -1027,69 +1031,6 @@ lazy_scan_heap(LVRelState *vacrel) if (prunestate.hastup) vacrel->nonempty_pages = blkno + 1; - if (vacrel->nindexes == 0) - { - /* - * Consider the need to do page-at-a-time heap vacuuming when - * using the one-pass strategy now. - * - * The one-pass strategy will never call lazy_vacuum(). The steps - * performed here can be thought of as the one-pass equivalent of - * a call to lazy_vacuum(). - */ - if (prunestate.has_lpdead_items) - { - Size freespace; - - lazy_vacuum_heap_page(vacrel, blkno, buf, 0, vmbuffer); - - /* Forget the LP_DEAD items that we just vacuumed */ - dead_items->num_items = 0; - - /* - * Now perform FSM processing for blkno, and move on to next - * page. - * - * Our call to lazy_vacuum_heap_page() will have considered if - * it's possible to set all_visible/all_frozen independently - * of lazy_scan_prune(). Note that prunestate was invalidated - * by lazy_vacuum_heap_page() call. - */ - freespace = PageGetHeapFreeSpace(page); - - UnlockReleaseBuffer(buf); - RecordPageWithFreeSpace(vacrel->rel, blkno, freespace); - - /* - * Periodically perform FSM vacuuming to make newly-freed - * space visible on upper FSM pages. FreeSpaceMapVacuumRange() - * vacuums the portion of the freespace map covering heap - * pages from start to end - 1. Include the block we just - * vacuumed by passing it blkno + 1. Overflow isn't an issue - * because MaxBlockNumber + 1 is InvalidBlockNumber which - * causes FreeSpaceMapVacuumRange() to vacuum freespace map - * pages covering the remainder of the relation. - */ - if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES) - { - FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, - blkno + 1); - next_fsm_block_to_vacuum = blkno + 1; - } - - continue; - } - - /* - * There was no call to lazy_vacuum_heap_page() because pruning - * didn't encounter/create any LP_DEAD items that needed to be - * vacuumed. Prune state has not been invalidated, so proceed - * with prunestate-driven visibility map and FSM steps (just like - * the two-pass strategy). - */ - Assert(dead_items->num_items == 0); - } - /* * Handle setting visibility map bit based on information from the VM * (as of last lazy_scan_skip() call), and from prunestate @@ -1200,38 +1141,45 @@ lazy_scan_heap(LVRelState *vacrel) /* * Final steps for block: drop cleanup lock, record free space in the - * FSM + * FSM. + * + * If we will likely do index vacuuming, wait until + * lazy_vacuum_heap_rel() to save free space. This doesn't just save + * us some cycles; it also allows us to record any additional free + * space that lazy_vacuum_heap_page() will make available in cases + * where it's possible to truncate the page's line pointer array. + * + * Note: It's not in fact 100% certain that we really will call + * lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip index + * vacuuming (and so must skip heap vacuuming). This is deemed okay + * because it only happens in emergencies, or when there is very + * little free space anyway. (Besides, we start recording free space + * in the FSM once index vacuuming has been abandoned.) */ - if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming) - { - /* - * Wait until lazy_vacuum_heap_rel() to save free space. This - * doesn't just save us some cycles; it also allows us to record - * any additional free space that lazy_vacuum_heap_page() will - * make available in cases where it's possible to truncate the - * page's line pointer array. - * - * Note: It's not in fact 100% certain that we really will call - * lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip - * index vacuuming (and so must skip heap vacuuming). This is - * deemed okay because it only happens in emergencies, or when - * there is very little free space anyway. (Besides, we start - * recording free space in the FSM once index vacuuming has been - * abandoned.) - * - * Note: The one-pass (no indexes) case is only supposed to make - * it this far when there were no LP_DEAD items during pruning. - */ - Assert(vacrel->nindexes > 0); - UnlockReleaseBuffer(buf); - } - else + if (recordfreespace) { Size freespace = PageGetHeapFreeSpace(page); UnlockReleaseBuffer(buf); RecordPageWithFreeSpace(vacrel->rel, blkno, freespace); } + else + UnlockReleaseBuffer(buf); + + /* + * Periodically perform FSM vacuuming to make newly-freed space + * visible on upper FSM pages. This is done after vacuuming if the + * table has indexes. + */ + if (vacrel->nindexes == 0 && + vacrel->tuples_deleted > tuples_already_deleted && + (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)) + { + FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, + blkno); + next_fsm_block_to_vacuum = blkno; + } + } vacrel->blkno = InvalidBlockNumber; @@ -1543,7 +1491,8 @@ lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, - LVPagePruneState *prunestate) + LVPagePruneState *prunestate, + bool *recordfreespace) { Relation rel = vacrel->rel; OffsetNumber offnum, @@ -1555,6 +1504,7 @@ lazy_scan_prune(LVRelState *vacrel, live_tuples, recently_dead_tuples; HeapPageFreeze pagefrz; + bool no_indexes; int64 fpi_before = pgWalUsage.wal_fpi; OffsetNumber deadoffsets[MaxHeapTuplesPerPage]; HeapTupleFreeze frozen[MaxHeapTuplesPerPage]; @@ -1579,6 +1529,8 @@ lazy_scan_prune(LVRelState *vacrel, live_tuples = 0; recently_dead_tuples = 0; + no_indexes = vacrel->nindexes == 0; + /* * Prune all HOT-update chains in this page. * @@ -1587,7 +1539,8 @@ lazy_scan_prune(LVRelState *vacrel, * lpdead_items's final value can be thought of as the number of tuples * that were deleted from indexes. */ - heap_page_prune(rel, buf, vacrel->vistest, &presult, &vacrel->offnum); + heap_page_prune(rel, buf, vacrel->vistest, no_indexes, + &presult, &vacrel->offnum); /* * Now scan the page to collect LP_DEAD items and check for tuples @@ -1598,6 +1551,7 @@ lazy_scan_prune(LVRelState *vacrel, prunestate->all_visible = true; prunestate->all_frozen = true; prunestate->visibility_cutoff_xid = InvalidTransactionId; + *recordfreespace = false; for (offnum = FirstOffsetNumber; offnum <= maxoff; @@ -1918,6 +1872,15 @@ lazy_scan_prune(LVRelState *vacrel, vacrel->lpdead_items += lpdead_items; vacrel->live_tuples += live_tuples; vacrel->recently_dead_tuples += recently_dead_tuples; + + /* + * If we will not do index vacuuming, either because we have no indexes, + * because there is nothing to vacuum, or because do_index_vacuuming is + * false, make sure we update the freespace map. + */ + if (vacrel->nindexes == 0 || + !vacrel->do_index_vacuuming || lpdead_items == 0) + *recordfreespace = true; } /* @@ -1937,7 +1900,8 @@ lazy_scan_prune(LVRelState *vacrel, * * See lazy_scan_prune for an explanation of hastup return flag. * recordfreespace flag instructs caller on whether or not it should do - * generic FSM processing for page. + * generic FSM processing for page. vacrel is updated with page-level counts + * and to indicate whether or not rel truncation is safe. */ static bool lazy_scan_noprune(LVRelState *vacrel, @@ -2516,7 +2480,7 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, bool all_frozen; LVSavedErrInfo saved_err_info; - Assert(vacrel->nindexes == 0 || vacrel->do_index_vacuuming); + Assert(vacrel->do_index_vacuuming); pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno); diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 932ec0d6f2b..867ec36cb64 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -320,9 +320,10 @@ struct GlobalVisState; extern void heap_page_prune_opt(Relation relation, Buffer buffer); extern void heap_page_prune(Relation relation, Buffer buffer, struct GlobalVisState *vistest, + bool no_indexes, PruneResult *presult, OffsetNumber *off_loc); -extern void heap_page_prune_execute(Buffer buffer, +extern void heap_page_prune_execute(Buffer buffer, bool no_indexes, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused); -- 2.37.2