From 9d6d6c2529700e4fe381dbc55ef172ba13882fab Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Tue, 29 Jul 2025 16:12:56 -0400 Subject: [PATCH v46 5/5] Set pd_prune_xid on insert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that visibility map (VM) updates can occur during read-only queries, it makes sense to also set the page’s pd_prune_xid hint during inserts and on the new page during updates. This enables heap_page_prune_and_freeze() to set the VM all-visible after a page is filled with newly inserted tuples the first time it is read. This means the page will get set all-visible when it is still in shared buffers and avoid potential I/O amplification when vacuum later has to scan the page and set it all-visible. It also enables index-only scans of newly inserted data much sooner. This change also addresses a long-standing note in heap_insert() and heap_multi_insert(), which observed that setting pd_prune_xid would help clean up aborted insertions sooner. Without it, such tuples might linger until VACUUM, whereas now they can be pruned earlier. Author: Melanie Plageman Reviewed-by: Andres Freund Reviewed-by: Chao Li Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com --- src/backend/access/heap/heapam.c | 39 +++++++++++++++++---------- src/backend/access/heap/heapam_xlog.c | 19 ++++++++++++- src/backend/access/heap/pruneheap.c | 18 ++++++------- 3 files changed, 51 insertions(+), 25 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index dbdf6521c42..cdaf57e3f12 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2156,6 +2156,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, TransactionId xid = GetCurrentTransactionId(); HeapTuple heaptup; Buffer buffer; + Page page; Buffer vmbuffer = InvalidBuffer; bool all_visible_cleared = false; @@ -2182,6 +2183,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, &vmbuffer, NULL, 0); + page = BufferGetPage(buffer); + /* * We're about to do the actual insert -- but check for conflict first, to * avoid possibly having to roll back work we've just done. @@ -2205,25 +2208,30 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, RelationPutHeapTuple(relation, buffer, heaptup, (options & HEAP_INSERT_SPECULATIVE) != 0); - if (PageIsAllVisible(BufferGetPage(buffer))) + if (PageIsAllVisible(page)) { all_visible_cleared = true; - PageClearAllVisible(BufferGetPage(buffer)); + PageClearAllVisible(page); visibilitymap_clear(relation, ItemPointerGetBlockNumber(&(heaptup->t_self)), vmbuffer, VISIBILITYMAP_VALID_BITS); } /* - * XXX Should we set PageSetPrunable on this page ? + * Set pd_prune_xid to trigger heap_page_prune_and_freeze() once the page + * is full so that we can set the page all-visible in the VM on the next + * page access. * - * The inserting transaction may eventually abort thus making this tuple - * DEAD and hence available for pruning. Though we don't want to optimize - * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the - * aborted tuple will never be pruned until next vacuum is triggered. + * Setting pd_prune_xid is also handy if the inserting transaction + * eventually aborts making this tuple DEAD and hence available for + * pruning. If no other tuple in this page is UPDATEd/DELETEd, the aborted + * tuple would never otherwise be pruned until next vacuum is triggered. * - * If you do add PageSetPrunable here, add it in heap_xlog_insert too. + * Don't set it if we are in bootstrap mode or we are inserting a frozen + * tuple, as there is no further pruning/freezing needed in those cases. */ + if (TransactionIdIsNormal(xid) && !(options & HEAP_INSERT_FROZEN)) + PageSetPrunable(page, xid); MarkBufferDirty(buffer); @@ -2233,7 +2241,6 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, xl_heap_insert xlrec; xl_heap_header xlhdr; XLogRecPtr recptr; - Page page = BufferGetPage(buffer); uint8 info = XLOG_HEAP_INSERT; int bufflags = 0; @@ -2598,8 +2605,12 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, } /* - * XXX Should we set PageSetPrunable on this page ? See heap_insert() + * Set pd_prune_xid. See heap_insert() for more on why we do this when + * inserting tuples. This only makes sense if we aren't already + * setting the page frozen in the VM and we're not in bootstrap mode. */ + if (!all_frozen_set && TransactionIdIsNormal(xid)) + PageSetPrunable(page, xid); MarkBufferDirty(buffer); @@ -4141,12 +4152,12 @@ l2: * the subsequent page pruning will be a no-op and the hint will be * cleared. * - * XXX Should we set hint on newbuf as well? If the transaction aborts, - * there would be a prunable tuple in the newbuf; but for now we choose - * not to optimize for aborts. Note that heap_xlog_update must be kept in - * sync if this decision changes. + * We set the new page prunable as well. See heap_insert() for more on why + * we do this when inserting tuples. */ PageSetPrunable(page, xid); + if (newbuf != buffer) + PageSetPrunable(newpage, xid); if (use_hot_update) { diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c index 1302bb13e18..f3f419d3dc1 100644 --- a/src/backend/access/heap/heapam_xlog.c +++ b/src/backend/access/heap/heapam_xlog.c @@ -450,6 +450,14 @@ heap_xlog_insert(XLogReaderState *record) freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ + /* + * Set the page prunable to trigger on-access pruning later, which may + * set the page all-visible in the VM. See comments in heap_insert(). + */ + if (TransactionIdIsNormal(XLogRecGetXid(record)) && + !HeapTupleHeaderXminFrozen(htup)) + PageSetPrunable(page, XLogRecGetXid(record)); + PageSetLSN(page, lsn); if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) @@ -599,12 +607,19 @@ heap_xlog_multi_insert(XLogReaderState *record) if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); - /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */ + /* + * XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible. If + * we are not setting the page frozen, then set the page's prunable + * hint so that we trigger on-access pruning later which may set the + * page all-visible in the VM. + */ if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) { PageSetAllVisible(page); PageClearPrunable(page); } + else + PageSetPrunable(page, XLogRecGetXid(record)); MarkBufferDirty(buffer); } @@ -921,6 +936,8 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) freespace = PageGetHeapFreeSpace(npage); PageSetLSN(npage, lsn); + /* See heap_insert() for why we set pd_prune_xid on insert */ + PageSetPrunable(npage, XLogRecGetXid(record)); MarkBufferDirty(nbuffer); } diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index d83fd26b274..bb364f53a44 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -275,7 +275,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer, /* * First check whether there's any chance there's something to prune, * determining the appropriate horizon is a waste if there's no prune_xid - * (i.e. no updates/deletes left potentially dead tuples around). + * (i.e. no updates/deletes left potentially dead tuples around and no + * inserts inserted new tuples that may be visible to all). */ prune_xid = PageGetPruneXid(page); if (!TransactionIdIsValid(prune_xid)) @@ -1918,17 +1919,14 @@ heap_prune_record_unchanged_lp_normal(PruneState *prstate, OffsetNumber offnum) prstate->set_all_visible = false; prstate->set_all_frozen = false; - /* The page should not be marked all-visible */ - if (PageIsAllVisible(page)) - heap_page_fix_vm_corruption(prstate, offnum, - VM_CORRUPT_TUPLE_VISIBILITY); - /* - * If we wanted to optimize for aborts, we might consider marking - * the page prunable when we see INSERT_IN_PROGRESS. But we - * don't. See related decisions about when to mark the page - * prunable in heapam.c. + * Though there is nothing "prunable" on the page, we maintain + * pd_prune_xid for inserts so that we have the opportunity to + * mark them all-visible during the next round of pruning. */ + heap_prune_record_prunable(prstate, + HeapTupleHeaderGetXmin(htup), + offnum); break; case HEAPTUPLE_DELETE_IN_PROGRESS: -- 2.43.0