From 4e0febe03cd305e81cb73235d750901e9ef379f0 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Tue, 29 Jul 2025 16:12:56 -0400 Subject: [PATCH v21 12/12] Set pd_prune_xid on insert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that visibility map (VM) updates can occur during read-only queries, it makes sense to also set the page’s pd_prune_xid hint during inserts. This enables heap_page_prune_and_freeze() to run after a page is filled with newly inserted tuples the first time it is read. This change also addresses a long-standing note in heap_insert() and heap_multi_insert(), which observed that setting pd_prune_xid would help clean up aborted insertions sooner. Without it, such tuples might linger until VACUUM, whereas now they can be pruned earlier. Setting pd_prune_xid on insert can cause a page to be dirtied and written out when it previously would not have been, affetcting the reported number of hits in the index-killtuples isolation test. It is unclear if this is a bug in the way hits are tracked, a faulty test expectation, or if simply updating the test's expected output is sufficient remediation. --- src/backend/access/heap/heapam.c | 25 +++++++++++++------ src/backend/access/heap/heapam_xlog.c | 15 ++++++++++- .../isolation/expected/index-killtuples.out | 6 ++--- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index ae53e311ce1..f329f497480 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2104,6 +2104,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, TransactionId xid = GetCurrentTransactionId(); HeapTuple heaptup; Buffer buffer; + Page page; Buffer vmbuffer = InvalidBuffer; bool all_visible_cleared = false; @@ -2163,15 +2164,19 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, } /* - * XXX Should we set PageSetPrunable on this page ? + * Set pd_prune_xid to trigger heap_page_prune_and_freeze() once the page + * is full so that we can set the page all-visible in the VM. * - * The inserting transaction may eventually abort thus making this tuple - * DEAD and hence available for pruning. Though we don't want to optimize - * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the - * aborted tuple will never be pruned until next vacuum is triggered. + * Setting pd_prune_xid is also handy if the inserting transaction + * eventually aborts making this tuple DEAD and hence available for + * pruning. If no other tuple in this page is UPDATEd/DELETEd, the aborted + * tuple would never otherwise be pruned until next vacuum is triggered. * - * If you do add PageSetPrunable here, add it in heap_xlog_insert too. + * Don't set it if we are in bootstrap mode, though. */ + page = BufferGetPage(buffer); + if (TransactionIdIsNormal(xid)) + PageSetPrunable(page, xid); MarkBufferDirty(buffer); @@ -2181,7 +2186,6 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, xl_heap_insert xlrec; xl_heap_header xlhdr; XLogRecPtr recptr; - Page page = BufferGetPage(buffer); uint8 info = XLOG_HEAP_INSERT; int bufflags = 0; @@ -2545,8 +2549,13 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, } /* - * XXX Should we set PageSetPrunable on this page ? See heap_insert() + * Set pd_prune_xid. See heap_insert() for more on why we do this when + * inserting tuples. This only makes sense if we aren't already + * setting the page frozen in the VM. We also don't set it in + * bootstrap mode. */ + if (!all_frozen_set && TransactionIdIsNormal(xid)) + PageSetPrunable(page, xid); MarkBufferDirty(buffer); diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c index 5ab46e8bf8f..dac640f5c9d 100644 --- a/src/backend/access/heap/heapam_xlog.c +++ b/src/backend/access/heap/heapam_xlog.c @@ -462,6 +462,12 @@ heap_xlog_insert(XLogReaderState *record) freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ + /* + * Set the page prunable to trigger on-access pruning later which may + * set the page all-visible in the VM. + */ + PageSetPrunable(page, XLogRecGetXid(record)); + PageSetLSN(page, lsn); if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) @@ -611,9 +617,16 @@ heap_xlog_multi_insert(XLogReaderState *record) if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); - /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */ + /* + * XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible. If + * we are not setting the page frozen, then set the page's prunable + * hint so that we trigger on-access pruning later which may set the + * page all-visible in the VM. + */ if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) PageSetAllVisible(page); + else + PageSetPrunable(page, XLogRecGetXid(record)); MarkBufferDirty(buffer); } diff --git a/src/test/isolation/expected/index-killtuples.out b/src/test/isolation/expected/index-killtuples.out index be7ddd756ef..b29f2434b00 100644 --- a/src/test/isolation/expected/index-killtuples.out +++ b/src/test/isolation/expected/index-killtuples.out @@ -54,7 +54,7 @@ step flush: SELECT FROM pg_stat_force_next_flush(); step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; new_heap_accesses ----------------- - 1 + 2 (1 row) step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); @@ -130,7 +130,7 @@ step flush: SELECT FROM pg_stat_force_next_flush(); step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; new_heap_accesses ----------------- - 1 + 2 (1 row) step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); @@ -283,7 +283,7 @@ step flush: SELECT FROM pg_stat_force_next_flush(); step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; new_heap_accesses ----------------- - 1 + 2 (1 row) step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); -- 2.43.0