From d9f0a411af0ae592cd7a30b6e0f3fac74c115b6a Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Sun, 18 Jan 2026 11:32:52 -0500 Subject: [PATCH v14 15/16] Add fake LSN support to hash index AM. This is preparation for an upcoming patch that will add the amgetbatch interface and switch hash over to it (from amgettuple). We need fake LSNs to make it safe to apply behavior that is equivalent to nbtree's previous dropPin behavior that works with unlogged hash index scans. The commit that will add hashgetbatch will replace _hash_kill_items with a new hashkillitemsbatch routine. This will be very similar to the btkillitemsbatch routine added by commit XXXXX. In particular, it will use the same "did the index page's LSN change since the page was first read?" trick. Author: Peter Geoghegan Discussion: https://postgr.es/m/CAH2-WzkehuhxyuA8quc7rRN3EtNXpiKsjPfO8mhb+0Dr2K0Dtg@mail.gmail.com --- src/backend/access/hash/hash.c | 21 +++-- src/backend/access/hash/hashinsert.c | 20 +++-- src/backend/access/hash/hashovfl.c | 111 ++++++++++++++++----------- src/backend/access/hash/hashpage.c | 22 +++--- 4 files changed, 105 insertions(+), 69 deletions(-) diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 217c7f5e1..25044fe8f 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -477,6 +477,7 @@ hashbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, Buffer metabuf = InvalidBuffer; HashMetaPage metap; HashMetaPage cachedmetap; + XLogRecPtr recptr; tuples_removed = 0; num_index_tuples = 0; @@ -616,7 +617,6 @@ loop_top: if (RelationNeedsWAL(rel)) { xl_hash_update_meta_page xlrec; - XLogRecPtr recptr; xlrec.ntuples = metap->hashm_ntuples; @@ -626,8 +626,11 @@ loop_top: XLogRegisterBuffer(0, metabuf, REGBUF_STANDARD); recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_UPDATE_META_PAGE); - PageSetLSN(BufferGetPage(metabuf), recptr); } + else + recptr = XLogGetFakeLSN(rel); + + PageSetLSN(BufferGetPage(metabuf), recptr); END_CRIT_SECTION(); @@ -700,6 +703,7 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf, Buffer buf; Bucket new_bucket PG_USED_FOR_ASSERTS_ONLY = InvalidBucket; bool bucket_dirty = false; + XLogRecPtr recptr; blkno = bucket_blkno; buf = bucket_buf; @@ -822,7 +826,6 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf, if (RelationNeedsWAL(rel)) { xl_hash_delete xlrec; - XLogRecPtr recptr; xlrec.clear_dead_marking = clear_dead_marking; xlrec.is_primary_bucket_page = (buf == bucket_buf); @@ -847,8 +850,11 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf, ndeletable * sizeof(OffsetNumber)); recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_DELETE); - PageSetLSN(BufferGetPage(buf), recptr); } + else + recptr = XLogGetFakeLSN(rel); + + PageSetLSN(BufferGetPage(buf), recptr); END_CRIT_SECTION(); } @@ -907,14 +913,15 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf, /* XLOG stuff */ if (RelationNeedsWAL(rel)) { - XLogRecPtr recptr; - XLogBeginInsert(); XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD); recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_CLEANUP); - PageSetLSN(page, recptr); } + else + recptr = XLogGetFakeLSN(rel); + + PageSetLSN(page, recptr); END_CRIT_SECTION(); } diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c index 0cefbacc9..3395bbc13 100644 --- a/src/backend/access/hash/hashinsert.c +++ b/src/backend/access/hash/hashinsert.c @@ -50,6 +50,7 @@ _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel, bool sorted) uint32 hashkey; Bucket bucket; OffsetNumber itup_off; + XLogRecPtr recptr; /* * Get the hash key for the item (it's stored in the index tuple itself). @@ -216,7 +217,6 @@ restart_insert: if (RelationNeedsWAL(rel)) { xl_hash_insert xlrec; - XLogRecPtr recptr; xlrec.offnum = itup_off; @@ -229,10 +229,12 @@ restart_insert: XLogRegisterBufData(0, itup, IndexTupleSize(itup)); recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INSERT); - - PageSetLSN(BufferGetPage(buf), recptr); - PageSetLSN(BufferGetPage(metabuf), recptr); } + else + recptr = XLogGetFakeLSN(rel); + + PageSetLSN(BufferGetPage(buf), recptr); + PageSetLSN(BufferGetPage(metabuf), recptr); END_CRIT_SECTION(); @@ -372,6 +374,7 @@ _hash_vacuum_one_page(Relation rel, Relation hrel, Buffer metabuf, Buffer buf) Page page = BufferGetPage(buf); HashPageOpaque pageopaque; HashMetaPage metap; + XLogRecPtr recptr; /* Scan each tuple in page to see if it is marked as LP_DEAD */ maxoff = PageGetMaxOffsetNumber(page); @@ -424,7 +427,6 @@ _hash_vacuum_one_page(Relation rel, Relation hrel, Buffer metabuf, Buffer buf) if (RelationNeedsWAL(rel)) { xl_hash_vacuum_one_page xlrec; - XLogRecPtr recptr; xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(hrel); xlrec.snapshotConflictHorizon = snapshotConflictHorizon; @@ -445,10 +447,12 @@ _hash_vacuum_one_page(Relation rel, Relation hrel, Buffer metabuf, Buffer buf) XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD); recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_VACUUM_ONE_PAGE); - - PageSetLSN(BufferGetPage(buf), recptr); - PageSetLSN(BufferGetPage(metabuf), recptr); } + else + recptr = XLogGetFakeLSN(rel); + + PageSetLSN(BufferGetPage(buf), recptr); + PageSetLSN(BufferGetPage(metabuf), recptr); END_CRIT_SECTION(); diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c index 8cfb6ce75..abd1f91fa 100644 --- a/src/backend/access/hash/hashovfl.c +++ b/src/backend/access/hash/hashovfl.c @@ -132,6 +132,7 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin) uint32 i, j; bool page_found = false; + XLogRecPtr recptr; /* * Write-lock the tail page. Here, we need to maintain locking order such @@ -381,7 +382,6 @@ found: /* XLOG stuff */ if (RelationNeedsWAL(rel)) { - XLogRecPtr recptr; xl_hash_add_ovfl_page xlrec; xlrec.bmpage_found = page_found; @@ -408,18 +408,20 @@ found: XLogRegisterBufData(4, &metap->hashm_firstfree, sizeof(uint32)); recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_ADD_OVFL_PAGE); - - PageSetLSN(BufferGetPage(ovflbuf), recptr); - PageSetLSN(BufferGetPage(buf), recptr); - - if (BufferIsValid(mapbuf)) - PageSetLSN(BufferGetPage(mapbuf), recptr); - - if (BufferIsValid(newmapbuf)) - PageSetLSN(BufferGetPage(newmapbuf), recptr); - - PageSetLSN(BufferGetPage(metabuf), recptr); } + else + recptr = XLogGetFakeLSN(rel); + + PageSetLSN(BufferGetPage(ovflbuf), recptr); + PageSetLSN(BufferGetPage(buf), recptr); + + if (BufferIsValid(mapbuf)) + PageSetLSN(BufferGetPage(mapbuf), recptr); + + if (BufferIsValid(newmapbuf)) + PageSetLSN(BufferGetPage(newmapbuf), recptr); + + PageSetLSN(BufferGetPage(metabuf), recptr); END_CRIT_SECTION(); @@ -510,7 +512,11 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf, Bucket bucket PG_USED_FOR_ASSERTS_ONLY; Buffer prevbuf = InvalidBuffer; Buffer nextbuf = InvalidBuffer; - bool update_metap = false; + bool update_metap = false, + mod_wbuf, + is_prim_bucket_same_wrt, + is_prev_bucket_same_wrt; + XLogRecPtr recptr; /* Get information from the doomed page */ _hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE); @@ -641,19 +647,21 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf, MarkBufferDirty(metabuf); } + /* Determine which pages WAL record modifies */ + mod_wbuf = false; + is_prim_bucket_same_wrt = (wbuf == bucketbuf); + is_prev_bucket_same_wrt = (wbuf == prevbuf); + /* XLOG stuff */ if (RelationNeedsWAL(rel)) { xl_hash_squeeze_page xlrec; - XLogRecPtr recptr; - int i; - bool mod_wbuf = false; xlrec.prevblkno = prevblkno; xlrec.nextblkno = nextblkno; xlrec.ntups = nitups; - xlrec.is_prim_bucket_same_wrt = (wbuf == bucketbuf); - xlrec.is_prev_bucket_same_wrt = (wbuf == prevbuf); + xlrec.is_prim_bucket_same_wrt = is_prim_bucket_same_wrt; + xlrec.is_prev_bucket_same_wrt = is_prev_bucket_same_wrt; XLogBeginInsert(); XLogRegisterData(&xlrec, SizeOfHashSqueezePage); @@ -662,14 +670,14 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf, * bucket buffer was not changed, but still needs to be registered to * ensure that we can acquire a cleanup lock on it during replay. */ - if (!xlrec.is_prim_bucket_same_wrt) + if (!is_prim_bucket_same_wrt) { uint8 flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE; XLogRegisterBuffer(0, bucketbuf, flags); } - if (xlrec.ntups > 0) + if (nitups > 0) { XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD); @@ -678,10 +686,10 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf, XLogRegisterBufData(1, itup_offsets, nitups * sizeof(OffsetNumber)); - for (i = 0; i < nitups; i++) + for (int i = 0; i < nitups; i++) XLogRegisterBufData(1, itups[i], tups_size[i]); } - else if (xlrec.is_prim_bucket_same_wrt || xlrec.is_prev_bucket_same_wrt) + else if (is_prim_bucket_same_wrt || is_prev_bucket_same_wrt) { uint8 wbuf_flags; @@ -691,10 +699,10 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf, * if it is the same as primary bucket buffer or update the * nextblkno if it is same as the previous bucket buffer. */ - Assert(xlrec.ntups == 0); + Assert(nitups == 0); wbuf_flags = REGBUF_STANDARD; - if (!xlrec.is_prev_bucket_same_wrt) + if (!is_prev_bucket_same_wrt) { wbuf_flags |= REGBUF_NO_CHANGE; } @@ -714,7 +722,7 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf, * prevpage. During replay, we can directly update the nextblock in * writepage. */ - if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt) + if (BufferIsValid(prevbuf) && !is_prev_bucket_same_wrt) XLogRegisterBuffer(3, prevbuf, REGBUF_STANDARD); if (BufferIsValid(nextbuf)) @@ -730,23 +738,33 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf, } recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SQUEEZE_PAGE); - - /* Set LSN iff wbuf is modified. */ - if (mod_wbuf) - PageSetLSN(BufferGetPage(wbuf), recptr); - - PageSetLSN(BufferGetPage(ovflbuf), recptr); - - if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt) - PageSetLSN(BufferGetPage(prevbuf), recptr); - if (BufferIsValid(nextbuf)) - PageSetLSN(BufferGetPage(nextbuf), recptr); - - PageSetLSN(BufferGetPage(mapbuf), recptr); - - if (update_metap) - PageSetLSN(BufferGetPage(metabuf), recptr); } + else /* !RelationNeedsWAL(rel) */ + { + recptr = XLogGetFakeLSN(rel); + + /* Determine if wbuf is modified */ + if (nitups > 0) + mod_wbuf = true; + else if (is_prev_bucket_same_wrt) + mod_wbuf = true; + } + + /* Set LSN iff wbuf is modified. */ + if (mod_wbuf) + PageSetLSN(BufferGetPage(wbuf), recptr); + + PageSetLSN(BufferGetPage(ovflbuf), recptr); + + if (BufferIsValid(prevbuf) && !is_prev_bucket_same_wrt) + PageSetLSN(BufferGetPage(prevbuf), recptr); + if (BufferIsValid(nextbuf)) + PageSetLSN(BufferGetPage(nextbuf), recptr); + + PageSetLSN(BufferGetPage(mapbuf), recptr); + + if (update_metap) + PageSetLSN(BufferGetPage(metabuf), recptr); END_CRIT_SECTION(); @@ -959,6 +977,8 @@ readpage: if (nitups > 0) { + XLogRecPtr recptr; + Assert(nitups == ndeletable); /* @@ -986,7 +1006,6 @@ readpage: /* XLOG stuff */ if (RelationNeedsWAL(rel)) { - XLogRecPtr recptr; xl_hash_move_page_contents xlrec; xlrec.ntups = nitups; @@ -1018,10 +1037,12 @@ readpage: ndeletable * sizeof(OffsetNumber)); recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_MOVE_PAGE_CONTENTS); - - PageSetLSN(BufferGetPage(wbuf), recptr); - PageSetLSN(BufferGetPage(rbuf), recptr); } + else + recptr = XLogGetFakeLSN(rel); + + PageSetLSN(BufferGetPage(wbuf), recptr); + PageSetLSN(BufferGetPage(rbuf), recptr); END_CRIT_SECTION(); diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 8e220a3ae..263bc73f1 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -630,6 +630,7 @@ _hash_expandtable(Relation rel, Buffer metabuf) uint32 lowmask; bool metap_update_masks = false; bool metap_update_splitpoint = false; + XLogRecPtr recptr; restart_expand: @@ -900,7 +901,6 @@ restart_expand: if (RelationNeedsWAL(rel)) { xl_hash_split_allocate_page xlrec; - XLogRecPtr recptr; xlrec.new_bucket = maxbucket; xlrec.old_bucket_flag = oopaque->hasho_flag; @@ -933,11 +933,13 @@ restart_expand: XLogRegisterData(&xlrec, SizeOfHashSplitAllocPage); recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_ALLOCATE_PAGE); - - PageSetLSN(BufferGetPage(buf_oblkno), recptr); - PageSetLSN(BufferGetPage(buf_nblkno), recptr); - PageSetLSN(BufferGetPage(metabuf), recptr); } + else + recptr = XLogGetFakeLSN(rel); + + PageSetLSN(BufferGetPage(buf_oblkno), recptr); + PageSetLSN(BufferGetPage(buf_nblkno), recptr); + PageSetLSN(BufferGetPage(metabuf), recptr); END_CRIT_SECTION(); @@ -1092,6 +1094,7 @@ _hash_splitbucket(Relation rel, Size all_tups_size = 0; int i; uint16 nitups = 0; + XLogRecPtr recptr; bucket_obuf = obuf; opage = BufferGetPage(obuf); @@ -1296,7 +1299,6 @@ _hash_splitbucket(Relation rel, if (RelationNeedsWAL(rel)) { - XLogRecPtr recptr; xl_hash_split_complete xlrec; xlrec.old_bucket_flag = oopaque->hasho_flag; @@ -1310,10 +1312,12 @@ _hash_splitbucket(Relation rel, XLogRegisterBuffer(1, bucket_nbuf, REGBUF_STANDARD); recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_COMPLETE); - - PageSetLSN(BufferGetPage(bucket_obuf), recptr); - PageSetLSN(BufferGetPage(bucket_nbuf), recptr); } + else + recptr = XLogGetFakeLSN(rel); + + PageSetLSN(BufferGetPage(bucket_obuf), recptr); + PageSetLSN(BufferGetPage(bucket_nbuf), recptr); END_CRIT_SECTION(); -- 2.53.0