From 586b8007cff40671f7039d67b77f8e0154e8e782 Mon Sep 17 00:00:00 2001 From: alterego655 <824662526@qq.com> Date: Sun, 28 Dec 2025 18:29:28 +0800 Subject: [PATCH v4 6/6] Streamify hash index VACUUM primary bucket page reads Refactor hashbulkdelete() to use the Read Stream for primary bucket pages. This enables prefetching of upcoming buckets while the current one is being processed, improving I/O efficiency during hash index vacuum operations. --- src/backend/access/hash/hash.c | 80 ++++++++++++++++++++++++++++++-- src/tools/pgindent/typedefs.list | 1 + 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index e388252afdc..01219c0015e 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -30,6 +30,7 @@ #include "nodes/execnodes.h" #include "optimizer/plancat.h" #include "pgstat.h" +#include "storage/read_stream.h" #include "utils/fmgrprotos.h" #include "utils/index_selfuncs.h" #include "utils/rel.h" @@ -42,12 +43,23 @@ typedef struct Relation heapRel; /* heap relation descriptor */ } HashBuildState; +/* Working state for streaming reads in hashbulkdelete */ +typedef struct +{ + HashMetaPage metap; /* cached metapage for BUCKET_TO_BLKNO */ + Bucket next_bucket; /* next bucket to prefetch */ + Bucket max_bucket; /* stop when next_bucket > max_bucket */ +} HashBulkDeleteStreamPrivate; + static void hashbuildCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state); +static BlockNumber hash_bulkdelete_read_stream_cb(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data); /* @@ -450,6 +462,27 @@ hashendscan(IndexScanDesc scan) scan->opaque = NULL; } +/* + * Read stream callback for hashbulkdelete. + * + * Returns the block number of the primary page for the next bucket to + * vacuum, using the BUCKET_TO_BLKNO mapping from the cached metapage. + */ +static BlockNumber +hash_bulkdelete_read_stream_cb(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data) +{ + HashBulkDeleteStreamPrivate *p = callback_private_data; + Bucket bucket; + + if (p->next_bucket > p->max_bucket) + return InvalidBlockNumber; + + bucket = p->next_bucket++; + return BUCKET_TO_BLKNO(p->metap, bucket); +} + /* * Bulk deletion of all index entries pointing to a set of heap tuples. * The set of target tuples is specified via a callback routine that tells @@ -474,6 +507,8 @@ hashbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, Buffer metabuf = InvalidBuffer; HashMetaPage metap; HashMetaPage cachedmetap; + HashBulkDeleteStreamPrivate stream_private; + ReadStream *stream = NULL; tuples_removed = 0; num_index_tuples = 0; @@ -494,7 +529,25 @@ hashbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, cur_bucket = 0; cur_maxbucket = orig_maxbucket; -loop_top: + /* Set up streaming read for primary bucket pages */ + stream_private.metap = cachedmetap; + stream_private.next_bucket = cur_bucket; + stream_private.max_bucket = cur_maxbucket; + + /* + * It is safe to use batchmode as hash_bulkdelete_read_stream_cb takes no + * locks. + */ + stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE | + READ_STREAM_USE_BATCHING, + info->strategy, + rel, + MAIN_FORKNUM, + hash_bulkdelete_read_stream_cb, + &stream_private, + 0); + +bucket_loop: while (cur_bucket <= cur_maxbucket) { BlockNumber bucket_blkno; @@ -514,7 +567,8 @@ loop_top: * We need to acquire a cleanup lock on the primary bucket page to out * wait concurrent scans before deleting the dead tuples. */ - buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); + buf = read_stream_next_buffer(stream, NULL); + Assert(BufferIsValid(buf)); LockBufferForCleanup(buf); _hash_checkpage(rel, buf, LH_BUCKET_PAGE); @@ -545,6 +599,16 @@ loop_top: { cachedmetap = _hash_getcachedmetap(rel, &metabuf, true); Assert(cachedmetap != NULL); + + /* + * Reset stream with updated metadata for remaining buckets. + * The BUCKET_TO_BLKNO mapping depends on hashm_spares[], + * which may have changed. + */ + stream_private.metap = cachedmetap; + stream_private.next_bucket = cur_bucket + 1; + stream_private.max_bucket = cur_maxbucket; + read_stream_reset(stream); } } @@ -577,9 +641,19 @@ loop_top: cachedmetap = _hash_getcachedmetap(rel, &metabuf, true); Assert(cachedmetap != NULL); cur_maxbucket = cachedmetap->hashm_maxbucket; - goto loop_top; + + /* Reset stream to process additional buckets from split */ + stream_private.metap = cachedmetap; + stream_private.next_bucket = cur_bucket; + stream_private.max_bucket = cur_maxbucket; + read_stream_reset(stream); + goto bucket_loop; } + /* Stream should be exhausted since we processed all buckets */ + Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer); + read_stream_end(stream); + /* Okay, we're really done. Update tuple count in metapage. */ START_CRIT_SECTION(); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index dc6fc28fcab..572be5598f2 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1176,6 +1176,7 @@ HashAggBatch HashAggSpill HashAllocFunc HashBuildState +HashBulkDeleteStreamPrivate HashCompareFunc HashCopyFunc HashIndexStat -- 2.51.0