From 2adfcd6c16f94e7dadb38ffc6cfed3457b363bf5 Mon Sep 17 00:00:00 2001 From: alterego655 <824662526@qq.com> Date: Sun, 28 Dec 2025 18:29:28 +0800 Subject: [PATCH v6 3/5] Streamify hash index VACUUM primary bucket page reads Refactor hashbulkdelete() to use the Read Stream for primary bucket pages. This enables prefetching of upcoming buckets while the current one is being processed, improving I/O efficiency during hash index vacuum operations. --- src/backend/access/hash/hash.c | 80 ++++++++++++++++++++++++++++++-- src/tools/pgindent/typedefs.list | 1 + 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index e88ddb32a05..6df5e7ccbd1 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -30,6 +30,7 @@ #include "nodes/execnodes.h" #include "optimizer/plancat.h" #include "pgstat.h" +#include "storage/read_stream.h" #include "utils/fmgrprotos.h" #include "utils/index_selfuncs.h" #include "utils/rel.h" @@ -42,12 +43,23 @@ typedef struct Relation heapRel; /* heap relation descriptor */ } HashBuildState; +/* Working state for streaming reads in hashbulkdelete */ +typedef struct +{ + HashMetaPage metap; /* cached metapage for BUCKET_TO_BLKNO */ + Bucket next_bucket; /* next bucket to prefetch */ + Bucket max_bucket; /* stop when next_bucket > max_bucket */ +} HashBulkDeleteStreamPrivate; + static void hashbuildCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state); +static BlockNumber hash_bulkdelete_read_stream_cb(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data); /* @@ -451,6 +463,27 @@ hashendscan(IndexScanDesc scan) scan->opaque = NULL; } +/* + * Read stream callback for hashbulkdelete. + * + * Returns the block number of the primary page for the next bucket to + * vacuum, using the BUCKET_TO_BLKNO mapping from the cached metapage. + */ +static BlockNumber +hash_bulkdelete_read_stream_cb(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data) +{ + HashBulkDeleteStreamPrivate *p = callback_private_data; + Bucket bucket; + + if (p->next_bucket > p->max_bucket) + return InvalidBlockNumber; + + bucket = p->next_bucket++; + return BUCKET_TO_BLKNO(p->metap, bucket); +} + /* * Bulk deletion of all index entries pointing to a set of heap tuples. * The set of target tuples is specified via a callback routine that tells @@ -475,6 +508,8 @@ hashbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, Buffer metabuf = InvalidBuffer; HashMetaPage metap; HashMetaPage cachedmetap; + HashBulkDeleteStreamPrivate stream_private; + ReadStream *stream = NULL; tuples_removed = 0; num_index_tuples = 0; @@ -495,7 +530,25 @@ hashbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, cur_bucket = 0; cur_maxbucket = orig_maxbucket; -loop_top: + /* Set up streaming read for primary bucket pages */ + stream_private.metap = cachedmetap; + stream_private.next_bucket = cur_bucket; + stream_private.max_bucket = cur_maxbucket; + + /* + * It is safe to use batchmode as hash_bulkdelete_read_stream_cb takes no + * locks. + */ + stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE | + READ_STREAM_USE_BATCHING, + info->strategy, + rel, + MAIN_FORKNUM, + hash_bulkdelete_read_stream_cb, + &stream_private, + 0); + +bucket_loop: while (cur_bucket <= cur_maxbucket) { BlockNumber bucket_blkno; @@ -515,7 +568,8 @@ loop_top: * We need to acquire a cleanup lock on the primary bucket page to out * wait concurrent scans before deleting the dead tuples. */ - buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); + buf = read_stream_next_buffer(stream, NULL); + Assert(BufferIsValid(buf)); LockBufferForCleanup(buf); _hash_checkpage(rel, buf, LH_BUCKET_PAGE); @@ -546,6 +600,16 @@ loop_top: { cachedmetap = _hash_getcachedmetap(rel, &metabuf, true); Assert(cachedmetap != NULL); + + /* + * Reset stream with updated metadata for remaining buckets. + * The BUCKET_TO_BLKNO mapping depends on hashm_spares[], + * which may have changed. + */ + stream_private.metap = cachedmetap; + stream_private.next_bucket = cur_bucket + 1; + stream_private.max_bucket = cur_maxbucket; + read_stream_reset(stream); } } @@ -578,9 +642,19 @@ loop_top: cachedmetap = _hash_getcachedmetap(rel, &metabuf, true); Assert(cachedmetap != NULL); cur_maxbucket = cachedmetap->hashm_maxbucket; - goto loop_top; + + /* Reset stream to process additional buckets from split */ + stream_private.metap = cachedmetap; + stream_private.next_bucket = cur_bucket; + stream_private.max_bucket = cur_maxbucket; + read_stream_reset(stream); + goto bucket_loop; } + /* Stream should be exhausted since we processed all buckets */ + Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer); + read_stream_end(stream); + /* Okay, we're really done. Update tuple count in metapage. */ START_CRIT_SECTION(); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index a67246138eb..0d60a17bc2c 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1185,6 +1185,7 @@ HashAggBatch HashAggSpill HashAllocFunc HashBuildState +HashBulkDeleteStreamPrivate HashCompareFunc HashCopyFunc HashIndexStat -- 2.51.0