From bf146598cd94110da255c6fb45b4a5c58002a91d Mon Sep 17 00:00:00 2001 From: Alexandre Felipe Date: Wed, 11 Mar 2026 12:07:54 +0000 Subject: [PATCH 3/5] Using simplehash This patch replaces the HTAB implementation with a simplehash as suggested by Andres Freund [1]. The simplehash implements a templated open addressing hash, which have entry size information at compile time. The access strategy of the simplehash is very close to the plain array that was seen to be very efficient compared to the HTAB implementation, with the additional advantage of using the key value to initialize the search closer to where the key actually is, instead of always scanning the entire array. Adds one macro to the simplehash implementation enable overriding the empty position detection, to enable using buffer == InvalidBuffer instead of requiring a separate status field. [1] https://www.postgresql.org/message-id/s5p7iou7pdhxhvmv4rohmskwqmr36dc4rybvwlep5yvwrjs4pa%406oxsemms5mw4 --- src/backend/storage/buffer/bufmgr_refcnt.c | 89 +++++++++++----------- src/include/lib/simplehash.h | 59 +++++++++----- 2 files changed, 84 insertions(+), 64 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr_refcnt.c b/src/backend/storage/buffer/bufmgr_refcnt.c index fef8f98037..dd95d953f6 100644 --- a/src/backend/storage/buffer/bufmgr_refcnt.c +++ b/src/backend/storage/buffer/bufmgr_refcnt.c @@ -42,7 +42,10 @@ *------------------------------------------------------------------------- */ -#include "utils/hsearch.h" +#include "common/hashfn.h" +#include "storage/buf_internals.h" +#include "bufmgr_refcnt.h" +#include "storage/proc.h" /* Structure definitions - internal to this file */ typedef struct PrivateRefCountData @@ -53,15 +56,36 @@ typedef struct PrivateRefCountData struct PrivateRefCountEntry { - Buffer buffer; + Buffer buffer; /* hash key - InvalidBuffer means empty */ PrivateRefCountData data; }; +/* + * Define simplehash parameters for the overflow hash table. + * We use buffer == InvalidBuffer as the "empty" marker to avoid needing + * a separate status field. + */ +#define SH_PREFIX refcount +#define SH_ELEMENT_TYPE PrivateRefCountEntry +#define SH_KEY_TYPE Buffer +#define SH_KEY buffer +#define SH_HASH_KEY(tb, key) murmurhash32(key) +#define SH_EQUAL(tb, a, b) ((a) == (b)) +#define SH_SCOPE static inline +#define SH_DEFINE +#define SH_DECLARE +/* Use buffer field as empty marker - no separate status needed */ +#define SH_ENTRY_EMPTY(entry) ((entry)->buffer == InvalidBuffer) +#define SH_MAKE_EMPTY(entry) ((entry)->buffer = InvalidBuffer) +#define SH_MAKE_IN_USE(entry) ((void)0) /* key assignment handles this */ +#include "lib/simplehash.h" + struct PrivateRefCountIterator { int array_index; bool in_hash; - HASH_SEQ_STATUS *hash_status; + refcount_iterator hash_iter; + bool hash_iter_valid; }; /* Private refcount array and keys */ @@ -70,7 +94,7 @@ static Buffer PrivateRefCountArrayKeys[REFCOUNT_ARRAY_ENTRIES]; static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]; /* Overflow hash table for when array is full */ -static HTAB *PrivateRefCountHash = NULL; +static refcount_hash *PrivateRefCountHash = NULL; /* Count of entries that have overflowed into the hash table */ static int32 PrivateRefCountOverflowed = 0; @@ -98,26 +122,18 @@ static pg_noinline PrivateRefCountEntry *GetPrivateRefCountEntrySlow(Buffer buff void InitPrivateRefCount(void) { - HASHCTL hash_ctl; - - /* * An advisory limit on the number of pins each backend should hold, based * on shared_buffers and the maximum number of connections possible. * That's very pessimistic, but outside toy-sized shared_buffers it should * allow plenty of pins. LimitAdditionalPins() and * GetAdditionalPinLimit() can be used to check the remaining balance. - */ - MaxProportionalPins = NBuffers / (MaxBackends + NUM_AUXILIARY_PROCS); - + */ + MaxProportionalPins = NBuffers / (MaxBackends + NUM_AUXILIARY_PROCS); memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray)); memset(&PrivateRefCountArrayKeys, 0, sizeof(PrivateRefCountArrayKeys)); - hash_ctl.keysize = sizeof(Buffer); - hash_ctl.entrysize = sizeof(PrivateRefCountEntry); - - PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl, - HASH_ELEM | HASH_BLOBS); + PrivateRefCountHash = refcount_create(CurrentMemoryContext, 16, NULL); } /* @@ -171,10 +187,9 @@ ReservePrivateRefCountEntry(void) Assert(PrivateRefCountArrayKeys[victim_slot] == PrivateRefCountArray[victim_slot].buffer); /* enter victim array entry into hashtable */ - hashent = hash_search(PrivateRefCountHash, - &PrivateRefCountArrayKeys[victim_slot], - HASH_ENTER, - &found); + hashent = refcount_insert(PrivateRefCountHash, + PrivateRefCountArrayKeys[victim_slot], + &found); Assert(!found); hashent->data = victim_entry->data; @@ -251,7 +266,7 @@ GetPrivateRefCountEntrySlow(Buffer buffer, bool do_move) if (PrivateRefCountOverflowed == 0) return NULL; - res = hash_search(PrivateRefCountHash, &buffer, HASH_FIND, NULL); + res = refcount_lookup(PrivateRefCountHash, buffer); if (res == NULL) return NULL; @@ -262,7 +277,6 @@ GetPrivateRefCountEntrySlow(Buffer buffer, bool do_move) else { /* move buffer from hashtable into the free array slot */ - bool found; PrivateRefCountEntry *free; ReservePrivateRefCountEntry(); @@ -278,8 +292,7 @@ GetPrivateRefCountEntrySlow(Buffer buffer, bool do_move) ReservedRefCountSlot = -1; - hash_search(PrivateRefCountHash, &buffer, HASH_REMOVE, &found); - Assert(found); + refcount_delete_item(PrivateRefCountHash, res); Assert(PrivateRefCountOverflowed > 0); PrivateRefCountOverflowed--; @@ -366,11 +379,7 @@ SharedBufferUnref(PrivateRefCountEntry *ref) } else { - bool found; - Buffer buffer = ref->buffer; - - hash_search(PrivateRefCountHash, &buffer, HASH_REMOVE, &found); - Assert(found); + refcount_delete_item(PrivateRefCountHash, ref); Assert(PrivateRefCountOverflowed > 0); PrivateRefCountOverflowed--; } @@ -441,10 +450,10 @@ CheckPrivateRefCountLeaks(void) /* if necessary search the hash */ if (PrivateRefCountOverflowed) { - HASH_SEQ_STATUS hstat; + refcount_iterator iter; - hash_seq_init(&hstat, PrivateRefCountHash); - while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL) + refcount_start_iterate(PrivateRefCountHash, &iter); + while ((res = refcount_iterate(PrivateRefCountHash, &iter)) != NULL) { s = DebugPrintBufferRefcount(res->buffer); elog(WARNING, "buffer refcount leak: %s", s); @@ -495,7 +504,7 @@ InitPrivateRefCountIterator(void) iter->array_index = 0; iter->in_hash = false; - iter->hash_status = NULL; + iter->hash_iter_valid = false; return iter; } @@ -521,21 +530,20 @@ GetNextPrivateRefCountEntry(PrivateRefCountIterator *iter) iter->in_hash = true; if (PrivateRefCountOverflowed > 0) { - iter->hash_status = palloc(sizeof(HASH_SEQ_STATUS)); - hash_seq_init(iter->hash_status, PrivateRefCountHash); + refcount_start_iterate(PrivateRefCountHash, &iter->hash_iter); + iter->hash_iter_valid = true; } } - if (iter->hash_status != NULL) + if (iter->hash_iter_valid) { PrivateRefCountEntry *res; - res = (PrivateRefCountEntry *) hash_seq_search(iter->hash_status); + res = refcount_iterate(PrivateRefCountHash, &iter->hash_iter); if (res != NULL) return res; - pfree(iter->hash_status); - iter->hash_status = NULL; + iter->hash_iter_valid = false; } return NULL; @@ -547,11 +555,6 @@ GetNextPrivateRefCountEntry(PrivateRefCountIterator *iter) void FreePrivateRefCountIterator(PrivateRefCountIterator *iter) { - if (iter->hash_status != NULL) - { - hash_seq_term(iter->hash_status); - pfree(iter->hash_status); - } pfree(iter); } diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h index 848719232a..3c03a7e9c9 100644 --- a/src/include/lib/simplehash.h +++ b/src/include/lib/simplehash.h @@ -287,6 +287,20 @@ SH_SCOPE void SH_STAT(SH_TYPE * tb); #define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey)) #endif +/* + * Macros to check/set entry status. Users can override these to avoid + * needing a separate status field if their key type has an "invalid" value. + */ +#ifndef SH_ENTRY_EMPTY +#define SH_ENTRY_EMPTY(entry) ((entry)->status == SH_STATUS_EMPTY) +#endif +#ifndef SH_MAKE_EMPTY +#define SH_MAKE_EMPTY(entry) ((entry)->status = SH_STATUS_EMPTY) +#endif +#ifndef SH_MAKE_IN_USE +#define SH_MAKE_IN_USE(entry) ((entry)->status = SH_STATUS_IN_USE) +#endif + /* * Wrap the following definitions in include guards, to avoid multiple * definition errors if this header is included more than once. The rest of @@ -544,7 +558,7 @@ SH_GROW(SH_TYPE * tb, uint64 newsize) uint32 hash; uint32 optimal; - if (oldentry->status != SH_STATUS_IN_USE) + if (SH_ENTRY_EMPTY(oldentry)) { startelem = i; break; @@ -566,7 +580,7 @@ SH_GROW(SH_TYPE * tb, uint64 newsize) { SH_ELEMENT_TYPE *oldentry = &olddata[copyelem]; - if (oldentry->status == SH_STATUS_IN_USE) + if (!SH_ENTRY_EMPTY(oldentry)) { uint32 hash; uint32 startelem2; @@ -582,7 +596,7 @@ SH_GROW(SH_TYPE * tb, uint64 newsize) { newentry = &newdata[curelem]; - if (newentry->status == SH_STATUS_EMPTY) + if (SH_ENTRY_EMPTY(newentry)) { break; } @@ -653,14 +667,14 @@ restart: SH_ELEMENT_TYPE *entry = &data[curelem]; /* any empty bucket can directly be used */ - if (entry->status == SH_STATUS_EMPTY) + if (SH_ENTRY_EMPTY(entry)) { tb->members++; entry->SH_KEY = key; #ifdef SH_STORE_HASH SH_GET_HASH(tb, entry) = hash; #endif - entry->status = SH_STATUS_IN_USE; + SH_MAKE_IN_USE(entry); *found = false; return entry; } @@ -675,7 +689,7 @@ restart: if (SH_COMPARE_KEYS(tb, hash, key, entry)) { - Assert(entry->status == SH_STATUS_IN_USE); + Assert(!SH_ENTRY_EMPTY(entry)); *found = true; return entry; } @@ -699,7 +713,7 @@ restart: emptyelem = SH_NEXT(tb, emptyelem, startelem); emptyentry = &data[emptyelem]; - if (emptyentry->status == SH_STATUS_EMPTY) + if (SH_ENTRY_EMPTY(emptyentry)) { lastentry = emptyentry; break; @@ -748,7 +762,7 @@ restart: #ifdef SH_STORE_HASH SH_GET_HASH(tb, entry) = hash; #endif - entry->status = SH_STATUS_IN_USE; + SH_MAKE_IN_USE(entry); *found = false; return entry; } @@ -810,12 +824,12 @@ SH_LOOKUP_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash) { SH_ELEMENT_TYPE *entry = &tb->data[curelem]; - if (entry->status == SH_STATUS_EMPTY) + if (SH_ENTRY_EMPTY(entry)) { return NULL; } - Assert(entry->status == SH_STATUS_IN_USE); + Assert(!SH_ENTRY_EMPTY(entry)); if (SH_COMPARE_KEYS(tb, hash, key, entry)) return entry; @@ -868,10 +882,10 @@ SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key) { SH_ELEMENT_TYPE *entry = &tb->data[curelem]; - if (entry->status == SH_STATUS_EMPTY) + if (SH_ENTRY_EMPTY(entry)) return false; - if (entry->status == SH_STATUS_IN_USE && + if (!SH_ENTRY_EMPTY(entry) && SH_COMPARE_KEYS(tb, hash, key, entry)) { SH_ELEMENT_TYPE *lastentry = entry; @@ -894,9 +908,9 @@ SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key) curelem = SH_NEXT(tb, curelem, startelem); curentry = &tb->data[curelem]; - if (curentry->status != SH_STATUS_IN_USE) + if (SH_ENTRY_EMPTY(curentry)) { - lastentry->status = SH_STATUS_EMPTY; + SH_MAKE_EMPTY(lastentry); break; } @@ -906,7 +920,7 @@ SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key) /* current is at optimal position, done */ if (curoptimal == curelem) { - lastentry->status = SH_STATUS_EMPTY; + SH_MAKE_EMPTY(lastentry); break; } @@ -957,9 +971,9 @@ SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry) curelem = SH_NEXT(tb, curelem, startelem); curentry = &tb->data[curelem]; - if (curentry->status != SH_STATUS_IN_USE) + if (SH_ENTRY_EMPTY(curentry)) { - lastentry->status = SH_STATUS_EMPTY; + SH_MAKE_EMPTY(lastentry); break; } @@ -969,7 +983,7 @@ SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry) /* current is at optimal position, done */ if (curoptimal == curelem) { - lastentry->status = SH_STATUS_EMPTY; + SH_MAKE_EMPTY(lastentry); break; } @@ -997,7 +1011,7 @@ SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter) { SH_ELEMENT_TYPE *entry = &tb->data[i]; - if (entry->status != SH_STATUS_IN_USE) + if (SH_ENTRY_EMPTY(entry)) { startelem = i; break; @@ -1063,7 +1077,7 @@ SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter) if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask)) iter->done = true; - if (elem->status == SH_STATUS_IN_USE) + if (!SH_ENTRY_EMPTY(elem)) { return elem; } @@ -1140,7 +1154,7 @@ SH_STAT(SH_TYPE * tb) elem = &tb->data[i]; - if (elem->status != SH_STATUS_IN_USE) + if (SH_ENTRY_EMPTY(elem)) continue; hash = SH_ENTRY_HASH(tb, elem); @@ -1205,6 +1219,9 @@ SH_STAT(SH_TYPE * tb) #undef SH_STORE_HASH #undef SH_USE_NONDEFAULT_ALLOCATOR #undef SH_EQUAL +#undef SH_ENTRY_EMPTY +#undef SH_MAKE_EMPTY +#undef SH_MAKE_IN_USE /* undefine locally declared macros */ #undef SH_MAKE_PREFIX -- 2.34.1