From c8b90725fd033465c68688f4663892ce1196a48e Mon Sep 17 00:00:00 2001 From: Alexandre Felipe Date: Fri, 6 Mar 2026 16:31:00 +0000 Subject: [PATCH 2/5] Refactoring reference counting This patch refactors the reference counting mechanism moving the implementation details away from bufmgr. Unfortunately, this comes with additional calls overhead, but I think that the ease of maintenance will pay off. And with the next optimisations, we will end up better than before. --- src/backend/storage/buffer/Makefile | 1 + src/backend/storage/buffer/buf_refcount.c | 602 ++++++++++++++++++++ src/backend/storage/buffer/bufmgr.c | 661 +++------------------- src/include/storage/buf_refcount.h | 58 ++ 4 files changed, 727 insertions(+), 595 deletions(-) create mode 100644 src/backend/storage/buffer/buf_refcount.c create mode 100644 src/include/storage/buf_refcount.h diff --git a/src/backend/storage/buffer/Makefile b/src/backend/storage/buffer/Makefile index fd7c40dcb08..c81271aabf6 100644 --- a/src/backend/storage/buffer/Makefile +++ b/src/backend/storage/buffer/Makefile @@ -14,6 +14,7 @@ include $(top_builddir)/src/Makefile.global OBJS = \ buf_init.o \ + buf_refcount.o \ buf_table.o \ bufmgr.o \ freelist.o \ diff --git a/src/backend/storage/buffer/buf_refcount.c b/src/backend/storage/buffer/buf_refcount.c new file mode 100644 index 00000000000..1c0bec29c93 --- /dev/null +++ b/src/backend/storage/buffer/buf_refcount.c @@ -0,0 +1,602 @@ +/*------------------------------------------------------------------------- + * + * buf_refcount.c + * Backend-private buffer refcount tracking + * + * Each buffer has a private refcount that keeps track of the number of + * times the buffer is pinned in the current process. This is so that the + * shared refcount needs to be modified only once if a buffer is pinned more + * than once by an individual backend. This mechanism is also used to track + * whether this backend has a buffer locked, and, if so, in what mode. + * + * To avoid - as we used to - requiring an array with NBuffers entries to keep + * track of local buffers, we use a small sequentially searched array + * (PrivateRefCountArrayKeys, with the corresponding data stored in + * PrivateRefCountArray) and an overflow hash table (PrivateRefCountHash) to + * keep track of backend local pins. + * + * Until no more than REFCOUNT_ARRAY_ENTRIES buffers are pinned at once, all + * refcounts are kept track of in the array; after that, new array entries + * displace old ones into the hash table. That way a frequently used entry + * can't get "stuck" in the hashtable while infrequent ones clog the array. + * + * This was initially designed trying to optimize for the case where the + * number of pinned buffers is expected to not exceed REFCOUNT_ARRAY_ENTRIES. + * However this might not be the case with the introduction of prefetching. + * + * To enter a buffer into the refcount tracking mechanism first reserve a free + * entry using ReservePrivateRefCountEntry() and then later, if necessary, + * fill it with NewPrivateRefCountEntry(). That split lets us avoid doing + * memory allocations in NewPrivateRefCountEntry() which can be important + * because in some scenarios it's called with a spinlock held... + * + * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/storage/buffer/buf_refcount.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "storage/buf_internals.h" +#include "storage/buf_refcount.h" +#include "storage/proc.h" +#include "utils/hsearch.h" + + + +typedef struct PrivateRefCountData +{ + int32 refcount; + BufferLockMode lockmode; +} PrivateRefCountData; + +struct PrivateRefCountEntry +{ + Buffer buffer; + PrivateRefCountData data; +}; + +struct PrivateRefCountIterator +{ + int array_index; + bool in_hash; + HASH_SEQ_STATUS *hash_status; +}; + +/* Private refcount array and keys */ +#define REFCOUNT_ARRAY_ENTRIES 8 +static Buffer PrivateRefCountArrayKeys[REFCOUNT_ARRAY_ENTRIES]; +static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]; + +/* Overflow hash table for when array is full */ +static HTAB *PrivateRefCountHash = NULL; + +/* Count of entries that have overflowed into the hash table */ +static int32 PrivateRefCountOverflowed = 0; + +/* Clock hand for selecting victim when array is full */ +static uint32 PrivateRefCountClock = 0; + +/* Reserved slot index, or -1 if none reserved */ +static int ReservedRefCountSlot = -1; + +/* Cache for last accessed entry */ +static int PrivateRefCountEntryLast = -1; + +/* Advisory limit on the number of pins each backend should hold */ +static uint32 MaxProportionalPins = 0; + +/* Forward declarations */ +static void ReservePrivateRefCountEntry(void); +static PrivateRefCountEntry *NewPrivateRefCountEntry(Buffer buffer); +static pg_noinline PrivateRefCountEntry *GetPrivateRefCountEntrySlow(Buffer buffer, bool do_move); + +/* + * Initialize private refcount tracking for this backend. + */ +void +InitPrivateRefCount(void) +{ + HASHCTL hash_ctl; + + + /* + * An advisory limit on the number of pins each backend should hold, based + * on shared_buffers and the maximum number of connections possible. + * That's very pessimistic, but outside toy-sized shared_buffers it should + * allow plenty of pins. LimitAdditionalPins() and + * GetAdditionalPinLimit() can be used to check the remaining balance. + */ + MaxProportionalPins = NBuffers / (MaxBackends + NUM_AUXILIARY_PROCS); + + memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray)); + memset(&PrivateRefCountArrayKeys, 0, sizeof(PrivateRefCountArrayKeys)); + + hash_ctl.keysize = sizeof(Buffer); + hash_ctl.entrysize = sizeof(PrivateRefCountEntry); + + PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl, + HASH_ELEM | HASH_BLOBS); +} + +/* + * Ensure that the PrivateRefCountArray has sufficient space to store one more + * entry. + */ +static void +ReservePrivateRefCountEntry(void) +{ + /* Already reserved (or freed), nothing to do */ + if (ReservedRefCountSlot != -1) + return; + + /* + * First search for a free entry the array, that'll be sufficient in the + * majority of cases. + */ + { + int i; + + for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) + { + if (PrivateRefCountArrayKeys[i] == InvalidBuffer) + { + ReservedRefCountSlot = i; + } + } + + if (ReservedRefCountSlot != -1) + return; + } + + /* + * No luck. All array entries are full. Move one array entry into the hash + * table. + */ + { + int victim_slot; + PrivateRefCountEntry *victim_entry; + PrivateRefCountEntry *hashent; + bool found; + + /* select victim slot */ + victim_slot = PrivateRefCountClock++ % REFCOUNT_ARRAY_ENTRIES; + victim_entry = &PrivateRefCountArray[victim_slot]; + ReservedRefCountSlot = victim_slot; + + /* Better be used, otherwise we shouldn't get here. */ + Assert(PrivateRefCountArrayKeys[victim_slot] != InvalidBuffer); + Assert(PrivateRefCountArray[victim_slot].buffer != InvalidBuffer); + Assert(PrivateRefCountArrayKeys[victim_slot] == PrivateRefCountArray[victim_slot].buffer); + + /* enter victim array entry into hashtable */ + hashent = hash_search(PrivateRefCountHash, + &PrivateRefCountArrayKeys[victim_slot], + HASH_ENTER, + &found); + Assert(!found); + hashent->data = victim_entry->data; + + /* clear the now free array slot */ + PrivateRefCountArrayKeys[victim_slot] = InvalidBuffer; + victim_entry->buffer = InvalidBuffer; + + memset(&victim_entry->data, 0, sizeof(victim_entry->data)); + victim_entry->data.refcount = 0; + victim_entry->data.lockmode = BUFFER_LOCK_UNLOCK; + + PrivateRefCountOverflowed++; + } +} + +/* + * Create a new refcount entry for the given buffer. + */ +static PrivateRefCountEntry * +NewPrivateRefCountEntry(Buffer buffer) +{ + PrivateRefCountEntry *res; + + /* only allowed to be called when a reservation has been made */ + Assert(ReservedRefCountSlot != -1); + + /* use up the reserved entry */ + res = &PrivateRefCountArray[ReservedRefCountSlot]; + + /* and fill it */ + PrivateRefCountArrayKeys[ReservedRefCountSlot] = buffer; + res->buffer = buffer; + res->data.refcount = 0; + res->data.lockmode = BUFFER_LOCK_UNLOCK; + + /* update cache for the next lookup */ + PrivateRefCountEntryLast = ReservedRefCountSlot; + + ReservedRefCountSlot = -1; + + return res; +} + +/* + * Slow-path for GetSharedBufferEntry(). + */ +static pg_noinline PrivateRefCountEntry * +GetPrivateRefCountEntrySlow(Buffer buffer, bool do_move) +{ + PrivateRefCountEntry *res; + int match = -1; + int i; + + /* + * First search for references in the array. + */ + for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) + { + if (PrivateRefCountArrayKeys[i] == buffer) + { + match = i; + } + } + + if (likely(match != -1)) + { + PrivateRefCountEntryLast = match; + return &PrivateRefCountArray[match]; + } + + /* + * Only look up the buffer in the hashtable if we've previously overflowed. + */ + if (PrivateRefCountOverflowed == 0) + return NULL; + + res = hash_search(PrivateRefCountHash, &buffer, HASH_FIND, NULL); + + if (res == NULL) + return NULL; + else if (!do_move) + { + return res; + } + else + { + /* move buffer from hashtable into the free array slot */ + bool found; + PrivateRefCountEntry *free; + + ReservePrivateRefCountEntry(); + + Assert(ReservedRefCountSlot != -1); + free = &PrivateRefCountArray[ReservedRefCountSlot]; + Assert(free->buffer == InvalidBuffer); + + free->buffer = buffer; + free->data = res->data; + PrivateRefCountArrayKeys[ReservedRefCountSlot] = buffer; + PrivateRefCountEntryLast = ReservedRefCountSlot; + + ReservedRefCountSlot = -1; + + hash_search(PrivateRefCountHash, &buffer, HASH_REMOVE, &found); + Assert(found); + Assert(PrivateRefCountOverflowed > 0); + PrivateRefCountOverflowed--; + + return free; + } +} + +/* + * Return the PrivateRefCountEntry for the passed buffer. + * Returns NULL if the buffer is not currently pinned. + */ +PrivateRefCountEntry * +GetSharedBufferEntry(Buffer buffer) +{ + Assert(BufferIsValid(buffer)); + Assert(!BufferIsLocal(buffer)); + + /* Fast path: check one-entry cache */ + if (likely(PrivateRefCountEntryLast != -1) && + likely(PrivateRefCountArray[PrivateRefCountEntryLast].buffer == buffer)) + { + return &PrivateRefCountArray[PrivateRefCountEntryLast]; + } + + return GetPrivateRefCountEntrySlow(buffer, false); +} + +/* + * Increment the private refcount for a shared buffer. + * Creates a new entry if one doesn't exist. + * Returns the entry pointer. + */ +PrivateRefCountEntry * +SharedBufferRef(Buffer buffer) +{ + PrivateRefCountEntry *ref; + + Assert(BufferIsValid(buffer)); + Assert(!BufferIsLocal(buffer)); + + /* Check cache first, then slow path */ + if (likely(PrivateRefCountEntryLast != -1) && + likely(PrivateRefCountArray[PrivateRefCountEntryLast].buffer == buffer)) + { + ref = &PrivateRefCountArray[PrivateRefCountEntryLast]; + } + else + { + ref = GetPrivateRefCountEntrySlow(buffer, true); + } + + if (ref == NULL) + { + /* New pin - create entry */ + ReservePrivateRefCountEntry(); + ref = NewPrivateRefCountEntry(buffer); + } + + ref->data.refcount++; + + return ref; +} + +/* + * Increment the private refcount for an existing entry. + * Use when you already have the entry from a previous lookup. + */ +void +SharedBufferRefExisting(PrivateRefCountEntry *ref) +{ + Assert(ref != NULL); + Assert(ref->data.refcount > 0); + ref->data.refcount++; +} + +/* + * Decrement the private refcount for a buffer. + * If the refcount reaches zero, removes the entry and returns true. + * Returns false if the buffer still has references. + */ +bool +SharedBufferUnref(PrivateRefCountEntry *ref) +{ + Assert(ref != NULL); + Assert(ref->data.refcount > 0); + + ref->data.refcount--; + + if (ref->data.refcount == 0) + { + /* No more references - clean up the entry */ + Assert(ref->data.lockmode == BUFFER_LOCK_UNLOCK); + + if (ref >= &PrivateRefCountArray[0] && + ref < &PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]) + { + ref->buffer = InvalidBuffer; + PrivateRefCountArrayKeys[ref - PrivateRefCountArray] = InvalidBuffer; + ReservedRefCountSlot = ref - PrivateRefCountArray; + } + else + { + bool found; + Buffer buffer = ref->buffer; + + hash_search(PrivateRefCountHash, &buffer, HASH_REMOVE, &found); + Assert(found); + Assert(PrivateRefCountOverflowed > 0); + PrivateRefCountOverflowed--; + } + + return true; + } + + return false; +} + +/* + * Accessors for refcount entry fields. + */ +int32 +SharedBufferRefCount(PrivateRefCountEntry *ref) +{ + return ref->data.refcount; +} + +BufferLockMode +SharedBufferGetLockMode(PrivateRefCountEntry *ref) +{ + return ref->data.lockmode; +} + +void +SharedBufferSetLockMode(PrivateRefCountEntry *ref, BufferLockMode mode) +{ + ref->data.lockmode = mode; +} + +Buffer +SharedBufferGetBuffer(PrivateRefCountEntry *ref) +{ + return ref->buffer; +} + +/* + * Check for buffer refcount leaks. + */ +void +CheckPrivateRefCountLeaks(void) +{ +#ifdef USE_ASSERT_CHECKING + int RefCountErrors = 0; + PrivateRefCountEntry *res; + int i; + char *s; + + /* check the array */ + for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) + { + if (PrivateRefCountArrayKeys[i] != InvalidBuffer) + { + res = &PrivateRefCountArray[i]; + + s = DebugPrintBufferRefcount(res->buffer); + elog(WARNING, "buffer refcount leak: %s", s); + pfree(s); + + RefCountErrors++; + } + } + + /* if necessary search the hash */ + if (PrivateRefCountOverflowed) + { + HASH_SEQ_STATUS hstat; + + hash_seq_init(&hstat, PrivateRefCountHash); + while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL) + { + s = DebugPrintBufferRefcount(res->buffer); + elog(WARNING, "buffer refcount leak: %s", s); + pfree(s); + RefCountErrors++; + } + } + + Assert(RefCountErrors == 0); +#endif +} + +/* + * Initialize an iterator for walking all private refcount entries. + */ +PrivateRefCountIterator * +InitPrivateRefCountIterator(void) +{ + PrivateRefCountIterator *iter = palloc(sizeof(PrivateRefCountIterator)); + + iter->array_index = 0; + iter->in_hash = false; + iter->hash_status = NULL; + return iter; +} + +/* + * Get the next private refcount entry. + * Returns NULL when iteration is complete. + */ +PrivateRefCountEntry * +GetNextPrivateRefCountEntry(PrivateRefCountIterator *iter) +{ + /* First iterate through the array */ + while (!iter->in_hash && iter->array_index < REFCOUNT_ARRAY_ENTRIES) + { + int idx = iter->array_index++; + + if (PrivateRefCountArrayKeys[idx] != InvalidBuffer) + return &PrivateRefCountArray[idx]; + } + + /* Then iterate through the hash if there are overflowed entries */ + if (!iter->in_hash) + { + iter->in_hash = true; + if (PrivateRefCountOverflowed > 0) + { + iter->hash_status = palloc(sizeof(HASH_SEQ_STATUS)); + hash_seq_init(iter->hash_status, PrivateRefCountHash); + } + } + + if (iter->hash_status != NULL) + { + PrivateRefCountEntry *res; + + res = (PrivateRefCountEntry *) hash_seq_search(iter->hash_status); + if (res != NULL) + return res; + + pfree(iter->hash_status); + iter->hash_status = NULL; + } + + return NULL; +} + +/* + * Free an iterator from InitPrivateRefCountIterator. + */ +void +FreePrivateRefCountIterator(PrivateRefCountIterator *iter) +{ + if (iter->hash_status != NULL) + { + hash_seq_term(iter->hash_status); + pfree(iter->hash_status); + } + pfree(iter); +} + + +/* + * Return the maximum number of buffers that a backend should try to pin once, + * to avoid exceeding its fair share. This is the highest value that + * GetAdditionalPinLimit() could ever return. Note that it may be zero on a + * system with a very small buffer pool relative to max_connections. + */ + uint32 + GetPinLimit(void) + { + return MaxProportionalPins; + } + + /* + * Return the maximum number of additional buffers that this backend should + * pin if it wants to stay under the per-backend limit, considering the number + * of buffers it has already pinned. Unlike LimitAdditionalPins(), the limit + * return by this function can be zero. + */ + uint32 + GetAdditionalPinLimit(void) + { + uint32 estimated_pins_held; + + /* + * We get the number of "overflowed" pins for free, but don't know the + * number of pins in PrivateRefCountArray. The cost of calculating that + * exactly doesn't seem worth it, so just assume the max. + */ + estimated_pins_held = PrivateRefCountOverflowed + REFCOUNT_ARRAY_ENTRIES; + + /* Is this backend already holding more than its fair share? */ + if (estimated_pins_held > MaxProportionalPins) + return 0; + + return MaxProportionalPins - estimated_pins_held; + } + + /* + * Limit the number of pins a batch operation may additionally acquire, to + * avoid running out of pinnable buffers. + * + * One additional pin is always allowed, on the assumption that the operation + * requires at least one to make progress. + */ + void + LimitAdditionalPins(uint32 *additional_pins) + { + uint32 limit; + + if (*additional_pins <= 1) + return; + + limit = GetAdditionalPinLimit(); + limit = Max(limit, 1); + if (limit < *additional_pins) + *additional_pins = limit; + } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 5f3d083e938..aa99e97e286 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -54,6 +54,7 @@ #include "storage/aio.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" +#include "storage/buf_refcount.h" #include "storage/fd.h" #include "storage/ipc.h" #include "storage/lmgr.h" @@ -93,43 +94,6 @@ */ #define BUF_DROP_FULL_SCAN_THRESHOLD (uint64) (NBuffers / 32) -/* - * This is separated out from PrivateRefCountEntry to allow for copying all - * the data members via struct assignment. - */ -typedef struct PrivateRefCountData -{ - /* - * How many times has the buffer been pinned by this backend. - */ - int32 refcount; - - /* - * Is the buffer locked by this backend? BUFFER_LOCK_UNLOCK indicates that - * the buffer is not locked. - */ - BufferLockMode lockmode; -} PrivateRefCountData; - -typedef struct PrivateRefCountEntry -{ - /* - * Note that this needs to be same as the entry's corresponding - * PrivateRefCountArrayKeys[i], if the entry is stored in the array. We - * store it in both places as this is used for the hashtable key and - * because it is more convenient (passing around a PrivateRefCountEntry - * suffices to identify the buffer) and faster (checking the keys array is - * faster when checking many entries, checking the entry is faster if just - * checking a single entry). - */ - Buffer buffer; - - PrivateRefCountData data; -} PrivateRefCountEntry; - -/* 64 bytes, about the size of a cache line on common systems */ -#define REFCOUNT_ARRAY_ENTRIES 8 - /* * Status of buffers to checkpoint for a particular tablespace, used * internally in BufferSync. @@ -213,55 +177,6 @@ int backend_flush_after = DEFAULT_BACKEND_FLUSH_AFTER; /* local state for LockBufferForCleanup */ static BufferDesc *PinCountWaitBuf = NULL; -/* - * Backend-Private refcount management: - * - * Each buffer also has a private refcount that keeps track of the number of - * times the buffer is pinned in the current process. This is so that the - * shared refcount needs to be modified only once if a buffer is pinned more - * than once by an individual backend. It's also used to check that no - * buffers are still pinned at the end of transactions and when exiting. We - * also use this mechanism to track whether this backend has a buffer locked, - * and, if so, in what mode. - * - * - * To avoid - as we used to - requiring an array with NBuffers entries to keep - * track of local buffers, we use a small sequentially searched array - * (PrivateRefCountArrayKeys, with the corresponding data stored in - * PrivateRefCountArray) and an overflow hash table (PrivateRefCountHash) to - * keep track of backend local pins. - * - * Until no more than REFCOUNT_ARRAY_ENTRIES buffers are pinned at once, all - * refcounts are kept track of in the array; after that, new array entries - * displace old ones into the hash table. That way a frequently used entry - * can't get "stuck" in the hashtable while infrequent ones clog the array. - * - * Note that in most scenarios the number of pinned buffers will not exceed - * REFCOUNT_ARRAY_ENTRIES. - * - * - * To enter a buffer into the refcount tracking mechanism first reserve a free - * entry using ReservePrivateRefCountEntry() and then later, if necessary, - * fill it with NewPrivateRefCountEntry(). That split lets us avoid doing - * memory allocations in NewPrivateRefCountEntry() which can be important - * because in some scenarios it's called with a spinlock held... - */ -static Buffer PrivateRefCountArrayKeys[REFCOUNT_ARRAY_ENTRIES]; -static struct PrivateRefCountEntry PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]; -static HTAB *PrivateRefCountHash = NULL; -static int32 PrivateRefCountOverflowed = 0; -static uint32 PrivateRefCountClock = 0; -static int ReservedRefCountSlot = -1; -static int PrivateRefCountEntryLast = -1; - -static uint32 MaxProportionalPins; - -static void ReservePrivateRefCountEntry(void); -static PrivateRefCountEntry *NewPrivateRefCountEntry(Buffer buffer); -static PrivateRefCountEntry *GetPrivateRefCountEntry(Buffer buffer, bool do_move); -static inline int32 GetPrivateRefCount(Buffer buffer); -static void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref); - /* ResourceOwner callbacks to hold in-progress I/Os and buffer pins */ static void ResOwnerReleaseBufferIO(Datum res); static char *ResOwnerPrintBufferIO(Datum res); @@ -286,301 +201,6 @@ const ResourceOwnerDesc buffer_resowner_desc = .DebugPrint = ResOwnerPrintBuffer }; -/* - * Ensure that the PrivateRefCountArray has sufficient space to store one more - * entry. This has to be called before using NewPrivateRefCountEntry() to fill - * a new entry - but it's perfectly fine to not use a reserved entry. - */ -static void -ReservePrivateRefCountEntry(void) -{ - /* Already reserved (or freed), nothing to do */ - if (ReservedRefCountSlot != -1) - return; - - /* - * First search for a free entry the array, that'll be sufficient in the - * majority of cases. - */ - { - int i; - - for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) - { - if (PrivateRefCountArrayKeys[i] == InvalidBuffer) - { - ReservedRefCountSlot = i; - - /* - * We could return immediately, but iterating till the end of - * the array allows compiler-autovectorization. - */ - } - } - - if (ReservedRefCountSlot != -1) - return; - } - - /* - * No luck. All array entries are full. Move one array entry into the hash - * table. - */ - { - /* - * Move entry from the current clock position in the array into the - * hashtable. Use that slot. - */ - int victim_slot; - PrivateRefCountEntry *victim_entry; - PrivateRefCountEntry *hashent; - bool found; - - /* select victim slot */ - victim_slot = PrivateRefCountClock++ % REFCOUNT_ARRAY_ENTRIES; - victim_entry = &PrivateRefCountArray[victim_slot]; - ReservedRefCountSlot = victim_slot; - - /* Better be used, otherwise we shouldn't get here. */ - Assert(PrivateRefCountArrayKeys[victim_slot] != InvalidBuffer); - Assert(PrivateRefCountArray[victim_slot].buffer != InvalidBuffer); - Assert(PrivateRefCountArrayKeys[victim_slot] == PrivateRefCountArray[victim_slot].buffer); - - /* enter victim array entry into hashtable */ - hashent = hash_search(PrivateRefCountHash, - &PrivateRefCountArrayKeys[victim_slot], - HASH_ENTER, - &found); - Assert(!found); - /* move data from the entry in the array to the hash entry */ - hashent->data = victim_entry->data; - - /* clear the now free array slot */ - PrivateRefCountArrayKeys[victim_slot] = InvalidBuffer; - victim_entry->buffer = InvalidBuffer; - - /* clear the whole data member, just for future proofing */ - memset(&victim_entry->data, 0, sizeof(victim_entry->data)); - victim_entry->data.refcount = 0; - victim_entry->data.lockmode = BUFFER_LOCK_UNLOCK; - - PrivateRefCountOverflowed++; - } -} - -/* - * Fill a previously reserved refcount entry. - */ -static PrivateRefCountEntry * -NewPrivateRefCountEntry(Buffer buffer) -{ - PrivateRefCountEntry *res; - - /* only allowed to be called when a reservation has been made */ - Assert(ReservedRefCountSlot != -1); - - /* use up the reserved entry */ - res = &PrivateRefCountArray[ReservedRefCountSlot]; - - /* and fill it */ - PrivateRefCountArrayKeys[ReservedRefCountSlot] = buffer; - res->buffer = buffer; - res->data.refcount = 0; - res->data.lockmode = BUFFER_LOCK_UNLOCK; - - /* update cache for the next lookup */ - PrivateRefCountEntryLast = ReservedRefCountSlot; - - ReservedRefCountSlot = -1; - - return res; -} - -/* - * Slow-path for GetPrivateRefCountEntry(). This is big enough to not be worth - * inlining. This particularly seems to be true if the compiler is capable of - * auto-vectorizing the code, as that imposes additional stack-alignment - * requirements etc. - */ -static pg_noinline PrivateRefCountEntry * -GetPrivateRefCountEntrySlow(Buffer buffer, bool do_move) -{ - PrivateRefCountEntry *res; - int match = -1; - int i; - - /* - * First search for references in the array, that'll be sufficient in the - * majority of cases. - */ - for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) - { - if (PrivateRefCountArrayKeys[i] == buffer) - { - match = i; - /* see ReservePrivateRefCountEntry() for why we don't return */ - } - } - - if (likely(match != -1)) - { - /* update cache for the next lookup */ - PrivateRefCountEntryLast = match; - - return &PrivateRefCountArray[match]; - } - - /* - * By here we know that the buffer, if already pinned, isn't residing in - * the array. - * - * Only look up the buffer in the hashtable if we've previously overflowed - * into it. - */ - if (PrivateRefCountOverflowed == 0) - return NULL; - - res = hash_search(PrivateRefCountHash, &buffer, HASH_FIND, NULL); - - if (res == NULL) - return NULL; - else if (!do_move) - { - /* caller doesn't want us to move the hash entry into the array */ - return res; - } - else - { - /* move buffer from hashtable into the free array slot */ - bool found; - PrivateRefCountEntry *free; - - /* Ensure there's a free array slot */ - ReservePrivateRefCountEntry(); - - /* Use up the reserved slot */ - Assert(ReservedRefCountSlot != -1); - free = &PrivateRefCountArray[ReservedRefCountSlot]; - Assert(PrivateRefCountArrayKeys[ReservedRefCountSlot] == free->buffer); - Assert(free->buffer == InvalidBuffer); - - /* and fill it */ - free->buffer = buffer; - free->data = res->data; - PrivateRefCountArrayKeys[ReservedRefCountSlot] = buffer; - /* update cache for the next lookup */ - PrivateRefCountEntryLast = match; - - ReservedRefCountSlot = -1; - - - /* delete from hashtable */ - hash_search(PrivateRefCountHash, &buffer, HASH_REMOVE, &found); - Assert(found); - Assert(PrivateRefCountOverflowed > 0); - PrivateRefCountOverflowed--; - - return free; - } -} - -/* - * Return the PrivateRefCount entry for the passed buffer. - * - * Returns NULL if a buffer doesn't have a refcount entry. Otherwise, if - * do_move is true, and the entry resides in the hashtable the entry is - * optimized for frequent access by moving it to the array. - */ -static inline PrivateRefCountEntry * -GetPrivateRefCountEntry(Buffer buffer, bool do_move) -{ - Assert(BufferIsValid(buffer)); - Assert(!BufferIsLocal(buffer)); - - /* - * It's very common to look up the same buffer repeatedly. To make that - * fast, we have a one-entry cache. - * - * In contrast to the loop in GetPrivateRefCountEntrySlow(), here it - * faster to check PrivateRefCountArray[].buffer, as in the case of a hit - * fewer addresses are computed and fewer cachelines are accessed. Whereas - * in GetPrivateRefCountEntrySlow()'s case, checking - * PrivateRefCountArrayKeys saves a lot of memory accesses. - */ - if (likely(PrivateRefCountEntryLast != -1) && - likely(PrivateRefCountArray[PrivateRefCountEntryLast].buffer == buffer)) - { - return &PrivateRefCountArray[PrivateRefCountEntryLast]; - } - - /* - * The code for the cached lookup is small enough to be worth inlining - * into the caller. In the miss case however, that empirically doesn't - * seem worth it. - */ - return GetPrivateRefCountEntrySlow(buffer, do_move); -} - -/* - * Returns how many times the passed buffer is pinned by this backend. - * - * Only works for shared memory buffers! - */ -static inline int32 -GetPrivateRefCount(Buffer buffer) -{ - PrivateRefCountEntry *ref; - - Assert(BufferIsValid(buffer)); - Assert(!BufferIsLocal(buffer)); - - /* - * Not moving the entry - that's ok for the current users, but we might - * want to change this one day. - */ - ref = GetPrivateRefCountEntry(buffer, false); - - if (ref == NULL) - return 0; - return ref->data.refcount; -} - -/* - * Release resources used to track the reference count of a buffer which we no - * longer have pinned and don't want to pin again immediately. - */ -static void -ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref) -{ - Assert(ref->data.refcount == 0); - Assert(ref->data.lockmode == BUFFER_LOCK_UNLOCK); - - if (ref >= &PrivateRefCountArray[0] && - ref < &PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]) - { - ref->buffer = InvalidBuffer; - PrivateRefCountArrayKeys[ref - PrivateRefCountArray] = InvalidBuffer; - - - /* - * Mark the just used entry as reserved - in many scenarios that - * allows us to avoid ever having to search the array/hash for free - * entries. - */ - ReservedRefCountSlot = ref - PrivateRefCountArray; - } - else - { - bool found; - Buffer buffer = ref->buffer; - - hash_search(PrivateRefCountHash, &buffer, HASH_REMOVE, &found); - Assert(found); - Assert(PrivateRefCountOverflowed > 0); - PrivateRefCountOverflowed--; - } -} - /* * BufferIsPinned * True iff the buffer is pinned (also checks for valid buffer number). @@ -596,7 +216,7 @@ ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref) BufferIsLocal(bufnum) ? \ (LocalRefCount[-(bufnum) - 1] > 0) \ : \ - (GetPrivateRefCount(bufnum) > 0) \ + (GetSharedBufferEntry(bufnum) != NULL) \ ) @@ -653,7 +273,6 @@ static void RelationCopyStorageUsingBuffer(RelFileLocator srclocator, RelFileLocator dstlocator, ForkNumber forkNum, bool permanent); static void AtProcExit_Buffers(int code, Datum arg); -static void CheckForBufferLeaks(void); #ifdef USE_ASSERT_CHECKING static void AssertNotCatalogBufferLock(Buffer buffer, BufferLockMode mode); #endif @@ -812,7 +431,6 @@ ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockN Assert(BufferIsValid(recent_buffer)); ResourceOwnerEnlarge(CurrentResourceOwner); - ReservePrivateRefCountEntry(); InitBufferTag(&tag, &rlocator, forkNum, blockNum); if (BufferIsLocal(recent_buffer)) @@ -2115,7 +1733,6 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, /* Make sure we will have room to remember the buffer pin */ ResourceOwnerEnlarge(CurrentResourceOwner); - ReservePrivateRefCountEntry(); /* create a tag so we can lookup the buffer */ InitBufferTag(&newTag, &smgr->smgr_rlocator.locator, forkNum, blockNum); @@ -2327,7 +1944,7 @@ retry: UnlockBufHdr(buf); LWLockRelease(oldPartitionLock); /* safety check: should definitely not be our *own* pin */ - if (GetPrivateRefCount(BufferDescriptorGetBuffer(buf)) > 0) + if (GetSharedBufferEntry(BufferDescriptorGetBuffer(buf)) != NULL) elog(ERROR, "buffer is pinned in InvalidateBuffer"); WaitIO(buf); goto retry; @@ -2380,7 +1997,7 @@ InvalidateVictimBuffer(BufferDesc *buf_hdr) LWLock *partition_lock; BufferTag tag; - Assert(GetPrivateRefCount(BufferDescriptorGetBuffer(buf_hdr)) == 1); + Assert(GetSharedBufferEntry(BufferDescriptorGetBuffer(buf_hdr)) != NULL); /* have buffer pinned, so it's safe to read tag without lock */ tag = buf_hdr->tag; @@ -2461,7 +2078,6 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context) * Ensure, before we pin a victim buffer, that there's a free refcount * entry and resource owner slot for the pin. */ - ReservePrivateRefCountEntry(); ResourceOwnerEnlarge(CurrentResourceOwner); /* we return here if a prospective victim buffer gets used concurrently */ @@ -2595,64 +2211,6 @@ again: return buf; } -/* - * Return the maximum number of buffers that a backend should try to pin once, - * to avoid exceeding its fair share. This is the highest value that - * GetAdditionalPinLimit() could ever return. Note that it may be zero on a - * system with a very small buffer pool relative to max_connections. - */ -uint32 -GetPinLimit(void) -{ - return MaxProportionalPins; -} - -/* - * Return the maximum number of additional buffers that this backend should - * pin if it wants to stay under the per-backend limit, considering the number - * of buffers it has already pinned. Unlike LimitAdditionalPins(), the limit - * return by this function can be zero. - */ -uint32 -GetAdditionalPinLimit(void) -{ - uint32 estimated_pins_held; - - /* - * We get the number of "overflowed" pins for free, but don't know the - * number of pins in PrivateRefCountArray. The cost of calculating that - * exactly doesn't seem worth it, so just assume the max. - */ - estimated_pins_held = PrivateRefCountOverflowed + REFCOUNT_ARRAY_ENTRIES; - - /* Is this backend already holding more than its fair share? */ - if (estimated_pins_held > MaxProportionalPins) - return 0; - - return MaxProportionalPins - estimated_pins_held; -} - -/* - * Limit the number of pins a batch operation may additionally acquire, to - * avoid running out of pinnable buffers. - * - * One additional pin is always allowed, on the assumption that the operation - * requires at least one to make progress. - */ -void -LimitAdditionalPins(uint32 *additional_pins) -{ - uint32 limit; - - if (*additional_pins <= 1) - return; - - limit = GetAdditionalPinLimit(); - limit = Max(limit, 1); - if (limit < *additional_pins) - *additional_pins = limit; -} - /* * Logic shared between ExtendBufferedRelBy(), ExtendBufferedRelTo(). Just to * avoid duplicating the tracing and relpersistence related logic. @@ -2816,7 +2374,6 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, /* in case we need to pin an existing buffer below */ ResourceOwnerEnlarge(CurrentResourceOwner); - ReservePrivateRefCountEntry(); InitBufferTag(&tag, &BMR_GET_SMGR(bmr)->smgr_rlocator.locator, fork, first_block + i); @@ -3188,9 +2745,8 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, PrivateRefCountEntry *ref; Assert(!BufferIsLocal(b)); - Assert(ReservedRefCountSlot != -1); - ref = GetPrivateRefCountEntry(b, true); + ref = GetSharedBufferEntry(b); if (ref == NULL) { @@ -3260,8 +2816,7 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, */ result = (pg_atomic_read_u64(&buf->state) & BM_VALID) != 0; - Assert(ref->data.refcount > 0); - ref->data.refcount++; + SharedBufferRefExisting(ref); ResourceOwnerRememberBuffer(CurrentResourceOwner, b); } @@ -3299,7 +2854,7 @@ PinBuffer_Locked(BufferDesc *buf) * As explained, We don't expect any preexisting pins. That allows us to * manipulate the PrivateRefCount after releasing the spinlock */ - Assert(GetPrivateRefCountEntry(BufferDescriptorGetBuffer(buf), false) == NULL); + Assert(GetSharedBufferEntry(BufferDescriptorGetBuffer(buf)) == NULL); /* * Since we hold the buffer spinlock, we can update the buffer state and @@ -3376,11 +2931,10 @@ UnpinBufferNoOwner(BufferDesc *buf) Assert(!BufferIsLocal(b)); /* not moving as we're likely deleting it soon anyway */ - ref = GetPrivateRefCountEntry(b, false); + ref = GetSharedBufferEntry(b); Assert(ref != NULL); - Assert(ref->data.refcount > 0); - ref->data.refcount--; - if (ref->data.refcount == 0) + + if (SharedBufferUnref(ref)) { uint64 old_buf_state; @@ -3405,8 +2959,6 @@ UnpinBufferNoOwner(BufferDesc *buf) /* Support LockBufferForCleanup() */ if (old_buf_state & BM_PIN_COUNT_WAITER) WakePinCountWaiter(buf); - - ForgetPrivateRefCountEntry(ref); } } @@ -3417,10 +2969,7 @@ UnpinBufferNoOwner(BufferDesc *buf) inline void TrackNewBufferPin(Buffer buf) { - PrivateRefCountEntry *ref; - - ref = NewPrivateRefCountEntry(buf); - ref->data.refcount++; + SharedBufferRef(buf); ResourceOwnerRememberBuffer(CurrentResourceOwner, buf); @@ -4040,7 +3589,6 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context) BufferTag tag; /* Make sure we can handle the pin */ - ReservePrivateRefCountEntry(); ResourceOwnerEnlarge(CurrentResourceOwner); /* @@ -4104,11 +3652,9 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context) void AtEOXact_Buffers(bool isCommit) { - CheckForBufferLeaks(); + CheckPrivateRefCountLeaks(); AtEOXact_LocalBuffers(isCommit); - - Assert(PrivateRefCountOverflowed == 0); } /* @@ -4121,25 +3667,8 @@ AtEOXact_Buffers(bool isCommit) void InitBufferManagerAccess(void) { - HASHCTL hash_ctl; - - /* - * An advisory limit on the number of pins each backend should hold, based - * on shared_buffers and the maximum number of connections possible. - * That's very pessimistic, but outside toy-sized shared_buffers it should - * allow plenty of pins. LimitAdditionalPins() and - * GetAdditionalPinLimit() can be used to check the remaining balance. - */ - MaxProportionalPins = NBuffers / (MaxBackends + NUM_AUXILIARY_PROCS); - - memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray)); - memset(&PrivateRefCountArrayKeys, 0, sizeof(PrivateRefCountArrayKeys)); - - hash_ctl.keysize = sizeof(Buffer); - hash_ctl.entrysize = sizeof(PrivateRefCountEntry); - PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl, - HASH_ELEM | HASH_BLOBS); + InitPrivateRefCount(); /* * AtProcExit_Buffers needs LWLock access, and thereby has to be called at @@ -4158,62 +3687,12 @@ AtProcExit_Buffers(int code, Datum arg) { UnlockBuffers(); - CheckForBufferLeaks(); + CheckPrivateRefCountLeaks(); /* localbuf.c needs a chance too */ AtProcExit_LocalBuffers(); } -/* - * CheckForBufferLeaks - ensure this backend holds no buffer pins - * - * As of PostgreSQL 8.0, buffer pins should get released by the - * ResourceOwner mechanism. This routine is just a debugging - * cross-check that no pins remain. - */ -static void -CheckForBufferLeaks(void) -{ -#ifdef USE_ASSERT_CHECKING - int RefCountErrors = 0; - PrivateRefCountEntry *res; - int i; - char *s; - - /* check the array */ - for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) - { - if (PrivateRefCountArrayKeys[i] != InvalidBuffer) - { - res = &PrivateRefCountArray[i]; - - s = DebugPrintBufferRefcount(res->buffer); - elog(WARNING, "buffer refcount leak: %s", s); - pfree(s); - - RefCountErrors++; - } - } - - /* if necessary search the hash */ - if (PrivateRefCountOverflowed) - { - HASH_SEQ_STATUS hstat; - - hash_seq_init(&hstat, PrivateRefCountHash); - while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL) - { - s = DebugPrintBufferRefcount(res->buffer); - elog(WARNING, "buffer refcount leak: %s", s); - pfree(s); - RefCountErrors++; - } - } - - Assert(RefCountErrors == 0); -#endif -} - #ifdef USE_ASSERT_CHECKING /* * Check for exclusive-locked catalog buffers. This is the core of @@ -4235,33 +3714,20 @@ CheckForBufferLeaks(void) void AssertBufferLocksPermitCatalogRead(void) { + PrivateRefCountIterator *iter; PrivateRefCountEntry *res; - /* check the array */ - for (int i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) + iter = InitPrivateRefCountIterator(); + while ((res = GetNextPrivateRefCountEntry(iter)) != NULL) { - if (PrivateRefCountArrayKeys[i] != InvalidBuffer) - { - res = &PrivateRefCountArray[i]; - - if (res->buffer == InvalidBuffer) - continue; - - AssertNotCatalogBufferLock(res->buffer, res->data.lockmode); - } - } + Buffer buf = SharedBufferGetBuffer(res); - /* if necessary search the hash */ - if (PrivateRefCountOverflowed) - { - HASH_SEQ_STATUS hstat; + if (buf == InvalidBuffer) + continue; - hash_seq_init(&hstat, PrivateRefCountHash); - while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL) - { - AssertNotCatalogBufferLock(res->buffer, res->data.lockmode); - } + AssertNotCatalogBufferLock(buf, SharedBufferGetLockMode(res)); } + FreePrivateRefCountIterator(iter); } static void @@ -4315,8 +3781,10 @@ DebugPrintBufferRefcount(Buffer buffer) } else { + PrivateRefCountEntry *ref = GetSharedBufferEntry(buffer); + buf = GetBufferDescriptor(buffer - 1); - loccount = GetPrivateRefCount(buffer); + loccount = ref ? SharedBufferRefCount(ref) : 0; backend = INVALID_PROC_NUMBER; } @@ -5102,7 +4570,6 @@ FlushRelationBuffers(Relation rel) error_context_stack = &errcallback; /* Make sure we can handle the pin */ - ReservePrivateRefCountEntry(); ResourceOwnerEnlarge(CurrentResourceOwner); /* @@ -5138,7 +4605,6 @@ FlushRelationBuffers(Relation rel) continue; /* Make sure we can handle the pin */ - ReservePrivateRefCountEntry(); ResourceOwnerEnlarge(CurrentResourceOwner); buf_state = LockBufHdr(bufHdr); @@ -5233,7 +4699,6 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels) continue; /* Make sure we can handle the pin */ - ReservePrivateRefCountEntry(); ResourceOwnerEnlarge(CurrentResourceOwner); buf_state = LockBufHdr(bufHdr); @@ -5459,7 +4924,6 @@ FlushDatabaseBuffers(Oid dbid) continue; /* Make sure we can handle the pin */ - ReservePrivateRefCountEntry(); ResourceOwnerEnlarge(CurrentResourceOwner); buf_state = LockBufHdr(bufHdr); @@ -5534,17 +4998,18 @@ UnlockReleaseBuffer(Buffer buffer) void IncrBufferRefCount(Buffer buffer) { - Assert(BufferIsPinned(buffer)); ResourceOwnerEnlarge(CurrentResourceOwner); if (BufferIsLocal(buffer)) + { + Assert(LocalRefCount[-buffer - 1] > 0); LocalRefCount[-buffer - 1]++; + } else { - PrivateRefCountEntry *ref; + PrivateRefCountEntry *ref = GetSharedBufferEntry(buffer); - ref = GetPrivateRefCountEntry(buffer, true); Assert(ref != NULL); - ref->data.refcount++; + SharedBufferRefExisting(ref); } ResourceOwnerRememberBuffer(CurrentResourceOwner, buffer); } @@ -5580,7 +5045,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std) bufHdr = GetBufferDescriptor(buffer - 1); - Assert(GetPrivateRefCount(buffer) > 0); + Assert(GetSharedBufferEntry(buffer) != NULL); /* here, either share or exclusive lock is OK */ Assert(BufferIsLockedByMe(buffer)); @@ -5763,12 +5228,12 @@ BufferLockAcquire(Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode) * Get reference to the refcount entry before we hold the lock, it seems * better to do before holding the lock. */ - entry = GetPrivateRefCountEntry(buffer, true); + entry = GetSharedBufferEntry(buffer); /* * We better not already hold a lock on the buffer. */ - Assert(entry->data.lockmode == BUFFER_LOCK_UNLOCK); + Assert(SharedBufferGetLockMode(entry) == BUFFER_LOCK_UNLOCK); /* * Lock out cancel/die interrupts until we exit the code section protected @@ -5857,7 +5322,7 @@ BufferLockAcquire(Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode) } /* Remember that we now hold this lock */ - entry->data.lockmode = mode; + SharedBufferSetLockMode(entry, mode); /* * Fix the process wait semaphore's count for any absorbed wakeups. @@ -5908,7 +5373,7 @@ BufferLockUnlock(Buffer buffer, BufferDesc *buf_hdr) static bool BufferLockConditional(Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode) { - PrivateRefCountEntry *entry = GetPrivateRefCountEntry(buffer, true); + PrivateRefCountEntry *entry = GetSharedBufferEntry(buffer); bool mustwait; /* @@ -5916,7 +5381,7 @@ BufferLockConditional(Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode) * already has locked, return false, independent of the existing and * desired lock level. */ - if (entry->data.lockmode != BUFFER_LOCK_UNLOCK) + if (SharedBufferGetLockMode(entry) != BUFFER_LOCK_UNLOCK) return false; /* @@ -5936,7 +5401,7 @@ BufferLockConditional(Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode) } else { - entry->data.lockmode = mode; + SharedBufferSetLockMode(entry, mode); } return !mustwait; @@ -6146,11 +5611,11 @@ BufferLockDisownInternal(Buffer buffer, BufferDesc *buf_hdr) BufferLockMode mode; PrivateRefCountEntry *ref; - ref = GetPrivateRefCountEntry(buffer, false); + ref = GetSharedBufferEntry(buffer); if (ref == NULL) elog(ERROR, "lock %d is not held", buffer); - mode = ref->data.lockmode; - ref->data.lockmode = BUFFER_LOCK_UNLOCK; + mode = SharedBufferGetLockMode(ref); + SharedBufferSetLockMode(ref, BUFFER_LOCK_UNLOCK); return mode; } @@ -6384,12 +5849,12 @@ static bool BufferLockHeldByMeInMode(BufferDesc *buf_hdr, BufferLockMode mode) { PrivateRefCountEntry *entry = - GetPrivateRefCountEntry(BufferDescriptorGetBuffer(buf_hdr), false); + GetSharedBufferEntry(BufferDescriptorGetBuffer(buf_hdr)); if (!entry) return false; else - return entry->data.lockmode == mode; + return SharedBufferGetLockMode(entry) == mode; } /* @@ -6402,12 +5867,12 @@ static bool BufferLockHeldByMe(BufferDesc *buf_hdr) { PrivateRefCountEntry *entry = - GetPrivateRefCountEntry(BufferDescriptorGetBuffer(buf_hdr), false); + GetSharedBufferEntry(BufferDescriptorGetBuffer(buf_hdr)); if (!entry) return false; else - return entry->data.lockmode != BUFFER_LOCK_UNLOCK; + return SharedBufferGetLockMode(entry) != BUFFER_LOCK_UNLOCK; } /* @@ -6503,9 +5968,13 @@ CheckBufferIsPinnedOnce(Buffer buffer) } else { - if (GetPrivateRefCount(buffer) != 1) - elog(ERROR, "incorrect local pin count: %d", - GetPrivateRefCount(buffer)); + { + PrivateRefCountEntry *ref = GetSharedBufferEntry(buffer); + int32 refcount = ref ? SharedBufferRefCount(ref) : 0; + + if (refcount != 1) + elog(ERROR, "incorrect local pin count: %d", refcount); + } } } @@ -6686,7 +6155,7 @@ HoldingBufferPinThatDelaysRecovery(void) if (bufid < 0) return false; - if (GetPrivateRefCount(bufid + 1) > 0) + if (GetSharedBufferEntry(bufid + 1) != NULL) return true; return false; @@ -6721,8 +6190,12 @@ ConditionalLockBufferForCleanup(Buffer buffer) } /* There should be exactly one local pin */ - refcount = GetPrivateRefCount(buffer); - Assert(refcount); + { + PrivateRefCountEntry *ref = GetSharedBufferEntry(buffer); + + refcount = ref ? SharedBufferRefCount(ref) : 0; + Assert(refcount); + } if (refcount != 1) return false; @@ -6776,8 +6249,12 @@ IsBufferCleanupOK(Buffer buffer) } /* There should be exactly one local pin */ - if (GetPrivateRefCount(buffer) != 1) - return false; + { + PrivateRefCountEntry *ref = GetSharedBufferEntry(buffer); + + if (!ref || SharedBufferRefCount(ref) != 1) + return false; + } bufHdr = GetBufferDescriptor(buffer - 1); @@ -7447,7 +6924,7 @@ ResOwnerReleaseBuffer(Datum res) { PrivateRefCountEntry *ref; - ref = GetPrivateRefCountEntry(buffer, false); + ref = GetSharedBufferEntry(buffer); /* not having a private refcount would imply resowner corruption */ Assert(ref != NULL); @@ -7456,7 +6933,7 @@ ResOwnerReleaseBuffer(Datum res) * If the buffer was locked at the time of the resowner release, * release the lock now. This should only happen after errors. */ - if (ref->data.lockmode != BUFFER_LOCK_UNLOCK) + if (SharedBufferGetLockMode(ref) != BUFFER_LOCK_UNLOCK) { BufferDesc *buf = GetBufferDescriptor(buffer - 1); @@ -7549,7 +7026,6 @@ EvictUnpinnedBuffer(Buffer buf, bool *buffer_flushed) /* Make sure we can pin the buffer. */ ResourceOwnerEnlarge(CurrentResourceOwner); - ReservePrivateRefCountEntry(); desc = GetBufferDescriptor(buf - 1); LockBufHdr(desc); @@ -7590,7 +7066,6 @@ EvictAllUnpinnedBuffers(int32 *buffers_evicted, int32 *buffers_flushed, continue; ResourceOwnerEnlarge(CurrentResourceOwner); - ReservePrivateRefCountEntry(); LockBufHdr(desc); @@ -7644,7 +7119,6 @@ EvictRelUnpinnedBuffers(Relation rel, int32 *buffers_evicted, /* Make sure we can pin the buffer. */ ResourceOwnerEnlarge(CurrentResourceOwner); - ReservePrivateRefCountEntry(); buf_state = LockBufHdr(desc); @@ -7736,7 +7210,6 @@ MarkDirtyUnpinnedBuffer(Buffer buf, bool *buffer_already_dirty) /* Make sure we can pin the buffer. */ ResourceOwnerEnlarge(CurrentResourceOwner); - ReservePrivateRefCountEntry(); desc = GetBufferDescriptor(buf - 1); LockBufHdr(desc); @@ -7789,7 +7262,6 @@ MarkDirtyRelUnpinnedBuffers(Relation rel, /* Make sure we can pin the buffer. */ ResourceOwnerEnlarge(CurrentResourceOwner); - ReservePrivateRefCountEntry(); buf_state = LockBufHdr(desc); @@ -7841,7 +7313,6 @@ MarkDirtyAllUnpinnedBuffers(int32 *buffers_dirtied, continue; ResourceOwnerEnlarge(CurrentResourceOwner); - ReservePrivateRefCountEntry(); LockBufHdr(desc); diff --git a/src/include/storage/buf_refcount.h b/src/include/storage/buf_refcount.h new file mode 100644 index 00000000000..842760ad2ee --- /dev/null +++ b/src/include/storage/buf_refcount.h @@ -0,0 +1,58 @@ +/*------------------------------------------------------------------------- + * + * buf_refcount.h + * Backend-private buffer refcount tracking + * + * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/storage/buf_refcount.h + * + *------------------------------------------------------------------------- + */ +#ifndef BUF_REFCOUNT_H +#define BUF_REFCOUNT_H + +#include "storage/buf.h" +#include "storage/bufmgr.h" + +/* Opaque handle to a private refcount entry */ +typedef struct PrivateRefCountEntry PrivateRefCountEntry; + +/* Initialization */ +extern void InitPrivateRefCount(void); + +/* Pure lookup */ +extern PrivateRefCountEntry *GetSharedBufferEntry(Buffer buffer); + +/* Reference counting - complex operations */ +extern PrivateRefCountEntry *SharedBufferRef(Buffer buffer); +extern void SharedBufferRefExisting(PrivateRefCountEntry *ref); +extern bool SharedBufferUnref(PrivateRefCountEntry *ref); + +/* Accessors */ +extern int32 SharedBufferRefCount(PrivateRefCountEntry *ref); +extern BufferLockMode SharedBufferGetLockMode(PrivateRefCountEntry *ref); +extern void SharedBufferSetLockMode(PrivateRefCountEntry *ref, BufferLockMode mode); +extern Buffer SharedBufferGetBuffer(PrivateRefCountEntry *ref); + +/* Pin limiting */ +extern uint32 GetPinLimit(void); +extern uint32 GetAdditionalPinLimit(void); +extern void LimitAdditionalPins(uint32 *additional_pins); + +/* Leak checking */ +extern void CheckPrivateRefCountLeaks(void); + +/* + * Iterator for walking all private refcount entries. + * Used by assertion checking code in bufmgr.c. + */ +typedef struct PrivateRefCountIterator PrivateRefCountIterator; + +extern PrivateRefCountIterator *InitPrivateRefCountIterator(void); +extern PrivateRefCountEntry *GetNextPrivateRefCountEntry(PrivateRefCountIterator *iter); +extern void FreePrivateRefCountIterator(PrivateRefCountIterator *iter); + + +#endif /* BUF_REFCOUNT_H */ -- 2.53.0