From 8debe072c3123ddbe065d677cf63c091414c03f9 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 28 Mar 2026 00:32:29 +0200 Subject: [PATCH v1 6/8] Allocate all parts of shmem hash table from a single contiguous area Previously, the shared header (HASHHDR) and the directory were allocated by the caller, and passed to hash_create(), while the actual elements were allocated separately with ShmemAlloc(). After this commit, all the memory needed by the header, the directory, and all the elements is allocated using a single ShmemInitStruct() call, and the different parts are carved out of that allocation. This way the ShmemIndex entries (and thus pg_shmem_allocations) reflect the size size of the whole hash table. Commit f5930f9a98 attempted this earlier, but it had to be reverted. The new strategy is to let dynahash perform all the allocations with the alloc function, but have the alloc function carve out the parts from the one larger allocation. The shared header and the directory are now also allocated with alloc calls, instead of passing the area for those directly from the caller. --- src/backend/storage/ipc/shmem.c | 71 +++++++++++++++++++++------- src/backend/utils/hash/dynahash.c | 78 +++++++++++++------------------ src/include/utils/hsearch.h | 5 +- src/tools/pgindent/typedefs.list | 1 + 4 files changed, 91 insertions(+), 64 deletions(-) diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c index 47065bb3603..c8171125871 100644 --- a/src/backend/storage/ipc/shmem.c +++ b/src/backend/storage/ipc/shmem.c @@ -112,6 +112,16 @@ static bool firstNumaTouch = true; Datum pg_numa_available(PG_FUNCTION_ARGS); +/* + * A very simple allocator used to carve out different parts of a hash table, + * from a previously allocated contiguous shared memory area. + */ +typedef struct shmem_hash_allocator +{ + char *next; /* start of free space in the area */ + char *end; /* end of the shmem area */ +} shmem_hash_allocator; + /* * InitShmemAllocator() --- set up basic pointers to shared memory. * @@ -126,7 +136,7 @@ InitShmemAllocator(PGShmemHeader *seghdr) Size offset; HASHCTL info; int hash_flags; - size_t size; + shmem_hash_allocator allocator; #ifndef EXEC_BACKEND Assert(!IsUnderPostmaster); @@ -182,15 +192,27 @@ InitShmemAllocator(PGShmemHeader *seghdr) info.dsize = info.max_dsize = hash_select_dirsize(SHMEM_INDEX_SIZE); info.alloc = ShmemHashAlloc; info.alloc_arg = NULL; - hash_flags = HASH_ELEM | HASH_STRINGS | HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE | HASH_FIXED_SIZE; + hash_flags = HASH_ELEM | HASH_STRINGS + | HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE | HASH_FIXED_SIZE; + if (!IsUnderPostmaster) { - size = hash_get_shared_size(&info, hash_flags); - ShmemAllocator->index = (HASHHDR *) ShmemAlloc(size); + size_t size = hash_estimate_size(SHMEM_INDEX_SIZE, info.entrysize); + char *location = ShmemAlloc(size); + + allocator.next = location; + allocator.end = location + size; + info.alloc_arg = &allocator; + + info.hctl = NULL; + hash_flags |= HASH_ALLOC | HASH_FIXED_SIZE; + ShmemAllocator->index = (HASHHDR *) location; } else + { + info.hctl = ShmemAllocator->index; hash_flags |= HASH_ATTACH; - info.hctl = ShmemAllocator->index; + } ShmemIndex = hash_create("ShmemIndex", SHMEM_INDEX_SIZE, &info, hash_flags); Assert(ShmemIndex != NULL); } @@ -233,9 +255,17 @@ ShmemAllocNoError(Size size) static void * ShmemHashAlloc(Size size, void *alloc_arg) { - Size allocated_size; + shmem_hash_allocator *allocator = (shmem_hash_allocator *) alloc_arg; + void *result; - return ShmemAllocRaw(size, &allocated_size); + size = MAXALIGN(size); + + if (allocator->end - allocator->next < size) + return NULL; + result = allocator->next; + allocator->next += size; + + return result; } /* @@ -321,12 +351,14 @@ ShmemAddrIsValid(const void *addr) */ HTAB * ShmemInitHash(const char *name, /* table string name for shmem index */ - int64 nelems, /* size of the table */ + int64 nelems, /* size of the table */ HASHCTL *infoP, /* info about key and bucket size */ int hash_flags) /* info about infoP */ { bool found; + size_t size; void *location; + shmem_hash_allocator allocator; /* * Hash tables allocated in shared memory have a fixed directory; it can't @@ -341,20 +373,27 @@ ShmemInitHash(const char *name, /* table string name for shmem index */ infoP->alloc_arg = NULL; hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE | HASH_FIXED_SIZE; - /* look it up in the shmem index */ - location = ShmemInitStruct(name, - hash_get_shared_size(infoP, hash_flags), - &found); + size = hash_estimate_size(nelems, infoP->entrysize); + + /* look it up in the shmem index or allocate */ + location = ShmemInitStruct(name, size, &found); /* * if it already exists, attach to it rather than allocate and initialize * new space */ - if (found) + if (!found) + { + allocator.next = (char *) location; + allocator.end = (char *) location + size; + infoP->alloc_arg = &allocator; + } + else + { + /* Pass location of hashtable header to hash_create */ + infoP->hctl = (HASHHDR *) location; hash_flags |= HASH_ATTACH; - - /* Pass location of hashtable header to hash_create */ - infoP->hctl = (HASHHDR *) location; + } return hash_create(name, nelems, infoP, hash_flags); } diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index d3dd16a4300..1173304ef0f 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -195,6 +195,9 @@ struct HASHHDR int nelem_alloc; /* number of entries to allocate at once */ bool isfixed; /* if true, don't enlarge */ + /* Current directory. In shared tables, this doesn't change */ + HASHSEGMENT *dir; + #ifdef HASH_STATISTICS /* @@ -224,7 +227,7 @@ struct HTAB HashCompareFunc match; /* key comparison function */ HashCopyFunc keycopy; /* key copying function */ HashAllocFunc alloc; /* memory allocator */ - void *alloc_arg; /* opaque argument to pass to allocator function */ + void *alloc_arg; /* opaque argument to pass to alloc function */ MemoryContext hcxt; /* memory context if default allocator used */ char *tabname; /* table name (for error messages) */ bool isshared; /* true if table is in shared memory */ @@ -294,7 +297,7 @@ DynaHashAlloc(Size size, void *alloc_arg) MemoryContext cxt = (MemoryContext) alloc_arg; Assert(MemoryContextIsValid(cxt)); - return MemoryContextAllocExtended(cxt, size, MCXT_ALLOC_NO_OOM); + return MemoryContextAllocExtended(cxt, size, MCXT_ALLOC_NO_OOM); } @@ -374,6 +377,8 @@ hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags) * hash_destroy very simple. The memory context is made a child of either * a context specified by the caller, or TopMemoryContext if nothing is * specified. + * + * Note that HASH_DIRSIZE and HASH_ALLOC had better be set as well. */ if (flags & HASH_SHARED_MEM) { @@ -486,21 +491,18 @@ hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags) if (flags & HASH_SHARED_MEM) { - /* - * ctl structure and directory are preallocated for shared memory - * tables. Note that HASH_DIRSIZE and HASH_ALLOC had better be set as - * well. - */ - hashp->hctl = info->hctl; - hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR)); hashp->hcxt = NULL; hashp->isshared = true; /* hash table already exists, we're just attaching to it */ if (flags & HASH_ATTACH) { + hctl = info->hctl; + + hashp->hctl = hctl; + hashp->dir = hctl->dir; + /* make local copies of some heavily-used values */ - hctl = hashp->hctl; hashp->keysize = hctl->keysize; return hashp; @@ -515,14 +517,20 @@ hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags) hashp->isshared = false; } + /* + * Allocate the header structure. + * + * XXX: In case of a shared memory hash table, other procesess need the + * pointer to the header to re-find the hash table. There is currently no + * explicit way to pass it back from here, the caller relies on the fact + * that this is the first allocation made with the alloc function. That's + * a little ugly, but works for now. + */ + hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR), hashp->alloc_arg); if (!hashp->hctl) - { - hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR), hashp->alloc_arg); - if (!hashp->hctl) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); hashp->frozen = false; @@ -725,25 +733,17 @@ init_htab(HTAB *hashp, int64 nelem) nsegs = next_pow2_int(nsegs); /* - * Make sure directory is big enough. If pre-allocated directory is too - * small, choke (caller screwed up). + * Make sure directory is big enough. */ if (nsegs > hctl->dsize) - { - if (!(hashp->dir)) - hctl->dsize = nsegs; - else - return false; - } + hctl->dsize = nsegs; /* Allocate a directory */ - if (!(hashp->dir)) - { - hashp->dir = (HASHSEGMENT *) - hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT), hashp->alloc_arg); - if (!hashp->dir) - return false; - } + hctl->dir = (HASHSEGMENT *) + hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT), hashp->alloc_arg); + if (!hctl->dir) + return false; + hashp->dir = hctl->dir; /* Allocate initial segments */ for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++) @@ -832,19 +832,6 @@ hash_select_dirsize(int64 num_entries) return nDirEntries; } -/* - * Compute the required initial memory allocation for a shared-memory - * hashtable with the given parameters. We need space for the HASHHDR - * and for the (non expansible) directory. - */ -Size -hash_get_shared_size(HASHCTL *info, int flags) -{ - Assert(flags & HASH_DIRSIZE); - Assert(info->dsize == info->max_dsize); - return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT); -} - /********************** DESTROY ROUTINES ************************/ @@ -1648,6 +1635,7 @@ dir_realloc(HTAB *hashp) { memcpy(p, old_p, old_dirsize); MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize); + hashp->hctl->dir = p; hashp->dir = p; hashp->hctl->dsize = new_dsize; diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h index 03bc1c171cd..b60ae20acc7 100644 --- a/src/include/utils/hsearch.h +++ b/src/include/utils/hsearch.h @@ -80,10 +80,10 @@ typedef struct HASHCTL HashCopyFunc keycopy; /* key copying function */ /* Used if HASH_ALLOC flag is set: */ HashAllocFunc alloc; /* memory allocator */ - void *alloc_arg; /* opaque argument to pass to allocator function */ + void *alloc_arg; /* opaque argument to pass to alloc */ /* Used if HASH_CONTEXT flag is set: */ MemoryContext hcxt; /* memory context to use for allocations */ - /* Used if HASH_SHARED_MEM flag is set: */ + /* Used if HASH_ATTACH flag is set: */ HASHHDR *hctl; /* location of header in shared mem */ } HASHCTL; @@ -150,7 +150,6 @@ extern void hash_seq_term(HASH_SEQ_STATUS *status); extern void hash_freeze(HTAB *hashp); extern Size hash_estimate_size(int64 num_entries, Size entrysize); extern int64 hash_select_dirsize(int64 num_entries); -extern Size hash_get_shared_size(HASHCTL *info, int flags); extern void AtEOXact_HashTables(bool isCommit); extern void AtEOSubXact_HashTables(bool isCommit, int nestDepth); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 712d84128ca..5f6502be030 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -4209,6 +4209,7 @@ shm_mq_result shm_toc shm_toc_entry shm_toc_estimator +shmem_hash_allocator shmem_request_hook_type shmem_startup_hook_type sig_atomic_t -- 2.47.3