From 54ef850a0bb909b242ec553b3ea84853611ec233 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Sun, 21 Jan 2024 16:04:16 +0700 Subject: [PATCH v15 2/4] Use fasthash for dynahash's default string hash This avoids strlen calls. string_hash is kept around in case extensions are using it. --- src/backend/utils/hash/dynahash.c | 52 +++++++++++++++++++++++++++---- src/common/hashfn.c | 3 +- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index a4152080b5..92c7989575 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -98,6 +98,7 @@ #include "access/xact.h" #include "common/hashfn.h" +#include "common/hashfn_unstable.h" #include "port/pg_bitutils.h" #include "storage/shmem.h" #include "storage/spin.h" @@ -307,6 +308,45 @@ string_compare(const char *key1, const char *key2, Size keysize) return strncmp(key1, key2, keysize - 1); } +/* + * default_string_hash: hash function for keys that are NUL-terminated strings. + * + * NOTE: this is the default hash function if none is specified. + */ +static uint32 +default_string_hash(const void *key, Size keysize) +{ + const char *k = (const char *) key; + Size s_len = 0; + fasthash_state hs; + + /* + * If the string exceeds keysize-1 bytes, we want to hash only that many, + * because when it is copied into the hash table it will be truncated at + * that length. + */ + + fasthash_init(&hs, 0); + + while (*k && s_len < keysize - 1) + { + int chunk_len = 0; + + while (k[chunk_len] != '\0' && + s_len < keysize - 1 && + chunk_len < FH_SIZEOF_ACCUM) + { + chunk_len++; + s_len++; + } + + fasthash_accum(&hs, k, chunk_len); + k += chunk_len; + } + + return fasthash_final32(&hs, s_len); +} + /************************** CREATE ROUTINES **********************/ @@ -418,8 +458,8 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) else { /* - * string_hash used to be considered the default hash method, and in a - * non-assert build it effectively still is. But we now consider it + * string_hash used to be considered the default hash method, and + * it effectively still was until version 17. Since version 14 we consider it * an assertion error to not say HASH_STRINGS explicitly. To help * catch mistaken usage of HASH_STRINGS, we also insist on a * reasonably long string length: if the keysize is only 4 or 8 bytes, @@ -428,12 +468,12 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) Assert(flags & HASH_STRINGS); Assert(info->keysize > 8); - hashp->hash = string_hash; + hashp->hash = default_string_hash; } /* * If you don't specify a match function, it defaults to string_compare if - * you used string_hash, and to memcmp otherwise. + * you used default_string_hash, and to memcmp otherwise. * * Note: explicitly specifying string_hash is deprecated, because this * might not work for callers in loadable modules on some platforms due to @@ -442,7 +482,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) */ if (flags & HASH_COMPARE) hashp->match = info->match; - else if (hashp->hash == string_hash) + else if (hashp->hash == default_string_hash) hashp->match = (HashCompareFunc) string_compare; else hashp->match = memcmp; @@ -452,7 +492,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) */ if (flags & HASH_KEYCOPY) hashp->keycopy = info->keycopy; - else if (hashp->hash == string_hash) + else if (hashp->hash == default_string_hash) { /* * The signature of keycopy is meant for memcpy(), which returns diff --git a/src/common/hashfn.c b/src/common/hashfn.c index 4db468cf85..3090b3cbd9 100644 --- a/src/common/hashfn.c +++ b/src/common/hashfn.c @@ -654,7 +654,8 @@ hash_bytes_uint32_extended(uint32 k, uint64 seed) /* * string_hash: hash function for keys that are NUL-terminated strings. * - * NOTE: this is the default hash function if none is specified. + * NOTE: this was the default string hash for dynahash until vesion 17, + * and is now here only for backward compatibility. */ uint32 string_hash(const void *key, Size keysize) -- 2.43.0