From 2e41e683b2fe2bc76b808e58ca2fea9067bff4e1 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Fri, 5 Apr 2024 13:59:13 +0700 Subject: [PATCH v21] Use fasthash for string keys in dynahash and dshash This avoids strlen calls. string_hash is kept around in case extensions are using it. --- src/backend/lib/dshash.c | 5 +++-- src/backend/utils/hash/dynahash.c | 25 ++++++++++++++++----- src/common/hashfn.c | 4 +++- src/include/common/hashfn_unstable.h | 33 ++++++++++++++++++++++++++++ src/include/lib/dshash.h | 2 +- 5 files changed, 59 insertions(+), 10 deletions(-) diff --git a/src/backend/lib/dshash.c b/src/backend/lib/dshash.c index 93a9e21ddd..8bebf92cb8 100644 --- a/src/backend/lib/dshash.c +++ b/src/backend/lib/dshash.c @@ -32,6 +32,7 @@ #include "postgres.h" #include "common/hashfn.h" +#include "common/hashfn_unstable.h" #include "lib/dshash.h" #include "storage/lwlock.h" #include "utils/dsa.h" @@ -605,14 +606,14 @@ dshash_strcmp(const void *a, const void *b, size_t size, void *arg) } /* - * A hash function that forwards to string_hash. + * A hash function that forwards to hash_string_with_len. */ dshash_hash dshash_strhash(const void *v, size_t size, void *arg) { Assert(strlen((const char *) v) < size); - return string_hash((const char *) v, size); + return hash_string_with_len((const char *) v, size - 1); } /* diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 145e058fe6..9b85af2743 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -98,6 +98,7 @@ #include "access/xact.h" #include "common/hashfn.h" +#include "common/hashfn_unstable.h" #include "port/pg_bitutils.h" #include "storage/shmem.h" #include "storage/spin.h" @@ -309,6 +310,18 @@ string_compare(const char *key1, const char *key2, Size keysize) return strncmp(key1, key2, keysize - 1); } +/* + * hash function used when HASH_STRINGS is set + * + * If the string exceeds keysize-1 bytes, we want to hash only that many, + * because when it is copied into the hash table it will be truncated at + * that length. + */ +static uint32 +default_string_hash(const void *key, Size keysize) +{ + return hash_string_with_len((const char *) key, keysize - 1); +} /************************** CREATE ROUTINES **********************/ @@ -420,8 +433,8 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) else { /* - * string_hash used to be considered the default hash method, and in a - * non-assert build it effectively still is. But we now consider it + * string_hash used to be considered the default hash method, and + * it effectively still was until version 17. Since version 14 we consider it * an assertion error to not say HASH_STRINGS explicitly. To help * catch mistaken usage of HASH_STRINGS, we also insist on a * reasonably long string length: if the keysize is only 4 or 8 bytes, @@ -430,12 +443,12 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) Assert(flags & HASH_STRINGS); Assert(info->keysize > 8); - hashp->hash = string_hash; + hashp->hash = default_string_hash; } /* * If you don't specify a match function, it defaults to string_compare if - * you used string_hash, and to memcmp otherwise. + * you used default_string_hash, and to memcmp otherwise. * * Note: explicitly specifying string_hash is deprecated, because this * might not work for callers in loadable modules on some platforms due to @@ -444,7 +457,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) */ if (flags & HASH_COMPARE) hashp->match = info->match; - else if (hashp->hash == string_hash) + else if (hashp->hash == default_string_hash) hashp->match = (HashCompareFunc) string_compare; else hashp->match = memcmp; @@ -454,7 +467,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) */ if (flags & HASH_KEYCOPY) hashp->keycopy = info->keycopy; - else if (hashp->hash == string_hash) + else if (hashp->hash == default_string_hash) { /* * The signature of keycopy is meant for memcpy(), which returns diff --git a/src/common/hashfn.c b/src/common/hashfn.c index 4db468cf85..5cfa4815a1 100644 --- a/src/common/hashfn.c +++ b/src/common/hashfn.c @@ -654,7 +654,9 @@ hash_bytes_uint32_extended(uint32 k, uint64 seed) /* * string_hash: hash function for keys that are NUL-terminated strings. * - * NOTE: this is the default hash function if none is specified. + * NOTE: this was the default string hash for dynahash until version 17, + * and is now here only for backward compatibility. It's more efficient + * to use hash_string_with_len instead. */ uint32 string_hash(const void *key, Size keysize) diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h index 7b647470ab..38abf690a0 100644 --- a/src/include/common/hashfn_unstable.h +++ b/src/include/common/hashfn_unstable.h @@ -450,4 +450,37 @@ hash_string(const char *s) return fasthash_final32(&hs, s_len); } +/* + * hash_string_with_len: Like hash_string but only hashes + * 'len' bytes. Callers will likely want to 'len' to be + * one less than the space they have in mind, to leave + * room for the NUL terminator. + */ +static inline uint32 +hash_string_with_len(const char *s, Size len) +{ + fasthash_state hs; + size_t s_len = 0; + + fasthash_init(&hs, 0); + + while (*s && s_len < len) + { + int chunk_len = 0; + + while (s[chunk_len] != '\0' && + s_len < len && + chunk_len < FH_SIZEOF_ACCUM) + { + chunk_len++; + s_len++; + } + + fasthash_accum(&hs, s, chunk_len); + s += chunk_len; + } + + return fasthash_final32(&hs, s_len); +} + #endif /* HASHFN_UNSTABLE_H */ diff --git a/src/include/lib/dshash.h b/src/include/lib/dshash.h index 7dda269b75..b72903c02e 100644 --- a/src/include/lib/dshash.h +++ b/src/include/lib/dshash.h @@ -117,7 +117,7 @@ extern dshash_hash dshash_memhash(const void *v, size_t size, void *arg); extern void dshash_memcpy(void *dest, const void *src, size_t size, void *arg); /* - * Convenience hash, compare, and copy functions wrapping strcmp, string_hash, + * Convenience hash, compare, and copy functions wrapping strcmp, hash_string_with_len, * and strcpy. */ extern int dshash_strcmp(const void *a, const void *b, size_t size, void *arg); -- 2.44.0