From 97c2996c4e081c5612c06da6c80ae87a1d273090 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Tue, 5 Mar 2024 16:59:39 +0700 Subject: [PATCH v20 3/3] Convert some frontend hash functions to fasthash Also go one step further and remove duplication of function definitions by creating a new function, hash_string(), in a new file hashfn_unstable.c. It's not clear how many of these are perfomance- sensitive enough to benefit from removing strlen() calls, but the simplification is worth it on its own. WIP: hash_string_with_len() for dynahash/dshash --- src/bin/pg_combinebackup/load_manifest.c | 16 ++--------- src/bin/pg_dump/pg_dumpall.c | 17 ++---------- src/bin/pg_rewind/filemap.c | 17 ++---------- src/bin/pg_verifybackup/pg_verifybackup.c | 16 ++--------- src/common/Makefile | 1 + src/common/blkreftable.c | 4 +-- src/common/hashfn_unstable.c | 34 +++++++++++++++++++++++ src/common/meson.build | 1 + src/include/common/hashfn_unstable.h | 3 ++ 9 files changed, 49 insertions(+), 60 deletions(-) create mode 100644 src/common/hashfn_unstable.c diff --git a/src/bin/pg_combinebackup/load_manifest.c b/src/bin/pg_combinebackup/load_manifest.c index 7bc10fbe10..d9e7f9e1b4 100644 --- a/src/bin/pg_combinebackup/load_manifest.c +++ b/src/bin/pg_combinebackup/load_manifest.c @@ -15,7 +15,7 @@ #include #include -#include "common/hashfn.h" +#include "common/hashfn_unstable.h" #include "common/logging.h" #include "common/parse_manifest.h" #include "load_manifest.h" @@ -38,12 +38,11 @@ * Define a hash table which we can use to store information about the files * mentioned in the backup manifest. */ -static uint32 hash_string_pointer(char *s); #define SH_PREFIX manifest_files #define SH_ELEMENT_TYPE manifest_file #define SH_KEY_TYPE char * #define SH_KEY pathname -#define SH_HASH_KEY(tb, key) hash_string_pointer(key) +#define SH_HASH_KEY(tb, key) hash_string(key) #define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0) #define SH_SCOPE extern #define SH_RAW_ALLOCATOR pg_malloc0 @@ -263,14 +262,3 @@ combinebackup_per_wal_range_cb(JsonManifestParseContext *context, manifest->last_wal_range->next = range; manifest->last_wal_range = range; } - -/* - * Helper function for manifest_files hash table. - */ -static uint32 -hash_string_pointer(char *s) -{ - unsigned char *ss = (unsigned char *) s; - - return hash_bytes(ss, strlen(s)); -} diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 046c0dc3b3..73337f3392 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -21,7 +21,7 @@ #include "catalog/pg_authid_d.h" #include "common/connect.h" #include "common/file_utils.h" -#include "common/hashfn.h" +#include "common/hashfn_unstable.h" #include "common/logging.h" #include "common/string.h" #include "dumputils.h" @@ -33,8 +33,6 @@ /* version string we expect back from pg_dump */ #define PGDUMP_VERSIONSTR "pg_dump (PostgreSQL) " PG_VERSION "\n" -static uint32 hash_string_pointer(char *s); - typedef struct { uint32 status; @@ -46,7 +44,7 @@ typedef struct #define SH_ELEMENT_TYPE RoleNameEntry #define SH_KEY_TYPE char * #define SH_KEY rolename -#define SH_HASH_KEY(tb, key) hash_string_pointer(key) +#define SH_HASH_KEY(tb, key) hash_string(key) #define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0) #define SH_STORE_HASH #define SH_GET_HASH(tb, a) (a)->hashval @@ -1938,17 +1936,6 @@ dumpTimestamp(const char *msg) fprintf(OPF, "-- %s %s\n\n", msg, buf); } -/* - * Helper function for rolename_hash hash table. - */ -static uint32 -hash_string_pointer(char *s) -{ - unsigned char *ss = (unsigned char *) s; - - return hash_bytes(ss, strlen(s)); -} - /* * read_dumpall_filters - retrieve database identifier patterns from file * diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c index 255ddf2ffa..4458324c9d 100644 --- a/src/bin/pg_rewind/filemap.c +++ b/src/bin/pg_rewind/filemap.c @@ -28,7 +28,7 @@ #include "catalog/pg_tablespace_d.h" #include "common/file_utils.h" -#include "common/hashfn.h" +#include "common/hashfn_unstable.h" #include "common/string.h" #include "datapagemap.h" #include "filemap.h" @@ -38,12 +38,11 @@ * Define a hash table which we can use to store information about the files * appearing in source and target systems. */ -static uint32 hash_string_pointer(const char *s); #define SH_PREFIX filehash #define SH_ELEMENT_TYPE file_entry_t #define SH_KEY_TYPE const char * #define SH_KEY path -#define SH_HASH_KEY(tb, key) hash_string_pointer(key) +#define SH_HASH_KEY(tb, key) hash_string(key) #define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0) #define SH_SCOPE static inline #define SH_RAW_ALLOCATOR pg_malloc0 @@ -821,15 +820,3 @@ decide_file_actions(void) return filemap; } - - -/* - * Helper function for filemap hash table. - */ -static uint32 -hash_string_pointer(const char *s) -{ - unsigned char *ss = (unsigned char *) s; - - return hash_bytes(ss, strlen(s)); -} diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c index 0e9b59f2a8..229c642633 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.c +++ b/src/bin/pg_verifybackup/pg_verifybackup.c @@ -19,7 +19,7 @@ #include #include "common/controldata_utils.h" -#include "common/hashfn.h" +#include "common/hashfn_unstable.h" #include "common/logging.h" #include "common/parse_manifest.h" #include "fe_utils/simple_list.h" @@ -68,12 +68,11 @@ typedef struct manifest_file * Define a hash table which we can use to store information about the files * mentioned in the backup manifest. */ -static uint32 hash_string_pointer(char *s); #define SH_PREFIX manifest_files #define SH_ELEMENT_TYPE manifest_file #define SH_KEY_TYPE char * #define SH_KEY pathname -#define SH_HASH_KEY(tb, key) hash_string_pointer(key) +#define SH_HASH_KEY(tb, key) hash_string(key) #define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0) #define SH_SCOPE static inline #define SH_RAW_ALLOCATOR pg_malloc0 @@ -986,17 +985,6 @@ should_ignore_relpath(verifier_context *context, char *relpath) return false; } -/* - * Helper function for manifest_files hash table. - */ -static uint32 -hash_string_pointer(char *s) -{ - unsigned char *ss = (unsigned char *) s; - - return hash_bytes(ss, strlen(s)); -} - /* * Print a progress report based on the global variables. * diff --git a/src/common/Makefile b/src/common/Makefile index 3d83299432..43a53e6d9d 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -59,6 +59,7 @@ OBJS_COMMON = \ file_perm.o \ file_utils.o \ hashfn.o \ + hashfn_unstable.o \ ip.o \ jsonapi.o \ keywords.o \ diff --git a/src/common/blkreftable.c b/src/common/blkreftable.c index bfa6f7ab5d..980f44c9df 100644 --- a/src/common/blkreftable.c +++ b/src/common/blkreftable.c @@ -37,7 +37,7 @@ #endif #include "common/blkreftable.h" -#include "common/hashfn.h" +#include "common/hashfn_unstable.h" #include "port/pg_crc32c.h" /* @@ -124,7 +124,7 @@ struct BlockRefTableEntry #define SH_KEY_TYPE BlockRefTableKey #define SH_KEY key #define SH_HASH_KEY(tb, key) \ - hash_bytes((const unsigned char *) &key, sizeof(BlockRefTableKey)) + fasthash32((const char *) &key, sizeof(BlockRefTableKey), 0) #define SH_EQUAL(tb, a, b) (memcmp(&a, &b, sizeof(BlockRefTableKey)) == 0) #define SH_SCOPE static inline #ifdef FRONTEND diff --git a/src/common/hashfn_unstable.c b/src/common/hashfn_unstable.c new file mode 100644 index 0000000000..8a2fbd0c3e --- /dev/null +++ b/src/common/hashfn_unstable.c @@ -0,0 +1,34 @@ +/*------------------------------------------------------------------------- + * + * hashfn_unstable.c + * Convenience hashing functions based on hashfn_unstable.h. + * As described in that header, they must not be used in indexes + * or other on-disk structures. + * + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/hashfn_unstable.c + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "common/hashfn_unstable.h" + + +uint32 +hash_string(const char *s) +{ + fasthash_state hs; + size_t s_len; + + fasthash_init(&hs, 0); + + /* Hash string and save the length for tweaking the final mix. */ + s_len = fasthash_accum_cstring(&hs, s); + + return fasthash_final32(&hs, s_len); +} diff --git a/src/common/meson.build b/src/common/meson.build index de68e408fa..f5bf755b89 100644 --- a/src/common/meson.build +++ b/src/common/meson.build @@ -13,6 +13,7 @@ common_sources = files( 'file_perm.c', 'file_utils.c', 'hashfn.c', + 'hashfn_unstable.c', 'ip.c', 'jsonapi.c', 'keywords.c', diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h index 8998475ccf..3b1e6bf2b5 100644 --- a/src/include/common/hashfn_unstable.h +++ b/src/include/common/hashfn_unstable.h @@ -394,4 +394,7 @@ fasthash32(const char *k, size_t len, uint64 seed) return fasthash_reduce32(fasthash64(k, len, seed)); } + +extern uint32 hash_string(const char *s); + #endif /* HASHFN_UNSTABLE_H */ -- 2.44.0