From b413ee15d17f933039ad4eff1bd4aedea0f37d20 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Sat, 9 Dec 2023 16:32:05 +0700 Subject: [PATCH v7 06/13] Add bytewise interface This is useful for hashing values with unknown length, like NUL-terminated strings. It should be faster than calling strlen() first and passing the length, which most hash functions require. Note: This method can't give the same answer as regular fasthash, so it will need to be evaluated. It's possible we need to mix in the length at the finalization step (at which time can know the length), in order to safeguard against collisions. --- src/include/common/hashfn_unstable.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h index 7ed1e5335a..a798c42ba7 100644 --- a/src/include/common/hashfn_unstable.h +++ b/src/include/common/hashfn_unstable.h @@ -49,6 +49,7 @@ typedef struct fasthash_state { uint64 accum; #define FH_SIZEOF_ACCUM sizeof(uint64) + int8 accum_len; uint64 hash; } fasthash_state; @@ -69,6 +70,7 @@ fasthash_combine(fasthash_state* hs) /* reset hash state for next input */ hs->accum = 0; + hs->accum_len = 0; } static inline void @@ -82,6 +84,18 @@ fasthash_init(fasthash_state *hs, int len, uint64 seed) hs->hash = seed ^ (len * UINT64CONST(0x880355f21e6d1965)); } +static inline void +fasthash_accum_byte(fasthash_state *hs, const unsigned char ch) +{ + hs->accum <<= BITS_PER_BYTE; + hs->accum |= ch; + hs->accum_len++; + + // wip: is there a better way to get sizeof struct member? + if (hs->accum_len == sizeof(((fasthash_state *) 0)->accum)) + fasthash_combine(hs); +} + static inline void fasthash_accum(fasthash_state *hs, const unsigned char *k, int len) { @@ -117,6 +131,11 @@ fasthash_accum(fasthash_state *hs, const unsigned char *k, int len) static inline uint64 fasthash_final64(fasthash_state *hs) { + // check for remaining bytes to combine into hash + // should only be used by the bytewise interface + if (hs->accum_len > 0) + fasthash_combine(hs); + return fasthash_mix(hs->hash); } -- 2.43.0