From 02875939fa45246140b34554c23eedccc66ba972 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Sun, 24 Dec 2023 15:14:46 +0700 Subject: [PATCH v11 6/8] Try simply byte-swapping on BE machines and then handling like LE --- src/include/common/hashfn_unstable.h | 36 +++++++--------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h index 4fc9edba6e..5bc1fc88ec 100644 --- a/src/include/common/hashfn_unstable.h +++ b/src/include/common/hashfn_unstable.h @@ -14,6 +14,7 @@ the same hashes between versions. #define HASHFN_UNSTABLE_H #include "port/pg_bitutils.h" +#include "port/pg_bswap.h" /* * fasthash is a modification of code taken from @@ -152,9 +153,6 @@ fasthash_accum(fasthash_state *hs, const char *k, int len) #define haszero64(v) \ (((v) - 0x0101010101010101UL) & ~(v) & 0x8080808080808080UL) -#define SIM_BE 1 -#include "port/pg_bswap.h" - /* * With an aligned pointer, we consume the string a word at a time. Loading * the word containing the NUL terminator cannot segfault since page boundaries @@ -170,39 +168,27 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str) uint64 zero_bytes; Assert(PointerIsAligned(start, uint64)); - while (true) + for (;;) { uint64 chunk = *(uint64 *)buf; -#ifdef SIM_BE - uint64 low_bits = 0x7F7F7F7F7F7F7F7F; - chunk = pg_bswap64(chunk); /* simulate BE */ +#ifdef WORDS_BIGENDIAN + /* switch to little endian, to make later calculations easier */ + chunk = pg_bswap64(chunk); +#endif /* - * This expression evaluates has the useful property that all bytes in the result word - * that correspond to non-zero bytes in the original word have - * the value 0x00, while all bytes corresponding to zero bytes have - * the value 0x80. - */ - zero_bytes = ~(((chunk & low_bits) + low_bits) | chunk | low_bits); -#else - /* - * On little endian machines, we can use a slightly faster calculation, + * With little-endian representation, we can use this calculation, * which sets bits in the first byte in the result word * that corresponds to a zero byte in the original word. * The rest of the bytes are indeterminate, so cannot be used - * on big-endian machines unless we resort to a bytewise check. + * on big-endian machines without either swapping or a bytewise check. */ zero_bytes = haszero64(chunk); -#endif if (zero_bytes) break; -#ifdef SIM_BE - hs->accum = pg_bswap64(chunk); /* not needed with real BE, because we won't need the same answer */ -#else hs->accum = chunk; -#endif fasthash_combine(hs); buf += FH_SIZEOF_ACCUM; } @@ -210,14 +196,10 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str) /* * Bytes with set bits will be 0x80, so * calculate the first occurrence of a zero byte within the input word - * by counting the number of leading (on BE) or trailing (on LE) + * by counting the number of trailing (for LE) * zeros and dividing the result by 8. */ -#ifdef SIM_BE - remainder = (63 - pg_leftmost_one_pos64(zero_bytes)) / BITS_PER_BYTE; -#else remainder = pg_rightmost_one_pos64(zero_bytes) / BITS_PER_BYTE; -#endif fasthash_accum(hs, buf, remainder); buf += remainder; -- 2.43.0