From ec447cc9a9718421883d9619e9dde1b5df3ada9c Mon Sep 17 00:00:00 2001 From: John Naylor Date: Wed, 20 Dec 2023 13:08:46 +0700 Subject: [PATCH v11 5/5] Optimize tail with inspiration from OpenBSD This only works on little endian, so add guard for that and for 64-bit. Word-at-a-time NUL checks are not worth the extra complexity for 32-bit platforms. There is an algorithm that works for big-endian, but this is all just demonstration anyway. --- src/backend/catalog/namespace.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index cb840ce9dd..2046d6788d 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -253,20 +253,36 @@ cstring_hash_aligned(const char *str, uint64 seed) { const char *const start = str; const char *buf = start; + // todo: this is now really "remainder" int chunk_len = 0; + uint64 zero_bytes, chunk; fasthash_state hs; fasthash_init(&hs, FH_UNKNOWN_LENGTH, seed); + // WIP: if this is the common case, we could have an "unlikely" bytewise preamble Assert(PointerIsAligned(start, uint64)); - while (!haszero64(*(uint64 *)buf)) + while (true) { + chunk = *(uint64 *)buf; + zero_bytes = (chunk - 0x0101010101010101UL) & 0x8080808080808080UL; + + // WIP: this is from OpenBSD strlen -- the extra branch is probably not worth it for short strings + if (zero_bytes) + { + // only needed if the input can have the high bit set + zero_bytes &= ~chunk; + if (zero_bytes) + break; + } + // WIP: since we have the chunk already, maybe just combine it directly? fasthash_accum64(&hs, buf); buf += sizeof(uint64); } - while (buf[chunk_len] != '\0') - chunk_len++; + // XXX this only works for little endian machines. See + // https://github.com/openbsd/src/blob/master/lib/libc/arch/amd64/string/strlen.S + chunk_len = (63 - pg_leftmost_one_pos64(zero_bytes)) / BITS_PER_BYTE; fasthash_accum(&hs, buf, chunk_len); buf += chunk_len; @@ -300,9 +316,11 @@ cstring_hash_unaligned(const char *str, uint64 seed) static inline uint32 spcachekey_hash(SearchPathCacheKey key) { +#if ((SIZEOF_VOIDP == 8) && !defined(WORDS_BIGENDIAN)) if (PointerIsAligned(key.searchPath, uint64)) return cstring_hash_aligned(key.searchPath, key.roleid); else +#endif return cstring_hash_unaligned(key.searchPath, key.roleid); } -- 2.43.0