From 225073b1741fd89ec7728e28978e057e9dc11116 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Wed, 11 Aug 2021 12:22:24 -0400 Subject: [PATCH v1 2/2] Replace intrinsics in pg_popcount*_slow with pure C code Intrinsics are used in the hope that the compiler will access some fast hardware implementation where available. However, on x86 at least, __builtin_popcount() didn't emit a POPCNT instruction since -mpopcnt wasn't passed to the compiler. Instead, the compiler emitted bitwise operations where the intrinsic was supported. Where not supported, we used a byte-at-a-time loop using a lookup table. Since the *slow functions are fallback implementations, replace the intrinsics and the associated #ifdef maze with the bitwise operations written in C so all platforms can benefit from them. If we ever get configure support for x86-64-v2, we could use these intrinsics to emit the POPCNT instruction without a runtime check. To allow for that possibility, let's keep the configure checks around. --- src/port/pg_bitutils.c | 47 ++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 32 deletions(-) diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index 3e90de5249..500372db10 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -207,6 +207,11 @@ __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc"); #endif /* TRY_POPCNT_FAST */ +/* + * The *_slow implementations are based on + * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + */ + /* * pg_popcount32_slow * Return the number of 1 bits set in word @@ -214,19 +219,11 @@ __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc"); int pg_popcount32_slow(uint32 word) { -#ifdef HAVE__BUILTIN_POPCOUNT - return __builtin_popcount(word); -#else /* !HAVE__BUILTIN_POPCOUNT */ - int result = 0; - - while (word != 0) - { - result += pg_number_of_ones[word & 255]; - word >>= 8; - } - - return result; -#endif /* HAVE__BUILTIN_POPCOUNT */ + word = word - ((word >> 1) & 0x55555555); + word = (word & 0x33333333) + + ((word >> 2) & 0x33333333); + word = (word + (word >> 4)) & 0xF0F0F0F; + return (int) ((word * 0x1010101) >> 24); } /* @@ -236,25 +233,11 @@ pg_popcount32_slow(uint32 word) int pg_popcount64_slow(uint64 word) { -#ifdef HAVE__BUILTIN_POPCOUNT -#if defined(HAVE_LONG_INT_64) - return __builtin_popcountl(word); -#elif defined(HAVE_LONG_LONG_INT_64) - return __builtin_popcountll(word); -#else -#error must have a working 64-bit integer datatype -#endif -#else /* !HAVE__BUILTIN_POPCOUNT */ - int result = 0; - - while (word != 0) - { - result += pg_number_of_ones[word & 255]; - word >>= 8; - } - - return result; -#endif /* HAVE__BUILTIN_POPCOUNT */ + word = word - ((word >> 1) & UINT64CONST(0x5555555555555555)); + word = (word & UINT64CONST(0x3333333333333333)) + + ((word >> 2) & UINT64CONST(0x3333333333333333)); + word = (word + (word >> 4)) & UINT64CONST(0xF0F0F0F0F0F0F0F); + return (int) ((word * UINT64CONST(0x101010101010101)) >> 56); } -- 2.31.1