Move the shift-and-test login into a separate fls() function, which
can use __builtin_clz() if it's available.
This requires a new check for __builtin_clz in the configure script.
Results in a ~2% performance increase on PowerPC.
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
---
v3: respin as context diff
---configure.in | 13 +++++++++++++src/backend/utils/mmgr/aset.c | 34
+++++++++++++++++++++++++++-------2files changed, 40 insertions(+), 7 deletions(-)
*** a/configure.in
--- b/configure.in
***************
*** 1361,1366 **** case $host_os in
--- 1361,1379 ---- AC_FUNC_FSEEKO;; esac
+ # GCC builtins
+ #
+ # We need AC_TRY_LINK here, as the prototype generated by AC_CHECK_FUNC
+ # will cause gcc to try to reference a non-builtin symbol.
+
+ AC_MSG_CHECKING([for __builtin_clz])
+ AC_TRY_LINK([],
+ [__builtin_clz(0);],
+ [AC_DEFINE(HAVE_BUILTIN_CLZ, 1,
+ [Define to 1 if you have __builtin_clz().])
+ AC_MSG_RESULT(yes)],
+ [AC_MSG_RESULT(no)])
+ # # Pthreads
*** a/src/backend/utils/mmgr/aset.c
--- b/src/backend/utils/mmgr/aset.c
***************
*** 255,260 **** static MemoryContextMethods AllocSetMethods = {
--- 255,285 ---- #define AllocAllocInfo(_cxt, _chunk) #endif
+ /*
+ * fls: find last set bit.
+ *
+ * Returns the 1-based index of the most-significant bit in x. The MSB
+ * is bit number 32, the LSB is bit number 1. If x is zero, the result is
+ * undefined.
+ */
+ static inline int
+ fls(unsigned int x)
+ {
+ #ifdef HAVE_BUILTIN_CLZ
+ return 32 - __builtin_clz(x);
+ #else
+ int ls = 0;
+
+ while (x != 0)
+ {
+ ls++;
+ x >>= 1;
+ }
+
+ return ls;
+ #endif
+ }
+ /* ---------- * AllocSetFreeIndex - *
***************
*** 268,281 **** AllocSetFreeIndex(Size size) { int idx = 0;
! if (size > 0) {
! size = (size - 1) >> ALLOC_MINBITS;
! while (size != 0)
! {
! idx++;
! size >>= 1;
! } Assert(idx < ALLOCSET_NUM_FREELISTS); }
--- 293,301 ---- { int idx = 0;
! if (size > (1 << ALLOC_MINBITS)) {
! idx = fls((size - 1) >> ALLOC_MINBITS); Assert(idx < ALLOCSET_NUM_FREELISTS); }