Move the shift-and-test login into a separate fls() function, which
can use __builtin_clz() if it's available.
This requires a new check for __builtin_clz in the configure script.
Results in a ~2% performance increase on PowerPC.
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
---
v2: prevent fls(0)
---configure.in | 13 +++++++++++++src/backend/utils/mmgr/aset.c | 34
+++++++++++++++++++++++++++-------2files changed, 40 insertions(+), 7 deletions(-)
diff --git a/configure.in b/configure.in
index b8d2685..6a317b0 100644
--- a/configure.in
+++ b/configure.in
@@ -1361,6 +1361,19 @@ case $host_os in AC_FUNC_FSEEKO;;esac
+# GCC builtins
+#
+# We need AC_TRY_LINK here, as the prototype generated by AC_CHECK_FUNC
+# will cause gcc to try to reference a non-builtin symbol.
+
+AC_MSG_CHECKING([for __builtin_clz])
+AC_TRY_LINK([],
+ [__builtin_clz(0);],
+ [AC_DEFINE(HAVE_BUILTIN_CLZ, 1,
+ [Define to 1 if you have __builtin_clz().])
+ AC_MSG_RESULT(yes)],
+ [AC_MSG_RESULT(no)])
+## Pthreads
diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index 0e2d4d5..9eb3117 100644
--- a/src/backend/utils/mmgr/aset.c
+++ b/src/backend/utils/mmgr/aset.c
@@ -255,6 +255,31 @@ static MemoryContextMethods AllocSetMethods = {#define AllocAllocInfo(_cxt, _chunk)#endif
+/*
+ * fls: find last set bit.
+ *
+ * Returns the 1-based index of the most-significant bit in x. The MSB
+ * is bit number 32, the LSB is bit number 1. If x is zero, the result is
+ * undefined.
+ */
+static inline int
+fls(unsigned int x)
+{
+#ifdef HAVE_BUILTIN_CLZ
+ return 32 - __builtin_clz(x);
+#else
+ int ls = 0;
+
+ while (x != 0)
+ {
+ ls++;
+ x >>= 1;
+ }
+
+ return ls;
+#endif
+}
+/* ---------- * AllocSetFreeIndex - *
@@ -268,14 +293,9 @@ AllocSetFreeIndex(Size size){ int idx = 0;
- if (size > 0)
+ if (size > (1 << ALLOC_MINBITS)) {
- size = (size - 1) >> ALLOC_MINBITS;
- while (size != 0)
- {
- idx++;
- size >>= 1;
- }
+ idx = fls((size - 1) >> ALLOC_MINBITS); Assert(idx < ALLOCSET_NUM_FREELISTS); }