Re: Cleaning up historical portability baggage - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Cleaning up historical portability baggage
Date
Msg-id 189843.1660334922@sss.pgh.pa.us
Whole thread Raw
In response to Re: Cleaning up historical portability baggage  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: Cleaning up historical portability baggage
List pgsql-hackers
I wrote:
> I see that prairiedog was the only buildfarm animal failing the
> HAVE_PPC_LWARX_MUTEX_HINT test, and it seems pretty unlikely that
> there are any assemblers remaining in the wild that can't parse that.

Actually, after further investigation and testing, I think we could
drop the conditionality around PPC spinlock sequences altogether.
The commentary in pg_config_manual.h claims that "some pre-POWER4
machines" will fail on LWSYNC or LWARX with hint, but I've now
confirmed that the oldest PPC chips in my possession (prairiedog's
ppc7400, as well as a couple of ppc7450 machines) are all fine with
both.  Indeed, prairiedog would have been failing for some time now
if it didn't like LWSYNC, because port/atomics/arch-ppc.h is using
that unconditionally in some places :-(.  I think we can safely
assume that such machines no longer exist in the wild, or at least
are not going to be used to run Postgres v16.

The attached, expanded patch hard-wires USE_PPC_LWARX_MUTEX_HINT
and USE_PPC_LWSYNC as true, and syncs the assembler sequences in
arch-ppc.h with that decision.  I've checked this lightly on
tern's host as well as my own machines.

            regards, tom lane

diff --git a/configure b/configure
index cf2c4b85fe..35dcfbb709 100755
--- a/configure
+++ b/configure
@@ -15423,39 +15423,7 @@ $as_echo "#define HAVE_X86_64_POPCNTQ 1" >>confdefs.h
     fi
   ;;
   ppc*|powerpc*)
-    # On PPC, check if assembler supports LWARX instruction's mutex hint bit
-    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether assembler supports lwarx hint bit" >&5
-$as_echo_n "checking whether assembler supports lwarx hint bit... " >&6; }
-if ${pgac_cv_have_ppc_mutex_hint+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
-int
-main ()
-{
-int a = 0; int *p = &a; int r;
-     __asm__ __volatile__ (" lwarx %0,0,%1,1\n" : "=&r"(r) : "r"(p));
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
-  pgac_cv_have_ppc_mutex_hint=yes
-else
-  pgac_cv_have_ppc_mutex_hint=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_have_ppc_mutex_hint" >&5
-$as_echo "$pgac_cv_have_ppc_mutex_hint" >&6; }
-    if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then
-
-$as_echo "#define HAVE_PPC_LWARX_MUTEX_HINT 1" >>confdefs.h
-
-    fi
-    # Check if compiler accepts "i"(x) when __builtin_constant_p(x).
+    # On PPC, check if compiler accepts "i"(x) when __builtin_constant_p(x).
     { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance" >&5
 $as_echo_n "checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance... " >&6; }
 if ${pgac_cv_have_i_constraint__builtin_constant_p+:} false; then :
diff --git a/configure.ac b/configure.ac
index b5798bcb0a..a2aa725455 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1687,18 +1687,7 @@ case $host_cpu in
     fi
   ;;
   ppc*|powerpc*)
-    # On PPC, check if assembler supports LWARX instruction's mutex hint bit
-    AC_CACHE_CHECK([whether assembler supports lwarx hint bit],
-                   [pgac_cv_have_ppc_mutex_hint],
-    [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
-    [int a = 0; int *p = &a; int r;
-     __asm__ __volatile__ (" lwarx %0,0,%1,1\n" : "=&r"(r) : "r"(p));])],
-    [pgac_cv_have_ppc_mutex_hint=yes],
-    [pgac_cv_have_ppc_mutex_hint=no])])
-    if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then
-    AC_DEFINE(HAVE_PPC_LWARX_MUTEX_HINT, 1, [Define to 1 if the assembler supports PPC's LWARX mutex hint bit.])
-    fi
-    # Check if compiler accepts "i"(x) when __builtin_constant_p(x).
+    # On PPC, check if compiler accepts "i"(x) when __builtin_constant_p(x).
     AC_CACHE_CHECK([whether __builtin_constant_p(x) implies "i"(x) acceptance],
                    [pgac_cv_have_i_constraint__builtin_constant_p],
     [AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index fc5ad5fd65..049c2d204f 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -349,9 +349,6 @@
 /* Define to 1 if you have the `posix_fallocate' function. */
 #undef HAVE_POSIX_FALLOCATE

-/* Define to 1 if the assembler supports PPC's LWARX mutex hint bit. */
-#undef HAVE_PPC_LWARX_MUTEX_HINT
-
 /* Define to 1 if you have the `ppoll' function. */
 #undef HAVE_PPOLL

diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index 5ee2c46267..844c3e0f09 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -227,32 +227,6 @@
  */
 #define DEFAULT_EVENT_SOURCE  "PostgreSQL"

-/*
- * On PPC machines, decide whether to use the mutex hint bit in LWARX
- * instructions.  Setting the hint bit will slightly improve spinlock
- * performance on POWER6 and later machines, but does nothing before that,
- * and will result in illegal-instruction failures on some pre-POWER4
- * machines.  By default we use the hint bit when building for 64-bit PPC,
- * which should be safe in nearly all cases.  You might want to override
- * this if you are building 32-bit code for a known-recent PPC machine.
- */
-#ifdef HAVE_PPC_LWARX_MUTEX_HINT    /* must have assembler support in any case */
-#if defined(__ppc64__) || defined(__powerpc64__)
-#define USE_PPC_LWARX_MUTEX_HINT
-#endif
-#endif
-
-/*
- * On PPC machines, decide whether to use LWSYNC instructions in place of
- * ISYNC and SYNC.  This provides slightly better performance, but will
- * result in illegal-instruction failures on some pre-POWER4 machines.
- * By default we use LWSYNC when building for 64-bit PPC, which should be
- * safe in nearly all cases.
- */
-#if defined(__ppc64__) || defined(__powerpc64__)
-#define USE_PPC_LWSYNC
-#endif
-
 /*
  * Assumed cache line size. This doesn't affect correctness, but can be used
  * for low-level optimizations. Currently, this is used to pad some data
diff --git a/src/include/port/atomics/arch-ppc.h b/src/include/port/atomics/arch-ppc.h
index eb64513626..35a79042c0 100644
--- a/src/include/port/atomics/arch-ppc.h
+++ b/src/include/port/atomics/arch-ppc.h
@@ -90,12 +90,12 @@ pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
         (int32) *expected >= PG_INT16_MIN)
         __asm__ __volatile__(
             "    sync                \n"
-            "    lwarx   %0,0,%5        \n"
+            "    lwarx   %0,0,%5,1    \n"
             "    cmpwi   %0,%3        \n"
-            "    bne     $+12        \n"        /* branch to isync */
+            "    bne     $+12        \n"        /* branch to lwsync */
             "    stwcx.  %4,0,%5        \n"
             "    bne     $-16        \n"        /* branch to lwarx */
-            "    isync                \n"
+            "    lwsync                \n"
             "    mfcr    %1          \n"
 :            "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
 :            "i"(*expected), "r"(newval), "r"(&ptr->value)
@@ -104,12 +104,12 @@ pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
 #endif
         __asm__ __volatile__(
             "    sync                \n"
-            "    lwarx   %0,0,%5        \n"
+            "    lwarx   %0,0,%5,1    \n"
             "    cmpw    %0,%3        \n"
-            "    bne     $+12        \n"        /* branch to isync */
+            "    bne     $+12        \n"        /* branch to lwsync */
             "    stwcx.  %4,0,%5        \n"
             "    bne     $-16        \n"        /* branch to lwarx */
-            "    isync                \n"
+            "    lwsync                \n"
             "    mfcr    %1          \n"
 :            "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
 :            "r"(*expected), "r"(newval), "r"(&ptr->value)
@@ -138,11 +138,11 @@ pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
         add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
         __asm__ __volatile__(
             "    sync                \n"
-            "    lwarx   %1,0,%4        \n"
+            "    lwarx   %1,0,%4,1    \n"
             "    addi    %0,%1,%3    \n"
             "    stwcx.  %0,0,%4        \n"
             "    bne     $-12        \n"        /* branch to lwarx */
-            "    isync                \n"
+            "    lwsync                \n"
 :            "=&r"(_t), "=&b"(res), "+m"(ptr->value)
 :            "i"(add_), "r"(&ptr->value)
 :            "memory", "cc");
@@ -150,11 +150,11 @@ pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
 #endif
         __asm__ __volatile__(
             "    sync                \n"
-            "    lwarx   %1,0,%4        \n"
+            "    lwarx   %1,0,%4,1    \n"
             "    add     %0,%1,%3    \n"
             "    stwcx.  %0,0,%4        \n"
             "    bne     $-12        \n"        /* branch to lwarx */
-            "    isync                \n"
+            "    lwsync                \n"
 :            "=&r"(_t), "=&r"(res), "+m"(ptr->value)
 :            "r"(add_), "r"(&ptr->value)
 :            "memory", "cc");
@@ -180,12 +180,12 @@ pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
         (int64) *expected >= PG_INT16_MIN)
         __asm__ __volatile__(
             "    sync                \n"
-            "    ldarx   %0,0,%5        \n"
+            "    ldarx   %0,0,%5,1    \n"
             "    cmpdi   %0,%3        \n"
-            "    bne     $+12        \n"        /* branch to isync */
+            "    bne     $+12        \n"        /* branch to lwsync */
             "    stdcx.  %4,0,%5        \n"
             "    bne     $-16        \n"        /* branch to ldarx */
-            "    isync                \n"
+            "    lwsync                \n"
             "    mfcr    %1          \n"
 :            "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
 :            "i"(*expected), "r"(newval), "r"(&ptr->value)
@@ -194,12 +194,12 @@ pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
 #endif
         __asm__ __volatile__(
             "    sync                \n"
-            "    ldarx   %0,0,%5        \n"
+            "    ldarx   %0,0,%5,1    \n"
             "    cmpd    %0,%3        \n"
-            "    bne     $+12        \n"        /* branch to isync */
+            "    bne     $+12        \n"        /* branch to lwsync */
             "    stdcx.  %4,0,%5        \n"
             "    bne     $-16        \n"        /* branch to ldarx */
-            "    isync                \n"
+            "    lwsync                \n"
             "    mfcr    %1          \n"
 :            "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
 :            "r"(*expected), "r"(newval), "r"(&ptr->value)
@@ -224,11 +224,11 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
         add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
         __asm__ __volatile__(
             "    sync                \n"
-            "    ldarx   %1,0,%4        \n"
+            "    ldarx   %1,0,%4,1    \n"
             "    addi    %0,%1,%3    \n"
             "    stdcx.  %0,0,%4        \n"
             "    bne     $-12        \n"        /* branch to ldarx */
-            "    isync                \n"
+            "    lwsync                \n"
 :            "=&r"(_t), "=&b"(res), "+m"(ptr->value)
 :            "i"(add_), "r"(&ptr->value)
 :            "memory", "cc");
@@ -236,11 +236,11 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 #endif
         __asm__ __volatile__(
             "    sync                \n"
-            "    ldarx   %1,0,%4        \n"
+            "    ldarx   %1,0,%4,1    \n"
             "    add     %0,%1,%3    \n"
             "    stdcx.  %0,0,%4        \n"
             "    bne     $-12        \n"        /* branch to ldarx */
-            "    isync                \n"
+            "    lwsync                \n"
 :            "=&r"(_t), "=&r"(res), "+m"(ptr->value)
 :            "r"(add_), "r"(&ptr->value)
 :            "memory", "cc");
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 0877cf65b0..cc83d561b2 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -435,7 +435,8 @@ typedef unsigned int slock_t;
  *
  * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002,
  * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop.
- * On newer machines, we can use lwsync instead for better performance.
+ * But if the spinlock is in ordinary memory, we can use lwsync instead for
+ * better performance.
  *
  * Ordinarily, we'd code the branches here using GNU-style local symbols, that
  * is "1f" referencing "1:" and so on.  But some people run gcc on AIX with
@@ -450,23 +451,15 @@ tas(volatile slock_t *lock)
     int _res;

     __asm__ __volatile__(
-#ifdef USE_PPC_LWARX_MUTEX_HINT
 "    lwarx   %0,0,%3,1    \n"
-#else
-"    lwarx   %0,0,%3        \n"
-#endif
 "    cmpwi   %0,0        \n"
 "    bne     $+16        \n"        /* branch to li %1,1 */
 "    addi    %0,%0,1        \n"
 "    stwcx.  %0,0,%3        \n"
-"    beq     $+12        \n"        /* branch to lwsync/isync */
+"    beq     $+12        \n"        /* branch to lwsync */
 "    li      %1,1        \n"
 "    b       $+12        \n"        /* branch to end of asm sequence */
-#ifdef USE_PPC_LWSYNC
 "    lwsync                \n"
-#else
-"    isync                \n"
-#endif
 "    li      %1,0        \n"

 :    "=&b"(_t), "=r"(_res), "+m"(*lock)
@@ -477,23 +470,14 @@ tas(volatile slock_t *lock)

 /*
  * PowerPC S_UNLOCK is almost standard but requires a "sync" instruction.
- * On newer machines, we can use lwsync instead for better performance.
+ * But we can use lwsync instead for better performance.
  */
-#ifdef USE_PPC_LWSYNC
 #define S_UNLOCK(lock)    \
 do \
 { \
     __asm__ __volatile__ ("    lwsync \n" ::: "memory"); \
     *((volatile slock_t *) (lock)) = 0; \
 } while (0)
-#else
-#define S_UNLOCK(lock)    \
-do \
-{ \
-    __asm__ __volatile__ ("    sync \n" ::: "memory"); \
-    *((volatile slock_t *) (lock)) = 0; \
-} while (0)
-#endif /* USE_PPC_LWSYNC */

 #endif /* powerpc */

diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm
index cc82668457..243c5fa824 100644
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@@ -313,7 +313,6 @@ sub GenerateFiles
         HAVE_PAM_PAM_APPL_H         => undef,
         HAVE_POSIX_FADVISE          => undef,
         HAVE_POSIX_FALLOCATE        => undef,
-        HAVE_PPC_LWARX_MUTEX_HINT   => undef,
         HAVE_PPOLL                  => undef,
         HAVE_PS_STRINGS             => undef,
         HAVE_PTHREAD                => undef,

pgsql-hackers by date:

Previous
From: "David G. Johnston"
Date:
Subject: Re: doc: New cumulative stats subsystem obsoletes comment in maintenance.sgml
Next
From: Thomas Munro
Date:
Subject: Re: Cleaning up historical portability baggage