From b72327ad1cc2b35767cab6a8267892878c868f56 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Thu, 1 Aug 2024 16:38:05 +1200 Subject: [PATCH 2/2] Standardize macros for detecting architectures. Instead of repeating compilers' architecture macros throughout the tree and sometimes getting it wrong, let's detect them in one central place, and define our own macros of the form: PG_ARCH_{ARM,LOONGARCH,MIPS,PPC,RISCV,S390,SPARC,X86} PG_ARCH_{ARM,LOONGARCH,MIPS,PPC,RISCV,S390,SPARC,X86}_{32,64} This fixes the problem that MSVC builds were unintentionally using pessimistic fallback code defined by "port/atomics.h", due to inconsistent testing for architecture macros. A couple of other obscure places were also affected, but failing to include arch-x86.h on Windows seems pretty egregious: * pg_{read,write}_barrier() must be falling back to pg_memory_barrier() instead of pg_compiler_barrier() * pg_spin_delay() must be falling back to nothing at all * PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY must not be defined Discussion: https://postgr.es/m/CA%2BhUKGKAf_i6w7hB_3pqZXQeqn%2BixvY%2BCMps_n%3DmJ5HAatMjMw%40mail.gmail.com --- contrib/pgcrypto/crypt-blowfish.c | 4 +- src/include/c.h | 57 ++++++++++++++++++++++++++++ src/include/jit/llvmjit_backport.h | 2 +- src/include/port/atomics.h | 6 +-- src/include/port/atomics/arch-arm.h | 4 +- src/include/port/atomics/arch-x86.h | 16 ++++---- src/include/port/pg_bitutils.h | 4 +- src/include/portability/instr_time.h | 2 +- src/include/storage/s_lock.h | 16 ++++---- src/port/pg_crc32c_armv8_choose.c | 12 +++--- src/port/pg_crc32c_sse42.c | 4 +- 11 files changed, 92 insertions(+), 35 deletions(-) diff --git a/contrib/pgcrypto/crypt-blowfish.c b/contrib/pgcrypto/crypt-blowfish.c index 5a1b1e10091..9c4e02e428b 100644 --- a/contrib/pgcrypto/crypt-blowfish.c +++ b/contrib/pgcrypto/crypt-blowfish.c @@ -38,10 +38,10 @@ #include "px-crypt.h" #include "px.h" -#ifdef __i386__ +#if defined(PG_ARCH_X86_32) #define BF_ASM 0 /* 1 */ #define BF_SCALE 1 -#elif defined(__x86_64__) +#elif defined(PG_ARCH_X86_64) #define BF_ASM 0 #define BF_SCALE 1 #else diff --git a/src/include/c.h b/src/include/c.h index 88d13ec9993..f9872ea20c7 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -585,6 +585,63 @@ typedef void (*pg_funcptr_t) (void); #define HAVE_PRAGMA_GCC_SYSTEM_HEADER 1 #endif +/* + * Project-standardized name for CPU architectures, to avoid having to repeat + * the names that different compilers use. + */ +#if defined(__arm__) || defined(__arm) +#define PG_ARCH_ARM_32 +#elif defined(__aarch64__) || defined(_M_ARM64) +#define PG_ARCH_ARM_64 +#elif defined(__loongarch) && !defined(__loongarch64) +#define PG_ARCH_LOONGARCH_32 +#elif defined(__loongarch) && defined(__loongarch64) +#define PG_ARCH_LOONGARCH_64 +#elif defined(__mips__) +#define PG_ARCH_MIPS_32 +#elif defined(__mips64__) +#define PG_ARCH_MIPS_64 +#elif defined(__ppc__) || defined(__powerpc__) +#define PG_ARCH_PPC_32 +#elif defined(__ppc64__) || defined(__powerpc64__) +#define PG_ARCH_PPC_64 +#elif defined(__riscv__) +#define PG_ARCH_RISCV_32 +#elif defined(__riscv64__) +#define PG_ARCH_RISCV_64 +#elif defined(__s390__) +#define PG_ARCH_S390_32 +#elif defined(__s390x__) +#define PG_ARCH_S390_64 +#elif defined(__sparc) +#define PG_ARCH_SPARC_32 +#elif defined(__sparcv9) +#define PG_ARCH_SPARC_64 +#elif defined(__i386__) || defined (__386) || defined(_M_IX86) +#define PG_ARCH_X86_32 +#elif defined(__x86_64__) || defined(__x86_64) || defined (__amd64) +#define PG_ARCH_X86_64 +#endif + +/* Same again without specifying the word size. */ +#if defined(PG_ARCH_ARM_32) || defined(PG_ARCH_ARM_64) +#define PG_ARCH_ARM +#elif defined(PG_ARCH_LOONGARCH_32) || defined(PG_ARCH_LOONGARCH_64) +#define PG_ARCH_LOONGARCH +#elif defined(PG_ARCH_MIPS_32) || defined(PG_ARCH_MIPS_64) +#define PG_ARCH_MIPS +#elif defined(PG_ARCH_PPC_32) || defined(PG_ARCH_PPC_64) +#define PG_ARCH_PPC +#elif defined(PG_ARCH_RISCV_32) || defined(PG_ARCH_RISCV_64) +#define PG_ARCH_RISCV +#elif defined(PG_ARCH_S390_32) || defined(PG_ARCH_S390_64) +#define PG_ARCH_S390 +#elif defined(PG_ARCH_SPARC_32) || defined(PG_ARCH_SPARC_64) +#define PG_ARCH_SPARC +#elif defined(PG_ARCH_X86_32) || defined(PG_ARCH_X86_64) +#define PG_ARCH_X86 +#endif + /* ---------------------------------------------------------------- * Section 2: bool, true, false diff --git a/src/include/jit/llvmjit_backport.h b/src/include/jit/llvmjit_backport.h index 71cfdfc832f..be6fbd64773 100644 --- a/src/include/jit/llvmjit_backport.h +++ b/src/include/jit/llvmjit_backport.h @@ -15,7 +15,7 @@ * class llvm::backport::SectionMemoryManager that we use as a workaround. * This header controls whether we use it. */ -#if defined(__aarch64__) && LLVM_VERSION_MAJOR < 22 +#if defined(PG_ARCH_ARM_64) && LLVM_VERSION_MAJOR < 22 #define USE_LLVM_BACKPORT_SECTION_MEMORY_MANAGER #endif diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h index d8b1d20fe60..8bd3b0d6dc9 100644 --- a/src/include/port/atomics.h +++ b/src/include/port/atomics.h @@ -63,11 +63,11 @@ * compiler barrier. * */ -#if defined(__arm__) || defined(__arm) || defined(__aarch64__) +#if defined(PG_ARCH_ARM) #include "port/atomics/arch-arm.h" -#elif defined(__i386__) || defined(__i386) || defined(__x86_64__) +#elif defined(PG_ARCH_X86) #include "port/atomics/arch-x86.h" -#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__) +#elif defined(PG_ARCH_PPC) #include "port/atomics/arch-ppc.h" #endif diff --git a/src/include/port/atomics/arch-arm.h b/src/include/port/atomics/arch-arm.h index 90280c7b751..4da56fab10e 100644 --- a/src/include/port/atomics/arch-arm.h +++ b/src/include/port/atomics/arch-arm.h @@ -21,7 +21,7 @@ * 64 bit atomics on ARM32 are implemented using kernel fallbacks and thus * might be slow, so disable entirely. On ARM64 that problem doesn't exist. */ -#if !defined(__aarch64__) +#if !defined(PG_ARCH_ARM_64) #define PG_DISABLE_64_BIT_ATOMICS #else /* @@ -29,4 +29,4 @@ * general purpose register is atomic. */ #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY -#endif /* __aarch64__ */ +#endif /* PG_ARCH_ARM_64 */ diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h index bd6f4f56ca2..05bb27c6ae5 100644 --- a/src/include/port/atomics/arch-x86.h +++ b/src/include/port/atomics/arch-x86.h @@ -32,10 +32,10 @@ */ #if defined(__GNUC__) || defined(__INTEL_COMPILER) -#if defined(__i386__) || defined(__i386) +#if defined(PG_ARCH_X86_32) #define pg_memory_barrier_impl() \ __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc") -#elif defined(__x86_64__) +#elif defined(PG_ARCH_X86_64) #define pg_memory_barrier_impl() \ __asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc") #endif @@ -67,14 +67,14 @@ typedef struct pg_atomic_uint32 * It's too complicated to write inline asm for 64bit types on 32bit and the * 486 can't do it anyway. */ -#ifdef __x86_64__ +#ifdef PG_ARCH_X86_64 #define PG_HAVE_ATOMIC_U64_SUPPORT typedef struct pg_atomic_uint64 { /* alignment guaranteed due to being on a 64bit platform */ volatile uint64 value; } pg_atomic_uint64; -#endif /* __x86_64__ */ +#endif /* PG_ARCH_X86_64 */ #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */ @@ -109,7 +109,7 @@ pg_spin_delay_impl(void) { __asm__ __volatile__(" rep; nop \n"); } -#elif defined(_MSC_VER) && defined(__x86_64__) +#elif defined(_MSC_VER) && defined(PG_ARCH_X86_64) #define PG_HAVE_SPIN_DELAY static __forceinline void pg_spin_delay_impl(void) @@ -192,7 +192,7 @@ pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) return res; } -#ifdef __x86_64__ +#ifdef PG_ARCH_X86_64 #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 static inline bool @@ -231,7 +231,7 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) return res; } -#endif /* __x86_64__ */ +#endif /* PG_ARCH_X86_64 */ #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */ @@ -241,6 +241,6 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) */ #if defined(__i568__) || defined(__i668__) || /* gcc i586+ */ \ (defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \ - defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, msvc */ + defined(PG_ARCH_X86_64) #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY #endif /* 8 byte single-copy atomicity */ diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 7a00d197013..0ca0c986113 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -82,7 +82,7 @@ pg_leftmost_one_pos64(uint64 word) #error "cannot find integer type of the same size as uint64_t" #endif -#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64)) +#elif defined(_MSC_VER) && (defined(PG_ARCH_ARM_64) || defined(PG_ARCH_X86_64)) unsigned long result; bool non_zero; @@ -155,7 +155,7 @@ pg_rightmost_one_pos64(uint64 word) #error "cannot find integer type of the same size as uint64_t" #endif -#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64)) +#elif defined(_MSC_VER) && (defined(PG_ARCH_ARM_64) || defined(PG_ARCH_X86_64)) unsigned long result; bool non_zero; diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h index 92558e234ac..a8834defa86 100644 --- a/src/include/portability/instr_time.h +++ b/src/include/portability/instr_time.h @@ -95,7 +95,7 @@ typedef struct instr_time * PG_INSTR_TSC_CLOCK controls whether the TSC clock source is compiled in, and * potentially used based on timing_tsc_enabled. */ -#if defined(__x86_64__) || defined(_M_X64) +#if defined(PG_ARCH_X86_64) #define PG_INSTR_TICKS_TO_NS 1 #define PG_INSTR_TSC_CLOCK 1 #elif defined(WIN32) diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h index 28f83df96d6..4229af8965f 100644 --- a/src/include/storage/s_lock.h +++ b/src/include/storage/s_lock.h @@ -193,7 +193,7 @@ spin_delay(void) #endif /* __i386__ */ -#ifdef __x86_64__ /* AMD Opteron, Intel EM64T */ +#ifdef PG_ARCH_X86_64 /* AMD Opteron, Intel EM64T */ #define HAS_TEST_AND_SET typedef unsigned char slock_t; @@ -238,7 +238,7 @@ spin_delay(void) " rep; nop \n"); } -#endif /* __x86_64__ */ +#endif /* PG_ARCH_X86_64 */ /* @@ -247,7 +247,7 @@ spin_delay(void) * We use the int-width variant of the builtin because it works on more chips * than other widths. */ -#if defined(__arm__) || defined(__arm) || defined(__aarch64__) +#if defined(PG_ARCH_ARM) #ifdef HAVE_GCC__SYNC_INT32_TAS #define HAS_TEST_AND_SET @@ -263,7 +263,7 @@ tas(volatile slock_t *lock) #define S_UNLOCK(lock) __sync_lock_release(lock) -#if defined(__aarch64__) +#if defined(PG_ARCH_ARM_64) /* * On ARM64, it's a win to use a non-locking test before the TAS proper. It @@ -285,9 +285,9 @@ spin_delay(void) " isb; \n"); } -#endif /* __aarch64__ */ +#endif /* PG_ARCH_ARM_64 */ #endif /* HAVE_GCC__SYNC_INT32_TAS */ -#endif /* __arm__ || __arm || __aarch64__ */ +#endif /* PG_ARCH_ARM */ /* S/390 and S/390x Linux (32- and 64-bit zSeries) */ @@ -391,7 +391,7 @@ do \ /* PowerPC */ -#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__) +#if defined(PG_ARCH_PPC) #define HAS_TEST_AND_SET typedef unsigned int slock_t; @@ -452,7 +452,7 @@ do \ #endif /* powerpc */ -#if defined(__mips__) +#if defined(PG_ARCH_MIPS) #define HAS_TEST_AND_SET typedef unsigned int slock_t; diff --git a/src/port/pg_crc32c_armv8_choose.c b/src/port/pg_crc32c_armv8_choose.c index 591e23df44b..ff9afd55e7d 100644 --- a/src/port/pg_crc32c_armv8_choose.c +++ b/src/port/pg_crc32c_armv8_choose.c @@ -27,14 +27,14 @@ #if defined(HAVE_ELF_AUX_INFO) || defined(HAVE_GETAUXVAL) #include /* Ancient glibc releases don't include the HWCAPxxx macros in sys/auxv.h */ -#if defined(__linux__) && (defined(__aarch64__) ? !defined(HWCAP_CRC32) : !defined(HWCAP2_CRC32)) +#if defined(__linux__) && (defined(PG_ARCH_ARM_64) ? !defined(HWCAP_CRC32) : !defined(HWCAP2_CRC32)) #include #endif #endif #if defined(__NetBSD__) #include -#if defined(__aarch64__) +#if defined(PG_ARCH_ARM_64) #include #endif #endif @@ -47,7 +47,7 @@ pg_crc32c_armv8_available(void) #if defined(HAVE_ELF_AUX_INFO) unsigned long value; -#ifdef __aarch64__ +#ifdef PG_ARCH_ARM_64 return elf_aux_info(AT_HWCAP, &value, sizeof(value)) == 0 && (value & HWCAP_CRC32) != 0; #else @@ -55,7 +55,7 @@ pg_crc32c_armv8_available(void) (value & HWCAP2_CRC32) != 0; #endif #elif defined(HAVE_GETAUXVAL) -#ifdef __aarch64__ +#ifdef PG_ARCH_ARM_64 return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0; #else return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0; @@ -74,7 +74,7 @@ pg_crc32c_armv8_available(void) size_t len; uint64 sysctlbuf[SYSCTL_CPU_ID_MAXSIZE]; -#if defined(__aarch64__) +#if defined(PG_ARCH_ARM_64) /* We assume cpu0 is representative of all the machine's CPUs. */ const char *path = "machdep.cpu0.cpu_id"; size_t expected_len = sizeof(struct aarch64_sysctl_cpu_id); @@ -112,7 +112,7 @@ pg_crc32c_armv8_available(void) static bool pg_pmull_available(void) { -#if defined(__aarch64__) && defined(HWCAP_PMULL) +#if defined(PG_ARCH_ARM_64) && defined(HWCAP_PMULL) #ifdef HAVE_ELF_AUX_INFO unsigned long value; diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index b8e77faf4d9..f0759ffcb26 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -39,7 +39,7 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len) * and performance testing didn't show any performance gain from aligning * the begin address. */ -#ifdef __x86_64__ +#ifdef PG_ARCH_X86_64 while (p + 8 <= pend) { crc = (uint32) _mm_crc32_u64(crc, *((const uint64 *) p)); @@ -63,7 +63,7 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len) crc = _mm_crc32_u32(crc, *((const unsigned int *) p)); p += 4; } -#endif /* __x86_64__ */ +#endif /* PG_ARCH_X86_64 */ /* Process any remaining bytes one at a time. */ while (p < pend) -- 2.53.0