From a3e9a1302d710f9fa6c48594144a82fe0c3988d6 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Thu, 19 Feb 2026 18:27:02 +0700 Subject: [PATCH v4 6/6] Centralize detection of CPU features Arm, take 1 --- src/include/port/pg_cpu.h | 25 ++++++++++++ src/port/pg_cpu_armv8.c | 74 +++++++++++++++------------------- src/port/pg_crc32c_armv8.c | 20 +++++++++ src/port/pg_popcount_aarch64.c | 26 +----------- 4 files changed, 80 insertions(+), 65 deletions(-) diff --git a/src/include/port/pg_cpu.h b/src/include/port/pg_cpu.h index 3c70fd43a23..3687e025083 100644 --- a/src/include/port/pg_cpu.h +++ b/src/include/port/pg_cpu.h @@ -48,6 +48,31 @@ x86_feature_available(X86FeatureId feature) return X86Features[feature]; } +#elif defined(__arm__) || defined(__arm) || defined(__aarch64__) + +typedef enum ArmFeatureId +{ + /* Have we run feature detection? */ + INIT_PG_ARM, + + PG_ARM_CRC32, + PG_ARM_SVE, +} ArmFeatureId; +#define ArmFeaturesSize (PG_ARM_SVE + 1) + +extern PGDLLIMPORT bool ArmFeatures[]; + +extern void set_arm_features(void); + +static inline bool +arm_feature_available(ArmFeatureId feature) +{ + if (ArmFeatures[INIT_PG_ARM] == false) + set_arm_features(); + + return ArmFeatures[feature]; +} + #endif /* defined(USE_SSE2) || defined(__i386__) */ #endif /* PG_CPU_H */ diff --git a/src/port/pg_cpu_armv8.c b/src/port/pg_cpu_armv8.c index 6c22704b5fa..59a9229b71d 100644 --- a/src/port/pg_cpu_armv8.c +++ b/src/port/pg_cpu_armv8.c @@ -1,12 +1,7 @@ /*------------------------------------------------------------------------- * * pg_cpu_armv8.c - * Choose between ARMv8 and software CRC-32C implementation. - * - * On first call, checks if the CPU we're running on supports the ARMv8 - * CRC Extension. If it does, use the special instructions for CRC-32C - * computation. Otherwise, fall back to the pure software implementation - * (slicing-by-8). + * Runtime CPU feature detection for Arm-v8 * * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -41,27 +36,41 @@ #endif #endif -#include "port/pg_crc32c.h" +#include "port/pg_cpu.h" + + +bool ArmFeatures[ArmFeaturesSize] = {0}; -static bool -pg_crc32c_armv8_available(void) +static inline unsigned long +pg_getauxval(unsigned long at_hwcap) { #if defined(HAVE_ELF_AUX_INFO) unsigned long value; -#ifdef __aarch64__ - return elf_aux_info(AT_HWCAP, &value, sizeof(value)) == 0 && - (value & HWCAP_CRC32) != 0; -#else - return elf_aux_info(AT_HWCAP2, &value, sizeof(value)) == 0 && - (value & HWCAP2_CRC32) != 0; -#endif + if (elf_aux_info(at_hwcap, &value, sizeof(value)) == 0) + return value; + else + return 0; #elif defined(HAVE_GETAUXVAL) + return getauxval(at_hwcap); +#endif +} + +void +set_arm_features(void) +{ +#if HAVE_GETAUXVAL || HAVE_ELF_AUX_INFO #ifdef __aarch64__ - return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0; + unsigned long hwcap = pg_getauxval(AT_HWCAP); + + ArmFeatures[PG_ARM_CRC32] = (hwcap & HWCAP_CRC32) != 0; + ArmFeatures[PG_ARM_SVE] = (hwcap & HWCAP_SVE) != 0; #else - return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0; + unsigned long hwcap2 = pg_getauxval(AT_HWCAP2); + + ArmFeatures[PG_ARM_CRC32] = (hwcap2 & HWCAP2_CRC32) != 0; #endif + #elif defined(__NetBSD__) /* * On NetBSD we can read the Instruction Set Attribute Registers via @@ -92,9 +101,9 @@ pg_crc32c_armv8_available(void) len = sizeof(sysctlbuf); memset(sysctlbuf, 0, len); if (sysctlbyname(path, sysctlbuf, &len, NULL, 0) != 0) - return false; /* perhaps kernel is 64-bit and we aren't? */ + return; /* perhaps kernel is 64-bit and we aren't? */ if (len != expected_len) - return false; /* kernel API change? */ + return; /* kernel API change? */ /* Fetch the CRC32 field from ISAR0. */ fld = (ISAR0 >> ISAR0_CRC32_BITPOS) & WIDTHMASK(ISAR0_CRC32_BITWIDTH); @@ -104,27 +113,10 @@ pg_crc32c_armv8_available(void) * (CRC32B/CRC32H/CRC32W/CRC32X/CRC32CB/CRC32CH/CRC32CW/CRC32CX). Assume * that any future nonzero value will be a superset of 1. */ - return (fld != 0); -#else - return false; -#endif -} + ArmFeatures[PG_ARM_CRC32] = (fld != 0); +#endif /* __NetBSD__ */ -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static pg_crc32c -pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) -{ - if (pg_crc32c_armv8_available()) - pg_comp_crc32c = pg_comp_crc32c_armv8; - else - pg_comp_crc32c = pg_comp_crc32c_sb8; - - return pg_comp_crc32c(crc, data, len); + ArmFeatures[INIT_PG_ARM] = true; } -pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose; - -#endif /* __arm__ || __arm || __aarch64__ */ +#endif /* __arm__ || __arm || __aarch64__ */ diff --git a/src/port/pg_crc32c_armv8.c b/src/port/pg_crc32c_armv8.c index 9ca0f728d39..a02264a00dc 100644 --- a/src/port/pg_crc32c_armv8.c +++ b/src/port/pg_crc32c_armv8.c @@ -20,8 +20,11 @@ #include #endif +#include "port/pg_cpu.h" #include "port/pg_crc32c.h" +static pg_crc32c pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len); + pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len) { @@ -77,3 +80,20 @@ pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len) return crc; } + +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static pg_crc32c +pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) +{ + if (arm_feature_available(PG_ARM_CRC32)) + pg_comp_crc32c = pg_comp_crc32c_armv8; + else + pg_comp_crc32c = pg_comp_crc32c_sb8; + + return pg_comp_crc32c(crc, data, len); +} + +pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose; diff --git a/src/port/pg_popcount_aarch64.c b/src/port/pg_popcount_aarch64.c index f474ef45510..37adff67ce8 100644 --- a/src/port/pg_popcount_aarch64.c +++ b/src/port/pg_popcount_aarch64.c @@ -18,17 +18,10 @@ #ifdef USE_SVE_POPCNT_WITH_RUNTIME_CHECK #include - -#if defined(HAVE_ELF_AUX_INFO) || defined(HAVE_GETAUXVAL) -#include -/* Ancient glibc releases don't include the HWCAPxxx macros in sys/auxv.h */ -#if defined(__linux__) && !defined(HWCAP_SVE) -#include -#endif -#endif #endif #include "port/pg_bitutils.h" +#include "port/pg_cpu.h" /* * The Neon versions are built regardless of whether we are building the SVE @@ -56,25 +49,10 @@ static uint64 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask); uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose; uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) = pg_popcount_masked_choose; -static inline bool -pg_popcount_sve_available(void) -{ -#ifdef HAVE_ELF_AUX_INFO - unsigned long value; - - return elf_aux_info(AT_HWCAP, &value, sizeof(value)) == 0 && - (value & HWCAP_SVE) != 0; -#elif defined(HAVE_GETAUXVAL) - return (getauxval(AT_HWCAP) & HWCAP_SVE) != 0; -#else - return false; -#endif -} - static inline void choose_popcount_functions(void) { - if (pg_popcount_sve_available()) + if (arm_feature_available(PG_ARM_SVE)) { pg_popcount_optimized = pg_popcount_sve; pg_popcount_masked_optimized = pg_popcount_masked_sve; -- 2.53.0