From e8bf08ed784551daa77fccd9485aef0e2844dd7e Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Tue, 25 Feb 2025 12:54:43 -0800 Subject: [PATCH v10 5/5] Move all cpuid checks to one location --- configure | 4 +- configure.ac | 4 +- src/include/port/pg_cpucap.h | 32 +++++-- src/include/port/pg_crc32c.h | 6 +- src/port/Makefile | 2 - src/port/meson.build | 12 --- src/port/pg_bitutils.c | 20 +--- src/port/pg_cpucap.c | 167 ++++++++++++++++++++++++++++++++-- src/port/pg_cpucap_arm.c | 119 ------------------------ src/port/pg_cpucap_x86.c | 75 --------------- src/port/pg_crc32c_sse42.c | 3 +- src/port/pg_popcount_avx512.c | 71 +-------------- 12 files changed, 197 insertions(+), 318 deletions(-) delete mode 100644 src/port/pg_cpucap_arm.c delete mode 100644 src/port/pg_cpucap_x86.c diff --git a/configure b/configure index 0d31e6a236..172c93896c 100755 --- a/configure +++ b/configure @@ -17360,7 +17360,7 @@ else $as_echo "#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h - PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o" { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2 with runtime check" >&5 $as_echo "SSE 4.2 with runtime check" >&6; } else @@ -17376,7 +17376,7 @@ $as_echo "ARMv8 CRC instructions" >&6; } $as_echo "#define USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h - PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o + PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o" { $as_echo "$as_me:${as_lineno-$LINENO}: result: ARMv8 CRC instructions with runtime check" >&5 $as_echo "ARMv8 CRC instructions with runtime check" >&6; } else diff --git a/configure.ac b/configure.ac index 60d30f855d..c7bfad98ac 100644 --- a/configure.ac +++ b/configure.ac @@ -2115,7 +2115,7 @@ if test x"$USE_SSE42_CRC32C" = x"1"; then else if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then AC_DEFINE(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check.]) - PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o" AC_MSG_RESULT(SSE 4.2 with runtime check) else if test x"$USE_ARMV8_CRC32C" = x"1"; then @@ -2125,7 +2125,7 @@ else else if test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then AC_DEFINE(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use ARMv8 CRC Extension with a runtime check.]) - PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o + PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o" AC_MSG_RESULT(ARMv8 CRC instructions with runtime check) else if test x"$USE_LOONGARCH_CRC32C" = x"1"; then diff --git a/src/include/port/pg_cpucap.h b/src/include/port/pg_cpucap.h index af3fabfcff..d623db43e4 100644 --- a/src/include/port/pg_cpucap.h +++ b/src/include/port/pg_cpucap.h @@ -14,17 +14,29 @@ #ifndef PG_CPUCAP_H #define PG_CPUCAP_H -#define PGCPUCAP_INIT (1 << 0) -#define PGCPUCAP_POPCNT (1 << 1) -#define PGCPUCAP_VPOPCNT (1 << 2) -#define PGCPUCAP_CRC32C (1 << 3) -#define PGCPUCAP_CLMUL (1 << 4) +enum pg_cpucap__ +{ + PG_CPU_FEATURE_INIT = 0, + // X86 + PG_CPU_FEATURE_SSE42 = 1, + PG_CPU_FEATURE_POPCNT = 2, + PG_CPU_FEATURE_PCLMUL = 3, -extern PGDLLIMPORT uint32 pg_cpucap; -extern void pg_cpucap_initialize(void); + /* SKX: */ + PG_CPU_FEATURE_AVX512F = 30, + PG_CPU_FEATURE_AVX512BW = 31, + + /* ICX */ + PG_CPU_FEATURE_AVX512VPOPCNTDQ = 40, + + // ARM + PG_CPU_FEATURE_ARMV8_CRC32C = 100, -/* arch-specific functions private to src/port */ -extern void pg_cpucap_crc32c(void); -extern void pg_cpucap_clmul(void); + PG_CPU_FEATURE_MAX +}; + + +extern void pg_cpucap_initialize(void); +bool pg_cpu_have(int feature_id); #endif /* PG_CPUCAP_H */ diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index 4f0ebb9923..41b648bea6 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -48,6 +48,7 @@ typedef uint32 pg_crc32c; ((crc) = pg_comp_crc32c_dispatch((crc), (data), (len))) #define COMP_CRC32C_HW(crc, data, len) \ ((crc) = pg_comp_crc32c_sse42((crc), (data), (len))) +#define PGCPUCAP_CRC32C pg_cpu_have(PG_CPU_FEATURE_SSE42) #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) #if defined(USE_SSE42_CRC32C) @@ -66,6 +67,7 @@ extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t le ((crc) = pg_comp_crc32c_dispatch((crc), (data), (len))) #define COMP_CRC32C_HW(crc, data, len) \ ((crc) = pg_comp_crc32c_armv8((crc), (data), (len))) +#define PGCPUCAP_CRC32C pg_cpu_have(PG_CPU_FEATURE_ARMV8_CRC32C) #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) #if defined(USE_ARMV8_CRC32C) @@ -125,13 +127,13 @@ pg_comp_crc32c_dispatch(pg_crc32c crc, const void *data, size_t len) */ // WIP: how to best intialize in frontend? #ifndef FRONTEND - Assert(pg_cpucap & PGCPUCAP_INIT); + Assert(pg_cpu_have(PG_CPU_FEATURE) == 1); #endif #if defined(HAVE_CRC_COMPTIME) return COMP_CRC32C_HW(crc, data, len); #else - if (pg_cpucap & PGCPUCAP_CRC32C) + if (PGCPUCAP_CRC32C) return COMP_CRC32C_HW(crc, data, len); else return pg_comp_crc32c_sb8(crc, data, len); diff --git a/src/port/Makefile b/src/port/Makefile index 1fc03713b3..5a05179e92 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -45,8 +45,6 @@ OBJS = \ path.o \ pg_bitutils.o \ pg_cpucap.o \ - pg_cpucap_x86.o \ - pg_cpucap_arm.o \ pg_popcount_avx512.o \ pg_strong_random.o \ pgcheckdir.o \ diff --git a/src/port/meson.build b/src/port/meson.build index baa8e16200..922ab1ad73 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -78,18 +78,6 @@ if host_system != 'windows' replace_funcs_neg += [['pthread_barrier_wait']] endif -# arch-specific runtime checks -if host_cpu == 'x86' or host_cpu == 'x86_64' - pgport_sources += files( - 'pg_cpucap_x86.c' - ) - -elif host_cpu == 'arm' or host_cpu == 'aarch64' - pgport_sources += files( - 'pg_cpucap_arm.c' - ) -endif - # Replacement functionality to be built if corresponding configure symbol # is true replace_funcs_pos = [ diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index 5677525693..4cf05f55a6 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -12,14 +12,8 @@ */ #include "c.h" -#ifdef HAVE__GET_CPUID -#include -#endif -#ifdef HAVE__CPUID -#include -#endif - #include "port/pg_bitutils.h" +#include "port/pg_cpucap.h" /* @@ -133,17 +127,7 @@ uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) static bool pg_popcount_available(void) { - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 1); -#else -#error cpuid instruction not available -#endif - - return (exx[2] & (1 << 23)) != 0; /* POPCNT */ + return pg_cpu_have(PG_CPU_FEATURE_POPCNT); } /* diff --git a/src/port/pg_cpucap.c b/src/port/pg_cpucap.c index 301bd9fc2c..422c8f4faf 100644 --- a/src/port/pg_cpucap.c +++ b/src/port/pg_cpucap.c @@ -15,20 +15,173 @@ #include "port/pg_cpucap.h" +#ifdef HAVE__GET_CPUID +#include +#endif + +#ifdef HAVE__CPUID +#include +#endif + +static unsigned char pg_cpucap[PG_CPU_FEATURE_MAX]; + +#ifdef __x86_64__ +// for _xgetbv +#include +/* + * Does XGETBV say the ZMM registers are enabled? + * + * NB: Caller is responsible for verifying that xsave_available() returns true + * before calling this. + */ +#ifdef HAVE_XSAVE_INTRINSICS +pg_attribute_target("xsave") +#endif +static inline bool +zmm_regs_available(void) +{ +#ifdef HAVE_XSAVE_INTRINSICS + return (_xgetbv(0) & 0xe6) == 0xe6; +#else + return false; +#endif +} + +static void +pg_cpucap_x86(void) +{ + unsigned int exx[4] = {0, 0, 0, 0}; +#if defined(HAVE__GET_CPUID) + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUID) + __cpuid(exx, 1); +#endif + + pg_cpucap[PG_CPU_FEATURE_SSE42] = (exx[2] & (1 << 20)) != 0; + pg_cpucap[PG_CPU_FEATURE_PCLMUL] = (exx[2] & (1 << 1)) != 0; + pg_cpucap[PG_CPU_FEATURE_POPCNT] = (exx[2] & (1 << 23)) != 0; + /* osxsave */ + if ((exx[2] & (1 << 27)) == 0) { + return; + } + /* avx512 os support */ + if (!zmm_regs_available()) { + return; + } + /* second cpuid call on leaf 7 to check extended avx512 support */ + memset(exx, 0, 4 * sizeof(exx[0])); +#if defined(HAVE__GET_CPUID_COUNT) + __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUIDEX) + __cpuidex(exx, 7, 0); +#endif + + pg_cpucap[PG_CPU_FEATURE_AVX512F] = (exx[1] & (1 << 16)) != 0; + pg_cpucap[PG_CPU_FEATURE_AVX512BW] = (exx[1] & (1 << 30)) != 0; + pg_cpucap[PG_CPU_FEATURE_AVX512VPOPCNTDQ] = (exx[2] & (1 << 14)) != 0; + +} +#else // ARM +static bool +pg_crc32c_armv8_available(void) +{ +#if defined(HAVE_ELF_AUX_INFO) + unsigned long value; + +#ifdef __aarch64__ + return elf_aux_info(AT_HWCAP, &value, sizeof(value)) == 0 && + (value & HWCAP_CRC32) != 0; +#else + return elf_aux_info(AT_HWCAP2, &value, sizeof(value)) == 0 && + (value & HWCAP2_CRC32) != 0; +#endif +#elif defined(HAVE_GETAUXVAL) +#ifdef __aarch64__ + return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0; +#else + return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0; +#endif +#elif defined(__NetBSD__) + /* + * On NetBSD we can read the Instruction Set Attribute Registers via + * sysctl. For doubtless-historical reasons the sysctl interface is + * completely different on 64-bit than 32-bit, but the underlying + * registers contain the same fields. + */ +#define ISAR0_CRC32_BITPOS 16 +#define ISAR0_CRC32_BITWIDTH 4 +#define WIDTHMASK(w) ((1 << (w)) - 1) +#define SYSCTL_CPU_ID_MAXSIZE 64 + + size_t len; + uint64 sysctlbuf[SYSCTL_CPU_ID_MAXSIZE]; +#if defined(__aarch64__) + /* We assume cpu0 is representative of all the machine's CPUs. */ + const char *path = "machdep.cpu0.cpu_id"; + size_t expected_len = sizeof(struct aarch64_sysctl_cpu_id); +#define ISAR0 ((struct aarch64_sysctl_cpu_id *) sysctlbuf)->ac_aa64isar0 +#else + const char *path = "machdep.id_isar"; + size_t expected_len = 6 * sizeof(int); +#define ISAR0 ((int *) sysctlbuf)[5] +#endif + uint64 fld; + + /* Fetch the appropriate set of register values. */ + len = sizeof(sysctlbuf); + memset(sysctlbuf, 0, len); + if (sysctlbyname(path, sysctlbuf, &len, NULL, 0) != 0) + return false; /* perhaps kernel is 64-bit and we aren't? */ + if (len != expected_len) + return false; /* kernel API change? */ + + /* Fetch the CRC32 field from ISAR0. */ + fld = (ISAR0 >> ISAR0_CRC32_BITPOS) & WIDTHMASK(ISAR0_CRC32_BITWIDTH); + + /* + * Current documentation defines only the field values 0 (No CRC32) and 1 + * (CRC32B/CRC32H/CRC32W/CRC32X/CRC32CB/CRC32CH/CRC32CW/CRC32CX). Assume + * that any future nonzero value will be a superset of 1. + */ + return (fld != 0); +#else + return false; +#endif +} + +static void +pg_cpucap_arm(void) +{ + if (pg_crc32c_armv8_available()) { + pg_cpucap[PG_CPU_FEATURE_ARMV8_CRC32C] = 1; + } +} +#endif -/* starts uninitialized so we can detect errors of omission */ -uint32 pg_cpucap = 0; /* * This needs to be called in main() for every * program that calls a function that dispatches * according to CPU features. */ -void -pg_cpucap_initialize(void) +void pg_cpucap_initialize(void) { - pg_cpucap = PGCPUCAP_INIT; + /* Initialize everything to zero */ + memset(pg_cpucap, 0, sizeof(pg_cpucap[0]) * PG_CPU_FEATURE_MAX); + pg_cpucap[PG_CPU_FEATURE_INIT] = 1; + +#if defined( __i386__ ) || defined(i386) || defined(_M_IX86) || \ + defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) + pg_cpucap_x86(); +#elif defined(__arm__) || defined(__arm) || \ + defined(__aarch64__) || defined(_M_ARM64) + pg_cpucap_arm(); +#endif - pg_cpucap_crc32c(); - pg_cpucap_clmul(); +} + +/* Access to pg_cpucap for modules that need runtime CPUID information */ +bool pg_cpu_have(int feature_id) +{ + return pg_cpucap[feature_id]; } diff --git a/src/port/pg_cpucap_arm.c b/src/port/pg_cpucap_arm.c deleted file mode 100644 index e080a5a931..0000000000 --- a/src/port/pg_cpucap_arm.c +++ /dev/null @@ -1,119 +0,0 @@ -/*------------------------------------------------------------------------- - * - * pg_cpucap_arm.c - * Check if the CPU we're running on supports the ARMv8 CRC Extension. - * - * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * src/port/pg_cpucap_arm.c - * - *------------------------------------------------------------------------- - */ - -#ifndef FRONTEND -#include "postgres.h" -#else -#include "postgres_fe.h" -#endif - -#if defined(HAVE_ELF_AUX_INFO) || defined(HAVE_GETAUXVAL) -#include -#if defined(__linux__) && !defined(__aarch64__) && !defined(HWCAP2_CRC32) -#include -#endif -#endif - -#if defined(__NetBSD__) -#include -#if defined(__aarch64__) -#include -#endif -#endif - -#include "port/pg_crc32c.h" - -static bool -pg_crc32c_armv8_available(void) -{ -#if defined(HAVE_ELF_AUX_INFO) - unsigned long value; - -#ifdef __aarch64__ - return elf_aux_info(AT_HWCAP, &value, sizeof(value)) == 0 && - (value & HWCAP_CRC32) != 0; -#else - return elf_aux_info(AT_HWCAP2, &value, sizeof(value)) == 0 && - (value & HWCAP2_CRC32) != 0; -#endif -#elif defined(HAVE_GETAUXVAL) -#ifdef __aarch64__ - return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0; -#else - return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0; -#endif -#elif defined(__NetBSD__) - /* - * On NetBSD we can read the Instruction Set Attribute Registers via - * sysctl. For doubtless-historical reasons the sysctl interface is - * completely different on 64-bit than 32-bit, but the underlying - * registers contain the same fields. - */ -#define ISAR0_CRC32_BITPOS 16 -#define ISAR0_CRC32_BITWIDTH 4 -#define WIDTHMASK(w) ((1 << (w)) - 1) -#define SYSCTL_CPU_ID_MAXSIZE 64 - - size_t len; - uint64 sysctlbuf[SYSCTL_CPU_ID_MAXSIZE]; -#if defined(__aarch64__) - /* We assume cpu0 is representative of all the machine's CPUs. */ - const char *path = "machdep.cpu0.cpu_id"; - size_t expected_len = sizeof(struct aarch64_sysctl_cpu_id); -#define ISAR0 ((struct aarch64_sysctl_cpu_id *) sysctlbuf)->ac_aa64isar0 -#else - const char *path = "machdep.id_isar"; - size_t expected_len = 6 * sizeof(int); -#define ISAR0 ((int *) sysctlbuf)[5] -#endif - uint64 fld; - - /* Fetch the appropriate set of register values. */ - len = sizeof(sysctlbuf); - memset(sysctlbuf, 0, len); - if (sysctlbyname(path, sysctlbuf, &len, NULL, 0) != 0) - return false; /* perhaps kernel is 64-bit and we aren't? */ - if (len != expected_len) - return false; /* kernel API change? */ - - /* Fetch the CRC32 field from ISAR0. */ - fld = (ISAR0 >> ISAR0_CRC32_BITPOS) & WIDTHMASK(ISAR0_CRC32_BITWIDTH); - - /* - * Current documentation defines only the field values 0 (No CRC32) and 1 - * (CRC32B/CRC32H/CRC32W/CRC32X/CRC32CB/CRC32CH/CRC32CW/CRC32CX). Assume - * that any future nonzero value will be a superset of 1. - */ - return (fld != 0); -#else - return false; -#endif -} - -/* - * Check if hardware instructions for CRC computation are available. - */ -void -pg_cpucap_crc32c(void) -{ - if (pg_crc32c_armv8_available()) - pg_cpucap |= PGCPUCAP_CRC32C; -} - -void -pg_cpucap_clmul(void) -{ - // WIP: does this even make sense? -} diff --git a/src/port/pg_cpucap_x86.c b/src/port/pg_cpucap_x86.c deleted file mode 100644 index 3a62a3a582..0000000000 --- a/src/port/pg_cpucap_x86.c +++ /dev/null @@ -1,75 +0,0 @@ -/*------------------------------------------------------------------------- - * - * pg_cpucap_x86.c - * Check if the CPU we're running on supports SSE4.2. - * - * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * src/port/pg_cpucap_x86.c - * - *------------------------------------------------------------------------- - */ - -#include "c.h" - -#ifdef HAVE__GET_CPUID -#include -#endif - -#ifdef HAVE__CPUID -#include -#endif - -#include "port/pg_cpucap.h" - -static bool -pg_sse42_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 1); -#else -#error cpuid instruction not available -#endif - - return (exx[2] & (1 << 20)) != 0; /* SSE 4.2 */ -} - -static bool -pg_pclmul_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 1); -#else -#error cpuid instruction not available -#endif - - return (exx[2] & (1 << 1)) != 0; /* PCLMUL */ -} - -/* - * Check if hardware instructions for CRC computation are available. - */ -void -pg_cpucap_crc32c(void) -{ - if (pg_sse42_available()) - pg_cpucap |= PGCPUCAP_CRC32C; -} - -void -pg_cpucap_clmul(void) -{ - if (pg_pclmul_available()) - pg_cpucap |= PGCPUCAP_CLMUL; -} diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index fc3cf0d088..7131b4a326 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -18,6 +18,7 @@ #include #include "port/pg_crc32c.h" +#include "port/pg_cpucap.h" /* WIP: configure checks */ #ifdef __x86_64__ @@ -140,7 +141,7 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len) const size_t orig_len PG_USED_FOR_ASSERTS_ONLY = len; #ifdef HAVE_PCLMUL_RUNTIME - if (len >= PCLMUL_THRESHOLD && (pg_cpucap & PGCPUCAP_CLMUL)) + if (len >= PCLMUL_THRESHOLD && (pg_cpu_have(PG_CPU_FEATURE_PCLMUL))) { return pg_comp_crc32c_pclmul(crc, data, len); } diff --git a/src/port/pg_popcount_avx512.c b/src/port/pg_popcount_avx512.c index dac895a0fc..7f5846b1bd 100644 --- a/src/port/pg_popcount_avx512.c +++ b/src/port/pg_popcount_avx512.c @@ -14,17 +14,10 @@ #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK -#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT) -#include -#endif - #include -#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX) -#include -#endif - #include "port/pg_bitutils.h" +#include "port/pg_cpucap.h" /* * It's probably unlikely that TRY_POPCNT_FAST won't be set if we are able to @@ -33,63 +26,6 @@ */ #ifdef TRY_POPCNT_FAST -/* - * Does CPUID say there's support for XSAVE instructions? - */ -static inline bool -xsave_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 1); -#else -#error cpuid instruction not available -#endif - return (exx[2] & (1 << 27)) != 0; /* osxsave */ -} - -/* - * Does XGETBV say the ZMM registers are enabled? - * - * NB: Caller is responsible for verifying that xsave_available() returns true - * before calling this. - */ -#ifdef HAVE_XSAVE_INTRINSICS -pg_attribute_target("xsave") -#endif -static inline bool -zmm_regs_available(void) -{ -#ifdef HAVE_XSAVE_INTRINSICS - return (_xgetbv(0) & 0xe6) == 0xe6; -#else - return false; -#endif -} - -/* - * Does CPUID say there's support for AVX-512 popcount and byte-and-word - * instructions? - */ -static inline bool -avx512_popcnt_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID_COUNT) - __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUIDEX) - __cpuidex(exx, 7, 0); -#else -#error cpuid instruction not available -#endif - return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */ - (exx[1] & (1 << 30)) != 0; /* avx512-bw */ -} - /* * Returns true if the CPU supports the instructions required for the AVX-512 * pg_popcount() implementation. @@ -97,9 +33,8 @@ avx512_popcnt_available(void) bool pg_popcount_avx512_available(void) { - return xsave_available() && - zmm_regs_available() && - avx512_popcnt_available(); + return pg_cpu_have(PG_CPU_FEATURE_AVX512VPOPCNTDQ) && + pg_cpu_have(PG_CPU_FEATURE_AVX512BW); } /* -- 2.43.0