From 8a61c21ad94bc6241989b6cd6237e75b16137ea1 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Fri, 13 Feb 2026 18:11:39 +0700 Subject: [PATCH v2 3/4] Refactor the detection of ZMM registers - Call _xgetbv within x86_set_runtime_features rather than in a separate function - Use symbols for XCR mask bits rather than a magic constant A future commit will build on this to detect YMM registers without code duplication. --- src/port/pg_cpu_x86.c | 42 +++++++++++++++++++++----------------- src/port/pg_crc32c_sse42.c | 2 +- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/src/port/pg_cpu_x86.c b/src/port/pg_cpu_x86.c index 249283725c8..468972a3d12 100644 --- a/src/port/pg_cpu_x86.c +++ b/src/port/pg_cpu_x86.c @@ -31,31 +31,28 @@ #include "port/pg_cpu.h" +/* XSAVE state component bits that we need */ +#define XMM (1<<1) +#define YMM (1<<2) +#define OPMASK (1<<5) +#define ZMM0_15 (1<<6) +#define ZMM16_31 (1<<7) + bool X86Features[X86FeaturesSize] = {0}; -/* - * Does XGETBV say the ZMM registers are enabled? - * - * NB: Caller is responsible for verifying that osxsave is available - * before calling this. - */ -#ifdef HAVE_XSAVE_INTRINSICS -pg_attribute_target("xsave") -#endif static bool -zmm_regs_available(void) +mask_available(uint32 value, uint32 mask) { -#ifdef HAVE_XSAVE_INTRINSICS - return (_xgetbv(0) & 0xe6) == 0xe6; -#else - return false; -#endif + return (value & mask) == mask; } /* * Parse the CPU ID info for runtime checks. */ +#ifdef HAVE_XSAVE_INTRINSICS +pg_attribute_target("xsave") +#endif void set_x86_features(void) { @@ -75,22 +72,29 @@ set_x86_features(void) /* All these features depend on OSXSAVE */ if (exx[2] & (1 << 27)) { - /* second cpuid call on leaf 7 to check extended AVX-512 support */ + uint32 xcr0_val = 0; + /* second cpuid call on leaf 7 to check extended AVX-512 support */ memset(exx, 0, 4 * sizeof(exx[0])); - #if defined(HAVE__GET_CPUID_COUNT) __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); #elif defined(HAVE__CPUIDEX) __cpuidex(exx, 7, 0); #endif - if (zmm_regs_available()) +#ifdef HAVE_XSAVE_INTRINSICS + /* get value of Extended Control Register */ + xcr0_val = _xgetbv(0); +#endif + + /* Are ZMM registeres enabled? */ + if (mask_available(xcr0_val, XMM | YMM | + OPMASK | ZMM0_15 | ZMM16_31)) { X86Features[PG_AVX512_BW] = exx[1] >> 30 & 1; X86Features[PG_AVX512_VL] = exx[1] >> 31 & 1; - X86Features[PG_VPCLMULQDQ] = exx[2] >> 10 & 1; + X86Features[PG_AVX512_VPCLMULQDQ] = exx[2] >> 10 & 1; X86Features[PG_AVX512_VPOPCNTDQ] = exx[2] >> 14 & 1; } } diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index 2e740e12a7a..d1d9d74e5ab 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -179,7 +179,7 @@ pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) #ifdef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK if (x86_feature_available(PG_AVX512_VL) && - x86_feature_available(PG_VPCLMULQDQ)) + x86_feature_available(PG_AVX512_VPCLMULQDQ)) pg_comp_crc32c = pg_comp_crc32c_avx512; #endif -- 2.53.0