From d704f3f76ba555e0c0ad8c3cfc2d953ea4baa162 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Sat, 15 Feb 2025 19:18:16 +0700 Subject: [PATCH v8 1/4] Dispatch CRC computation by branching rather than indirect calls --- src/backend/postmaster/postmaster.c | 4 ++ src/include/port/pg_cpucap.h | 25 +++++++++ src/include/port/pg_crc32c.h | 78 +++++++++++++++++++++-------- src/port/Makefile | 1 + src/port/meson.build | 4 ++ src/port/pg_cpucap.c | 51 +++++++++++++++++++ src/port/pg_crc32c_armv8_choose.c | 26 +--------- src/port/pg_crc32c_sse42_choose.c | 26 +--------- 8 files changed, 145 insertions(+), 70 deletions(-) create mode 100644 src/include/port/pg_cpucap.h create mode 100644 src/port/pg_cpucap.c diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 5dd3b6a4fd4..43e35f8041f 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -99,6 +99,7 @@ #include "pg_getopt.h" #include "pgstat.h" #include "port/pg_bswap.h" +#include "port/pg_cpucap.h" #include "postmaster/autovacuum.h" #include "postmaster/bgworker_internals.h" #include "postmaster/pgarch.h" @@ -1951,6 +1952,9 @@ InitProcessGlobals(void) #ifndef WIN32 srandom(pg_prng_uint32(&pg_global_prng_state)); #endif + + /* detect CPU capabilities */ + pg_cpucap_initialize(); } /* diff --git a/src/include/port/pg_cpucap.h b/src/include/port/pg_cpucap.h new file mode 100644 index 00000000000..81edfedce5d --- /dev/null +++ b/src/include/port/pg_cpucap.h @@ -0,0 +1,25 @@ +/*------------------------------------------------------------------------- + * + * pg_cpucap.h + * Runtime detection of CPU capabilities. + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * src/include/port/pg_cpucap.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_CPUCAP_H +#define PG_CPUCAP_H + +#define PGCPUCAP_INIT (1 << 0) +#define PGCPUCAP_POPCNT (1 << 1) +#define PGCPUCAP_VPOPCNT (1 << 2) +#define PGCPUCAP_CRC32C (1 << 3) + +extern PGDLLIMPORT uint32 pg_cpucap; +extern void pg_cpucap_initialize(void); + +#endif /* PG_CPUCAP_H */ diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index 65ebeacf4b1..b565a0f2949 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -34,6 +34,7 @@ #define PG_CRC32C_H #include "port/pg_bswap.h" +#include "port/pg_cpucap.h" typedef uint32 pg_crc32c; @@ -41,52 +42,55 @@ typedef uint32 pg_crc32c; #define INIT_CRC32C(crc) ((crc) = 0xFFFFFFFF) #define EQ_CRC32C(c1, c2) ((c1) == (c2)) -#if defined(USE_SSE42_CRC32C) +#if defined(USE_SSE42_CRC32C) || defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) /* Use Intel SSE4.2 instructions. */ #define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c_dispatch((crc), (data), (len))) +#define COMP_CRC32C_HW(crc, data, len) \ ((crc) = pg_comp_crc32c_sse42((crc), (data), (len))) #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) +#if defined(USE_SSE42_CRC32C) +#define HAVE_CRC_COMPTIME +#else +#define HAVE_CRC_RUNTIME +extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); +#endif + +extern bool pg_crc32c_sse42_available(void); extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); -#elif defined(USE_ARMV8_CRC32C) +#elif defined(USE_ARMV8_CRC32C) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK) /* Use ARMv8 CRC Extension instructions. */ #define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c_dispatch((crc), (data), (len))) +#define COMP_CRC32C_HW(crc, data, len) \ ((crc) = pg_comp_crc32c_armv8((crc), (data), (len))) #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) +#if defined(USE_ARMV8_CRC32C) +#define HAVE_CRC_COMPTIME +#else +#define HAVE_CRC_RUNTIME +extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); +#endif + +extern bool pg_crc32c_armv8_available(void); extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len); #elif defined(USE_LOONGARCH_CRC32C) /* Use LoongArch CRCC instructions. */ #define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c_dispatch((crc), (data), (len))) +#define COMP_CRC32C_HW(crc, data, len) \ ((crc) = pg_comp_crc32c_loongarch((crc), (data), (len))) #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) +#define HAVE_CRC_COMPTIME extern pg_crc32c pg_comp_crc32c_loongarch(pg_crc32c crc, const void *data, size_t len); -#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK) - -/* - * Use Intel SSE 4.2 or ARMv8 instructions, but perform a runtime check first - * to check that they are available. - */ -#define COMP_CRC32C(crc, data, len) \ - ((crc) = pg_comp_crc32c((crc), (data), (len))) -#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) - -extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); -extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); - -#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK -extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); -#endif -#ifdef USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK -extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len); -#endif - #else /* * Use slicing-by-8 algorithm. @@ -105,6 +109,36 @@ extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t le extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); +#endif /* end of CPU-specfic symbols */ + +#if defined(HAVE_CRC_COMPTIME) || defined(HAVE_CRC_RUNTIME) +/* + * Check if the CPU we're running on supports special + * instructions for CRC-32C computation. Otherwise, fall + * back to the pure software implementation (slicing-by-8). + */ +static inline pg_crc32c +pg_comp_crc32c_dispatch(pg_crc32c crc, const void *data, size_t len) +{ + /* + * If this is firing in a frontend program, first look if you forgot a + * call to pg_cpucap_initialize() in main(). See for example + * src/bin/pg_controldata/pg_controldata.c. + */ + // WIP: how to best intialize in frontend? +#ifndef FRONTEND + Assert(pg_cpucap & PGCPUCAP_INIT); +#endif + +#if defined(HAVE_CRC_COMPTIME) + return COMP_CRC32C_HW(crc, data, len); +#else + if (pg_cpucap & PGCPUCAP_CRC32C) + return COMP_CRC32C_HW(crc, data, len); + else + return pg_comp_crc32c_sb8(crc, data, len); #endif +} +#endif /* HAVE_CRC_COMPTIME || HAVE_CRC_RUNTIME */ #endif /* PG_CRC32C_H */ diff --git a/src/port/Makefile b/src/port/Makefile index 4c224319512..5a05179e926 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -44,6 +44,7 @@ OBJS = \ noblock.o \ path.o \ pg_bitutils.o \ + pg_cpucap.o \ pg_popcount_avx512.o \ pg_strong_random.o \ pgcheckdir.o \ diff --git a/src/port/meson.build b/src/port/meson.build index 7fcfa728d43..e1e7ce8fb87 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -7,6 +7,7 @@ pgport_sources = [ 'noblock.c', 'path.c', 'pg_bitutils.c', + 'pg_cpucap.c', 'pg_popcount_avx512.c', 'pg_strong_random.c', 'pgcheckdir.c', @@ -83,12 +84,15 @@ replace_funcs_pos = [ # x86/x64 ['pg_crc32c_sse42', 'USE_SSE42_CRC32C'], ['pg_crc32c_sse42', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], + # WIP sometime we'll need to build these based on host_cpu + ['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C'], ['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], ['pg_crc32c_sb8', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], # arm / aarch64 ['pg_crc32c_armv8', 'USE_ARMV8_CRC32C'], ['pg_crc32c_armv8', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK', 'crc'], + ['pg_crc32c_armv8_choose', 'USE_ARMV8_CRC32C'], ['pg_crc32c_armv8_choose', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'], ['pg_crc32c_sb8', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'], diff --git a/src/port/pg_cpucap.c b/src/port/pg_cpucap.c new file mode 100644 index 00000000000..eba6e31c63f --- /dev/null +++ b/src/port/pg_cpucap.c @@ -0,0 +1,51 @@ +/*------------------------------------------------------------------------- + * + * pg_cpucap.c + * Runtime detection of CPU capabilities. + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * src/port/pg_cpucap.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "port/pg_cpucap.h" +#include "port/pg_crc32c.h" + + +/* starts uninitialized so we can detect errors of omission */ +uint32 pg_cpucap = 0; + +/* + * Check if hardware instructions for CRC computation are available. + */ +static void +pg_cpucap_crc32c(void) +{ + /* WIP: It seems like we should use CPU arch symbols instead */ +#if defined(USE_SSE42_CRC32C) || defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) + if (pg_crc32c_sse42_available()) + pg_cpucap |= PGCPUCAP_CRC32C; + +#elif defined(USE_ARMV8_CRC32C) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK) + if (pg_crc32c_armv8_available()) + pg_cpucap |= PGCPUCAP_CRC32C; +#endif +} + +/* + * This needs to be called in main() for every + * program that calls a function that dispatches + * according to CPU features. + */ +void +pg_cpucap_initialize(void) +{ + pg_cpucap = PGCPUCAP_INIT; + + pg_cpucap_crc32c(); +} diff --git a/src/port/pg_crc32c_armv8_choose.c b/src/port/pg_crc32c_armv8_choose.c index ec12be1bbc3..e3654427c3f 100644 --- a/src/port/pg_crc32c_armv8_choose.c +++ b/src/port/pg_crc32c_armv8_choose.c @@ -1,12 +1,7 @@ /*------------------------------------------------------------------------- * * pg_crc32c_armv8_choose.c - * Choose between ARMv8 and software CRC-32C implementation. - * - * On first call, checks if the CPU we're running on supports the ARMv8 - * CRC Extension. If it does, use the special instructions for CRC-32C - * computation. Otherwise, fall back to the pure software implementation - * (slicing-by-8). + * Check if the CPU we're running on supports the ARMv8 CRC Extension. * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -40,7 +35,7 @@ #include "port/pg_crc32c.h" -static bool +bool pg_crc32c_armv8_available(void) { #if defined(HAVE_ELF_AUX_INFO) @@ -106,20 +101,3 @@ pg_crc32c_armv8_available(void) return false; #endif } - -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static pg_crc32c -pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) -{ - if (pg_crc32c_armv8_available()) - pg_comp_crc32c = pg_comp_crc32c_armv8; - else - pg_comp_crc32c = pg_comp_crc32c_sb8; - - return pg_comp_crc32c(crc, data, len); -} - -pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose; diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c index 65dbc4d4249..f4d3215bc55 100644 --- a/src/port/pg_crc32c_sse42_choose.c +++ b/src/port/pg_crc32c_sse42_choose.c @@ -1,12 +1,7 @@ /*------------------------------------------------------------------------- * * pg_crc32c_sse42_choose.c - * Choose between Intel SSE 4.2 and software CRC-32C implementation. - * - * On first call, checks if the CPU we're running on supports Intel SSE - * 4.2. If it does, use the special SSE instructions for CRC-32C - * computation. Otherwise, fall back to the pure software implementation - * (slicing-by-8). + * Check if the CPU we're running on supports SSE4.2. * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -30,7 +25,7 @@ #include "port/pg_crc32c.h" -static bool +bool pg_crc32c_sse42_available(void) { unsigned int exx[4] = {0, 0, 0, 0}; @@ -45,20 +40,3 @@ pg_crc32c_sse42_available(void) return (exx[2] & (1 << 20)) != 0; /* SSE 4.2 */ } - -/* - * This gets called on the first call. It replaces the function pointer - * so that subsequent calls are routed directly to the chosen implementation. - */ -static pg_crc32c -pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) -{ - if (pg_crc32c_sse42_available()) - pg_comp_crc32c = pg_comp_crc32c_sse42; - else - pg_comp_crc32c = pg_comp_crc32c_sb8; - - return pg_comp_crc32c(crc, data, len); -} - -pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose; -- 2.48.1