From adfe02a6d169be865937b567bc1b2b2ffde60631 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Tue, 11 Mar 2025 11:20:20 +0700 Subject: [PATCH v14 4/8] Always do runtime check for x86 to simplify PCLMUL --- configure | 2 +- configure.ac | 2 +- src/include/port/pg_crc32c.h | 20 ++++++++++++++------ src/port/meson.build | 1 + src/port/pg_crc32c_sse42.c | 2 +- src/port/pg_crc32c_sse42_choose.c | 2 ++ 6 files changed, 20 insertions(+), 9 deletions(-) diff --git a/configure b/configure index 93fddd69981..91c0ffc8272 100755 --- a/configure +++ b/configure @@ -17684,7 +17684,7 @@ if test x"$USE_SSE42_CRC32C" = x"1"; then $as_echo "#define USE_SSE42_CRC32C 1" >>confdefs.h - PG_CRC32C_OBJS="pg_crc32c_sse42.o" + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sse42_choose.o" { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2" >&5 $as_echo "SSE 4.2" >&6; } else diff --git a/configure.ac b/configure.ac index b6d02f5ecc7..a85bdbd4ff6 100644 --- a/configure.ac +++ b/configure.ac @@ -2151,7 +2151,7 @@ fi AC_MSG_CHECKING([which CRC-32C implementation to use]) if test x"$USE_SSE42_CRC32C" = x"1"; then AC_DEFINE(USE_SSE42_CRC32C, 1, [Define to 1 use Intel SSE 4.2 CRC instructions.]) - PG_CRC32C_OBJS="pg_crc32c_sse42.o" + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sse42_choose.o" AC_MSG_RESULT(SSE 4.2) else if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index 229f4f6a65a..28253b48018 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -47,7 +47,10 @@ typedef uint32 pg_crc32c; #define EQ_CRC32C(c1, c2) ((c1) == (c2)) #if defined(USE_SSE42_CRC32C) -/* Use Intel SSE4.2 instructions. */ +/* + * Use either Intel SSE 4.2 or PCLMUL instructions. We don't need a runtime check + * for SSE 4.2, so we can inline those in some cases. + */ #include @@ -55,7 +58,11 @@ typedef uint32 pg_crc32c; ((crc) = pg_comp_crc32c_dispatch((crc), (data), (len))) #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) +extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); +#ifdef USE_PCLMUL_WITH_RUNTIME_CHECK +extern pg_crc32c pg_comp_crc32c_pclmul(pg_crc32c crc, const void *data, size_t len); +#endif pg_attribute_no_sanitize_alignment() static inline @@ -67,9 +74,9 @@ pg_comp_crc32c_dispatch(pg_crc32c crc, const void *data, size_t len) const unsigned char *p = data; /* - * For constant inputs, inline the computation to avoid the - * indirect function call. This also allows the compiler to unroll - * loops for small inputs. + * For constant inputs, inline the computation to avoid the indirect + * function call. This also allows the compiler to unroll loops for + * small inputs. */ #if SIZEOF_VOID_P >= 8 for (; len >= 8; p += 8, len -= 8) @@ -82,7 +89,8 @@ pg_comp_crc32c_dispatch(pg_crc32c crc, const void *data, size_t len) return crc; } else - return pg_comp_crc32c_sse42(crc, data, len); + /* Otherwise, use a runtime check for PCLMUL instructions. */ + return pg_comp_crc32c(crc, data, len); } #elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) @@ -123,7 +131,7 @@ extern pg_crc32c pg_comp_crc32c_loongarch(pg_crc32c crc, const void *data, size_ #elif defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK) /* - * Use Intel SSE 4.2 or ARMv8 instructions, but perform a runtime check first + * Use ARMv8 instructions, but perform a runtime check first * to check that they are available. */ #define COMP_CRC32C(crc, data, len) \ diff --git a/src/port/meson.build b/src/port/meson.build index 7fcfa728d43..8d70a4d510e 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -83,6 +83,7 @@ replace_funcs_pos = [ # x86/x64 ['pg_crc32c_sse42', 'USE_SSE42_CRC32C'], ['pg_crc32c_sse42', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], + ['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C'], ['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], ['pg_crc32c_sb8', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index 2001e69850b..c57d6c6293b 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -152,7 +152,7 @@ pg_comp_crc32c_pclmul(pg_crc32c crc, const void *data, size_t length) len = end - buf; } - return pg_comp_crc32c_sse42_inline(crc0, buf, len); + return pg_comp_crc32c_sse42(crc0, buf, len); } #endif diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c index abea0f90eb3..89a48c76894 100644 --- a/src/port/pg_crc32c_sse42_choose.c +++ b/src/port/pg_crc32c_sse42_choose.c @@ -55,8 +55,10 @@ pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) pg_comp_crc32c = pg_comp_crc32c_pclmul; #endif } +#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK else pg_comp_crc32c = pg_comp_crc32c_sb8; +#endif return pg_comp_crc32c(crc, data, len); } -- 2.48.1