From 561893beb4e3e008196b3e571685503e25a243f1 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Tue, 8 Aug 2023 12:58:07 +0700 Subject: [PATCH v4 2/2] Some minor adjustemts to be squashed --- config/c-compiler.m4 | 4 ++++ configure | 7 +++++-- configure.ac | 7 +++++-- src/port/pg_crc32c_loongarch.c | 6 +++--- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index 7777ad6e90..bd3e6d6623 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -669,6 +669,10 @@ undefine([Ac_cachevar])dnl # __builtin_loongarch_crcc_w_w_w and __builtin_loongarch_crcc_w_d_w # intrinsic functions. # +# We test for the 8-byte variant since platforms capable of running +# Postgres are 64-bit only (as of PG17), so we know CRC instructions +# are available without a runtime check. +# # If the intrinsics are supported, sets pgac_loongarch_crc32c_intrinsics. AC_DEFUN([PGAC_LOONGARCH_CRC32C_INTRINSICS], [define([Ac_cachevar], [AS_TR_SH([pgac_cv_loongarch_crc32c_intrinsics])])dnl diff --git a/configure b/configure index fe0b02aa80..6a80e374f1 100755 --- a/configure +++ b/configure @@ -18119,8 +18119,11 @@ fi # we're not targeting such a processor, but can nevertheless produce code that # uses the CRC instructions, compile both, and select at runtime. # -# You can override this logic by setting the appropriate USE_*_CRC32 flag to 1 +# You can skip the runtime check by setting the appropriate USE_*_CRC32 flag to 1 # in the template or configure command line. +# +# If we are targeting a LoongArch processor, CRC instructions are +# always available (at least on 64 bit), so no runtime check is needed. if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARMV8_CRC32C" = x"" && test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_LOONGARCH_CRC32C" = x""; then # Use Intel SSE 4.2 if available. if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then @@ -18139,8 +18142,8 @@ if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && if test x"$pgac_armv8_crc32c_intrinsics" = x"yes"; then USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK=1 else + # LoongArch CRCC instructions. if test x"$pgac_loongarch_crc32c_intrinsics" = x"yes"; then - # LoongArch CRCC instructions. USE_LOONGARCH_CRC32C=1 else # fall back to slicing-by-8 algorithm, which doesn't require any diff --git a/configure.ac b/configure.ac index 57f0f836c7..6105af6996 100644 --- a/configure.ac +++ b/configure.ac @@ -2130,8 +2130,11 @@ AC_SUBST(CFLAGS_CRC) # we're not targeting such a processor, but can nevertheless produce code that # uses the CRC instructions, compile both, and select at runtime. # -# You can override this logic by setting the appropriate USE_*_CRC32 flag to 1 +# You can skip the runtime check by setting the appropriate USE_*_CRC32 flag to 1 # in the template or configure command line. +# +# If we are targeting a LoongArch processor, CRC instructions are +# always available (at least on 64 bit), so no runtime check is needed. if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARMV8_CRC32C" = x"" && test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_LOONGARCH_CRC32C" = x""; then # Use Intel SSE 4.2 if available. if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then @@ -2150,8 +2153,8 @@ if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && if test x"$pgac_armv8_crc32c_intrinsics" = x"yes"; then USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK=1 else + # LoongArch CRCC instructions. if test x"$pgac_loongarch_crc32c_intrinsics" = x"yes"; then - # LoongArch CRCC instructions. USE_LOONGARCH_CRC32C=1 else # fall back to slicing-by-8 algorithm, which doesn't require any diff --git a/src/port/pg_crc32c_loongarch.c b/src/port/pg_crc32c_loongarch.c index 2897920800..db9da80e1b 100644 --- a/src/port/pg_crc32c_loongarch.c +++ b/src/port/pg_crc32c_loongarch.c @@ -23,9 +23,9 @@ pg_comp_crc32c_loongarch(pg_crc32c crc, const void *data, size_t len) const unsigned char *pend = p + len; /* - * Loongarch desktop and server chips support unaligned memory access by default. - * However, aligned memory access is significantly faster. - * Process leading bytes so that the loop below starts with a pointer aligned to eight bytes. + * LoongArch doesn't require alignment, but aligned memory access is + * significantly faster. Process leading bytes so that the loop below + * starts with a pointer aligned to eight bytes. */ if (!PointerIsAligned(p, uint16) && p + 1 <= pend) -- 2.41.0