From a42eb01c87675698ae5972421f8896f85f048f2b Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Mon, 24 Oct 2022 14:07:09 +0900 Subject: [PATCH v24 1/9] introduce vector8_min and vector8_highbit_mask TODO: commit message TODO: Remove uint64 case. separate-commit TODO: move non-SIMD fallbacks to own header to clean up the #ifdef maze. --- src/include/port/simd.h | 47 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/include/port/simd.h b/src/include/port/simd.h index c836360d4b..f0bba33c53 100644 --- a/src/include/port/simd.h +++ b/src/include/port/simd.h @@ -77,6 +77,7 @@ static inline bool vector8_has(const Vector8 v, const uint8 c); static inline bool vector8_has_zero(const Vector8 v); static inline bool vector8_has_le(const Vector8 v, const uint8 c); static inline bool vector8_is_highbit_set(const Vector8 v); +static inline uint32 vector8_highbit_mask(const Vector8 v); #ifndef USE_NO_SIMD static inline bool vector32_is_highbit_set(const Vector32 v); #endif @@ -96,6 +97,7 @@ static inline Vector8 vector8_ssub(const Vector8 v1, const Vector8 v2); */ #ifndef USE_NO_SIMD static inline Vector8 vector8_eq(const Vector8 v1, const Vector8 v2); +static inline Vector8 vector8_min(const Vector8 v1, const Vector8 v2); static inline Vector32 vector32_eq(const Vector32 v1, const Vector32 v2); #endif @@ -277,6 +279,36 @@ vector8_is_highbit_set(const Vector8 v) #endif } +/* + * Return the bitmask of the high-bit of each element. + */ +static inline uint32 +vector8_highbit_mask(const Vector8 v) +{ +#ifdef USE_SSE2 + return (uint32) _mm_movemask_epi8(v); +#elif defined(USE_NEON) + static const uint8 mask[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + uint8x16_t masked = vandq_u8(vld1q_u8(mask), (uint8x16_t) vshrq_n_s8(v, 7)); + uint8x16_t maskedhi = vextq_u8(masked, masked, 8); + + return (uint32) vaddvq_u16((uint16x8_t) vzip1q_u8(masked, maskedhi)); +#else + uint32 mask = 0; + + for (Size i = 0; i < sizeof(Vector8); i++) + mask |= (((const uint8 *) &v)[i] >> 7) << i; + + return mask; +#endif +} + /* * Exactly like vector8_is_highbit_set except for the input type, so it * looks at each byte separately. @@ -372,4 +404,19 @@ vector32_eq(const Vector32 v1, const Vector32 v2) } #endif /* ! USE_NO_SIMD */ +/* + * Compare the given vectors and return the vector of minimum elements. + */ +#ifndef USE_NO_SIMD +static inline Vector8 +vector8_min(const Vector8 v1, const Vector8 v2) +{ +#ifdef USE_SSE2 + return _mm_min_epu8(v1, v2); +#elif defined(USE_NEON) + return vminq_u8(v1, v2); +#endif +} +#endif /* ! USE_NO_SIMD */ + #endif /* SIMD_H */ -- 2.31.1