Optimize vector8_has_le() on AArch64.

author Nathan Bossart <[email protected]>

Fri, 3 Oct 2025 19:02:47 +0000 (14:02 -0500)

committer Nathan Bossart <[email protected]>

Fri, 3 Oct 2025 19:02:47 +0000 (14:02 -0500)
author Nathan Bossart <[email protected]>
Fri, 3 Oct 2025 19:02:47 +0000 (14:02 -0500)
committer Nathan Bossart <[email protected]>
Fri, 3 Oct 2025 19:02:47 +0000 (14:02 -0500)
diff --git a/src/include/port/simd.h b/src/include/port/simd.h

index 97c5f3530221a8f911664d13bf92153257bc6b93..5f5737707a89b4a6ce34c1f172b3c51245477d03 100644 (file)
--- a/src/include/port/simd.h
+++ b/src/include/port/simd.h
@@ -86,7 +86,6 @@ static inline uint32 vector8_highbit_mask(const Vector8 v);
  static inline Vector8 vector8_or(const Vector8 v1, const Vector8 v2);
  #ifndef USE_NO_SIMD
  static inline Vector32 vector32_or(const Vector32 v1, const Vector32 v2);
-static inline Vector8 vector8_ssub(const Vector8 v1, const Vector8 v2);
  #endif
  
  /*
@@ -213,6 +212,10 @@ static inline bool
  vector8_has_le(const Vector8 v, const uint8 c)
  {
     bool        result = false;
+#ifdef USE_SSE2
+   Vector8     umin;
+   Vector8     cmpe;
+#endif
  
     /* pre-compute the result for assert checking */
  #ifdef USE_ASSERT_CHECKING
@@ -250,14 +253,12 @@ vector8_has_le(const Vector8 v, const uint8 c)
             }
         }
     }
-#else
-
-   /*
-    * Use saturating subtraction to find bytes <= c, which will present as
-    * NUL bytes.  This approach is a workaround for the lack of unsigned
-    * comparison instructions on some architectures.
-    */
-   result = vector8_has_zero(vector8_ssub(v, vector8_broadcast(c)));
+#elif defined(USE_SSE2)
+   umin = vector8_min(v, vector8_broadcast(c));
+   cmpe = vector8_eq(umin, v);
+   result = vector8_is_highbit_set(cmpe);
+#elif defined(USE_NEON)
+   result = vminvq_u8(v) <= c;
  #endif
  
     Assert(assert_result == result);
@@ -358,24 +359,6 @@ vector32_or(const Vector32 v1, const Vector32 v2)
  }
  #endif                         /* ! USE_NO_SIMD */
  
-/*
- * Return the result of subtracting the respective elements of the input
- * vectors using saturation (i.e., if the operation would yield a value less
- * than zero, zero is returned instead).  For more information on saturation
- * arithmetic, see https://en.wikipedia.org/wiki/Saturation_arithmetic
- */
-#ifndef USE_NO_SIMD
-static inline Vector8
-vector8_ssub(const Vector8 v1, const Vector8 v2)
-{
-#ifdef USE_SSE2
-   return _mm_subs_epu8(v1, v2);
-#elif defined(USE_NEON)
-   return vqsubq_u8(v1, v2);
-#endif
-}
-#endif                         /* ! USE_NO_SIMD */
-
  /*
   * Return a vector with all bits set in each lane where the corresponding
   * lanes in the inputs are equal.
author	Nathan Bossart <[email protected]>
	Fri, 3 Oct 2025 19:02:47 +0000 (14:02 -0500)
committer	Nathan Bossart <[email protected]>
	Fri, 3 Oct 2025 19:02:47 +0000 (14:02 -0500)