summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Naylor2025-06-23 11:03:56 +0000
committerJohn Naylor2025-06-23 11:03:56 +0000
commit43da394304fba820830da2cef2c0214fe292c037 (patch)
treead7a9c591f06d1a45cf89f6d625426cdb07ea029
parent2c0d8b95080e1d51c60d9c6f6a2e4460d6dfaf77 (diff)
Properly fix AVX-512 CRC calculation bug
The problem that led to the workaround in f83f14881c7 was not in fact a compiler bug, but a failure to zero the upper bits of the vector register containing the initial scalar CRC value. Fix that and revert the workaround. Diagnosed-by: Nathan Bossart <[email protected]> Diagnosed-by: Raghuveer Devulapalli <[email protected]> Tested-by: Andy Fan <[email protected]> Tested-by: Soumyadeep Chakraborty <[email protected]> Reviewed-by: Nathan Bossart <[email protected]> Reviewed-by: Raghuveer Devulapalli <[email protected]> Discussion: https://postgr.es/m/PH8PR11MB82866B07AA6758D12F699C00FB70A@PH8PR11MB8286.namprd11.prod.outlook.com
-rw-r--r--src/port/pg_crc32c_sse42.c2
-rw-r--r--src/port/pg_crc32c_sse42_choose.c4
2 files changed, 2 insertions, 4 deletions
diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c
index 9af3474a6ca..1a717255355 100644
--- a/src/port/pg_crc32c_sse42.c
+++ b/src/port/pg_crc32c_sse42.c
@@ -123,7 +123,7 @@ pg_comp_crc32c_avx512(pg_crc32c crc, const void *data, size_t len)
__m512i k;
k = _mm512_broadcast_i32x4(_mm_setr_epi32(0x740eef02, 0, 0x9e4addf8, 0));
- x0 = _mm512_xor_si512(_mm512_castsi128_si512(_mm_cvtsi32_si128(crc0)), x0);
+ x0 = _mm512_xor_si512(_mm512_zextsi128_si512(_mm_cvtsi32_si128(crc0)), x0);
buf += 64;
/* Main loop. */
diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c
index 802e47788c1..74d2421ba2b 100644
--- a/src/port/pg_crc32c_sse42_choose.c
+++ b/src/port/pg_crc32c_sse42_choose.c
@@ -95,9 +95,7 @@ pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
__cpuidex(exx, 7, 0);
#endif
-#if defined(__clang__) && !defined(__OPTIMIZE__)
- /* Some versions of clang are broken at -O0 */
-#elif defined(USE_AVX512_CRC32C_WITH_RUNTIME_CHECK)
+#ifdef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK
if (exx[2] & (1 << 10) && /* VPCLMULQDQ */
exx[1] & (1 << 31)) /* AVX512-VL */
pg_comp_crc32c = pg_comp_crc32c_avx512;