From 03f1371f881281fb2cbd7035ee52eb2f7f3b35c4 Mon Sep 17 00:00:00 2001 From: Chiranmoy Bhattacharya Date: Sun, 6 Jul 2025 19:25:28 +0530 Subject: [PATCH 1/2] hex coding regress test --- src/test/regress/expected/hex_coding.out | 63 ++++++++++++++++++++++++ src/test/regress/parallel_schedule | 5 ++ src/test/regress/sql/hex_coding.sql | 39 +++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 src/test/regress/expected/hex_coding.out create mode 100644 src/test/regress/sql/hex_coding.sql diff --git a/src/test/regress/expected/hex_coding.out b/src/test/regress/expected/hex_coding.out new file mode 100644 index 000000000000..e6d78fa4876d --- /dev/null +++ b/src/test/regress/expected/hex_coding.out @@ -0,0 +1,63 @@ +-- +-- tests for hex_encode and hex_decode in encode.c +-- +-- Build table for testing +CREATE TABLE BYTEA_TABLE(data BYTEA); +-- hex_decode is used for inserting into bytea column +-- Set bytea_output to hex so that hex_encode is used and tested +SET bytea_output = 'hex'; +INSERT INTO BYTEA_TABLE VALUES ('\xAB'); +INSERT INTO BYTEA_TABLE VALUES ('\x01ab'); +INSERT INTO BYTEA_TABLE VALUES ('\xDEADC0DE'); +INSERT INTO BYTEA_TABLE VALUES ('\xbaadf00d'); +INSERT INTO BYTEA_TABLE VALUES ('\x C001 c0ffee '); -- hex string with whitespaces +-- errors checking +INSERT INTO BYTEA_TABLE VALUES ('\xbadf00d'); -- odd number of hex digits +ERROR: invalid hexadecimal data: odd number of digits +LINE 1: INSERT INTO BYTEA_TABLE VALUES ('\xbadf00d'); + ^ +INSERT INTO BYTEA_TABLE VALUES ('\xdeadcode'); -- invalid hexadecimal digit: "o" +ERROR: invalid hexadecimal digit: "o" +LINE 1: INSERT INTO BYTEA_TABLE VALUES ('\xdeadcode'); + ^ +INSERT INTO BYTEA_TABLE VALUES ('\xC00LC0FFEE'); -- invalid hexadecimal digit: "L" +ERROR: invalid hexadecimal digit: "L" +LINE 1: INSERT INTO BYTEA_TABLE VALUES ('\xC00LC0FFEE'); + ^ +INSERT INTO BYTEA_TABLE VALUES ('\xC00LC*DE'); -- invalid hexadecimal digit: "*" +ERROR: invalid hexadecimal digit: "L" +LINE 1: INSERT INTO BYTEA_TABLE VALUES ('\xC00LC*DE'); + ^ +INSERT INTO BYTEA_TABLE VALUES ('\xbad f00d'); -- invalid hexadecimal digit: " " +ERROR: invalid hexadecimal digit: " " +LINE 1: INSERT INTO BYTEA_TABLE VALUES ('\xbad f00d'); + ^ +-- long hex strings to test SIMD implementation +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 8))::bytea; +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 8) || repeat('baadf00d', 8))::bytea; +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 8) || ' ' || repeat('baad f00d', 8))::bytea; -- hex string with whitespaces +-- errors checking for SIMD implementation +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 4) || 'badf00d' || repeat('DEADC0DE', 4))::bytea; -- odd number of hex digits +ERROR: invalid hexadecimal data: odd number of digits +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 4) || 'baadfood'|| repeat('DEADC0DE', 4))::bytea; -- invalid hexadecimal digit: "o" +ERROR: invalid hexadecimal digit: "o" +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 4) || 'C00LC0FFEE' || repeat('DEADC0DE', 4))::bytea; -- invalid hexadecimal digit: "L" +ERROR: invalid hexadecimal digit: "L" +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 8) || 'C00LC*DE' || repeat('DEADC0DE', 4))::bytea; -- invalid hexadecimal digit: "*" +ERROR: invalid hexadecimal digit: "L" +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 8) || 'bad f00d' || repeat('DEADC0DE', 4))::bytea; -- invalid hexadecimal digit: " " +ERROR: invalid hexadecimal digit: " " +SELECT encode(data, 'hex') FROM BYTEA_TABLE; + encode +---------------------------------------------------------------------------------------------------------------------------------- + ab + 01ab + deadc0de + baadf00d + c001c0ffee + deadc0dedeadc0dedeadc0dedeadc0dedeadc0dedeadc0dedeadc0dedeadc0de + deadc0dedeadc0dedeadc0dedeadc0dedeadc0dedeadc0dedeadc0dedeadc0debaadf00dbaadf00dbaadf00dbaadf00dbaadf00dbaadf00dbaadf00dbaadf00d + deadc0dedeadc0dedeadc0dedeadc0dedeadc0dedeadc0dedeadc0dedeadc0debaadf00dbaadf00dbaadf00dbaadf00dbaadf00dbaadf00dbaadf00dbaadf00d +(8 rows) + +DROP TABLE BYTEA_TABLE; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index fbffc67ae601..876a3988ed07 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -109,6 +109,11 @@ test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combo # ---------- test: json jsonb json_encoding jsonpath jsonpath_encoding jsonb_jsonpath sqljson sqljson_queryfuncs sqljson_jsontable +# ---------- +# Another group of parallel tests for hex encode/decode +# ---------- +test: hex_coding + # ---------- # Another group of parallel tests # with depends on create_misc diff --git a/src/test/regress/sql/hex_coding.sql b/src/test/regress/sql/hex_coding.sql new file mode 100644 index 000000000000..97c51b62e907 --- /dev/null +++ b/src/test/regress/sql/hex_coding.sql @@ -0,0 +1,39 @@ +-- +-- tests for hex_encode and hex_decode in encode.c +-- + +-- Build table for testing +CREATE TABLE BYTEA_TABLE(data BYTEA); + +-- hex_decode is used for inserting into bytea column +-- Set bytea_output to hex so that hex_encode is used and tested +SET bytea_output = 'hex'; + +INSERT INTO BYTEA_TABLE VALUES ('\xAB'); +INSERT INTO BYTEA_TABLE VALUES ('\x01ab'); +INSERT INTO BYTEA_TABLE VALUES ('\xDEADC0DE'); +INSERT INTO BYTEA_TABLE VALUES ('\xbaadf00d'); +INSERT INTO BYTEA_TABLE VALUES ('\x C001 c0ffee '); -- hex string with whitespaces + +-- errors checking +INSERT INTO BYTEA_TABLE VALUES ('\xbadf00d'); -- odd number of hex digits +INSERT INTO BYTEA_TABLE VALUES ('\xdeadcode'); -- invalid hexadecimal digit: "o" +INSERT INTO BYTEA_TABLE VALUES ('\xC00LC0FFEE'); -- invalid hexadecimal digit: "L" +INSERT INTO BYTEA_TABLE VALUES ('\xC00LC*DE'); -- invalid hexadecimal digit: "*" +INSERT INTO BYTEA_TABLE VALUES ('\xbad f00d'); -- invalid hexadecimal digit: " " + +-- long hex strings to test SIMD implementation +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 8))::bytea; +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 8) || repeat('baadf00d', 8))::bytea; +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 8) || ' ' || repeat('baad f00d', 8))::bytea; -- hex string with whitespaces + +-- errors checking for SIMD implementation +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 4) || 'badf00d' || repeat('DEADC0DE', 4))::bytea; -- odd number of hex digits +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 4) || 'baadfood'|| repeat('DEADC0DE', 4))::bytea; -- invalid hexadecimal digit: "o" +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 4) || 'C00LC0FFEE' || repeat('DEADC0DE', 4))::bytea; -- invalid hexadecimal digit: "L" +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 8) || 'C00LC*DE' || repeat('DEADC0DE', 4))::bytea; -- invalid hexadecimal digit: "*" +INSERT INTO BYTEA_TABLE SELECT ('\x' || repeat('DEADC0DE', 8) || 'bad f00d' || repeat('DEADC0DE', 4))::bytea; -- invalid hexadecimal digit: " " + +SELECT encode(data, 'hex') FROM BYTEA_TABLE; + +DROP TABLE BYTEA_TABLE; From f84509b9b88c273c475cd66db2e99eda50e0dcb5 Mon Sep 17 00:00:00 2001 From: Chiranmoy Bhattacharya Date: Sun, 6 Jul 2025 19:35:46 +0530 Subject: [PATCH 2/2] SVE support for hex coding --- config/c-compiler.m4 | 85 ++++++++ configure | 104 +++++++++ configure.ac | 9 + meson.build | 81 +++++++ src/backend/utils/adt/Makefile | 1 + src/backend/utils/adt/encode.c | 6 +- src/backend/utils/adt/encode_aarch64.c | 280 +++++++++++++++++++++++++ src/backend/utils/adt/meson.build | 1 + src/include/pg_config.h.in | 3 + src/include/utils/builtins.h | 51 ++++- 10 files changed, 615 insertions(+), 6 deletions(-) create mode 100644 src/backend/utils/adt/encode_aarch64.c diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index da40bd6a6475..73d128266982 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -798,3 +798,88 @@ if test x"$Ac_cachevar" = x"yes"; then fi undefine([Ac_cachevar])dnl ])# PGAC_SVE_POPCNT_INTRINSICS + +# PGAC_ARM_SVE_HEX_INTRINSICS +# ------------------------------ +# Check if the compiler supports the SVE intrinsic required for hex coding: +# svsub_x, svcmplt, svsel, svcmpgt, svtbl, svlsr_x, svand_z, svcreate2, +# svptest_any, svnot_z, svorr_z, svcntb, svld1, svwhilelt_b8, svst2, svld2, +# svget2, svst1 and svlsl_x. +# +# If the intrinsics are supported, sets pgac_arm_sve_hex_intrinsics. +AC_DEFUN([PGAC_ARM_SVE_HEX_INTRINSICS], +[define([Ac_cachevar], [AS_TR_SH([pgac_cv_arm_sve_hex_intrinsics])])dnl +AC_CACHE_CHECK([for svtbl, svlsr_x, svand_z, svcreate2, etc], [Ac_cachevar], +[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include + + char input@<:@64@:>@; + char output@<:@128@:>@; + + #if defined(__has_attribute) && __has_attribute (target) + __attribute__((target("arch=armv8-a+sve"))) + #endif + int get_hex_sve(svbool_t pred, svuint8_t vec, svuint8_t *res) + { + svuint8_t digit = svsub_x(pred, vec, 48), + upper = svsub_x(pred, vec, 55), + lower = svsub_x(pred, vec, 87); + svbool_t valid_digit = svcmplt(pred, digit, 10), + valid_upper = svcmplt(pred, upper, 16); + svuint8_t letter = svsel(valid_upper, upper, lower); + svbool_t valid_letter = svand_z(pred, svcmpgt(pred, letter, 9), + svcmplt(pred, letter, 16)); + if (svptest_any(pred, svnot_z(pred, svorr_z(pred, valid_digit, valid_letter)))) + return 0; + *res = svsel(valid_digit, digit, letter); + return 1; + } + + #if defined(__has_attribute) && __has_attribute (target) + __attribute__((target("arch=armv8-a+sve"))) + #endif + static int hex_coding_test(void) + { + int len = 64, vec_len = svcntb(), vec_len_x2 = svcntb() * 2; + const char *hextbl = "0123456789abcdef"; + svuint8_t hextbl_vec = svld1(svwhilelt_b8(0, 16), (uint8_t *) hextbl); + char *src = input, *dst = output; + + /* hex encode */ + for (uint64_t i = 0; i < 64; i += vec_len, dst += 2 * vec_len, src += vec_len) + { + svbool_t pred = svwhilelt_b8((uint64_t) i, (uint64_t) len); + svuint8_t bytes = svld1(pred, (uint8_t *) src), + high = svlsr_x(pred, bytes, 4), + low = svand_z(pred, bytes, 0xF); + svuint8x2_t merged = svcreate2(svtbl(hextbl_vec, high), svtbl(hextbl_vec, low)); + svst2(pred, (uint8_t *) dst, merged); + } + + /* hex decode */ + len = 128; + + for (int i; i < len; i += vec_len_x2) + { + svbool_t pred = svwhilelt_b8((uint64_t) i / 2, (uint64_t) len / 2); + svuint8x2_t bytes = svld2(pred, (uint8_t *) src + i); + svuint8_t high = svget2(bytes, 0), low = svget2(bytes, 1); + + if (svptest_any(pred, svorr_z(pred, svcmplt(pred, high, '0'), svcmplt(pred, low, '0')))) + break; + if (!get_hex_sve(pred, high, &high) || !get_hex_sve(pred, low, &low)) + break; + + svst1(pred, (uint8_t *) dst + i / 2, svorr_z(pred, svlsl_x(pred, high, 4), low)); + } + + /* return computed value, to prevent the above being optimized away */ + return output@<:@0@:>@; + }], + [return hex_coding_test();])], + [Ac_cachevar=yes], + [Ac_cachevar=no])]) +if test x"$Ac_cachevar" = x"yes"; then + pgac_arm_sve_hex_intrinsics=yes +fi +undefine([Ac_cachevar])dnl +])# PGAC_ARM_SVE_HEX_INTRINSICS diff --git a/configure b/configure index 39c68161cece..60354107f874 100755 --- a/configure +++ b/configure @@ -17735,6 +17735,110 @@ $as_echo "#define USE_SVE_POPCNT_WITH_RUNTIME_CHECK 1" >>confdefs.h fi fi +# Check for ARM SVE intrinsics for hex coding +# +if test x"$host_cpu" = x"aarch64"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for svtbl, svlsr_x, svand_z, svcreate2, etc" >&5 +$as_echo_n "checking for svtbl, svlsr_x, svand_z, svcreate2, etc... " >&6; } +if ${pgac_cv_arm_sve_hex_intrinsics+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + + char input[64]; + char output[128]; + + #if defined(__has_attribute) && __has_attribute (target) + __attribute__((target("arch=armv8-a+sve"))) + #endif + int get_hex_sve(svbool_t pred, svuint8_t vec, svuint8_t *res) + { + svuint8_t digit = svsub_x(pred, vec, 48), + upper = svsub_x(pred, vec, 55), + lower = svsub_x(pred, vec, 87); + svbool_t valid_digit = svcmplt(pred, digit, 10), + valid_upper = svcmplt(pred, upper, 16); + svuint8_t letter = svsel(valid_upper, upper, lower); + svbool_t valid_letter = svand_z(pred, svcmpgt(pred, letter, 9), + svcmplt(pred, letter, 16)); + if (svptest_any(pred, svnot_z(pred, svorr_z(pred, valid_digit, valid_letter)))) + return 0; + *res = svsel(valid_digit, digit, letter); + return 1; + } + + #if defined(__has_attribute) && __has_attribute (target) + __attribute__((target("arch=armv8-a+sve"))) + #endif + static int hex_coding_test(void) + { + int len = 64, vec_len = svcntb(), vec_len_x2 = svcntb() * 2; + const char *hextbl = "0123456789abcdef"; + svuint8_t hextbl_vec = svld1(svwhilelt_b8(0, 16), (uint8_t *) hextbl); + char *src = input, *dst = output; + + /* hex encode */ + for (uint64_t i = 0; i < 64; i += vec_len, dst += 2 * vec_len, src += vec_len) + { + svbool_t pred = svwhilelt_b8((uint64_t) i, (uint64_t) len); + svuint8_t bytes = svld1(pred, (uint8_t *) src), + high = svlsr_x(pred, bytes, 4), + low = svand_z(pred, bytes, 0xF); + svuint8x2_t merged = svcreate2(svtbl(hextbl_vec, high), svtbl(hextbl_vec, low)); + svst2(pred, (uint8_t *) dst, merged); + } + + /* hex decode */ + len = 128; + + for (int i; i < len; i += vec_len_x2) + { + svbool_t pred = svwhilelt_b8((uint64_t) i / 2, (uint64_t) len / 2); + svuint8x2_t bytes = svld2(pred, (uint8_t *) src + i); + svuint8_t high = svget2(bytes, 0), low = svget2(bytes, 1); + + if (svptest_any(pred, svorr_z(pred, svcmplt(pred, high, '0'), svcmplt(pred, low, '0')))) + break; + if (!get_hex_sve(pred, high, &high) || !get_hex_sve(pred, low, &low)) + break; + + svst1(pred, (uint8_t *) dst + i / 2, svorr_z(pred, svlsl_x(pred, high, 4), low)); + } + + /* return computed value, to prevent the above being optimized away */ + return output[0]; + } +int +main () +{ +return hex_coding_test(); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv_arm_sve_hex_intrinsics=yes +else + pgac_cv_arm_sve_hex_intrinsics=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_arm_sve_hex_intrinsics" >&5 +$as_echo "$pgac_cv_arm_sve_hex_intrinsics" >&6; } +if test x"$pgac_cv_arm_sve_hex_intrinsics" = x"yes"; then + pgac_arm_sve_hex_intrinsics=yes +fi + + if test x"$pgac_arm_sve_hex_intrinsics" = x"yes"; then + +$as_echo "#define USE_SVE_HEX_WITH_RUNTIME_CHECK 1" >>confdefs.h + + fi +fi + # Check for Intel SSE 4.2 intrinsics to do CRC calculations. # { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u32" >&5 diff --git a/configure.ac b/configure.ac index 066e3976c0aa..6ca57b8c4a79 100644 --- a/configure.ac +++ b/configure.ac @@ -2136,6 +2136,15 @@ if test x"$host_cpu" = x"aarch64"; then fi fi +# Check for ARM SVE intrinsics for hex coding +# +if test x"$host_cpu" = x"aarch64"; then + PGAC_ARM_SVE_HEX_INTRINSICS() + if test x"$pgac_arm_sve_hex_intrinsics" = x"yes"; then + AC_DEFINE(USE_SVE_HEX_WITH_RUNTIME_CHECK, 1, [Define to 1 to use ARM SVE intrinsic for hex coding.]) + fi +fi + # Check for Intel SSE 4.2 intrinsics to do CRC calculations. # PGAC_SSE42_CRC32_INTRINSICS() diff --git a/meson.build b/meson.build index ab8101d67b26..9a1c9103006a 100644 --- a/meson.build +++ b/meson.build @@ -2372,6 +2372,87 @@ int main(void) endif +############################################################### +# Check the availability of SVE intrinsics for hex coding. +############################################################### + +if host_cpu == 'aarch64' + + prog = ''' +#include + +char input[64]; +char output[128]; + +#if defined(__has_attribute) && __has_attribute (target) + __attribute__((target("arch=armv8-a+sve"))) +#endif +int get_hex_sve(svbool_t pred, svuint8_t vec, svuint8_t *res) +{ + svuint8_t digit = svsub_x(pred, vec, 48), + upper = svsub_x(pred, vec, 55), + lower = svsub_x(pred, vec, 87); + svbool_t valid_digit = svcmplt(pred, digit, 10), + valid_upper = svcmplt(pred, upper, 16); + svuint8_t letter = svsel(valid_upper, upper, lower); + svbool_t valid_letter = svand_z(pred, svcmpgt(pred, letter, 9), + svcmplt(pred, letter, 16)); + if (svptest_any(pred, svnot_z(pred, svorr_z(pred, valid_digit, valid_letter)))) + return 0; + *res = svsel(valid_digit, digit, letter); + return 1; +} + +#if defined(__has_attribute) && __has_attribute (target) + __attribute__((target("arch=armv8-a+sve"))) +#endif +int main(void) +{ + int len = 64, vec_len = svcntb(), vec_len_x2 = svcntb() * 2; + const char hextbl[] = "0123456789abcdef"; + svuint8_t hextbl_vec = svld1(svwhilelt_b8(0, 16), (uint8_t *) hextbl); + char *src = input, *dst = output; + + /* hex encode */ + for (uint64_t i = 0; i < 64; i += vec_len, dst += 2 * vec_len, src += vec_len) + { + svbool_t pred = svwhilelt_b8((uint64_t) i, (uint64_t) len); + svuint8_t bytes = svld1(pred, (uint8_t *) src), + high = svlsr_x(pred, bytes, 4), + low = svand_z(pred, bytes, 0xF); + svuint8x2_t merged = svcreate2(svtbl(hextbl_vec, high), svtbl(hextbl_vec, low)); + svst2(pred, (uint8_t *) dst, merged); + } + + /* hex decode */ + len = 128; + + for (int i; i < len; i += vec_len_x2) + { + svbool_t pred = svwhilelt_b8((uint64_t) i / 2, (uint64_t) len / 2); + svuint8x2_t bytes = svld2(pred, (uint8_t *) src + i); + svuint8_t high = svget2(bytes, 0), low = svget2(bytes, 1); + + if (svptest_any(pred, svorr_z(pred, svcmplt(pred, high, '0'), svcmplt(pred, low, '0')))) + break; + if (!get_hex_sve(pred, high, &high) || !get_hex_sve(pred, low, &low)) + break; + + svst1(pred, (uint8_t *) dst + i / 2, svorr_z(pred, svlsl_x(pred, high, 4), low)); + } + + /* return computed value, to prevent the above being optimized away */ + return output[0]; +} +''' + + if cc.links(prog, name: 'SVE hex coding', args: test_c_args) + cdata.set('USE_SVE_HEX_WITH_RUNTIME_CHECK', 1) + endif + +endif + + ############################################################### # Select CRC-32C implementation. # diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index cc68ac545a5f..40eaee14899a 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -33,6 +33,7 @@ OBJS = \ dbsize.o \ domains.o \ encode.o \ + encode_aarch64.o \ enum.o \ expandeddatum.o \ expandedrecord.o \ diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index 4ccaed815d17..fa62ce3107da 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -178,7 +178,7 @@ static const int8 hexlookup[128] = { }; uint64 -hex_encode(const char *src, size_t len, char *dst) +hex_encode_scalar(const char *src, size_t len, char *dst) { const char *end = src + len; @@ -208,13 +208,13 @@ get_hex(const char *cp, char *out) } uint64 -hex_decode(const char *src, size_t len, char *dst) +hex_decode_scalar(const char *src, size_t len, char *dst) { return hex_decode_safe(src, len, dst, NULL); } uint64 -hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext) +hex_decode_safe_scalar(const char *src, size_t len, char *dst, Node *escontext) { const char *s, *srcend; diff --git a/src/backend/utils/adt/encode_aarch64.c b/src/backend/utils/adt/encode_aarch64.c new file mode 100644 index 000000000000..bf8157900f88 --- /dev/null +++ b/src/backend/utils/adt/encode_aarch64.c @@ -0,0 +1,280 @@ +/*------------------------------------------------------------------------- + * + * encode_aarch64.c + * Holds the SVE hex encode/decode implementations. + * + * Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/encode_aarch64.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "utils/builtins.h" + +#ifdef USE_SVE_HEX_WITH_RUNTIME_CHECK +#include + +#if defined(HAVE_ELF_AUX_INFO) || defined(HAVE_GETAUXVAL) +#include +#endif + +/* + * These are the SVE implementations of the hex encode/decode functions. + */ +static uint64 hex_encode_sve(const char *src, size_t len, char *dst); +static uint64 hex_decode_sve(const char *src, size_t len, char *dst); +static uint64 hex_decode_safe_sve(const char *src, size_t len, char *dst, Node *escontext); + +/* + * The function pointers are initially set to "choose" functions. These + * functions will first set the pointers to the right implementations (based on + * what the current CPU supports) and then will call the pointer to fulfill the + * caller's request. + */ + +static uint64 hex_encode_choose(const char *src, size_t len, char *dst); +static uint64 hex_decode_choose(const char *src, size_t len, char *dst); +static uint64 hex_decode_safe_choose(const char *src, size_t len, char *dst, Node *escontext); +uint64 (*hex_encode_optimized) (const char *src, size_t len, char *dst) = hex_encode_choose; +uint64 (*hex_decode_optimized) (const char *src, size_t len, char *dst) = hex_decode_choose; +uint64 (*hex_decode_safe_optimized) (const char *src, size_t len, char *dst, Node *escontext) = hex_decode_safe_choose; + +static inline bool +check_sve_support(void) +{ +#ifdef HAVE_ELF_AUX_INFO + unsigned long value; + + return elf_aux_info(AT_HWCAP, &value, sizeof(value)) == 0 && + (value & HWCAP_SVE) != 0; +#elif defined(HAVE_GETAUXVAL) + return (getauxval(AT_HWCAP) & HWCAP_SVE) != 0; +#else + return false; +#endif +} + +static inline void +choose_hex_functions(void) +{ + if (check_sve_support()) + { + hex_encode_optimized = hex_encode_sve; + hex_decode_optimized = hex_decode_sve; + hex_decode_safe_optimized = hex_decode_safe_sve; + } + else + { + hex_encode_optimized = hex_encode_scalar; + hex_decode_optimized = hex_decode_scalar; + hex_decode_safe_optimized = hex_decode_safe_scalar; + } +} + +static uint64 +hex_encode_choose(const char *src, size_t len, char *dst) +{ + choose_hex_functions(); + return hex_encode_optimized(src, len, dst); +} +static uint64 +hex_decode_choose(const char *src, size_t len, char *dst) +{ + choose_hex_functions(); + return hex_decode_optimized(src, len, dst); +} +static uint64 +hex_decode_safe_choose(const char *src, size_t len, char *dst, Node *escontext) +{ + choose_hex_functions(); + return hex_decode_safe_optimized(src, len, dst, escontext); +} + +pg_attribute_target("arch=armv8-a+sve") +uint64 +hex_encode_sve(const char *src, size_t len, char *dst) +{ + const char hextbl[] = "0123456789abcdef"; + uint32 vec_len = svcntb(); + svuint8_t hextbl_vec = svld1(svwhilelt_b8(0, 16), (uint8 *) hextbl); + svbool_t pred = svptrue_b8(); + size_t loop_bytes = len & ~(2 * vec_len - 1); /* process 2 * vec_len byte chunk each iteration */ + svuint8_t bytes, high, low; + svuint8x2_t zipped; + + for (size_t i = 0; i < loop_bytes; i += 2 * vec_len) + { + bytes = svld1(pred, (uint8 *) src); + + /* Right-shift to obtain the high nibble */ + high = svlsr_x(pred, bytes, 4); + + /* Mask the high nibble to obtain the low nibble */ + low = svand_z(pred, bytes, 0xF); + + /* + * Convert the high and low nibbles to hexadecimal digits using a + * vectorized table lookup and zip (interleave) the hexadecimal digits. + */ + zipped = svcreate2(svtbl(hextbl_vec, high), svtbl(hextbl_vec, low)); + svst2(pred, (uint8 *) dst, zipped); + + dst += 2 * vec_len; + src += vec_len; + + /* unrolled */ + bytes = svld1(pred, (uint8 *) src); + high = svlsr_x(pred, bytes, 4); + low = svand_z(pred, bytes, 0xF); + + zipped = svcreate2(svtbl(hextbl_vec, high), svtbl(hextbl_vec, low)); + svst2(pred, (uint8 *) dst, zipped); + + dst += 2 * vec_len; + src += vec_len; + } + + /* process remaining tail bytes */ + for (size_t i = loop_bytes; i < len; i += vec_len) + { + pred = svwhilelt_b8((uint64) i, (uint64) len); + bytes = svld1(pred, (uint8 *) src); + high = svlsr_x(pred, bytes, 4); + low = svand_z(pred, bytes, 0xF); + + zipped = svcreate2(svtbl(hextbl_vec, high), svtbl(hextbl_vec, low)); + svst2(pred, (uint8 *) dst, zipped); + + dst += 2 * vec_len; + src += vec_len; + } + + return (uint64) len * 2; +} + +/* + * get_hex_sve + * Returns true if the hexadecimal digits are successfully converted + * to nibbles and stored in 'res'; otherwise, returns false. + */ +pg_attribute_target("arch=armv8-a+sve") +static inline bool +get_hex_sve(svbool_t pred, svuint8_t vec, svuint8_t *res) +{ + /* + * Convert ASCII of '0'-'9' to integers 0-9 by subtracting 48 (ASCII of '0'). + * Similarly, convert letters 'A'–'F' and 'a'–'f' to integers 10–15 by + * subtracting 55 ('A' - 10) and 87 ('a' - 10). + */ + svuint8_t digit = svsub_x(pred, vec, '0'), + upper = svsub_x(pred, vec, 'A' - 10), + lower = svsub_x(pred, vec, 'a' - 10); + + /* + * Identify valid values in digits, upper, and lower vectors. + * Values 0-9 are valid in digits, while values 10-15 are valid + * in upper and lower. + * + * Example: + * vec: '0' '9' 'A' 'F' 'a' 'f' + * vec (in ASCII): 48 57 65 70 97 102 + * + * digit: 0 9 17 22 49 54 + * valid_digit: 1 1 0 0 0 0 + * + * upper: 249 2 10 15 42 47 + * valid_upper: 0 1 1 1 0 0 + * + * lower: 217 226 234 239 10 15 + * + * Note that values 0-9 are also marked valid in valid_upper, this will be + * handled later. + */ + svbool_t valid_digit = svcmplt(pred, digit, 10), + valid_upper = svcmplt(pred, upper, 16); + + /* + * Merge upper and lower vector using the logic: take the element from + * upper if it's true in valid_upper else pick the element in lower + * + * Mark the valid range i.e. 10-15 in letter vector + * + * letter: 217 2 10 15 10 15 + * valid_letter: 0 0 1 1 1 1 + */ + + svuint8_t letter = svsel(valid_upper, upper, lower); + svbool_t valid_letter = svand_z(pred, svcmpgt(pred, letter, 9), + svcmplt(pred, letter, 16)); + + /* + * Check for invalid hexadecimal digit. Each value must fall within + * the range 0-9 (true in valid_digit) or 10-15 (true in valid_letter) i.e. + * the OR of valid_digit and valid_letter should be all true. + */ + + if (svptest_any(pred, svnot_z(pred, svorr_z(pred, valid_digit, valid_letter)))) + return false; + + /* + * Finally, combine digit and letter vectors using the logic: + * take the element from digit if it's true in valid_digit else pick the + * element in letter. + * + * res: 0 9 10 15 10 15 + */ + + *res = svsel(valid_digit, digit, letter); + return true; +} + +uint64 +hex_decode_sve(const char *src, size_t len, char *dst) +{ + return hex_decode_safe_sve(src, len, dst, NULL); +} + +pg_attribute_target("arch=armv8-a+sve") +uint64 +hex_decode_safe_sve(const char *src, size_t len, char *dst, Node *escontext) +{ + uint32 vec_len = svcntb(); + size_t i = 0, + loop_bytes = len & ~(2 * vec_len - 1); /* process 2 * vec_len byte chunk each iteration */ + svbool_t pred = svptrue_b8(); + const char *p = dst; + + while (i < loop_bytes) + { + svuint8x2_t bytes = svld2(pred, (uint8 *) src); + svuint8_t high = svget2(bytes, 0), + low = svget2(bytes, 1); + + /* fallback for characters with ASCII values below '0' */ + if (svptest_any(pred, svorr_z(pred, svcmplt(pred, high, '0'), svcmplt(pred, low, '0')))) + break; + + /* fallback if an invalid hexadecimal digit is found */ + if (!get_hex_sve(pred, high, &high) || !get_hex_sve(pred, low, &low)) + break; + + /* form the byte by left-shifting the high nibble and OR-ing it with the low nibble */ + svst1(pred, (uint8 *) dst, svorr_z(pred, svlsl_x(pred, high, 4), low)); + + i += 2 * vec_len; + src += 2 * vec_len; + dst += vec_len; + } + + if (len > i) /* fallback */ + return dst - p + hex_decode_safe_scalar(src, len - i, dst, escontext); + + return dst - p; +} + +#endif /* USE_SVE_HEX_WITH_RUNTIME_CHECK */ diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build index dac372c3bea3..8b106d03d336 100644 --- a/src/backend/utils/adt/meson.build +++ b/src/backend/utils/adt/meson.build @@ -22,6 +22,7 @@ backend_sources += files( 'dbsize.c', 'domains.c', 'encode.c', + 'encode_aarch64.c', 'enum.c', 'expandeddatum.c', 'expandedrecord.c', diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index c4dc5d72bdb7..a6735bdd21f8 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -678,6 +678,9 @@ /* Define to 1 to use AVX-512 popcount instructions with a runtime check. */ #undef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK +/* Define to 1 to use SVE instructions for hex coding with a runtime check. */ +#undef USE_SVE_HEX_WITH_RUNTIME_CHECK + /* Define to 1 to build with Bonjour support. (--with-bonjour) */ #undef USE_BONJOUR diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 1c98c7d2255c..2f72d8df9d12 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -35,11 +35,56 @@ extern int errdatatype(Oid datatypeOid); extern int errdomainconstraint(Oid datatypeOid, const char *conname); /* encode.c */ -extern uint64 hex_encode(const char *src, size_t len, char *dst); -extern uint64 hex_decode(const char *src, size_t len, char *dst); -extern uint64 hex_decode_safe(const char *src, size_t len, char *dst, +extern uint64 hex_encode_scalar(const char *src, size_t len, char *dst); +extern uint64 hex_decode_scalar(const char *src, size_t len, char *dst); +extern uint64 hex_decode_safe_scalar(const char *src, size_t len, char *dst, Node *escontext); +/* + * On AArch64, we can try to use an SVE optimized hex encode/decode on some systems. + */ +#ifdef USE_SVE_HEX_WITH_RUNTIME_CHECK +extern PGDLLIMPORT uint64 (*hex_encode_optimized) (const char *src, size_t len, char *dst); +extern PGDLLIMPORT uint64 (*hex_decode_optimized) (const char *src, size_t len, char *dst); +extern PGDLLIMPORT uint64 (*hex_decode_safe_optimized) (const char *src, size_t len, char *dst, Node *escontext); +#endif + +static inline uint64 +hex_encode(const char *src, size_t len, char *dst) +{ +#ifdef USE_SVE_HEX_WITH_RUNTIME_CHECK + int threshold = 16; + + if (len >= threshold) + return hex_encode_optimized(src, len, dst); +#endif + return hex_encode_scalar(src, len, dst); +} + +static inline uint64 +hex_decode(const char *src, size_t len, char *dst) +{ +#ifdef USE_SVE_HEX_WITH_RUNTIME_CHECK + int threshold = 32; + + if (len >= threshold) + return hex_decode_optimized(src, len, dst); +#endif + return hex_decode_scalar(src, len, dst); +} + +static inline uint64 +hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext) +{ +#ifdef USE_SVE_HEX_WITH_RUNTIME_CHECK + int threshold = 32; + + if (len >= threshold) + return hex_decode_safe_optimized(src, len, dst, escontext); +#endif + return hex_decode_safe_scalar(src, len, dst, escontext); +} + /* int.c */ extern int2vector *buildint2vector(const int16 *int2s, int n);