Skip to content

Instantly share code, notes, and snippets.

@nielsdos
Last active February 3, 2025 19:02
Show Gist options
  • Save nielsdos/3b42ffaa4476bb5cb7eb498935072312 to your computer and use it in GitHub Desktop.
Save nielsdos/3b42ffaa4476bb5cb7eb498935072312 to your computer and use it in GitHub Desktop.
diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c
index 53dd4cae257..2fd5b4ed618 100644
--- a/ext/json/json_encoder.c
+++ b/ext/json/json_encoder.c
@@ -30,6 +30,10 @@
#include "zend_property_hooks.h"
#include "zend_lazy_objects.h"
+#include <nmmintrin.h>
+# pragma GCC push_options
+# pragma GCC target ("sse4.2")
+
static const char digits[] = "0123456789abcdef";
static zend_always_inline bool php_json_check_stack_limit(void)
@@ -408,21 +412,65 @@ zend_result php_json_escape_string(
0xffffffff, 0x500080c4, 0x10000000, 0x00000000,
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff};
+ while (len >= 16) {
+ const __m128i input = _mm_loadu_si128((__m128i *) (s + pos));
+ const __m128i input_range = _mm_cmpgt_epi8(input, _mm_set1_epi8(31));
+
+ int input_range_mask = _mm_movemask_epi8(input_range);
+ if (input_range_mask != 0xffff) {
+ int shift = __builtin_clz(~input_range_mask);
+ pos += shift;
+ len -= shift;
+ break;
+ }
+
+#if 0
+ const __m128i result_34 = _mm_cmpeq_epi8(input, _mm_set1_epi8(34));
+ const __m128i result_38 = _mm_cmpeq_epi8(input, _mm_set1_epi8(38));
+ const __m128i result_39 = _mm_cmpeq_epi8(input, _mm_set1_epi8(39));
+ const __m128i result_47 = _mm_cmpeq_epi8(input, _mm_set1_epi8(47));
+ const __m128i result_60 = _mm_cmpeq_epi8(input, _mm_set1_epi8(60));
+ const __m128i result_62 = _mm_cmpeq_epi8(input, _mm_set1_epi8(62));
+ const __m128i result_92 = _mm_cmpeq_epi8(input, _mm_set1_epi8(92));
+
+ const __m128i result_34_38 = _mm_or_si128(result_34, result_38);
+ const __m128i result_39_47 = _mm_or_si128(result_39, result_47);
+ const __m128i result_60_62 = _mm_or_si128(result_60, result_62);
+
+ const __m128i result_34_38_39_47 = _mm_or_si128(result_34_38, result_39_47);
+ const __m128i result_60_62_92 = _mm_or_si128(result_60_62, result_92);
+
+ const __m128i result_individual_bytes = _mm_or_si128(result_34_38_39_47, result_60_62_92);
+ int mask = _mm_movemask_epi8(result_individual_bytes);
+#else
+ const __m128i result_individual_bytes = _mm_cmpistrm(_mm_setr_epi8(34, 38, 39, 47, 60, 62, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0), input, _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
+ int mask = _mm_cvtsi128_si32(result_individual_bytes);
+#endif
+ if (mask != 0) {
+ int shift = __builtin_ctz(mask);
+ pos += shift;
+ len -= shift;
+ break;
+ }
+
+ len -= 16;
+ pos += 16;
+ }
+
+ if (!len) {
+ break;
+ }
+
us = (unsigned char)s[pos];
if (EXPECTED(!ZEND_BIT_TEST(charmap, us))) {
pos++;
len--;
- if (len == 0) {
- smart_str_appendl(buf, s, pos);
- break;
- }
} else {
if (pos) {
smart_str_appendl(buf, s, pos);
s += pos;
pos = 0;
}
- us = (unsigned char)s[0];
if (UNEXPECTED(us >= 0x80)) {
zend_result status;
us = php_next_utf8_char((unsigned char *)s, len, &pos, &status);
@@ -573,6 +621,10 @@ zend_result php_json_escape_string(
}
} while (len);
+ if (pos) {
+ smart_str_appendl(buf, s, pos);
+ }
+
smart_str_appendc(buf, '"');
return SUCCESS;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment