From c3d76103a6d94d137a2a35cec7c2578aed185078 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Fri, 7 Feb 2025 21:02:54 +0100
Subject: [PATCH 1/2] Split out a specialized function to decode multibyte
 UTF-8 sequences
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Decoding purely multibyte UTF-8 is common for example in the case of
JSON. Furthermore, we want to avoid the switch on the character set in
such hot code. Finally, we also add UNEXPECTED markers to move code to
the cold section which reduces pressure on the µop and instruction
caches.
---
 UPGRADING.INTERNALS |   4 ++
 ext/standard/html.c | 154 ++++++++++++++++++++++++++------------------
 ext/standard/html.h |   1 +
 3 files changed, 97 insertions(+), 62 deletions(-)

diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS
index 56c7535158460..fb12e448d0d91 100644
--- a/UPGRADING.INTERNALS
+++ b/UPGRADING.INTERNALS
@@ -61,6 +61,10 @@ PHP 8.5 INTERNALS UPGRADE NOTES
     is still valid. This is useful when a GC cycle is collected and the
     database object can be destroyed prior to destroying the statement.
 
+- ext/standard
+  . Added `php_next_utf8_char_mb()` to decode the next UTF-8 multibyte
+    codepoint (i.e. >= 2 bytes).
+
 ========================
 4. OpCode changes
 ========================
diff --git a/ext/standard/html.c b/ext/standard/html.c
index 0c6231d590d88..44b27599aab06 100644
--- a/ext/standard/html.c
+++ b/ext/standard/html.c
@@ -53,12 +53,16 @@
 	(all) = (all) && !CHARSET_PARTIAL_SUPPORT((charset)) && ((doctype) != ENT_HTML_DOC_XML1); \
 } while (0)
 
-#define MB_FAILURE(pos, advance) do { \
+#define MB_FAILURE_NO_STATUS(pos, advance) do { \
 	*cursor = pos + (advance); \
-	*status = FAILURE; \
 	return 0; \
 } while (0)
 
+#define MB_FAILURE(pos, advance) do { \
+	*status = FAILURE; \
+	MB_FAILURE_NO_STATUS(pos, advance); \
+} while (0)
+
 #define CHECK_LEN(pos, chars_need) ((str_len - (pos)) >= (chars_need))
 
 /* valid as single byte character or leading byte */
@@ -85,6 +89,87 @@ static char *get_default_charset(void) {
 }
 /* }}} */
 
+/* Decodes the next UTF-8 multibyte codepoint (i.e. >= 2 bytes).
+ * Uses `c` as the leading byte. */
+PHPAPI unsigned int php_next_utf8_char_mb(
+		const unsigned char *str,
+		unsigned char c,
+		size_t str_len,
+		size_t *cursor)
+{
+	size_t pos = *cursor;
+	unsigned int this_char = 0;
+
+	/* We'll follow strategy 2. from section 3.6.1 of UTR #36:
+	 * "In a reported illegal byte sequence, do not include any
+	 *  non-initial byte that encodes a valid character or is a leading
+	 *  byte for a valid sequence." */
+
+	ZEND_ASSERT(c >= 0x80);
+
+	if (UNEXPECTED(c < 0xc2)) {
+		MB_FAILURE_NO_STATUS(pos, 1);
+	} else if (c < 0xe0) {
+		if (UNEXPECTED(!CHECK_LEN(pos, 2)))
+			MB_FAILURE_NO_STATUS(pos, 1);
+
+		if (UNEXPECTED(!utf8_trail(str[pos + 1]))) {
+			MB_FAILURE_NO_STATUS(pos, utf8_lead(str[pos + 1]) ? 1 : 2);
+		}
+		this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
+		if (UNEXPECTED(this_char < 0x80)) { /* non-shortest form */
+			MB_FAILURE_NO_STATUS(pos, 2);
+		}
+		pos += 2;
+	} else if (c < 0xf0) {
+		size_t avail = str_len - pos;
+
+		if (UNEXPECTED(avail < 3 ||
+				!utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2]))) {
+			if (avail < 2 || utf8_lead(str[pos + 1]))
+				MB_FAILURE_NO_STATUS(pos, 1);
+			else if (avail < 3 || utf8_lead(str[pos + 2]))
+				MB_FAILURE_NO_STATUS(pos, 2);
+			else
+				MB_FAILURE_NO_STATUS(pos, 3);
+		}
+
+		this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
+		if (UNEXPECTED(this_char < 0x800)) { /* non-shortest form */
+			MB_FAILURE_NO_STATUS(pos, 3);
+		} else if (UNEXPECTED(this_char >= 0xd800 && this_char <= 0xdfff)) { /* surrogate */
+			MB_FAILURE_NO_STATUS(pos, 3);
+		}
+		pos += 3;
+	} else if (c < 0xf5) {
+		size_t avail = str_len - pos;
+
+		if (UNEXPECTED(avail < 4 ||
+				!utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2]) ||
+				!utf8_trail(str[pos + 3]))) {
+			if (avail < 2 || utf8_lead(str[pos + 1]))
+				MB_FAILURE_NO_STATUS(pos, 1);
+			else if (avail < 3 || utf8_lead(str[pos + 2]))
+				MB_FAILURE_NO_STATUS(pos, 2);
+			else if (avail < 4 || utf8_lead(str[pos + 3]))
+				MB_FAILURE_NO_STATUS(pos, 3);
+			else
+				MB_FAILURE_NO_STATUS(pos, 4);
+		}
+
+		this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
+		if (UNEXPECTED(this_char < 0x10000 || this_char > 0x10FFFF)) { /* non-shortest form or outside range */
+			MB_FAILURE_NO_STATUS(pos, 4);
+		}
+		pos += 4;
+	} else {
+		MB_FAILURE_NO_STATUS(pos, 1);
+	}
+
+	*cursor = pos;
+	return this_char;
+}
+
 /* {{{ get_next_char */
 static inline unsigned int get_next_char(
 		enum entity_charset charset,
@@ -105,72 +190,17 @@ static inline unsigned int get_next_char(
 	switch (charset) {
 	case cs_utf_8:
 		{
-			/* We'll follow strategy 2. from section 3.6.1 of UTR #36:
-			 * "In a reported illegal byte sequence, do not include any
-			 *  non-initial byte that encodes a valid character or is a leading
-			 *  byte for a valid sequence." */
 			unsigned char c;
 			c = str[pos];
 			if (c < 0x80) {
 				this_char = c;
 				pos++;
-			} else if (c < 0xc2) {
-				MB_FAILURE(pos, 1);
-			} else if (c < 0xe0) {
-				if (!CHECK_LEN(pos, 2))
-					MB_FAILURE(pos, 1);
-
-				if (!utf8_trail(str[pos + 1])) {
-					MB_FAILURE(pos, utf8_lead(str[pos + 1]) ? 1 : 2);
-				}
-				this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
-				if (this_char < 0x80) { /* non-shortest form */
-					MB_FAILURE(pos, 2);
-				}
-				pos += 2;
-			} else if (c < 0xf0) {
-				size_t avail = str_len - pos;
-
-				if (avail < 3 ||
-						!utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2])) {
-					if (avail < 2 || utf8_lead(str[pos + 1]))
-						MB_FAILURE(pos, 1);
-					else if (avail < 3 || utf8_lead(str[pos + 2]))
-						MB_FAILURE(pos, 2);
-					else
-						MB_FAILURE(pos, 3);
-				}
-
-				this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
-				if (this_char < 0x800) { /* non-shortest form */
-					MB_FAILURE(pos, 3);
-				} else if (this_char >= 0xd800 && this_char <= 0xdfff) { /* surrogate */
-					MB_FAILURE(pos, 3);
-				}
-				pos += 3;
-			} else if (c < 0xf5) {
-				size_t avail = str_len - pos;
-
-				if (avail < 4 ||
-						!utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2]) ||
-						!utf8_trail(str[pos + 3])) {
-					if (avail < 2 || utf8_lead(str[pos + 1]))
-						MB_FAILURE(pos, 1);
-					else if (avail < 3 || utf8_lead(str[pos + 2]))
-						MB_FAILURE(pos, 2);
-					else if (avail < 4 || utf8_lead(str[pos + 3]))
-						MB_FAILURE(pos, 3);
-					else
-						MB_FAILURE(pos, 4);
-				}
-
-				this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
-				if (this_char < 0x10000 || this_char > 0x10FFFF) { /* non-shortest form or outside range */
-					MB_FAILURE(pos, 4);
-				}
-				pos += 4;
 			} else {
-				MB_FAILURE(pos, 1);
+				this_char = php_next_utf8_char_mb(str, c, str_len, cursor);
+				if (UNEXPECTED(this_char == 0)) {
+					*status = FAILURE;
+				}
+				return this_char;
 			}
 		}
 		break;
diff --git a/ext/standard/html.h b/ext/standard/html.h
index 40c595ba5d89c..d0a713f55de54 100644
--- a/ext/standard/html.h
+++ b/ext/standard/html.h
@@ -48,5 +48,6 @@ PHPAPI zend_string *php_escape_html_entities(const unsigned char *old, size_t ol
 PHPAPI zend_string *php_escape_html_entities_ex(const unsigned char *old, size_t oldlen, int all, int flags, const char *hint_charset, bool double_encode, bool quiet);
 PHPAPI zend_string *php_unescape_html_entities(zend_string *str, int all, int flags, const char *hint_charset);
 PHPAPI unsigned int php_next_utf8_char(const unsigned char *str, size_t str_len, size_t *cursor, zend_result *status);
+PHPAPI unsigned int php_next_utf8_char_mb(const unsigned char *str, unsigned char c, size_t str_len, size_t *cursor);
 
 #endif /* HTML_H */

From 860b11f21c6a3779e028fdb99c0915a41480eb93 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Fri, 7 Feb 2025 21:13:28 +0100
Subject: [PATCH 2/2] Optimize JSON string encoding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are a couple of optimizations that work together:
- We now use the specialized php_next_utf8_char_mb() helper function to
  avoid pressure on the µop and instruction cache.
- It no longer emits UTF-8 bytes under PHP_JSON_UNESCAPED_UNICODE until
  it actually has to. By emitting in bulk, this improves performance.
- Code layout tweaks
  * Use a specialized php_json_append() and assertions to avoid
    allocating the initial buffer, as this is already done upfront.
  * Factor out the call to smart_str_extend() to above the UTF-16 check
    to avoid code bloat.
- Use SIMD, either with SSE2 or SSE4.2. A resolver is used when SSE4.2
  is not configured at compile time.
---
 UPGRADING               |   3 +
 ext/json/json_encoder.c | 403 +++++++++++++++++++++++++++++++---------
 2 files changed, 317 insertions(+), 89 deletions(-)

diff --git a/UPGRADING b/UPGRADING
index 2ce2ba3c120b9..d8d9c2d341846 100644
--- a/UPGRADING
+++ b/UPGRADING
@@ -457,6 +457,9 @@ PHP 8.5 UPGRADE NOTES
 14. Performance Improvements
 ========================================
 
+- JSON:
+  . Encoding JSON strings without special characters is now faster.
+
 - ReflectionProperty:
   . Improved performance of the following methods: getValue(), getRawValue(),
     isInitialized(), setValue(), setRawValue().
diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c
index c147b8eb23d73..2e3e27f8cfc02 100644
--- a/ext/json/json_encoder.c
+++ b/ext/json/json_encoder.c
@@ -29,6 +29,260 @@
 #include "zend_enum.h"
 #include "zend_property_hooks.h"
 #include "zend_lazy_objects.h"
+#include "zend_bitset.h"
+
+#if defined(ZEND_INTRIN_SSE4_2_NATIVE) || defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO)
+# include <nmmintrin.h>
+#endif
+#ifdef ZEND_INTRIN_SSE4_2_FUNC_PROTO
+# include "zend_cpuinfo.h"
+#endif
+
+#ifdef __SSE2__
+# define JSON_USE_SIMD
+#endif
+
+typedef enum php_json_simd_result {
+	PHP_JSON_STOP,
+	PHP_JSON_SLOW,
+	PHP_JSON_NON_ASCII,
+} php_json_simd_result;
+
+/* Specialization of smart_str_appendl() to avoid performance loss due to code bloat */
+static zend_always_inline void php_json_append(smart_str *dest, const char *src, size_t len)
+{
+	/* dest has a minimum size of the input length,
+	 * this avoids generating initial allocation code */
+	ZEND_ASSERT(dest->s);
+
+	smart_str_appendl(dest, src, len);
+}
+
+static zend_always_inline bool php_json_printable_ascii_escape(smart_str *buf, unsigned char us, int options)
+{
+	ZEND_ASSERT(buf->s);
+
+	switch (us) {
+		case '"':
+			if (options & PHP_JSON_HEX_QUOT) {
+				php_json_append(buf, "\\u0022", 6);
+			} else {
+				php_json_append(buf, "\\\"", 2);
+			}
+			break;
+
+		case '\\':
+			php_json_append(buf, "\\\\", 2);
+			break;
+
+		case '/':
+			if (options & PHP_JSON_UNESCAPED_SLASHES) {
+				smart_str_appendc(buf, '/');
+			} else {
+				php_json_append(buf, "\\/", 2);
+			}
+			break;
+
+		case '<':
+			if (options & PHP_JSON_HEX_TAG) {
+				php_json_append(buf, "\\u003C", 6);
+			} else {
+				smart_str_appendc(buf, '<');
+			}
+			break;
+
+		case '>':
+			if (options & PHP_JSON_HEX_TAG) {
+				php_json_append(buf, "\\u003E", 6);
+			} else {
+				smart_str_appendc(buf, '>');
+			}
+			break;
+
+		case '&':
+			if (options & PHP_JSON_HEX_AMP) {
+				php_json_append(buf, "\\u0026", 6);
+			} else {
+				smart_str_appendc(buf, '&');
+			}
+			break;
+
+		case '\'':
+			if (options & PHP_JSON_HEX_APOS) {
+				php_json_append(buf, "\\u0027", 6);
+			} else {
+				smart_str_appendc(buf, '\'');
+			}
+			break;
+
+		default:
+			return false;
+	}
+
+	return true;
+}
+
+#ifdef JSON_USE_SIMD
+static zend_always_inline int php_json_sse2_compute_escape_intersection(const __m128i mask, const __m128i input)
+{
+	(void) mask;
+
+	const __m128i result_34 = _mm_cmpeq_epi8(input, _mm_set1_epi8('"'));
+	const __m128i result_38 = _mm_cmpeq_epi8(input, _mm_set1_epi8('&'));
+	const __m128i result_39 = _mm_cmpeq_epi8(input, _mm_set1_epi8('\''));
+	const __m128i result_47 = _mm_cmpeq_epi8(input, _mm_set1_epi8('/'));
+	const __m128i result_60 = _mm_cmpeq_epi8(input, _mm_set1_epi8('<'));
+	const __m128i result_62 = _mm_cmpeq_epi8(input, _mm_set1_epi8('>'));
+	const __m128i result_92 = _mm_cmpeq_epi8(input, _mm_set1_epi8('\\'));
+
+	const __m128i result_34_38 = _mm_or_si128(result_34, result_38);
+	const __m128i result_39_47 = _mm_or_si128(result_39, result_47);
+	const __m128i result_60_62 = _mm_or_si128(result_60, result_62);
+
+	const __m128i result_34_38_39_47 = _mm_or_si128(result_34_38, result_39_47);
+	const __m128i result_60_62_92 = _mm_or_si128(result_60_62, result_92);
+
+	const __m128i result_individual_bytes = _mm_or_si128(result_34_38_39_47, result_60_62_92);
+	return _mm_movemask_epi8(result_individual_bytes);
+}
+
+#if defined(ZEND_INTRIN_SSE4_2_NATIVE) || defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO)
+static const char php_json_escape_noslashes_lut[2][8][16] = {
+	/* !PHP_JSON_UNESCAPED_SLASHES */
+	{
+		[0] = {'"', '\\', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		[PHP_JSON_HEX_AMP] = {'"', '\\', '&', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		[PHP_JSON_HEX_APOS] = {'"', '\\', '\'', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		[PHP_JSON_HEX_AMP|PHP_JSON_HEX_APOS] = {'"', '\\', '&', '\'', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		[PHP_JSON_HEX_TAG] = {'"', '\\', '<', '>', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		[PHP_JSON_HEX_AMP|PHP_JSON_HEX_TAG] = {'"', '\\', '&', '<', '>', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		[PHP_JSON_HEX_APOS|PHP_JSON_HEX_TAG] = {'"', '\\', '\'', '<', '>', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		[PHP_JSON_HEX_AMP|PHP_JSON_HEX_APOS|PHP_JSON_HEX_TAG] = {'"', '\\', '&', '\'', '<', '>', '/', 0, 0, 0, 0, 0, 0, 0, 0, 0}
+	},
+
+	/* PHP_JSON_UNESCAPED_SLASHES */
+	{
+		[0] = {'"', '\\', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		[PHP_JSON_HEX_AMP] = {'"', '\\', '&', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		[PHP_JSON_HEX_APOS] = {'"', '\\', '\'', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		[PHP_JSON_HEX_AMP|PHP_JSON_HEX_APOS] = {'"', '\\', '&', '\'', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		[PHP_JSON_HEX_TAG] = {'"', '\\', '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		[PHP_JSON_HEX_AMP|PHP_JSON_HEX_TAG] = {'"', '\\', '&', '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		[PHP_JSON_HEX_APOS|PHP_JSON_HEX_TAG] = {'"', '\\', '\'', '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+		[PHP_JSON_HEX_AMP|PHP_JSON_HEX_APOS|PHP_JSON_HEX_TAG] = {'"', '\\', '&', '\'', '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+	}
+};
+
+static zend_always_inline __m128i php_json_create_sse_escape_mask(int options)
+{
+	const int slashes = (options & PHP_JSON_UNESCAPED_SLASHES) ? 1 : 0;
+	const int masked = options & (PHP_JSON_HEX_AMP|PHP_JSON_HEX_APOS|PHP_JSON_HEX_TAG);
+	return *(const __m128i *) &php_json_escape_noslashes_lut[slashes][masked];
+}
+
+ZEND_INTRIN_SSE4_2_FUNC_DECL(int php_json_sse42_compute_escape_intersection_real(const __m128i mask, const __m128i input));
+zend_always_inline int php_json_sse42_compute_escape_intersection_real(const __m128i mask, const __m128i input)
+{
+	const __m128i result_individual_bytes = _mm_cmpistrm(mask, input, _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
+	return _mm_cvtsi128_si32(result_individual_bytes);
+}
+#endif
+
+#ifdef ZEND_INTRIN_SSE4_2_FUNC_PROTO
+static int php_json_sse42_compute_escape_intersection(const __m128i mask, const __m128i input) __attribute__((ifunc("php_json_resolve_escape_intersection")));
+
+typedef int (*php_json_compute_escape_intersection_t)(const __m128i mask, const __m128i input);
+
+ZEND_NO_SANITIZE_ADDRESS
+ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
+static php_json_compute_escape_intersection_t php_json_resolve_escape_intersection(void) {
+	if (zend_cpu_supports_sse42()) {
+		return php_json_sse42_compute_escape_intersection_real;
+	}
+	return php_json_sse2_compute_escape_intersection;
+}
+#endif
+
+static zend_always_inline php_json_simd_result php_json_process_simd_block(
+	smart_str *buf,
+	const __m128i sse_escape_mask,
+	const char **restrict s,
+	size_t *restrict pos,
+	size_t *restrict len,
+	int options
+)
+{
+	while (*len >= sizeof(__m128i)) {
+		const __m128i input = _mm_loadu_si128((const __m128i *) (*s + *pos));
+		/* signed compare, so checks for unsigned bytes >= 0x80 as well */
+		const __m128i input_range = _mm_cmplt_epi8(input, _mm_set1_epi8(32));
+
+		int max_shift = sizeof(__m128i);
+
+		int input_range_mask = _mm_movemask_epi8(input_range);
+		if (input_range_mask != 0) {
+			if (UNEXPECTED(input_range_mask & 1)) {
+				/* not worth it */
+				return PHP_JSON_NON_ASCII;
+			}
+			max_shift = zend_ulong_ntz(input_range_mask);
+		}
+
+#ifdef ZEND_INTRIN_SSE4_2_NATIVE
+		int mask = php_json_sse42_compute_escape_intersection_real(sse_escape_mask, input);
+#elif defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO)
+		int mask = php_json_sse42_compute_escape_intersection(sse_escape_mask, input);
+#else
+		int mask = php_json_sse2_compute_escape_intersection(_mm_setzero_si128(), input);
+#endif
+		if (mask != 0) {
+			if (UNEXPECTED(max_shift < sizeof(__m128i))) {
+				int shift = zend_ulong_ntz(mask); /* first offending character */
+				*pos += MIN(max_shift, shift);
+				*len -= MIN(max_shift, shift);
+				return PHP_JSON_SLOW;
+			}
+
+			php_json_append(buf, *s, *pos);
+			*s += *pos;
+			const char *s_backup = *s;
+
+			/* It's more important to keep this loop tight than to optimize this with
+			 * a trailing zero count. */
+			for (; mask; mask >>= 1, *s += 1) {
+				if (UNEXPECTED(mask & 1)) {
+					bool handled = php_json_printable_ascii_escape(buf, (*s)[0], options);
+					ZEND_ASSERT(handled);
+				} else {
+					ZEND_ASSERT(buf->s);
+					smart_str_appendc(buf, (*s)[0]);
+				}
+			}
+
+			*pos = sizeof(__m128i) - (*s - s_backup);
+		} else {
+			if (max_shift < sizeof(__m128i)) {
+				*pos += max_shift;
+				*len -= max_shift;
+				return PHP_JSON_SLOW;
+			}
+			*pos += sizeof(__m128i);
+		}
+
+		*len -= sizeof(__m128i);
+	}
+
+	return UNEXPECTED(!*len) ? PHP_JSON_STOP : PHP_JSON_SLOW;
+}
+
+# if defined(ZEND_INTRIN_SSE4_2_NATIVE) || defined(ZEND_INTRIN_SSE4_2_FUNC_PROTO)
+#  define JSON_DEFINE_ESCAPE_MASK(name, options) const __m128i name = php_json_create_sse_escape_mask(options)
+# else
+#  define JSON_DEFINE_ESCAPE_MASK(name, options) const __m128i name = _mm_setzero_si128()
+# endif
+#else
+# define JSON_DEFINE_ESCAPE_MASK(name, options)
+#endif
 
 static const char digits[] = "0123456789abcdef";
 
@@ -394,54 +648,64 @@ zend_result php_json_escape_string(
 		}
 
 	}
-	checkpoint = buf->s ? ZSTR_LEN(buf->s) : 0;
 
 	/* pre-allocate for string length plus 2 quotes */
 	smart_str_alloc(buf, len+2, 0);
+	checkpoint = ZSTR_LEN(buf->s);
 	smart_str_appendc(buf, '"');
 
 	pos = 0;
 
+	JSON_DEFINE_ESCAPE_MASK(sse_escape_mask, options);
+
 	do {
 		static const uint32_t charmap[8] = {
 			0xffffffff, 0x500080c4, 0x10000000, 0x00000000,
 			0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff};
 
+		php_json_simd_result result = PHP_JSON_SLOW;
+#ifdef JSON_USE_SIMD
+		result = php_json_process_simd_block(buf, sse_escape_mask, &s, &pos, &len, options);
+		if (UNEXPECTED(result == PHP_JSON_STOP)) {
+			break;
+		}
+#endif
+
 		us = (unsigned char)s[pos];
-		if (EXPECTED(!ZEND_BIT_TEST(charmap, us))) {
+		if (EXPECTED(result != PHP_JSON_NON_ASCII && !ZEND_BIT_TEST(charmap, us))) {
 			pos++;
 			len--;
-			if (len == 0) {
-				smart_str_appendl(buf, s, pos);
-				break;
-			}
 		} else {
-			if (pos) {
-				smart_str_appendl(buf, s, pos);
-				s += pos;
-				pos = 0;
-			}
-			us = (unsigned char)s[0];
 			if (UNEXPECTED(us >= 0x80)) {
-				zend_result status;
-				us = php_next_utf8_char((unsigned char *)s, len, &pos, &status);
+				size_t pos_old = pos;
+				const char *cur = s + pos;
+				pos = 0;
+				us = php_next_utf8_char_mb((unsigned char *)cur, us, len, &pos);
+				len -= pos;
+				pos += pos_old;
 
 				/* check whether UTF8 character is correct */
-				if (UNEXPECTED(status != SUCCESS)) {
+				if (UNEXPECTED(!us)) {
+					if (pos_old && (options & (PHP_JSON_INVALID_UTF8_IGNORE|PHP_JSON_INVALID_UTF8_SUBSTITUTE))) {
+						php_json_append(buf, s, pos_old);
+					}
+					s += pos;
+					pos = 0;
+
 					if (options & PHP_JSON_INVALID_UTF8_IGNORE) {
 						/* ignore invalid UTF8 character */
 					} else if (options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) {
 						/* Use Unicode character 'REPLACEMENT CHARACTER' (U+FFFD) */
 						if (options & PHP_JSON_UNESCAPED_UNICODE) {
-							smart_str_appendl(buf, "\xef\xbf\xbd", 3);
+							php_json_append(buf, "\xef\xbf\xbd", 3);
 						} else {
-							smart_str_appendl(buf, "\\ufffd", 6);
+							php_json_append(buf, "\\ufffd", 6);
 						}
 					} else {
 						ZSTR_LEN(buf->s) = checkpoint;
 						encoder->error_code = PHP_JSON_ERROR_UTF8;
 						if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) {
-							smart_str_appendl(buf, "null", 4);
+							php_json_append(buf, "null", 4);
 						}
 						return FAILURE;
 					}
@@ -452,16 +716,22 @@ zend_result php_json_escape_string(
 				} else if ((options & PHP_JSON_UNESCAPED_UNICODE)
 				    && ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS)
 						|| us < 0x2028 || us > 0x2029)) {
-					smart_str_appendl(buf, s, pos);
+					/* No need to emit any bytes, just move the cursor. */
 				} else {
+					php_json_append(buf, s, pos_old);
+					s += pos;
+					pos = 0;
+
+					ZEND_ASSERT(buf->s);
+
 					/* From http://en.wikipedia.org/wiki/UTF16 */
+					dst = smart_str_extend(buf, 6 + ((us >= 0x10000) ? 6 : 0));
 					if (us >= 0x10000) {
 						unsigned int next_us;
 
 						us -= 0x10000;
 						next_us = (unsigned short)((us & 0x3ff) | 0xdc00);
 						us = (unsigned short)((us >> 10) | 0xd800);
-						dst = smart_str_extend(buf, 6);
 						dst[0] = '\\';
 						dst[1] = 'u';
 						dst[2] = digits[(us >> 12) & 0xf];
@@ -469,8 +739,8 @@ zend_result php_json_escape_string(
 						dst[4] = digits[(us >> 4) & 0xf];
 						dst[5] = digits[us & 0xf];
 						us = next_us;
+						dst += 6;
 					}
-					dst = smart_str_extend(buf, 6);
 					dst[0] = '\\';
 					dst[1] = 'u';
 					dst[2] = digits[(us >> 12) & 0xf];
@@ -478,93 +748,45 @@ zend_result php_json_escape_string(
 					dst[4] = digits[(us >> 4) & 0xf];
 					dst[5] = digits[us & 0xf];
 				}
-				s += pos;
-				len -= pos;
-				pos = 0;
 			} else {
+				if (pos) {
+					php_json_append(buf, s, pos);
+					s += pos;
+					pos = 0;
+				}
 				s++;
 				switch (us) {
-					case '"':
-						if (options & PHP_JSON_HEX_QUOT) {
-							smart_str_appendl(buf, "\\u0022", 6);
-						} else {
-							smart_str_appendl(buf, "\\\"", 2);
-						}
-						break;
-
-					case '\\':
-						smart_str_appendl(buf, "\\\\", 2);
-						break;
-
-					case '/':
-						if (options & PHP_JSON_UNESCAPED_SLASHES) {
-							smart_str_appendc(buf, '/');
-						} else {
-							smart_str_appendl(buf, "\\/", 2);
-						}
-						break;
-
 					case '\b':
-						smart_str_appendl(buf, "\\b", 2);
+						php_json_append(buf, "\\b", 2);
 						break;
 
 					case '\f':
-						smart_str_appendl(buf, "\\f", 2);
+						php_json_append(buf, "\\f", 2);
 						break;
 
 					case '\n':
-						smart_str_appendl(buf, "\\n", 2);
+						php_json_append(buf, "\\n", 2);
 						break;
 
 					case '\r':
-						smart_str_appendl(buf, "\\r", 2);
+						php_json_append(buf, "\\r", 2);
 						break;
 
 					case '\t':
-						smart_str_appendl(buf, "\\t", 2);
-						break;
-
-					case '<':
-						if (options & PHP_JSON_HEX_TAG) {
-							smart_str_appendl(buf, "\\u003C", 6);
-						} else {
-							smart_str_appendc(buf, '<');
-						}
-						break;
-
-					case '>':
-						if (options & PHP_JSON_HEX_TAG) {
-							smart_str_appendl(buf, "\\u003E", 6);
-						} else {
-							smart_str_appendc(buf, '>');
-						}
-						break;
-
-					case '&':
-						if (options & PHP_JSON_HEX_AMP) {
-							smart_str_appendl(buf, "\\u0026", 6);
-						} else {
-							smart_str_appendc(buf, '&');
-						}
-						break;
-
-					case '\'':
-						if (options & PHP_JSON_HEX_APOS) {
-							smart_str_appendl(buf, "\\u0027", 6);
-						} else {
-							smart_str_appendc(buf, '\'');
-						}
+						php_json_append(buf, "\\t", 2);
 						break;
 
 					default:
-						ZEND_ASSERT(us < ' ');
-						dst = smart_str_extend(buf, 6);
-						dst[0] = '\\';
-						dst[1] = 'u';
-						dst[2] = '0';
-						dst[3] = '0';
-						dst[4] = digits[(us >> 4) & 0xf];
-						dst[5] = digits[us & 0xf];
+						if (!php_json_printable_ascii_escape(buf, us, options)) {
+							ZEND_ASSERT(us < ' ');
+							dst = smart_str_extend(buf, 6);
+							dst[0] = '\\';
+							dst[1] = 'u';
+							dst[2] = '0';
+							dst[3] = '0';
+							dst[4] = digits[(us >> 4) & 0xf];
+							dst[5] = digits[us & 0xf];
+						}
 						break;
 				}
 				len--;
@@ -572,6 +794,9 @@ zend_result php_json_escape_string(
 		}
 	} while (len);
 
+	php_json_append(buf, s, pos);
+
+	ZEND_ASSERT(buf->s);
 	smart_str_appendc(buf, '"');
 
 	return SUCCESS;