Convert jsonpath's input function to report errors softly

Reviewed by Tom Lane Discussion: https://postgr.es/m/[email protected]
author: Andrew Dunstan 2022-12-24 20:19:14 +0000
committer: Andrew Dunstan 2022-12-24 20:21:20 +0000
commit: e37fe1db6ef930f657be28fe764f7e642b93464a (patch)
tree: d7f72770eb4350c6a9192c52e42932019efa0ed2 /src/backend/utils/adt/jsonpath_scan.l
parent: 780ec9f1b2a44c118d1246325404ad0ed2226cbf (diff)
1 files changed, 141 insertions, 52 deletions
diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l
index 948f379e76..59652c76dc 100644
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@@ -25,6 +25,7 @@
 #include "jsonpath_gram.h"
 
 #include "mb/pg_wchar.h"
+#include "nodes/miscnodes.h"
 #include "nodes/pg_list.h"
 }
 
@@ -39,8 +40,8 @@ static int	scanbuflen;
 static void addstring(bool init, char *s, int l);
 static void addchar(bool init, char c);
 static enum yytokentype checkKeyword(void);
-static void parseUnicode(char *s, int l);
-static void parseHexChar(char *s);
+static bool parseUnicode(char *s, int l, struct Node *escontext);
+static bool parseHexChar(char *s, struct Node *escontext);
 
 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
 #undef fprintf
@@ -147,25 +148,48 @@ hex_fail	\\x{hex_dig}{0,1}
 
 <xnq,xq,xvq>\\v				{ addchar(false, '\v'); }
 
-<xnq,xq,xvq>{unicode}+		{ parseUnicode(yytext, yyleng); }
+<xnq,xq,xvq>{unicode}+		{
+								if (!parseUnicode(yytext, yyleng, escontext))
+									yyterminate();
+							}
 
-<xnq,xq,xvq>{hex_char}		{ parseHexChar(yytext); }
+<xnq,xq,xvq>{hex_char}		{
+								if (!parseHexChar(yytext, escontext))
+									yyterminate();
+							}
 
-<xnq,xq,xvq>{unicode}*{unicodefail}	{ jsonpath_yyerror(NULL, "invalid unicode sequence"); }
+<xnq,xq,xvq>{unicode}*{unicodefail} {
+								jsonpath_yyerror(NULL, escontext,
+												 "invalid unicode sequence");
+								yyterminate();
+							}
 
-<xnq,xq,xvq>{hex_fail}		{ jsonpath_yyerror(NULL, "invalid hex character sequence"); }
+<xnq,xq,xvq>{hex_fail}		{
+								jsonpath_yyerror(NULL, escontext,
+												 "invalid hex character sequence");
+								yyterminate();
+							}
 
 <xnq,xq,xvq>{unicode}+\\	{
 								/* throw back the \\, and treat as unicode */
 								yyless(yyleng - 1);
-								parseUnicode(yytext, yyleng);
+								if (!parseUnicode(yytext, yyleng, escontext))
+									yyterminate();
 							}
 
 <xnq,xq,xvq>\\.				{ addchar(false, yytext[1]); }
 
-<xnq,xq,xvq>\\				{ jsonpath_yyerror(NULL, "unexpected end after backslash"); }
+<xnq,xq,xvq>\\				{
+							  jsonpath_yyerror(NULL, escontext,
+											   "unexpected end after backslash");
+							  yyterminate();
+							}
 
-<xq,xvq><<EOF>>				{ jsonpath_yyerror(NULL, "unexpected end of quoted string"); }
+<xq,xvq><<EOF>>				{
+							  jsonpath_yyerror(NULL, escontext,
+											   "unexpected end of quoted string");
+							  yyterminate();
+							}
 
 <xq>\"							{
 									yylval->str = scanstring;
@@ -187,8 +211,12 @@ hex_fail	\\x{hex_dig}{0,1}
 
 <xc>\*							{ }
 
-<xc><<EOF>>						{ jsonpath_yyerror(NULL, "unexpected end of comment"); }
-
+<xc><<EOF>>						{
+									jsonpath_yyerror(
+										NULL, escontext,
+										"unexpected end of comment");
+									yyterminate();
+								}
 \&\&							{ return AND_P; }
 
 \|\|							{ return OR_P; }
@@ -253,11 +281,30 @@ hex_fail	\\x{hex_dig}{0,1}
 									return INT_P;
 								}
 
-{realfail}						{ jsonpath_yyerror(NULL, "invalid numeric literal"); }
-{integer_junk}					{ jsonpath_yyerror(NULL, "trailing junk after numeric literal"); }
-{decimal_junk}					{ jsonpath_yyerror(NULL, "trailing junk after numeric literal"); }
-{real_junk}						{ jsonpath_yyerror(NULL, "trailing junk after numeric literal"); }
-
+{realfail}						{
+									jsonpath_yyerror(
+										NULL, escontext,
+										"invalid numeric literal");
+									yyterminate();
+								}
+{integer_junk}					{
+									jsonpath_yyerror(
+										NULL, escontext,
+										"trailing junk after numeric literal");
+									yyterminate();
+								}
+{decimal_junk}					{
+									jsonpath_yyerror(
+										NULL, escontext,
+										"trailing junk after numeric literal");
+									yyterminate();
+								}
+{real_junk}						{
+									jsonpath_yyerror(
+										NULL, escontext,
+										"trailing junk after numeric literal");
+									yyterminate();
+								}
 \"								{
 									addchar(true, '\0');
 									BEGIN xq;
@@ -281,18 +328,23 @@ hex_fail	\\x{hex_dig}{0,1}
 /* LCOV_EXCL_STOP */
 
 void
-jsonpath_yyerror(JsonPathParseResult **result, const char *message)
+jsonpath_yyerror(JsonPathParseResult **result, struct Node *escontext,
+				 const char *message)
 {
+	/* don't overwrite escontext if it's already been set */
+	if (SOFT_ERROR_OCCURRED(escontext))
+		return;
+
 	if (*yytext == YY_END_OF_BUFFER_CHAR)
 	{
-		ereport(ERROR,
+		errsave(escontext,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 /* translator: %s is typically "syntax error" */
 				 errmsg("%s at end of jsonpath input", _(message))));
 	}
 	else
 	{
-		ereport(ERROR,
+		errsave(escontext,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 /* translator: first %s is typically "syntax error" */
 				 errmsg("%s at or near \"%s\" of jsonpath input",
@@ -463,14 +515,14 @@ addchar(bool init, char c)
 
 /* Interface to jsonpath parser */
 JsonPathParseResult *
-parsejsonpath(const char *str, int len)
+parsejsonpath(const char *str, int len, struct Node *escontext)
 {
 	JsonPathParseResult	*parseresult;
 
 	jsonpath_scanner_init(str, len);
 
-	if (jsonpath_yyparse((void *) &parseresult) != 0)
-		jsonpath_yyerror(NULL, "bogus input"); /* shouldn't happen */
+	if (jsonpath_yyparse((void *) &parseresult, escontext) != 0)
+		jsonpath_yyerror(NULL, escontext, "bogus input"); /* shouldn't happen */
 
 	jsonpath_scanner_finish();
 
@@ -478,27 +530,36 @@ parsejsonpath(const char *str, int len)
 }
 
 /* Turn hex character into integer */
-static int
-hexval(char c)
+static bool
+hexval(char c, int *result, struct Node *escontext)
 {
 	if (c >= '0' && c <= '9')
-		return c - '0';
+	{
+		*result = c - '0';
+		return true;
+	}
 	if (c >= 'a' && c <= 'f')
-		return c - 'a' + 0xA;
+	{
+		*result = c - 'a' + 0xA;
+		return true;
+	}
 	if (c >= 'A' && c <= 'F')
-		return c - 'A' + 0xA;
-	jsonpath_yyerror(NULL, "invalid hexadecimal digit");
-	return 0; /* not reached */
+	{
+		*result = c - 'A' + 0xA;
+		return true;
+	}
+	jsonpath_yyerror(NULL, escontext, "invalid hexadecimal digit");
+	return false;
 }
 
 /* Add given unicode character to scanstring */
-static void
-addUnicodeChar(int ch)
+static bool
+addUnicodeChar(int ch, struct Node *escontext)
 {
 	if (ch == 0)
 	{
 		/* We can't allow this, since our TEXT type doesn't */
-		ereport(ERROR,
+		ereturn(escontext, false,
 				(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
 				 errmsg("unsupported Unicode escape sequence"),
 				  errdetail("\\u0000 cannot be converted to text.")));
@@ -507,30 +568,42 @@ addUnicodeChar(int ch)
 	{
 		char		cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
 
-		pg_unicode_to_server(ch, (unsigned char *) cbuf);
+		/*
+		 * If we're trapping the error status, call the noerror form of the
+		 * conversion function. Otherwise call the normal form which provides
+		 * more detailed errors.
+		 */
+
+		if (! escontext  || ! IsA(escontext, ErrorSaveContext))
+			pg_unicode_to_server(ch, (unsigned char *) cbuf);
+		else if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
+			ereturn(escontext, false,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("could not convert unicode to server encoding")));
 		addstring(false, cbuf, strlen(cbuf));
 	}
+	return true;
 }
 
 /* Add unicode character, processing any surrogate pairs */
-static void
-addUnicode(int ch, int *hi_surrogate)
+static bool
+addUnicode(int ch, int *hi_surrogate, struct Node *escontext)
 {
 	if (is_utf16_surrogate_first(ch))
 	{
 		if (*hi_surrogate != -1)
-			ereport(ERROR,
+			ereturn(escontext, false,
 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 					 errmsg("invalid input syntax for type %s", "jsonpath"),
 					 errdetail("Unicode high surrogate must not follow "
 							   "a high surrogate.")));
 		*hi_surrogate = ch;
-		return;
+		return true;
 	}
 	else if (is_utf16_surrogate_second(ch))
 	{
 		if (*hi_surrogate == -1)
-			ereport(ERROR,
+			ereturn(escontext, false,
 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 					 errmsg("invalid input syntax for type %s", "jsonpath"),
 					 errdetail("Unicode low surrogate must follow a high "
@@ -540,22 +613,22 @@ addUnicode(int ch, int *hi_surrogate)
 	}
 	else if (*hi_surrogate != -1)
 	{
-		ereport(ERROR,
+		ereturn(escontext, false,
 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 				 errmsg("invalid input syntax for type %s", "jsonpath"),
 				 errdetail("Unicode low surrogate must follow a high "
 						   "surrogate.")));
 	}
 
-	addUnicodeChar(ch);
+	return addUnicodeChar(ch, escontext);
 }
 
 /*
  * parseUnicode was adopted from json_lex_string() in
  * src/backend/utils/adt/json.c
  */
-static void
-parseUnicode(char *s, int l)
+static bool
+parseUnicode(char *s, int l, struct Node *escontext)
 {
 	int			i = 2;
 	int			hi_surrogate = -1;
@@ -563,41 +636,57 @@ parseUnicode(char *s, int l)
 	for (i = 2; i < l; i += 2)	/* skip '\u' */
 	{
 		int			ch = 0;
-		int			j;
+		int			j, si;
 
 		if (s[i] == '{')	/* parse '\u{XX...}' */
 		{
 			while (s[++i] != '}' && i < l)
-				ch = (ch << 4) | hexval(s[i]);
+			{
+				if (!hexval(s[i], &si, escontext))
+					return false;
+				ch = (ch << 4) | si;
+			}
 			i++;	/* skip '}' */
 		}
 		else		/* parse '\uXXXX' */
 		{
 			for (j = 0; j < 4 && i < l; j++)
-				ch = (ch << 4) | hexval(s[i++]);
+			{
+				if (!hexval(s[i++], &si, escontext))
+					return false;
+				ch = (ch << 4) | si;
+			}
 		}
 
-		addUnicode(ch, &hi_surrogate);
+		if (! addUnicode(ch, &hi_surrogate, escontext))
+			return false;
 	}
 
 	if (hi_surrogate != -1)
 	{
-		ereport(ERROR,
+		ereturn(escontext, false,
 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 				 errmsg("invalid input syntax for type %s", "jsonpath"),
 				 errdetail("Unicode low surrogate must follow a high "
 						   "surrogate.")));
 	}
+
+	return true;
 }
 
 /* Parse sequence of hex-encoded characters */
-static void
-parseHexChar(char *s)
+static bool
+parseHexChar(char *s, struct Node *escontext)
 {
-	int			ch = (hexval(s[2]) << 4) |
-					  hexval(s[3]);
+	int s2, s3, ch;
+	if (!hexval(s[2], &s2, escontext))
+		return false;
+	if (!hexval(s[3], &s3, escontext))
+		return false;
+
+	ch = (s2 << 4) | s3;
 
-	addUnicodeChar(ch);
+	return addUnicodeChar(ch, escontext);
 }
 
 /*
author	Andrew Dunstan	2022-12-24 20:19:14 +0000
committer	Andrew Dunstan	2022-12-24 20:21:20 +0000
commit	e37fe1db6ef930f657be28fe764f7e642b93464a (patch)
tree	d7f72770eb4350c6a9192c52e42932019efa0ed2 /src/backend/utils/adt/jsonpath_scan.l
parent	780ec9f1b2a44c118d1246325404ad0ed2226cbf (diff)