summaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/jsonpath_scan.l
diff options
context:
space:
mode:
authorAndrew Dunstan2022-12-24 20:19:14 +0000
committerAndrew Dunstan2022-12-24 20:21:20 +0000
commite37fe1db6ef930f657be28fe764f7e642b93464a (patch)
treed7f72770eb4350c6a9192c52e42932019efa0ed2 /src/backend/utils/adt/jsonpath_scan.l
parent780ec9f1b2a44c118d1246325404ad0ed2226cbf (diff)
Convert jsonpath's input function to report errors softly
Reviewed by Tom Lane Discussion: https://postgr.es/m/[email protected]
Diffstat (limited to 'src/backend/utils/adt/jsonpath_scan.l')
-rw-r--r--src/backend/utils/adt/jsonpath_scan.l193
1 files changed, 141 insertions, 52 deletions
diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l
index 948f379e76..59652c76dc 100644
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@@ -25,6 +25,7 @@
#include "jsonpath_gram.h"
#include "mb/pg_wchar.h"
+#include "nodes/miscnodes.h"
#include "nodes/pg_list.h"
}
@@ -39,8 +40,8 @@ static int scanbuflen;
static void addstring(bool init, char *s, int l);
static void addchar(bool init, char c);
static enum yytokentype checkKeyword(void);
-static void parseUnicode(char *s, int l);
-static void parseHexChar(char *s);
+static bool parseUnicode(char *s, int l, struct Node *escontext);
+static bool parseHexChar(char *s, struct Node *escontext);
/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
#undef fprintf
@@ -147,25 +148,48 @@ hex_fail \\x{hex_dig}{0,1}
<xnq,xq,xvq>\\v { addchar(false, '\v'); }
-<xnq,xq,xvq>{unicode}+ { parseUnicode(yytext, yyleng); }
+<xnq,xq,xvq>{unicode}+ {
+ if (!parseUnicode(yytext, yyleng, escontext))
+ yyterminate();
+ }
-<xnq,xq,xvq>{hex_char} { parseHexChar(yytext); }
+<xnq,xq,xvq>{hex_char} {
+ if (!parseHexChar(yytext, escontext))
+ yyterminate();
+ }
-<xnq,xq,xvq>{unicode}*{unicodefail} { jsonpath_yyerror(NULL, "invalid unicode sequence"); }
+<xnq,xq,xvq>{unicode}*{unicodefail} {
+ jsonpath_yyerror(NULL, escontext,
+ "invalid unicode sequence");
+ yyterminate();
+ }
-<xnq,xq,xvq>{hex_fail} { jsonpath_yyerror(NULL, "invalid hex character sequence"); }
+<xnq,xq,xvq>{hex_fail} {
+ jsonpath_yyerror(NULL, escontext,
+ "invalid hex character sequence");
+ yyterminate();
+ }
<xnq,xq,xvq>{unicode}+\\ {
/* throw back the \\, and treat as unicode */
yyless(yyleng - 1);
- parseUnicode(yytext, yyleng);
+ if (!parseUnicode(yytext, yyleng, escontext))
+ yyterminate();
}
<xnq,xq,xvq>\\. { addchar(false, yytext[1]); }
-<xnq,xq,xvq>\\ { jsonpath_yyerror(NULL, "unexpected end after backslash"); }
+<xnq,xq,xvq>\\ {
+ jsonpath_yyerror(NULL, escontext,
+ "unexpected end after backslash");
+ yyterminate();
+ }
-<xq,xvq><<EOF>> { jsonpath_yyerror(NULL, "unexpected end of quoted string"); }
+<xq,xvq><<EOF>> {
+ jsonpath_yyerror(NULL, escontext,
+ "unexpected end of quoted string");
+ yyterminate();
+ }
<xq>\" {
yylval->str = scanstring;
@@ -187,8 +211,12 @@ hex_fail \\x{hex_dig}{0,1}
<xc>\* { }
-<xc><<EOF>> { jsonpath_yyerror(NULL, "unexpected end of comment"); }
-
+<xc><<EOF>> {
+ jsonpath_yyerror(
+ NULL, escontext,
+ "unexpected end of comment");
+ yyterminate();
+ }
\&\& { return AND_P; }
\|\| { return OR_P; }
@@ -253,11 +281,30 @@ hex_fail \\x{hex_dig}{0,1}
return INT_P;
}
-{realfail} { jsonpath_yyerror(NULL, "invalid numeric literal"); }
-{integer_junk} { jsonpath_yyerror(NULL, "trailing junk after numeric literal"); }
-{decimal_junk} { jsonpath_yyerror(NULL, "trailing junk after numeric literal"); }
-{real_junk} { jsonpath_yyerror(NULL, "trailing junk after numeric literal"); }
-
+{realfail} {
+ jsonpath_yyerror(
+ NULL, escontext,
+ "invalid numeric literal");
+ yyterminate();
+ }
+{integer_junk} {
+ jsonpath_yyerror(
+ NULL, escontext,
+ "trailing junk after numeric literal");
+ yyterminate();
+ }
+{decimal_junk} {
+ jsonpath_yyerror(
+ NULL, escontext,
+ "trailing junk after numeric literal");
+ yyterminate();
+ }
+{real_junk} {
+ jsonpath_yyerror(
+ NULL, escontext,
+ "trailing junk after numeric literal");
+ yyterminate();
+ }
\" {
addchar(true, '\0');
BEGIN xq;
@@ -281,18 +328,23 @@ hex_fail \\x{hex_dig}{0,1}
/* LCOV_EXCL_STOP */
void
-jsonpath_yyerror(JsonPathParseResult **result, const char *message)
+jsonpath_yyerror(JsonPathParseResult **result, struct Node *escontext,
+ const char *message)
{
+ /* don't overwrite escontext if it's already been set */
+ if (SOFT_ERROR_OCCURRED(escontext))
+ return;
+
if (*yytext == YY_END_OF_BUFFER_CHAR)
{
- ereport(ERROR,
+ errsave(escontext,
(errcode(ERRCODE_SYNTAX_ERROR),
/* translator: %s is typically "syntax error" */
errmsg("%s at end of jsonpath input", _(message))));
}
else
{
- ereport(ERROR,
+ errsave(escontext,
(errcode(ERRCODE_SYNTAX_ERROR),
/* translator: first %s is typically "syntax error" */
errmsg("%s at or near \"%s\" of jsonpath input",
@@ -463,14 +515,14 @@ addchar(bool init, char c)
/* Interface to jsonpath parser */
JsonPathParseResult *
-parsejsonpath(const char *str, int len)
+parsejsonpath(const char *str, int len, struct Node *escontext)
{
JsonPathParseResult *parseresult;
jsonpath_scanner_init(str, len);
- if (jsonpath_yyparse((void *) &parseresult) != 0)
- jsonpath_yyerror(NULL, "bogus input"); /* shouldn't happen */
+ if (jsonpath_yyparse((void *) &parseresult, escontext) != 0)
+ jsonpath_yyerror(NULL, escontext, "bogus input"); /* shouldn't happen */
jsonpath_scanner_finish();
@@ -478,27 +530,36 @@ parsejsonpath(const char *str, int len)
}
/* Turn hex character into integer */
-static int
-hexval(char c)
+static bool
+hexval(char c, int *result, struct Node *escontext)
{
if (c >= '0' && c <= '9')
- return c - '0';
+ {
+ *result = c - '0';
+ return true;
+ }
if (c >= 'a' && c <= 'f')
- return c - 'a' + 0xA;
+ {
+ *result = c - 'a' + 0xA;
+ return true;
+ }
if (c >= 'A' && c <= 'F')
- return c - 'A' + 0xA;
- jsonpath_yyerror(NULL, "invalid hexadecimal digit");
- return 0; /* not reached */
+ {
+ *result = c - 'A' + 0xA;
+ return true;
+ }
+ jsonpath_yyerror(NULL, escontext, "invalid hexadecimal digit");
+ return false;
}
/* Add given unicode character to scanstring */
-static void
-addUnicodeChar(int ch)
+static bool
+addUnicodeChar(int ch, struct Node *escontext)
{
if (ch == 0)
{
/* We can't allow this, since our TEXT type doesn't */
- ereport(ERROR,
+ ereturn(escontext, false,
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
errmsg("unsupported Unicode escape sequence"),
errdetail("\\u0000 cannot be converted to text.")));
@@ -507,30 +568,42 @@ addUnicodeChar(int ch)
{
char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
- pg_unicode_to_server(ch, (unsigned char *) cbuf);
+ /*
+ * If we're trapping the error status, call the noerror form of the
+ * conversion function. Otherwise call the normal form which provides
+ * more detailed errors.
+ */
+
+ if (! escontext || ! IsA(escontext, ErrorSaveContext))
+ pg_unicode_to_server(ch, (unsigned char *) cbuf);
+ else if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
+ ereturn(escontext, false,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("could not convert unicode to server encoding")));
addstring(false, cbuf, strlen(cbuf));
}
+ return true;
}
/* Add unicode character, processing any surrogate pairs */
-static void
-addUnicode(int ch, int *hi_surrogate)
+static bool
+addUnicode(int ch, int *hi_surrogate, struct Node *escontext)
{
if (is_utf16_surrogate_first(ch))
{
if (*hi_surrogate != -1)
- ereport(ERROR,
+ ereturn(escontext, false,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "jsonpath"),
errdetail("Unicode high surrogate must not follow "
"a high surrogate.")));
*hi_surrogate = ch;
- return;
+ return true;
}
else if (is_utf16_surrogate_second(ch))
{
if (*hi_surrogate == -1)
- ereport(ERROR,
+ ereturn(escontext, false,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "jsonpath"),
errdetail("Unicode low surrogate must follow a high "
@@ -540,22 +613,22 @@ addUnicode(int ch, int *hi_surrogate)
}
else if (*hi_surrogate != -1)
{
- ereport(ERROR,
+ ereturn(escontext, false,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "jsonpath"),
errdetail("Unicode low surrogate must follow a high "
"surrogate.")));
}
- addUnicodeChar(ch);
+ return addUnicodeChar(ch, escontext);
}
/*
* parseUnicode was adopted from json_lex_string() in
* src/backend/utils/adt/json.c
*/
-static void
-parseUnicode(char *s, int l)
+static bool
+parseUnicode(char *s, int l, struct Node *escontext)
{
int i = 2;
int hi_surrogate = -1;
@@ -563,41 +636,57 @@ parseUnicode(char *s, int l)
for (i = 2; i < l; i += 2) /* skip '\u' */
{
int ch = 0;
- int j;
+ int j, si;
if (s[i] == '{') /* parse '\u{XX...}' */
{
while (s[++i] != '}' && i < l)
- ch = (ch << 4) | hexval(s[i]);
+ {
+ if (!hexval(s[i], &si, escontext))
+ return false;
+ ch = (ch << 4) | si;
+ }
i++; /* skip '}' */
}
else /* parse '\uXXXX' */
{
for (j = 0; j < 4 && i < l; j++)
- ch = (ch << 4) | hexval(s[i++]);
+ {
+ if (!hexval(s[i++], &si, escontext))
+ return false;
+ ch = (ch << 4) | si;
+ }
}
- addUnicode(ch, &hi_surrogate);
+ if (! addUnicode(ch, &hi_surrogate, escontext))
+ return false;
}
if (hi_surrogate != -1)
{
- ereport(ERROR,
+ ereturn(escontext, false,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "jsonpath"),
errdetail("Unicode low surrogate must follow a high "
"surrogate.")));
}
+
+ return true;
}
/* Parse sequence of hex-encoded characters */
-static void
-parseHexChar(char *s)
+static bool
+parseHexChar(char *s, struct Node *escontext)
{
- int ch = (hexval(s[2]) << 4) |
- hexval(s[3]);
+ int s2, s3, ch;
+ if (!hexval(s[2], &s2, escontext))
+ return false;
+ if (!hexval(s[3], &s3, escontext))
+ return false;
+
+ ch = (s2 << 4) | s3;
- addUnicodeChar(ch);
+ return addUnicodeChar(ch, escontext);
}
/*