@@ -97,6 +97,7 @@ static bool is_utf16_surrogate_first(pg_wchar c);
9797static bool is_utf16_surrogate_second (pg_wchar c);
9898static pg_wchar surrogate_pair_to_codepoint (pg_wchar first, pg_wchar second);
9999static void addunicode (pg_wchar c, yyscan_t yyscanner);
100+ static bool check_uescapechar (unsigned char escape);
100101
101102#define yyerror (msg ) scanner_yyerror(msg, yyscanner)
102103
@@ -150,7 +151,9 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
150151 * <xe> extended quoted strings (support backslash escape sequences)
151152 * <xdolq> $foo$ quoted strings
152153 * <xui> quoted identifier with Unicode escapes
154+ * <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow
153155 * <xus> quoted string with Unicode escapes
156+ * <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow
154157 * <xeu> Unicode surrogate pair in extended quoted string
155158 */
156159
@@ -162,7 +165,9 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
162165%x xq
163166%x xdolq
164167%x xui
168+ %x xuiend
165169%x xus
170+ %x xusend
166171%x xeu
167172
168173/*
@@ -279,17 +284,17 @@ xdinside [^"]+
279284/* Unicode escapes */
280285uescape [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* {quote }[^ ' ]{quote }
281286/* error rule to avoid backup */
282- uescapefail ( " - " | [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* " -" | [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* {quote }[^ ' ]| [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* {quote }| [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* | [uU ][eE ][sS ][cC ][aA ][pP ]| [uU ][eE ][sS ][cC ][aA ]| [uU ][eE ][sS ][cC ]| [uU ][eE ][sS ]| [uU ][eE ]| [uU ])
287+ uescapefail [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* " -" | [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* {quote }[^ ' ]| [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* {quote }| [uU ][eE ][sS ][cC ][aA ][pP ][eE ]{whitespace }* | [uU ][eE ][sS ][cC ][aA ][pP ]| [uU ][eE ][sS ][cC ][aA ]| [uU ][eE ][sS ][cC ]| [uU ][eE ][sS ]| [uU ][eE ]| [uU ]
283288
284289/* Quoted identifier with Unicode escapes */
285290xuistart [uU ]&{dquote }
286- xuistop1 {dquote }{whitespace }* {uescapefail }?
287- xuistop2 {dquote }{whitespace }* {uescape }
288291
289292/* Quoted string with Unicode escapes */
290293xusstart [uU ]&{quote }
291- xusstop1 {quote }{whitespace }* {uescapefail }?
292- xusstop2 {quote }{whitespace }* {uescape }
294+
295+ /* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
296+ xustop1 {uescapefail }?
297+ xustop2 {uescape }
293298
294299/* error rule to avoid backup */
295300xufailed [uU ]&
@@ -536,15 +541,31 @@ other .
536541 yylval->str = litbufdup (yyscanner);
537542 return SCONST;
538543 }
539- <xus >{xusstop1 } {
544+ <xus >{quotestop } |
545+ <xus >{quotefail } {
540546 /* throw back all but the quote */
541547 yyless (1 );
548+ /* handle possible UESCAPE in xusend mode */
549+ BEGIN (xusend);
550+ }
551+ <xusend >{whitespace }
552+ <xusend >{other } |
553+ <xusend >{xustop1 } {
554+ /* no UESCAPE after the quote, throw back everything */
555+ yyless (0 );
542556 BEGIN (INITIAL);
543557 yylval->str = litbuf_udeescape (' \\ ' , yyscanner);
544558 return SCONST;
545559 }
546- <xus >{xusstop2 } {
560+ <xusend >{xustop2 } {
561+ /* found UESCAPE after the end quote */
547562 BEGIN (INITIAL);
563+ if (!check_uescapechar (yytext[yyleng-2 ]))
564+ {
565+ SET_YYLLOC ();
566+ ADVANCE_YYLLOC (yyleng-2 );
567+ yyerror (" invalid Unicode escape character" );
568+ }
548569 yylval->str = litbuf_udeescape (yytext[yyleng-2 ], yyscanner);
549570 return SCONST;
550571 }
@@ -702,26 +723,41 @@ other .
702723 yylval->str = ident;
703724 return IDENT;
704725 }
705- <xui >{xuistop1 } {
726+ <xui >{dquote } {
727+ yyless (1 );
728+ /* handle possible UESCAPE in xuiend mode */
729+ BEGIN (xuiend);
730+ }
731+ <xuiend >{whitespace } { }
732+ <xuiend >{other } |
733+ <xuiend >{xustop1 } {
734+ /* no UESCAPE after the quote, throw back everything */
706735 char *ident;
707736
737+ yyless (0 );
738+
708739 BEGIN (INITIAL);
709740 if (yyextra->literallen == 0 )
710741 yyerror (" zero-length delimited identifier" );
711742 ident = litbuf_udeescape (' \\ ' , yyscanner);
712743 if (yyextra->literallen >= NAMEDATALEN)
713744 truncate_identifier (ident, yyextra->literallen , true );
714745 yylval->str = ident;
715- /* throw back all but the quote */
716- yyless (1 );
717746 return IDENT;
718747 }
719- <xui >{xuistop2 } {
748+ <xuiend >{xustop2 } {
749+ /* found UESCAPE after the end quote */
720750 char *ident;
721751
722752 BEGIN (INITIAL);
723753 if (yyextra->literallen == 0 )
724754 yyerror (" zero-length delimited identifier" );
755+ if (!check_uescapechar (yytext[yyleng-2 ]))
756+ {
757+ SET_YYLLOC ();
758+ ADVANCE_YYLLOC (yyleng-2 );
759+ yyerror (" invalid Unicode escape character" );
760+ }
725761 ident = litbuf_udeescape (yytext[yyleng - 2 ], yyscanner);
726762 if (yyextra->literallen >= NAMEDATALEN)
727763 truncate_identifier (ident, yyextra->literallen , true );
@@ -1203,22 +1239,29 @@ addunicode(pg_wchar c, core_yyscan_t yyscanner)
12031239 addlit (buf, pg_mblen (buf), yyscanner);
12041240}
12051241
1206- static char *
1207- litbuf_udeescape (unsigned char escape, core_yyscan_t yyscanner)
1242+ /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
1243+ static bool
1244+ check_uescapechar (unsigned char escape)
12081245{
1209- char *new ;
1210- char *litbuf, *in, *out;
1211- pg_wchar pair_first = 0 ;
1212-
12131246 if (isxdigit (escape)
12141247 || escape == ' +'
12151248 || escape == ' \' '
12161249 || escape == ' "'
12171250 || scanner_isspace (escape))
12181251 {
1219- ADVANCE_YYLLOC (yyextra->literallen + yyleng + 1 );
1220- yyerror (" invalid Unicode escape character" );
1252+ return false ;
12211253 }
1254+ else
1255+ return true ;
1256+ }
1257+
1258+ /* like litbufdup, but handle unicode escapes */
1259+ static char *
1260+ litbuf_udeescape (unsigned char escape, core_yyscan_t yyscanner)
1261+ {
1262+ char *new ;
1263+ char *litbuf, *in, *out;
1264+ pg_wchar pair_first = 0 ;
12221265
12231266 /* Make literalbuf null-terminated to simplify the scanning loop */
12241267 litbuf = yyextra->literalbuf ;
0 commit comments