2424 * Portions Copyright (c) 1994, Regents of the University of California
2525 *
2626 * IDENTIFICATION
27- * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.150 2009/04/14 22:18:47 tgl Exp $
27+ * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.151 2009/04/19 21:08:54 tgl Exp $
2828 *
2929 *-------------------------------------------------------------------------
3030 */
@@ -60,7 +60,7 @@ bool escape_string_warning = true;
6060bool standard_conforming_strings = false ;
6161
6262static bool warn_on_first_escape;
63- static bool saw_high_bit = false ;
63+ static bool saw_non_ascii = false ;
6464
6565/*
6666 * literalbuf is used to accumulate literal values when multiple rules
@@ -453,7 +453,7 @@ other .
453453
454454{xqstart } {
455455 warn_on_first_escape = true ;
456- saw_high_bit = false ;
456+ saw_non_ascii = false ;
457457 SET_YYLLOC ();
458458 if (standard_conforming_strings)
459459 BEGIN (xq);
@@ -463,7 +463,7 @@ other .
463463 }
464464{xestart } {
465465 warn_on_first_escape = false ;
466- saw_high_bit = false ;
466+ saw_non_ascii = false ;
467467 SET_YYLLOC ();
468468 BEGIN (xe);
469469 startlit ();
@@ -477,10 +477,11 @@ other .
477477<xq ,xe >{quotefail } {
478478 yyless (1 );
479479 BEGIN (INITIAL);
480- /* check that the data remains valid if it might have been
480+ /*
481+ * check that the data remains valid if it might have been
481482 * made invalid by unescaping any chars.
482483 */
483- if (saw_high_bit )
484+ if (saw_non_ascii )
484485 pg_verifymbstr (literalbuf, literallen, false );
485486 yylval.str = litbufdup ();
486487 return SCONST;
@@ -526,16 +527,16 @@ other .
526527
527528 check_escape_warning ();
528529 addlitchar (c);
529- if (IS_HIGHBIT_SET (c))
530- saw_high_bit = true ;
530+ if (c == ' \0 ' || IS_HIGHBIT_SET (c))
531+ saw_non_ascii = true ;
531532 }
532533<xe >{xehexesc } {
533534 unsigned char c = strtoul (yytext+2 , NULL , 16 );
534535
535536 check_escape_warning ();
536537 addlitchar (c);
537- if (IS_HIGHBIT_SET (c))
538- saw_high_bit = true ;
538+ if (c == ' \0 ' || IS_HIGHBIT_SET (c))
539+ saw_non_ascii = true ;
539540 }
540541<xq ,xe ,xus >{quotecontinue } {
541542 /* ignore */
@@ -1083,21 +1084,18 @@ litbuf_udeescape(unsigned char escape)
10831084 }
10841085
10851086 *out = ' \0 ' ;
1087+ /*
1088+ * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
1089+ * codes; but it's probably not worth the trouble, since this isn't
1090+ * likely to be a performance-critical path.
1091+ */
10861092 pg_verifymbstr (new , out - new , false );
10871093 return new ;
10881094}
10891095
10901096static unsigned char
10911097unescape_single_char (unsigned char c)
10921098{
1093- /* Normally we wouldn't expect to see \n where n has its high bit set
1094- * but we set the flag to check the string if we do get it, so
1095- * that this doesn't become a way of getting around the coding validity
1096- * checks.
1097- */
1098- if (IS_HIGHBIT_SET (c))
1099- saw_high_bit = true ;
1100-
11011099 switch (c)
11021100 {
11031101 case ' b' :
@@ -1111,6 +1109,10 @@ unescape_single_char(unsigned char c)
11111109 case ' t' :
11121110 return ' \t ' ;
11131111 default :
1112+ /* check for backslash followed by non-7-bit-ASCII */
1113+ if (c == ' \0 ' || IS_HIGHBIT_SET (c))
1114+ saw_non_ascii = true ;
1115+
11141116 return c;
11151117 }
11161118}
0 commit comments