summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Eisentraut2022-02-16 09:32:36 +0000
committerPeter Eisentraut2022-02-16 09:37:31 +0000
commit2549f0661bd28571d7200d6f82f752a7ee5d47e1 (patch)
tree04e905d232763697d1a94cc4cc69c0154ca31372
parent70e81861fadd9112fa2d425c762e163910a4ee52 (diff)
Reject trailing junk after numeric literals
After this, the PostgreSQL lexers no longer accept numeric literals with trailing non-digits, such as 123abc, which would be scanned as two tokens: 123 and abc. This is undocumented and surprising, and it might also interfere with some extended numeric literal syntax being contemplated for the future. Reviewed-by: John Naylor <[email protected]> Discussion: https://www.postgresql.org/message-id/flat/[email protected]
-rw-r--r--src/backend/parser/scan.l37
-rw-r--r--src/fe_utils/psqlscan.l31
-rw-r--r--src/interfaces/ecpg/preproc/pgc.l36
-rw-r--r--src/test/regress/expected/numerology.out77
-rw-r--r--src/test/regress/sql/numerology.sql1
5 files changed, 96 insertions, 86 deletions
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index f555ac6e6d..882e081aae 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -387,7 +387,7 @@ operator {op_chars}+
*
* {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
*
- * {realfail1} and {realfail2} are added to prevent the need for scanner
+ * {realfail} is added to prevent the need for scanner
* backup when the {real} rule fails to match completely.
*/
digit [0-9]
@@ -396,10 +396,14 @@ integer {digit}+
decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
decimalfail {digit}+\.\.
real ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1 ({integer}|{decimal})[Ee]
-realfail2 ({integer}|{decimal})[Ee][-+]
+realfail ({integer}|{decimal})[Ee][-+]
+
+integer_junk {integer}{ident_start}
+decimal_junk {decimal}{ident_start}
+real_junk {real}{ident_start}
param \${integer}
+param_junk \${integer}{ident_start}
other .
@@ -974,6 +978,10 @@ other .
yylval->ival = atol(yytext + 1);
return PARAM;
}
+{param_junk} {
+ SET_YYLLOC();
+ yyerror("trailing junk after parameter");
+ }
{integer} {
SET_YYLLOC();
@@ -995,20 +1003,21 @@ other .
yylval->str = pstrdup(yytext);
return FCONST;
}
-{realfail1} {
- /*
- * throw back the [Ee], and figure out whether what
- * remains is an {integer} or {decimal}.
- */
- yyless(yyleng - 1);
+{realfail} {
SET_YYLLOC();
- return process_integer_literal(yytext, yylval);
+ yyerror("trailing junk after numeric literal");
}
-{realfail2} {
- /* throw back the [Ee][+-], and proceed as above */
- yyless(yyleng - 2);
+{integer_junk} {
SET_YYLLOC();
- return process_integer_literal(yytext, yylval);
+ yyerror("trailing junk after numeric literal");
+ }
+{decimal_junk} {
+ SET_YYLLOC();
+ yyerror("trailing junk after numeric literal");
+ }
+{real_junk} {
+ SET_YYLLOC();
+ yyerror("trailing junk after numeric literal");
}
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index 941ed06553..ae531ec240 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -325,7 +325,7 @@ operator {op_chars}+
*
* {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
*
- * {realfail1} and {realfail2} are added to prevent the need for scanner
+ * {realfail} is added to prevent the need for scanner
* backup when the {real} rule fails to match completely.
*/
digit [0-9]
@@ -334,10 +334,14 @@ integer {digit}+
decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
decimalfail {digit}+\.\.
real ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1 ({integer}|{decimal})[Ee]
-realfail2 ({integer}|{decimal})[Ee][-+]
+realfail ({integer}|{decimal})[Ee][-+]
+
+integer_junk {integer}{ident_start}
+decimal_junk {decimal}{ident_start}
+real_junk {real}{ident_start}
param \${integer}
+param_junk \${integer}{ident_start}
/* psql-specific: characters allowed in variable names */
variable_char [A-Za-z\200-\377_0-9]
@@ -839,6 +843,9 @@ other .
{param} {
ECHO;
}
+{param_junk} {
+ ECHO;
+ }
{integer} {
ECHO;
@@ -854,18 +861,16 @@ other .
{real} {
ECHO;
}
-{realfail1} {
- /*
- * throw back the [Ee], and figure out whether what
- * remains is an {integer} or {decimal}.
- * (in psql, we don't actually care...)
- */
- yyless(yyleng - 1);
+{realfail} {
ECHO;
}
-{realfail2} {
- /* throw back the [Ee][+-], and proceed as above */
- yyless(yyleng - 2);
+{integer_junk} {
+ ECHO;
+ }
+{decimal_junk} {
+ ECHO;
+ }
+{real_junk} {
ECHO;
}
diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l
index 9286a0355d..2367b860f5 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -353,7 +353,7 @@ operator {op_chars}+
*
* {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
*
- * {realfail1} and {realfail2} are added to prevent the need for scanner
+ * {realfail} is added to prevent the need for scanner
* backup when the {real} rule fails to match completely.
*/
digit [0-9]
@@ -362,10 +362,14 @@ integer {digit}+
decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
decimalfail {digit}+\.\.
real ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1 ({integer}|{decimal})[Ee]
-realfail2 ({integer}|{decimal})[Ee][-+]
+realfail ({integer}|{decimal})[Ee][-+]
+
+integer_junk {integer}{ident_start}
+decimal_junk {decimal}{ident_start}
+real_junk {real}{ident_start}
param \${integer}
+param_junk \${integer}{ident_start}
/* special characters for other dbms */
/* we have to react differently in compat mode */
@@ -917,6 +921,9 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
base_yylval.ival = atol(yytext+1);
return PARAM;
}
+{param_junk} {
+ mmfatal(PARSE_ERROR, "trailing junk after parameter");
+ }
{ip} {
base_yylval.str = mm_strdup(yytext);
@@ -941,22 +948,31 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
base_yylval.str = mm_strdup(yytext);
return FCONST;
}
-{realfail1} {
+{realfail} {
/*
- * throw back the [Ee], and figure out whether what
+ * throw back the [Ee][+-], and figure out whether what
* remains is an {integer} or {decimal}.
*/
- yyless(yyleng - 1);
- return process_integer_literal(yytext, &base_yylval);
- }
-{realfail2} {
- /* throw back the [Ee][+-], and proceed as above */
yyless(yyleng - 2);
return process_integer_literal(yytext, &base_yylval);
}
} /* <C,SQL> */
<SQL>{
+ /*
+ * Note that some trailing junk is valid in C (such as 100LL), so we
+ * contain this to SQL mode.
+ */
+{integer_junk} {
+ mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
+ }
+{decimal_junk} {
+ mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
+ }
+{real_junk} {
+ mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
+ }
+
:{identifier}((("->"|\.){identifier})|(\[{array}\]))* {
base_yylval.str = mm_strdup(yytext+1);
return CVARIABLE;
diff --git a/src/test/regress/expected/numerology.out b/src/test/regress/expected/numerology.out
index 2ffc73e854..77d4843417 100644
--- a/src/test/regress/expected/numerology.out
+++ b/src/test/regress/expected/numerology.out
@@ -6,64 +6,45 @@
-- Trailing junk in numeric literals
--
SELECT 123abc;
- abc
------
- 123
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near "123a"
+LINE 1: SELECT 123abc;
+ ^
SELECT 0x0o;
- x0o
------
- 0
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near "0x"
+LINE 1: SELECT 0x0o;
+ ^
SELECT 1_2_3;
- _2_3
-------
- 1
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near "1_"
+LINE 1: SELECT 1_2_3;
+ ^
SELECT 0.a;
- a
----
- 0
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near "0.a"
+LINE 1: SELECT 0.a;
+ ^
SELECT 0.0a;
- a
------
- 0.0
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near "0.0a"
+LINE 1: SELECT 0.0a;
+ ^
SELECT .0a;
- a
------
- 0.0
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near ".0a"
+LINE 1: SELECT .0a;
+ ^
SELECT 0.0e1a;
- a
----
- 0
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near "0.0e1a"
+LINE 1: SELECT 0.0e1a;
+ ^
SELECT 0.0e;
- e
------
- 0.0
-(1 row)
-
+ERROR: trailing junk after numeric literal at or near "0.0e"
+LINE 1: SELECT 0.0e;
+ ^
SELECT 0.0e+a;
-ERROR: syntax error at or near "+"
+ERROR: trailing junk after numeric literal at or near "0.0e+"
LINE 1: SELECT 0.0e+a;
- ^
+ ^
PREPARE p1 AS SELECT $1a;
-EXECUTE p1(1);
- a
----
- 1
-(1 row)
-
+ERROR: trailing junk after parameter at or near "$1a"
+LINE 1: PREPARE p1 AS SELECT $1a;
+ ^
--
-- Test implicit type conversions
-- This fails for Postgres v6.1 (and earlier?)
diff --git a/src/test/regress/sql/numerology.sql b/src/test/regress/sql/numerology.sql
index fb75f97832..be7d6dfe0c 100644
--- a/src/test/regress/sql/numerology.sql
+++ b/src/test/regress/sql/numerology.sql
@@ -17,7 +17,6 @@ SELECT 0.0e1a;
SELECT 0.0e;
SELECT 0.0e+a;
PREPARE p1 AS SELECT $1a;
-EXECUTE p1(1);
--
-- Test implicit type conversions