diff options
author | Tom Lane | 2004-02-24 21:45:18 +0000 |
---|---|---|
committer | Tom Lane | 2004-02-24 21:45:18 +0000 |
commit | bc04d02f6a626887a7e500032d899ec07e11a936 (patch) | |
tree | d962f5bb01d13f17029a67bb287d8222fa7c1ed3 | |
parent | 0760253d3be09dcc5fcde7086c32dcc55d18acbf (diff) |
Implement dollar-quoting in the backend lexer and psql. Documentation
is still lacking, as is support in plpgsql and other places, but this is
the basic feature. Patch by Andrew Dunstan, some tweaking by Tom Lane.
Also, enable %option nodefault in these two lexers, and patch some gaps
revealed thereby.
-rw-r--r-- | src/backend/parser/scan.l | 58 | ||||
-rw-r--r-- | src/bin/psql/prompt.c | 4 | ||||
-rw-r--r-- | src/bin/psql/prompt.h | 1 | ||||
-rw-r--r-- | src/bin/psql/psqlscan.l | 72 |
4 files changed, 127 insertions, 8 deletions
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index f3d52790c7..bdeb57e796 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -37,6 +37,7 @@ extern YYSTYPE yylval; static int xcdepth = 0; /* depth of nesting in slash-star comments */ +static char *dolqstart; /* current $foo$ quote start string */ /* * literalbuf is used to accumulate literal values when multiple rules @@ -74,6 +75,7 @@ unsigned char unescape_single_char(unsigned char c); %option 8bit %option never-interactive +%option nodefault %option nounput %option noyywrap %option prefix="base_yy" @@ -94,6 +96,7 @@ unsigned char unescape_single_char(unsigned char c); * <xd> delimited identifiers (double-quoted identifiers) * <xh> hexadecimal numeric string * <xq> quoted strings + * <xdolq> $foo$ quoted strings */ %x xb @@ -101,6 +104,7 @@ unsigned char unescape_single_char(unsigned char c); %x xd %x xh %x xq +%x xdolq /* * In order to make the world safe for Windows and Mac clients as well as @@ -175,6 +179,17 @@ xqescape [\\][^0-7] xqoctesc [\\][0-7]{1,3} xqcat {quote}{whitespace_with_newline}{quote} +/* $foo$ style quotes ("dollar quoting") + * The quoted string starts with $foo$ where "foo" is an optional string + * in the form of an identifier, except that it may not contain "$", + * and extends to the first occurrence of an identical string. + * There is *no* processing of the quoted text. + */ +dolq_start [A-Za-z\200-\377_] +dolq_cont [A-Za-z\200-\377_0-9] +dolqdelim \$({dolq_start}{dolq_cont}*)?\$ +dolqinside [^$]+ + /* Double quote * Allows embedded spaces and other special characters into identifiers. */ @@ -242,7 +257,8 @@ param \${integer} other . /* - * Quoted strings must allow some special characters such as single-quote + * Dollar quoted strings are totally opaque, and no escaping is done on them. + * Other quoted strings must allow some special characters such as single-quote * and newline. * Embedded single-quotes are implemented both in the SQL standard * style of two adjacent single quotes "''" and in the Postgres/Java style @@ -388,8 +404,46 @@ other . <xq>{xqcat} { /* ignore */ } +<xq>. { + /* This is only needed for \ just before EOF */ + addlitchar(yytext[0]); + } <xq><<EOF>> { yyerror("unterminated quoted string"); } +{dolqdelim} { + token_start = yytext; + dolqstart = pstrdup(yytext); + BEGIN(xdolq); + startlit(); + } +<xdolq>{dolqdelim} { + if (strcmp(yytext, dolqstart) == 0) + { + pfree(dolqstart); + BEGIN(INITIAL); + yylval.str = litbufdup(); + return SCONST; + } + else + { + /* + * When we fail to match $...$ to dolqstart, transfer + * the $... part to the output, but put back the final + * $ for rescanning. Consider $delim$...$junk$delim$ + */ + addlit(yytext, yyleng-1); + yyless(yyleng-1); + } + } +<xdolq>{dolqinside} { + addlit(yytext, yyleng); + } +<xdolq>. { + /* This is only needed for $ inside the quoted text */ + addlitchar(yytext[0]); + } +<xdolq><<EOF>> { yyerror("unterminated dollar-quoted string"); } + {xdstart} { token_start = yytext; BEGIN(xd); @@ -407,7 +461,7 @@ other . yylval.str = ident; return IDENT; } -<xd>{xddouble} { +<xd>{xddouble} { addlitchar('"'); } <xd>{xdinside} { diff --git a/src/bin/psql/prompt.c b/src/bin/psql/prompt.c index c3cc9e4083..1e05e5c37e 100644 --- a/src/bin/psql/prompt.c +++ b/src/bin/psql/prompt.c @@ -85,6 +85,7 @@ get_prompt(promptStatus_t status) case PROMPT_CONTINUE: case PROMPT_SINGLEQUOTE: case PROMPT_DOUBLEQUOTE: + case PROMPT_DOLLARQUOTE: case PROMPT_COMMENT: case PROMPT_PAREN: prompt_name = "PROMPT2"; @@ -199,6 +200,9 @@ get_prompt(promptStatus_t status) case PROMPT_DOUBLEQUOTE: buf[0] = '"'; break; + case PROMPT_DOLLARQUOTE: + buf[0] = '$'; + break; case PROMPT_COMMENT: buf[0] = '*'; break; diff --git a/src/bin/psql/prompt.h b/src/bin/psql/prompt.h index 031d4c1774..5880111103 100644 --- a/src/bin/psql/prompt.h +++ b/src/bin/psql/prompt.h @@ -15,6 +15,7 @@ typedef enum _promptStatus PROMPT_COMMENT, PROMPT_SINGLEQUOTE, PROMPT_DOUBLEQUOTE, + PROMPT_DOLLARQUOTE, PROMPT_PAREN, PROMPT_COPY } promptStatus_t; diff --git a/src/bin/psql/psqlscan.l b/src/bin/psql/psqlscan.l index 1c5438dcdd..f23b9d398b 100644 --- a/src/bin/psql/psqlscan.l +++ b/src/bin/psql/psqlscan.l @@ -92,6 +92,7 @@ typedef struct PsqlScanStateData int start_state; /* saved YY_START */ int paren_depth; /* depth of nesting in parentheses */ int xcdepth; /* depth of nesting in slash-star comments */ + char *dolqstart; /* current $foo$ quote start string */ } PsqlScanStateData; static PsqlScanState cur_state; /* current state while active */ @@ -123,6 +124,7 @@ static void emit(const char *txt, int len); %option 8bit %option never-interactive +%option nodefault %option nounput %option noyywrap @@ -151,6 +153,7 @@ static void emit(const char *txt, int len); * <xd> delimited identifiers (double-quoted identifiers) * <xh> hexadecimal numeric string * <xq> quoted strings + * <xdolq> $foo$ quoted strings */ %x xb @@ -158,6 +161,7 @@ static void emit(const char *txt, int len); %x xd %x xh %x xq +%x xdolq /* Additional exclusive states for psql only: lex backslash commands */ %x xslashcmd %x xslasharg @@ -241,6 +245,17 @@ xqescape [\\][^0-7] xqoctesc [\\][0-7]{1,3} xqcat {quote}{whitespace_with_newline}{quote} +/* $foo$ style quotes ("dollar quoting") + * The quoted string starts with $foo$ where "foo" is an optional string + * in the form of an identifier, except that it may not contain "$", + * and extends to the first occurrence of an identical string. + * There is *no* processing of the quoted text. + */ +dolq_start [A-Za-z\200-\377_] +dolq_cont [A-Za-z\200-\377_0-9] +dolqdelim \$({dolq_start}{dolq_cont}*)?\$ +dolqinside [^$]+ + /* Double quote * Allows embedded spaces and other special characters into identifiers. */ @@ -308,7 +323,8 @@ param \${integer} other . /* - * Quoted strings must allow some special characters such as single-quote + * Dollar quoted strings are totally opaque, and no escaping is done on them. + * Other quoted strings must allow some special characters such as single-quote * and newline. * Embedded single-quotes are implemented both in the SQL standard * style of two adjacent single quotes "''" and in the Postgres/Java style @@ -427,6 +443,41 @@ other . <xq>{xqcat} { ECHO; } +<xq>. { + /* This is only needed for \ just before EOF */ + ECHO; + } + +{dolqdelim} { + cur_state->dolqstart = pg_strdup(yytext); + BEGIN(xdolq); + ECHO; + } +<xdolq>{dolqdelim} { + if (strcmp(yytext, cur_state->dolqstart) == 0) + { + free(cur_state->dolqstart); + cur_state->dolqstart = NULL; + BEGIN(INITIAL); + } + else + { + /* + * When we fail to match $...$ to dolqstart, transfer + * the $... part to the output, but put back the final + * $ for rescanning. Consider $delim$...$junk$delim$ + */ + yyless(yyleng-1); + } + ECHO; + } +<xdolq>{dolqinside} { + ECHO; + } +<xdolq>. { + /* This is only needed for $ inside the quoted text */ + ECHO; + } {xdstart} { BEGIN(xd); @@ -436,7 +487,7 @@ other . BEGIN(INITIAL); ECHO; } -<xd>{xddouble} { +<xd>{xddouble} { ECHO; } <xd>{xdinside} { @@ -754,7 +805,7 @@ other . "\\". { emit(yytext + 1, 1); } -{other} { ECHO; } +{other}|\n { ECHO; } } @@ -766,7 +817,7 @@ other . "`" { return LEXRES_OK; } -{other} { ECHO; } +{other}|\n { ECHO; } } @@ -811,7 +862,7 @@ other . BEGIN(xslashdefaultarg); } -{other} { ECHO; } +{other}|\n { ECHO; } } @@ -833,7 +884,7 @@ other . "\\\\" { return LEXRES_OK; } -{other} { +{other}|\n { yyless(0); return LEXRES_OK; } @@ -865,6 +916,8 @@ psql_scan_destroy(PsqlScanState state) { psql_scan_finish(state); + psql_scan_reset(state); + free(state); } @@ -1008,6 +1061,10 @@ psql_scan(PsqlScanState state, result = PSCAN_INCOMPLETE; *prompt = PROMPT_SINGLEQUOTE; break; + case xdolq: + result = PSCAN_INCOMPLETE; + *prompt = PROMPT_DOLLARQUOTE; + break; default: /* can't get here */ fprintf(stderr, "invalid YY_START\n"); @@ -1082,6 +1139,9 @@ psql_scan_reset(PsqlScanState state) state->start_state = INITIAL; state->paren_depth = 0; state->xcdepth = 0; /* not really necessary */ + if (state->dolqstart) + free(state->dolqstart); + state->dolqstart = NULL; } /* |