diff options
author | Pavan Deolasee | 2016-03-03 05:35:30 +0000 |
---|---|---|
committer | Pavan Deolasee | 2016-10-18 10:00:18 +0000 |
commit | 455ff923454e78d80b77639a381db9b05c776577 (patch) | |
tree | 4ada4c3e541c5afd33d5f70be516dd5ee3232494 | |
parent | e99d0a73f172b68394f43b5c00b11f282b1731dc (diff) |
Collect and return query substrings corresponding to each SQL statement
while parsing a multi-statement query separated by ';'
raw_parser() returns a list of parsetrees after parsing a multi-statement SQL
query, where each parsetree corresponds to one SQL statement. It does not have
any mechanism to return the source text of the SQL statement. In Postgres-XL,
we send out the query text as it is to remote datanodes and coordinators while
dealing with utility statements. Not having access to individual SQL statement
is a problem because we end up sending the same text again and again, leading
to various issues.
This patch adds some rudimentary mechanism to return a list of query strings
along with the list of parsetress.
-rw-r--r-- | src/backend/commands/tablecmds.c | 2 | ||||
-rw-r--r-- | src/backend/parser/gram.y | 64 | ||||
-rw-r--r-- | src/backend/parser/parse_type.c | 2 | ||||
-rw-r--r-- | src/backend/parser/parser.c | 5 | ||||
-rw-r--r-- | src/backend/parser/scan.l | 36 | ||||
-rw-r--r-- | src/backend/tcop/postgres.c | 38 | ||||
-rw-r--r-- | src/include/parser/gramparse.h | 1 | ||||
-rw-r--r-- | src/include/parser/parser.h | 2 | ||||
-rw-r--r-- | src/include/parser/scanner.h | 8 | ||||
-rw-r--r-- | src/include/tcop/tcopprot.h | 1 | ||||
-rw-r--r-- | src/pl/plpgsql/src/pl_gram.y | 2 |
11 files changed, 146 insertions, 15 deletions
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 98d532b162..e43b76b83a 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8858,7 +8858,7 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd, * parse_analyze() or the rewriter, but instead we need to pass them * through parse_utilcmd.c to make them ready for execution. */ - raw_parsetree_list = raw_parser(cmd); + raw_parsetree_list = raw_parser(cmd, NULL); querytree_list = NIL; foreach(list_item, raw_parsetree_list) { diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 2f4bb9ab69..bf55f5060b 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -123,6 +123,14 @@ typedef struct ImportQual List *table_names; } ImportQual; +typedef struct StmtMulti +{ + List *parsetrees; + List *queries; + int offset; + char *lastQuery; +} StmtMulti; + /* ConstraintAttributeSpec yields an integer bitmask of these flags: */ #define CAS_NOT_DEFERRABLE 0x01 #define CAS_DEFERRABLE 0x02 @@ -231,6 +239,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); InsertStmt *istmt; VariableSetStmt *vsetstmt; /* PGXC_BEGIN */ + struct StmtMulti *stmtmulti; DistributeBy *distby; PGXCSubCluster *subclus; /* PGXC_END */ @@ -354,7 +363,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type <ival> import_qualification_type %type <importqual> import_qualification -%type <list> stmtblock stmtmulti +%type <stmtmulti> stmtmulti +%type <list> stmtblock OptTableElementList TableElementList OptInherit definition OptTypedTableElementList TypedTableElementList reloptions opt_reloptions @@ -759,7 +769,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); */ stmtblock: stmtmulti { - pg_yyget_extra(yyscanner)->parsetree = $1; + pg_yyget_extra(yyscanner)->parsetree = $1 ? $1->parsetrees : NIL; + pg_yyget_extra(yyscanner)->queries = $1 ? $1->queries : NIL; } ; @@ -767,16 +778,59 @@ stmtblock: stmtmulti stmtmulti: stmtmulti ';' stmt { if ($3 != NULL) - $$ = lappend($1, $3); + { + char *query = scanner_get_query(@3, -1, yyscanner); + /* + * Because of the way multi-commands are parsed by the + * parser, when the earlier command was parsed and + * reduced to a 'stmtmulti', we did not have the + * end-of-the-query marker. But now that we have seen + * the ';' token, add '\0' at the corresponding offset + * to get a separated command. + */ + if ($1->lastQuery) + $1->lastQuery[@2 - $1->offset] = '\0'; + $1->offset = @2; + $1->parsetrees = lappend($1->parsetrees, $3); + $1->queries = lappend($1->queries, makeString(query)); + $1->lastQuery = query; + $$ = $1; + } else $$ = $1; } | stmt { if ($1 != NULL) - $$ = list_make1($1); + { + StmtMulti *n = (StmtMulti *) palloc0(sizeof (StmtMulti)); + char *query = scanner_get_query(@1, -1, yyscanner); + n->lastQuery = query; + + /* + * Keep track of the offset where $1 started. We don't + * have the offset where it ends so we copy the entire + * query to the end. If later, we find a ';' followed + * by another command, we'll add the '\0' at the + * appropriate offset + * + * XXX May be there is a better way to get the matching + * portion of the query string, but this does the trick + * for regression as well as the problem we are trying + * to solve with multi-command queries + */ + n->offset = @1; + + /* + * Collect both parsetree as well as the original query + * that resulted in the parsetree + */ + n->parsetrees = list_make1($1); + n->queries = list_make1(makeString(query)); + $$ = n; + } else - $$ = NIL; + $$ = NULL; } ; diff --git a/src/backend/parser/parse_type.c b/src/backend/parser/parse_type.c index 661663994e..69023cea8d 100644 --- a/src/backend/parser/parse_type.c +++ b/src/backend/parser/parse_type.c @@ -739,7 +739,7 @@ typeStringToTypeName(const char *str) ptserrcontext.previous = error_context_stack; error_context_stack = &ptserrcontext; - raw_parsetree_list = raw_parser(buf.data); + raw_parsetree_list = raw_parser(buf.data, NULL); error_context_stack = ptserrcontext.previous; diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c index fdf5a6a1ca..6a01faa247 100644 --- a/src/backend/parser/parser.c +++ b/src/backend/parser/parser.c @@ -32,7 +32,7 @@ * Returns a list of raw (un-analyzed) parse trees. */ List * -raw_parser(const char *str) +raw_parser(const char *str, List **queries) { core_yyscan_t yyscanner; base_yy_extra_type yyextra; @@ -57,6 +57,9 @@ raw_parser(const char *str) if (yyresult) /* error */ return NIL; + if (queries) + *queries = yyextra.queries; + return yyextra.parsetree; } diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 82b20c6e5f..0d36717f4e 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -1103,6 +1103,10 @@ scanner_init(const char *str, yyext->keywords = keywords; yyext->num_keywords = num_keywords; +#ifdef XCP + yyext->query = pstrdup(str); +#endif + yyext->backslash_quote = backslash_quote; yyext->escape_string_warning = escape_string_warning; yyext->standard_conforming_strings = standard_conforming_strings; @@ -1526,3 +1530,35 @@ core_yyfree(void *ptr, core_yyscan_t yyscanner) if (ptr) pfree(ptr); } + +/* + * Return a copy of a substring of the original query string, starting at + * 'start' offset and 'len' bytes long, Be mindful of the invalid arguments + * being passed by the caller + */ +char * +scanner_get_query(int start, int len, core_yyscan_t yyscanner) +{ + char *query; + + /* + * If the caller passes a wrong offset, just assume 0 + */ + if (start == -1) + start = 0; + if (start > strlen(yyextra->query)) + return NULL; + /* + * Similarly, if the passed-in length is more than remaining + * bytes in the string, just return whatever is available + */ + if (len == -1) + len = strlen(yyextra->query) - start; + else if (len + start > strlen(yyextra->query)) + return NULL; + + query = palloc0(len + 1); + memcpy(query, yyextra->query + start, len); + query[len] = '\0'; + return query; +} diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 4605eedd77..6ffe015136 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -754,7 +754,7 @@ ProcessClientWriteInterrupt(bool blocked) * commands are not processed any further than the raw parse stage. */ List * -pg_parse_query(const char *query_string) +pg_parse_query_internal(const char *query_string, List **querysource_list) { List *raw_parsetree_list; @@ -763,7 +763,7 @@ pg_parse_query(const char *query_string) if (log_parser_stats) ResetUsage(); - raw_parsetree_list = raw_parser(query_string); + raw_parsetree_list = raw_parser(query_string, querysource_list); if (log_parser_stats) ShowUsage("PARSER STATISTICS"); @@ -786,6 +786,18 @@ pg_parse_query(const char *query_string) return raw_parsetree_list; } +List * +pg_parse_query(const char *query_string) +{ + return pg_parse_query_internal(query_string, NULL); +} + +List * +pg_parse_query_get_source(const char *query_string, List **querysource_list) +{ + return pg_parse_query_internal(query_string, querysource_list); +} + /* * Given a raw parsetree (gram.y output), and optionally information about * types of parameter symbols ($n), perform parse analysis and rule rewriting. @@ -1044,6 +1056,8 @@ exec_simple_query(const char *query_string) MemoryContext oldcontext; List *parsetree_list; ListCell *parsetree_item; + List *querysource_list; + ListCell *querysource_item; bool save_log_statement_stats = log_statement_stats; bool was_logged = false; bool isTopLevel; @@ -1092,7 +1106,7 @@ exec_simple_query(const char *query_string) * Do basic parsing of the query or queries (this should be safe even if * we are in aborted transaction state!) */ - parsetree_list = pg_parse_query(query_string); + parsetree_list = pg_parse_query_get_source(query_string, &querysource_list); #ifdef XCP if (IS_PGXC_LOCAL_COORDINATOR && list_length(parsetree_list) > 1) @@ -1159,9 +1173,10 @@ exec_simple_query(const char *query_string) /* * Run through the raw parsetree(s) and process each one. */ - foreach(parsetree_item, parsetree_list) + forboth(parsetree_item, parsetree_list, querysource_item, querysource_list) { Node *parsetree = (Node *) lfirst(parsetree_item); + char *querysource = ((Value *) lfirst(querysource_item))->val.str; bool snapshot_set = false; const char *commandTag; char completionTag[COMPLETION_TAG_BUFSIZE]; @@ -1274,10 +1289,23 @@ exec_simple_query(const char *query_string) * We don't have to copy anything into the portal, because everything * we are passing here is in MessageContext, which will outlive the * portal anyway. + * + * The query_string may contain multiple commands separated by ';' and + * we have a separate parsetree corresponding to each such command. + * Since we later may send down the query to the remote nodes + * (especially for utility queries), using the query_string is a + * problem because the same query will be sent out multiple times, one + * for each command processed. So we taught the parser to return the + * portion of the query_string along with the parsetree and use that + * while defining a portal below. + * + * XXX Since the portal expects to see a valid query_string, if the + * substring is available, use the original query_string. Not elegant, + * but far better than what we were doing earlier */ PortalDefineQuery(portal, NULL, - query_string, + querysource ? querysource : query_string, commandTag, plantree_list, NULL); diff --git a/src/include/parser/gramparse.h b/src/include/parser/gramparse.h index 100fdfb213..6c49917da3 100644 --- a/src/include/parser/gramparse.h +++ b/src/include/parser/gramparse.h @@ -53,6 +53,7 @@ typedef struct base_yy_extra_type * State variables that belong to the grammar. */ List *parsetree; /* final parse result is delivered here */ + List *queries; } base_yy_extra_type; /* diff --git a/src/include/parser/parser.h b/src/include/parser/parser.h index 4208d545ab..af77da59aa 100644 --- a/src/include/parser/parser.h +++ b/src/include/parser/parser.h @@ -32,7 +32,7 @@ extern PGDLLIMPORT bool standard_conforming_strings; /* Primary entry point for the raw parsing functions */ -extern List *raw_parser(const char *str); +extern List *raw_parser(const char *str, List **queries); /* Utility functions exported by gram.y (perhaps these should be elsewhere) */ extern List *SystemFuncName(char *name); diff --git a/src/include/parser/scanner.h b/src/include/parser/scanner.h index 9e38c0d197..b0d890e82c 100644 --- a/src/include/parser/scanner.h +++ b/src/include/parser/scanner.h @@ -72,6 +72,13 @@ typedef struct core_yy_extra_type char *scanbuf; Size scanbuflen; +#ifdef XCP + /* + * Pointer to the original query string + */ + char *query; +#endif + /* * The keyword list to use. */ @@ -126,5 +133,6 @@ extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner); extern int scanner_errposition(int location, core_yyscan_t yyscanner); extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner) pg_attribute_noreturn(); +extern char *scanner_get_query(int start, int len, core_yyscan_t yyscanner); #endif /* SCANNER_H */ diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h index 5abc26e864..de4591f110 100644 --- a/src/include/tcop/tcopprot.h +++ b/src/include/tcop/tcopprot.h @@ -47,6 +47,7 @@ typedef enum extern int log_statement; extern List *pg_parse_query(const char *query_string); +extern List *pg_parse_query_get_source(const char *query_string, List **queries); extern List *pg_analyze_and_rewrite(Node *parsetree, const char *query_string, Oid *paramTypes, int numParams); extern List *pg_analyze_and_rewrite_params(Node *parsetree, diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y index 00978909a3..80bb07041a 100644 --- a/src/pl/plpgsql/src/pl_gram.y +++ b/src/pl/plpgsql/src/pl_gram.y @@ -3470,7 +3470,7 @@ check_sql_expr(const char *stmt, int location, int leaderlen) error_context_stack = &syntax_errcontext; oldCxt = MemoryContextSwitchTo(compile_tmp_cxt); - (void) raw_parser(stmt); + (void) raw_parser(stmt, NULL); MemoryContextSwitchTo(oldCxt); /* Restore former ereport callback */ |