summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavan Deolasee2016-03-03 05:35:30 +0000
committerPavan Deolasee2016-10-18 10:00:18 +0000
commit455ff923454e78d80b77639a381db9b05c776577 (patch)
tree4ada4c3e541c5afd33d5f70be516dd5ee3232494
parente99d0a73f172b68394f43b5c00b11f282b1731dc (diff)
Collect and return query substrings corresponding to each SQL statement
while parsing a multi-statement query separated by ';' raw_parser() returns a list of parsetrees after parsing a multi-statement SQL query, where each parsetree corresponds to one SQL statement. It does not have any mechanism to return the source text of the SQL statement. In Postgres-XL, we send out the query text as it is to remote datanodes and coordinators while dealing with utility statements. Not having access to individual SQL statement is a problem because we end up sending the same text again and again, leading to various issues. This patch adds some rudimentary mechanism to return a list of query strings along with the list of parsetress.
-rw-r--r--src/backend/commands/tablecmds.c2
-rw-r--r--src/backend/parser/gram.y64
-rw-r--r--src/backend/parser/parse_type.c2
-rw-r--r--src/backend/parser/parser.c5
-rw-r--r--src/backend/parser/scan.l36
-rw-r--r--src/backend/tcop/postgres.c38
-rw-r--r--src/include/parser/gramparse.h1
-rw-r--r--src/include/parser/parser.h2
-rw-r--r--src/include/parser/scanner.h8
-rw-r--r--src/include/tcop/tcopprot.h1
-rw-r--r--src/pl/plpgsql/src/pl_gram.y2
11 files changed, 146 insertions, 15 deletions
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 98d532b162..e43b76b83a 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -8858,7 +8858,7 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd,
* parse_analyze() or the rewriter, but instead we need to pass them
* through parse_utilcmd.c to make them ready for execution.
*/
- raw_parsetree_list = raw_parser(cmd);
+ raw_parsetree_list = raw_parser(cmd, NULL);
querytree_list = NIL;
foreach(list_item, raw_parsetree_list)
{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 2f4bb9ab69..bf55f5060b 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -123,6 +123,14 @@ typedef struct ImportQual
List *table_names;
} ImportQual;
+typedef struct StmtMulti
+{
+ List *parsetrees;
+ List *queries;
+ int offset;
+ char *lastQuery;
+} StmtMulti;
+
/* ConstraintAttributeSpec yields an integer bitmask of these flags: */
#define CAS_NOT_DEFERRABLE 0x01
#define CAS_DEFERRABLE 0x02
@@ -231,6 +239,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
InsertStmt *istmt;
VariableSetStmt *vsetstmt;
/* PGXC_BEGIN */
+ struct StmtMulti *stmtmulti;
DistributeBy *distby;
PGXCSubCluster *subclus;
/* PGXC_END */
@@ -354,7 +363,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type <ival> import_qualification_type
%type <importqual> import_qualification
-%type <list> stmtblock stmtmulti
+%type <stmtmulti> stmtmulti
+%type <list> stmtblock
OptTableElementList TableElementList OptInherit definition
OptTypedTableElementList TypedTableElementList
reloptions opt_reloptions
@@ -759,7 +769,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
*/
stmtblock: stmtmulti
{
- pg_yyget_extra(yyscanner)->parsetree = $1;
+ pg_yyget_extra(yyscanner)->parsetree = $1 ? $1->parsetrees : NIL;
+ pg_yyget_extra(yyscanner)->queries = $1 ? $1->queries : NIL;
}
;
@@ -767,16 +778,59 @@ stmtblock: stmtmulti
stmtmulti: stmtmulti ';' stmt
{
if ($3 != NULL)
- $$ = lappend($1, $3);
+ {
+ char *query = scanner_get_query(@3, -1, yyscanner);
+ /*
+ * Because of the way multi-commands are parsed by the
+ * parser, when the earlier command was parsed and
+ * reduced to a 'stmtmulti', we did not have the
+ * end-of-the-query marker. But now that we have seen
+ * the ';' token, add '\0' at the corresponding offset
+ * to get a separated command.
+ */
+ if ($1->lastQuery)
+ $1->lastQuery[@2 - $1->offset] = '\0';
+ $1->offset = @2;
+ $1->parsetrees = lappend($1->parsetrees, $3);
+ $1->queries = lappend($1->queries, makeString(query));
+ $1->lastQuery = query;
+ $$ = $1;
+ }
else
$$ = $1;
}
| stmt
{
if ($1 != NULL)
- $$ = list_make1($1);
+ {
+ StmtMulti *n = (StmtMulti *) palloc0(sizeof (StmtMulti));
+ char *query = scanner_get_query(@1, -1, yyscanner);
+ n->lastQuery = query;
+
+ /*
+ * Keep track of the offset where $1 started. We don't
+ * have the offset where it ends so we copy the entire
+ * query to the end. If later, we find a ';' followed
+ * by another command, we'll add the '\0' at the
+ * appropriate offset
+ *
+ * XXX May be there is a better way to get the matching
+ * portion of the query string, but this does the trick
+ * for regression as well as the problem we are trying
+ * to solve with multi-command queries
+ */
+ n->offset = @1;
+
+ /*
+ * Collect both parsetree as well as the original query
+ * that resulted in the parsetree
+ */
+ n->parsetrees = list_make1($1);
+ n->queries = list_make1(makeString(query));
+ $$ = n;
+ }
else
- $$ = NIL;
+ $$ = NULL;
}
;
diff --git a/src/backend/parser/parse_type.c b/src/backend/parser/parse_type.c
index 661663994e..69023cea8d 100644
--- a/src/backend/parser/parse_type.c
+++ b/src/backend/parser/parse_type.c
@@ -739,7 +739,7 @@ typeStringToTypeName(const char *str)
ptserrcontext.previous = error_context_stack;
error_context_stack = &ptserrcontext;
- raw_parsetree_list = raw_parser(buf.data);
+ raw_parsetree_list = raw_parser(buf.data, NULL);
error_context_stack = ptserrcontext.previous;
diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c
index fdf5a6a1ca..6a01faa247 100644
--- a/src/backend/parser/parser.c
+++ b/src/backend/parser/parser.c
@@ -32,7 +32,7 @@
* Returns a list of raw (un-analyzed) parse trees.
*/
List *
-raw_parser(const char *str)
+raw_parser(const char *str, List **queries)
{
core_yyscan_t yyscanner;
base_yy_extra_type yyextra;
@@ -57,6 +57,9 @@ raw_parser(const char *str)
if (yyresult) /* error */
return NIL;
+ if (queries)
+ *queries = yyextra.queries;
+
return yyextra.parsetree;
}
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 82b20c6e5f..0d36717f4e 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -1103,6 +1103,10 @@ scanner_init(const char *str,
yyext->keywords = keywords;
yyext->num_keywords = num_keywords;
+#ifdef XCP
+ yyext->query = pstrdup(str);
+#endif
+
yyext->backslash_quote = backslash_quote;
yyext->escape_string_warning = escape_string_warning;
yyext->standard_conforming_strings = standard_conforming_strings;
@@ -1526,3 +1530,35 @@ core_yyfree(void *ptr, core_yyscan_t yyscanner)
if (ptr)
pfree(ptr);
}
+
+/*
+ * Return a copy of a substring of the original query string, starting at
+ * 'start' offset and 'len' bytes long, Be mindful of the invalid arguments
+ * being passed by the caller
+ */
+char *
+scanner_get_query(int start, int len, core_yyscan_t yyscanner)
+{
+ char *query;
+
+ /*
+ * If the caller passes a wrong offset, just assume 0
+ */
+ if (start == -1)
+ start = 0;
+ if (start > strlen(yyextra->query))
+ return NULL;
+ /*
+ * Similarly, if the passed-in length is more than remaining
+ * bytes in the string, just return whatever is available
+ */
+ if (len == -1)
+ len = strlen(yyextra->query) - start;
+ else if (len + start > strlen(yyextra->query))
+ return NULL;
+
+ query = palloc0(len + 1);
+ memcpy(query, yyextra->query + start, len);
+ query[len] = '\0';
+ return query;
+}
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 4605eedd77..6ffe015136 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -754,7 +754,7 @@ ProcessClientWriteInterrupt(bool blocked)
* commands are not processed any further than the raw parse stage.
*/
List *
-pg_parse_query(const char *query_string)
+pg_parse_query_internal(const char *query_string, List **querysource_list)
{
List *raw_parsetree_list;
@@ -763,7 +763,7 @@ pg_parse_query(const char *query_string)
if (log_parser_stats)
ResetUsage();
- raw_parsetree_list = raw_parser(query_string);
+ raw_parsetree_list = raw_parser(query_string, querysource_list);
if (log_parser_stats)
ShowUsage("PARSER STATISTICS");
@@ -786,6 +786,18 @@ pg_parse_query(const char *query_string)
return raw_parsetree_list;
}
+List *
+pg_parse_query(const char *query_string)
+{
+ return pg_parse_query_internal(query_string, NULL);
+}
+
+List *
+pg_parse_query_get_source(const char *query_string, List **querysource_list)
+{
+ return pg_parse_query_internal(query_string, querysource_list);
+}
+
/*
* Given a raw parsetree (gram.y output), and optionally information about
* types of parameter symbols ($n), perform parse analysis and rule rewriting.
@@ -1044,6 +1056,8 @@ exec_simple_query(const char *query_string)
MemoryContext oldcontext;
List *parsetree_list;
ListCell *parsetree_item;
+ List *querysource_list;
+ ListCell *querysource_item;
bool save_log_statement_stats = log_statement_stats;
bool was_logged = false;
bool isTopLevel;
@@ -1092,7 +1106,7 @@ exec_simple_query(const char *query_string)
* Do basic parsing of the query or queries (this should be safe even if
* we are in aborted transaction state!)
*/
- parsetree_list = pg_parse_query(query_string);
+ parsetree_list = pg_parse_query_get_source(query_string, &querysource_list);
#ifdef XCP
if (IS_PGXC_LOCAL_COORDINATOR && list_length(parsetree_list) > 1)
@@ -1159,9 +1173,10 @@ exec_simple_query(const char *query_string)
/*
* Run through the raw parsetree(s) and process each one.
*/
- foreach(parsetree_item, parsetree_list)
+ forboth(parsetree_item, parsetree_list, querysource_item, querysource_list)
{
Node *parsetree = (Node *) lfirst(parsetree_item);
+ char *querysource = ((Value *) lfirst(querysource_item))->val.str;
bool snapshot_set = false;
const char *commandTag;
char completionTag[COMPLETION_TAG_BUFSIZE];
@@ -1274,10 +1289,23 @@ exec_simple_query(const char *query_string)
* We don't have to copy anything into the portal, because everything
* we are passing here is in MessageContext, which will outlive the
* portal anyway.
+ *
+ * The query_string may contain multiple commands separated by ';' and
+ * we have a separate parsetree corresponding to each such command.
+ * Since we later may send down the query to the remote nodes
+ * (especially for utility queries), using the query_string is a
+ * problem because the same query will be sent out multiple times, one
+ * for each command processed. So we taught the parser to return the
+ * portion of the query_string along with the parsetree and use that
+ * while defining a portal below.
+ *
+ * XXX Since the portal expects to see a valid query_string, if the
+ * substring is available, use the original query_string. Not elegant,
+ * but far better than what we were doing earlier
*/
PortalDefineQuery(portal,
NULL,
- query_string,
+ querysource ? querysource : query_string,
commandTag,
plantree_list,
NULL);
diff --git a/src/include/parser/gramparse.h b/src/include/parser/gramparse.h
index 100fdfb213..6c49917da3 100644
--- a/src/include/parser/gramparse.h
+++ b/src/include/parser/gramparse.h
@@ -53,6 +53,7 @@ typedef struct base_yy_extra_type
* State variables that belong to the grammar.
*/
List *parsetree; /* final parse result is delivered here */
+ List *queries;
} base_yy_extra_type;
/*
diff --git a/src/include/parser/parser.h b/src/include/parser/parser.h
index 4208d545ab..af77da59aa 100644
--- a/src/include/parser/parser.h
+++ b/src/include/parser/parser.h
@@ -32,7 +32,7 @@ extern PGDLLIMPORT bool standard_conforming_strings;
/* Primary entry point for the raw parsing functions */
-extern List *raw_parser(const char *str);
+extern List *raw_parser(const char *str, List **queries);
/* Utility functions exported by gram.y (perhaps these should be elsewhere) */
extern List *SystemFuncName(char *name);
diff --git a/src/include/parser/scanner.h b/src/include/parser/scanner.h
index 9e38c0d197..b0d890e82c 100644
--- a/src/include/parser/scanner.h
+++ b/src/include/parser/scanner.h
@@ -72,6 +72,13 @@ typedef struct core_yy_extra_type
char *scanbuf;
Size scanbuflen;
+#ifdef XCP
+ /*
+ * Pointer to the original query string
+ */
+ char *query;
+#endif
+
/*
* The keyword list to use.
*/
@@ -126,5 +133,6 @@ extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp,
core_yyscan_t yyscanner);
extern int scanner_errposition(int location, core_yyscan_t yyscanner);
extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner) pg_attribute_noreturn();
+extern char *scanner_get_query(int start, int len, core_yyscan_t yyscanner);
#endif /* SCANNER_H */
diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h
index 5abc26e864..de4591f110 100644
--- a/src/include/tcop/tcopprot.h
+++ b/src/include/tcop/tcopprot.h
@@ -47,6 +47,7 @@ typedef enum
extern int log_statement;
extern List *pg_parse_query(const char *query_string);
+extern List *pg_parse_query_get_source(const char *query_string, List **queries);
extern List *pg_analyze_and_rewrite(Node *parsetree, const char *query_string,
Oid *paramTypes, int numParams);
extern List *pg_analyze_and_rewrite_params(Node *parsetree,
diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y
index 00978909a3..80bb07041a 100644
--- a/src/pl/plpgsql/src/pl_gram.y
+++ b/src/pl/plpgsql/src/pl_gram.y
@@ -3470,7 +3470,7 @@ check_sql_expr(const char *stmt, int location, int leaderlen)
error_context_stack = &syntax_errcontext;
oldCxt = MemoryContextSwitchTo(compile_tmp_cxt);
- (void) raw_parser(stmt);
+ (void) raw_parser(stmt, NULL);
MemoryContextSwitchTo(oldCxt);
/* Restore former ereport callback */