Tweak the core scanner so that it can be used by plpgsql too.

Changes: Pass in the keyword lookup array instead of having it be hardwired. (This incidentally allows elimination of some duplicate coding in ecpg.) Re-order the token declarations in gram.y so that non-keyword tokens have numbers that won't change when keywords are added or removed. Add ".." and ":=" to the set of tokens recognized by scan.l. (Since these combinations are nowhere legal in core SQL, this does not change anything except the precise wording of the error you get when you write this.)
author: Tom Lane 2009-07-14 20:24:10 +0000
committer: Tom Lane 2009-07-14 20:24:10 +0000
commit: 580632c2d79de4147e26563627b76aeb31c01fca (patch)
tree: 69681419f89e1dcc8eecabaa50decc1ca1ecdfe4
parent: a25726eae584bf56d3ddcf984bc1b54765088492 (diff)
15 files changed, 124 insertions, 101 deletions
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 3f4eca77cf..8faf593433 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -421,10 +421,23 @@ static TypeName *TableFuncTypeName(List *columns);
 
 
 /*
- * If you make any token changes, update the keyword table in
- * src/include/parser/kwlist.h and add new keywords to the appropriate one of
- * the reserved-or-not-so-reserved keyword lists, below; search
- * this file for "Name classification hierarchy".
+ * Non-keyword token types.  These are hard-wired into the "flex" lexer.
+ * They must be listed first so that their numeric codes do not depend on
+ * the set of keywords.  PL/pgsql depends on this so that it can share the
+ * same lexer.  If you add/change tokens here, fix PL/pgsql to match!
+ *
+ * DOT_DOT and COLON_EQUALS are unused in the core SQL grammar, and so will
+ * always provoke parse errors.  They are needed by PL/pgsql.
+ */
+%token <str>	IDENT FCONST SCONST BCONST XCONST Op
+%token <ival>	ICONST PARAM
+%token			TYPECAST DOT_DOT COLON_EQUALS
+
+/*
+ * If you want to make any keyword changes, update the keyword table in
+ * src/include/parser/kwlist.h and add new keywords to the appropriate one
+ * of the reserved-or-not-so-reserved keyword lists, below; search
+ * this file for "Keyword category lists".
  */
 
 /* ordinary key words in alphabetical order */
@@ -515,17 +528,15 @@ static TypeName *TableFuncTypeName(List *columns);
 
 	ZONE
 
-/* The grammar thinks these are keywords, but they are not in the kwlist.h
+/*
+ * The grammar thinks these are keywords, but they are not in the kwlist.h
  * list and so can never be entered directly.  The filter in parser.c
  * creates these tokens when required.
  */
 %token			NULLS_FIRST NULLS_LAST WITH_TIME
 
-/* Special token types, not actually keywords - see the "lex" file */
-%token <str>	IDENT FCONST SCONST BCONST XCONST Op
-%token <ival>	ICONST PARAM
 
-/* precedence: lowest to highest */
+/* Precedence: lowest to highest */
 %nonassoc	SET				/* see relation_expr_opt_alias */
 %left		UNION EXCEPT
 %left		INTERSECT
diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c
index 5a56a1f17c..732f3065ff 100644
--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -16,7 +16,6 @@
 #include "postgres.h"
 
 #include "parser/gramparse.h"
-#include "parser/keywords.h"
 
 #define PG_KEYWORD(a,b,c) {a,b,c},
 
@@ -25,5 +24,4 @@ const ScanKeyword ScanKeywords[] = {
 #include "parser/kwlist.h"
 };
 
-/* End of ScanKeywords, for use in kwlookup.c and elsewhere */
-const ScanKeyword *LastScanKeyword = endof(ScanKeywords);
+const int	NumScanKeywords = lengthof(ScanKeywords);
diff --git a/src/backend/parser/kwlookup.c b/src/backend/parser/kwlookup.c
index f941837fb1..e6675e6512 100644
--- a/src/backend/parser/kwlookup.c
+++ b/src/backend/parser/kwlookup.c
@@ -6,9 +6,6 @@
  * NB - this file is also used by ECPG and several frontend programs in
  * src/bin/ including pg_dump and psql
  *
- * Note that this file expects that the ScanKeywords array is defined
- * and that LastScanKeyword points to its element one past the last.
- *
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
@@ -39,7 +36,9 @@
  * receive a different case-normalization mapping.
  */
 const ScanKeyword *
-ScanKeywordLookup(const char *text)
+ScanKeywordLookup(const char *text,
+				  const ScanKeyword *keywords,
+				  int num_keywords)
 {
 	int			len,
 				i;
@@ -69,8 +68,8 @@ ScanKeywordLookup(const char *text)
 	/*
 	 * Now do a binary search using plain strcmp() comparison.
 	 */
-	low = &ScanKeywords[0];
-	high = LastScanKeyword - 1;
+	low = keywords;
+	high = keywords + (num_keywords - 1);
 	while (low <= high)
 	{
 		const ScanKeyword *middle;
diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c
index 0e6c6c7c27..b5370a8b14 100644
--- a/src/backend/parser/parser.c
+++ b/src/backend/parser/parser.c
@@ -39,7 +39,7 @@ raw_parser(const char *str)
 	int			yyresult;
 
 	/* initialize the flex scanner */
-	yyscanner = scanner_init(str, &yyextra);
+	yyscanner = scanner_init(str, &yyextra, ScanKeywords, NumScanKeywords);
 
 	/* filtered_base_yylex() only needs this much initialization */
 	yyextra.have_lookahead = false;
@@ -79,7 +79,7 @@ pg_parse_string_token(const char *token)
 	YYSTYPE		yylval;
 	YYLTYPE		yylloc;
 
-	yyscanner = scanner_init(token, &yyextra);
+	yyscanner = scanner_init(token, &yyextra, ScanKeywords, NumScanKeywords);
 
 	ctoken = base_yylex(&yylval, &yylloc, yyscanner);
 
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 0d423c8ef7..f404f9dc8b 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -304,6 +304,10 @@ identifier		{ident_start}{ident_cont}*
 
 typecast		"::"
 
+/* these two token types are used by PL/pgsql, though not in core SQL */
+dot_dot			\.\.
+colon_equals	":="
+
 /*
  * "self" is the set of chars that should be returned as single-character
  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
@@ -450,11 +454,21 @@ other			.
 
 					SET_YYLLOC();
 					yyless(1);				/* eat only 'n' this time */
-					/* nchar had better be a keyword! */
-					keyword = ScanKeywordLookup("nchar");
-					Assert(keyword != NULL);
-					yylval->keyword = keyword->name;
-					return keyword->value;
+
+					keyword = ScanKeywordLookup("nchar",
+												yyextra->keywords,
+												yyextra->num_keywords);
+					if (keyword != NULL)
+					{
+						yylval->keyword = keyword->name;
+						return keyword->value;
+					}
+					else
+					{
+						/* If NCHAR isn't a keyword, just return "n" */
+						yylval->str = pstrdup("n");
+						return IDENT;
+					}
 				}
 
 {xqstart}		{
@@ -680,6 +694,16 @@ other			.
 					return TYPECAST;
 				}
 
+{dot_dot}		{
+					SET_YYLLOC();
+					return DOT_DOT;
+				}
+
+{colon_equals}	{
+					SET_YYLLOC();
+					return COLON_EQUALS;
+				}
+
 {self}			{
 					SET_YYLLOC();
 					return yytext[0];
@@ -830,7 +854,9 @@ other			.
 					SET_YYLLOC();
 
 					/* Is it a keyword? */
-					keyword = ScanKeywordLookup(yytext);
+					keyword = ScanKeywordLookup(yytext,
+												yyextra->keywords,
+												yyextra->num_keywords);
 					if (keyword != NULL)
 					{
 						yylval->keyword = keyword->name;
@@ -939,7 +965,10 @@ scanner_yyerror(const char *message, base_yyscan_t yyscanner)
  * Called before any actual parsing is done
  */
 base_yyscan_t
-scanner_init(const char *str, base_yy_extra_type *yyext)
+scanner_init(const char *str,
+			 base_yy_extra_type *yyext,
+			 const ScanKeyword *keywords,
+			 int num_keywords)
 {
 	Size		slen = strlen(str);
 	yyscan_t	scanner;
@@ -949,6 +978,9 @@ scanner_init(const char *str, base_yy_extra_type *yyext)
 
 	base_yyset_extra(yyext, scanner);
 
+	yyext->keywords = keywords;
+	yyext->num_keywords = num_keywords;
+
 	/*
 	 * Make a scan buffer with special termination needed by flex.
 	 */
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c
index 06c17a3dfd..93e0e17f37 100644
--- a/src/backend/utils/adt/misc.c
+++ b/src/backend/utils/adt/misc.c
@@ -334,7 +334,7 @@ pg_get_keywords(PG_FUNCTION_ARGS)
 
 	funcctx = SRF_PERCALL_SETUP();
 
-	if (&ScanKeywords[funcctx->call_cntr] < LastScanKeyword)
+	if (funcctx->call_cntr < NumScanKeywords)
 	{
 		char	   *values[3];
 		HeapTuple	tuple;
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 1562a5444c..ac063af0e6 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -6219,7 +6219,9 @@ quote_identifier(const char *ident)
 		 * Note: ScanKeywordLookup() does case-insensitive comparison, but
 		 * that's fine, since we already know we have all-lower-case.
 		 */
-		const ScanKeyword *keyword = ScanKeywordLookup(ident);
+		const ScanKeyword *keyword = ScanKeywordLookup(ident,
+													   ScanKeywords,
+													   NumScanKeywords);
 
 		if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD)
 			safe = false;
diff --git a/src/bin/pg_dump/dumputils.c b/src/bin/pg_dump/dumputils.c
index 93bd4d4273..178eb7c8d5 100644
--- a/src/bin/pg_dump/dumputils.c
+++ b/src/bin/pg_dump/dumputils.c
@@ -130,7 +130,9 @@ fmtId(const char *rawid)
 		 * Note: ScanKeywordLookup() does case-insensitive comparison, but
 		 * that's fine, since we already know we have all-lower-case.
 		 */
-		const ScanKeyword *keyword = ScanKeywordLookup(rawid);
+		const ScanKeyword *keyword = ScanKeywordLookup(rawid,
+													   ScanKeywords,
+													   NumScanKeywords);
 
 		if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD)
 			need_quotes = true;
diff --git a/src/bin/pg_dump/keywords.c b/src/bin/pg_dump/keywords.c
index 99cdf6e116..29c64b2613 100644
--- a/src/bin/pg_dump/keywords.c
+++ b/src/bin/pg_dump/keywords.c
@@ -27,5 +27,4 @@ const ScanKeyword ScanKeywords[] = {
 #include "parser/kwlist.h"
 };
 
-/* End of ScanKeywords, for use in kwlookup.c */
-const ScanKeyword *LastScanKeyword = endof(ScanKeywords);
+const int	NumScanKeywords = lengthof(ScanKeywords);
diff --git a/src/include/parser/gramparse.h b/src/include/parser/gramparse.h
index 40382fd699..a54a1b1bb4 100644
--- a/src/include/parser/gramparse.h
+++ b/src/include/parser/gramparse.h
@@ -20,6 +20,7 @@
 #define GRAMPARSE_H
 
 #include "nodes/parsenodes.h"
+#include "parser/keywords.h"
 
 /*
  * We track token locations in terms of byte offsets from the start of the
@@ -50,6 +51,12 @@ typedef struct base_yy_extra_type
 	Size		scanbuflen;
 
 	/*
+	 * The keyword list to use.
+	 */
+	const ScanKeyword *keywords;
+	int			num_keywords;
+
+	/*
 	 * literalbuf is used to accumulate literal values when multiple rules
 	 * are needed to parse a single literal.  Call startlit() to reset buffer
 	 * to empty, addlit() to add text.  NOTE: the string in literalbuf is
@@ -106,7 +113,10 @@ extern int	filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
 								base_yyscan_t yyscanner);
 
 /* from scan.l */
-extern base_yyscan_t scanner_init(const char *str, base_yy_extra_type *yyext);
+extern base_yyscan_t scanner_init(const char *str,
+								  base_yy_extra_type *yyext,
+								  const ScanKeyword *keywords,
+								  int num_keywords);
 extern void scanner_finish(base_yyscan_t yyscanner);
 extern int	base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
 					   base_yyscan_t yyscanner);
diff --git a/src/include/parser/keywords.h b/src/include/parser/keywords.h
index 4c56c14ea3..51f9c94b89 100644
--- a/src/include/parser/keywords.h
+++ b/src/include/parser/keywords.h
@@ -29,8 +29,10 @@ typedef struct ScanKeyword
 } ScanKeyword;
 
 extern const ScanKeyword ScanKeywords[];
-extern const ScanKeyword *LastScanKeyword;
+extern const int	NumScanKeywords;
 
-extern const ScanKeyword *ScanKeywordLookup(const char *text);
+extern const ScanKeyword *ScanKeywordLookup(const char *text,
+											const ScanKeyword *keywords,
+											int num_keywords);
 
 #endif   /* KEYWORDS_H */
diff --git a/src/interfaces/ecpg/preproc/c_keywords.c b/src/interfaces/ecpg/preproc/c_keywords.c
index 62e729db5e..9bf504a106 100644
--- a/src/interfaces/ecpg/preproc/c_keywords.c
+++ b/src/interfaces/ecpg/preproc/c_keywords.c
@@ -1,10 +1,10 @@
 /*-------------------------------------------------------------------------
  *
- * keywords.c
+ * c_keywords.c
  *	  lexical token lookup for reserved words in postgres embedded SQL
  *
  * $PostgreSQL$
- * §
+ *
  *-------------------------------------------------------------------------
  */
 #include "postgres_fe.h"
@@ -55,8 +55,31 @@ static const ScanKeyword ScanCKeywords[] = {
 	{"year", YEAR_P, 0},
 };
 
+
+/*
+ * Do a binary search using plain strcmp() comparison.  This is much like
+ * ScanKeywordLookup(), except we want case-sensitive matching.
+ */
 const ScanKeyword *
 ScanCKeywordLookup(const char *text)
 {
-	return DoLookup(text, &ScanCKeywords[0], endof(ScanCKeywords) - 1);
+	const ScanKeyword *low = &ScanCKeywords[0];
+	const ScanKeyword *high = &ScanCKeywords[lengthof(ScanCKeywords) - 1];
+
+	while (low <= high)
+	{
+		const ScanKeyword *middle;
+		int			difference;
+
+		middle = low + (high - low) / 2;
+		difference = strcmp(middle->name, text);
+		if (difference == 0)
+			return middle;
+		else if (difference < 0)
+			low = middle + 1;
+		else
+			high = middle - 1;
+	}
+
+	return NULL;
 }
diff --git a/src/interfaces/ecpg/preproc/ecpg_keywords.c b/src/interfaces/ecpg/preproc/ecpg_keywords.c
index 9a7fde7181..833e4e3bc9 100644
--- a/src/interfaces/ecpg/preproc/ecpg_keywords.c
+++ b/src/interfaces/ecpg/preproc/ecpg_keywords.c
@@ -75,79 +75,26 @@ static const ScanKeyword ScanECPGKeywords[] = {
 	{"whenever", SQL_WHENEVER, 0},
 };
 
-/* This is all taken from src/backend/parser/keyword.c and adjusted for our needs. */
-/*
- * Do a binary search using plain strcmp() comparison.
- */
-const ScanKeyword *
-DoLookup(const char *word, const ScanKeyword *low, const ScanKeyword *high)
-{
-	while (low <= high)
-	{
-		const ScanKeyword *middle;
-		int			difference;
-
-		middle = low + (high - low) / 2;
-		difference = strcmp(middle->name, word);
-		if (difference == 0)
-			return middle;
-		else if (difference < 0)
-			low = middle + 1;
-		else
-			high = middle - 1;
-	}
-
-	return NULL;
-}
-
 /*
  * ScanECPGKeywordLookup - see if a given word is a keyword
  *
  * Returns a pointer to the ScanKeyword table entry, or NULL if no match.
- *
- * The match is done case-insensitively.  Note that we deliberately use a
- * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
- * even if we are in a locale where tolower() would produce more or different
- * translations.  This is to conform to the SQL99 spec, which says that
- * keywords are to be matched in this way even though non-keyword identifiers
- * receive a different case-normalization mapping.
+ * Keywords are matched using the same case-folding rules as in the backend.
  */
 const ScanKeyword *
 ScanECPGKeywordLookup(const char *text)
 {
-	int			len,
-				i;
-	char		word[NAMEDATALEN];
 	const ScanKeyword *res;
 
 	/* First check SQL symbols defined by the backend. */
-
-	res = ScanKeywordLookup(text);
+	res = ScanKeywordLookup(text, ScanKeywords, NumScanKeywords);
 	if (res)
 		return res;
 
-	len = strlen(text);
-	/* We assume all keywords are shorter than NAMEDATALEN. */
-	if (len >= NAMEDATALEN)
-		return NULL;
-
-	/*
-	 * Apply an ASCII-only downcasing. We must not use tolower() since it may
-	 * produce the wrong translation in some locales (eg, Turkish).
-	 */
-	for (i = 0; i < len; i++)
-	{
-		char		ch = text[i];
-
-		if (ch >= 'A' && ch <= 'Z')
-			ch += 'a' - 'A';
-		word[i] = ch;
-	}
-	word[len] = '\0';
-
-	/*
-	 * Now do a binary search using plain strcmp() comparison.
-	 */
+	/* Try ECPG-specific keywords. */
+	res = ScanKeywordLookup(text, ScanECPGKeywords, lengthof(ScanECPGKeywords));
+	if (res)
+		return res;
 
-	return DoLookup(word, &ScanECPGKeywords[0], endof(ScanECPGKeywords) - 1);
+	return NULL;
 }
diff --git a/src/interfaces/ecpg/preproc/extern.h b/src/interfaces/ecpg/preproc/extern.h
index 013359aab4..d760e23443 100644
--- a/src/interfaces/ecpg/preproc/extern.h
+++ b/src/interfaces/ecpg/preproc/extern.h
@@ -101,7 +101,6 @@ extern void remove_variables(int);
 extern struct variable *new_variable(const char *, struct ECPGtype *, int);
 extern const ScanKeyword *ScanCKeywordLookup(const char *);
 extern const ScanKeyword *ScanECPGKeywordLookup(const char *text);
-extern const ScanKeyword *DoLookup(const char *, const ScanKeyword *, const ScanKeyword *);
 extern void scanner_init(const char *);
 extern void parser_init(void);
 extern void scanner_finish(void);
diff --git a/src/interfaces/ecpg/preproc/keywords.c b/src/interfaces/ecpg/preproc/keywords.c
index fa6db2ed04..57eecef889 100644
--- a/src/interfaces/ecpg/preproc/keywords.c
+++ b/src/interfaces/ecpg/preproc/keywords.c
@@ -26,5 +26,4 @@ const ScanKeyword ScanKeywords[] = {
 #include "parser/kwlist.h"
 };
 
-/* End of ScanKeywords, for use in kwlookup.c */
-const ScanKeyword *LastScanKeyword = endof(ScanKeywords);
+const int	NumScanKeywords = lengthof(ScanKeywords);
author	Tom Lane	2009-07-14 20:24:10 +0000
committer	Tom Lane	2009-07-14 20:24:10 +0000
commit	580632c2d79de4147e26563627b76aeb31c01fca (patch)
tree	69681419f89e1dcc8eecabaa50decc1ca1ecdfe4
parent	a25726eae584bf56d3ddcf984bc1b54765088492 (diff)