Fix bugs in plpgsql and ecpg caused by assuming that isspace() would only

return true for exactly the characters treated as whitespace by their flex scanners. Per report from Victor Snezhko and subsequent investigation. Also fix a passel of unsafe usages of <ctype.h> functions, that is, ye olde char-vs-unsigned-char issue. I won't miss <ctype.h> when we are finally able to stop using it.
author: Tom Lane 2006-09-22 21:39:58 +0000
committer: Tom Lane 2006-09-22 21:39:58 +0000
commit: 4acb714ad4497d1c4971520b26dfd404eb420e2a (patch)
tree: 3af6ee3acda79529ee9250a013e77301c57a540f
parent: 1ab048fe5c0d252df9c796f2e8daedab1113d9fb (diff)
19 files changed, 94 insertions, 43 deletions
diff --git a/contrib/fuzzystrmatch/dmetaphone.c b/contrib/fuzzystrmatch/dmetaphone.c
index 6883dbebb6..4f4fa18f4f 100644
--- a/contrib/fuzzystrmatch/dmetaphone.c
+++ b/contrib/fuzzystrmatch/dmetaphone.c
@@ -318,7 +318,7 @@ MakeUpper(metastring * s)
 	char	   *i;
 
 	for (i = s->str; *i; i++)
-		*i = toupper(*i);
+		*i = toupper((unsigned char) *i);
 }
 
 
diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c
index b905ff7f3d..051a411a99 100644
--- a/contrib/hstore/hstore_io.c
+++ b/contrib/hstore/hstore_io.c
@@ -51,7 +51,7 @@ get_val( HSParser *state, bool ignoreeq, bool *escaped ) {
 				elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), (int4)(state->ptr-state->begin));
 			} else if ( *(state->ptr) == '\\' ) {
 				st = GV_WAITESCIN;
-			} else if ( !isspace(*(state->ptr)) ) {
+			} else if ( !isspace((unsigned char) *(state->ptr)) ) {
 				*(state->cur) = *(state->ptr);
 				state->cur++;
 				st = GV_INVAL;
@@ -65,7 +65,7 @@ get_val( HSParser *state, bool ignoreeq, bool *escaped ) {
 			} else if ( *(state->ptr) == ',' && ignoreeq ) {
 				state->ptr--;
 				return true;
-			} else if ( isspace(*(state->ptr)) ) {
+			} else if ( isspace((unsigned char) *(state->ptr)) ) {
 				return true;
 			} else if ( *(state->ptr) == '\0' ) {
 				state->ptr--;
@@ -146,7 +146,7 @@ parse_hstore( HSParser *state ) {
 				st = WGT;
 			} else if ( *(state->ptr) == '\0' ) {
 				elog(ERROR,"Unexpectd end of string");
-			} else if (!isspace(*(state->ptr))) {
+			} else if (!isspace((unsigned char) *(state->ptr))) {
 				elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), (int4)(state->ptr-state->begin));
 			}
 		} else if ( st == WGT ) {
@@ -177,7 +177,7 @@ parse_hstore( HSParser *state ) {
 				st = WKEY;
 			} else if ( *(state->ptr) == '\0' ) {
 				return;
-			} else if (!isspace(*(state->ptr))) {
+			} else if (!isspace((unsigned char) *(state->ptr))) {
 				elog(ERROR,"Syntax error near '%c' at postion %d", *(state->ptr), (int4)(state->ptr-state->begin));
 			}
 		} else 
diff --git a/contrib/isn/isn.c b/contrib/isn/isn.c
index b84e6a2658..f8025e5b8e 100644
--- a/contrib/isn/isn.c
+++ b/contrib/isn/isn.c
@@ -72,13 +72,16 @@ bool check_table(const char *(*TABLE)[2], const unsigned TABLE_index[10][2])
 		aux2 = TABLE[i][1];
 
 		/* must always start with a digit: */
-		if(!isdigit(*aux1) || !isdigit(*aux2)) goto invalidtable;
+		if (!isdigit((unsigned char) *aux1) || !isdigit((unsigned char) *aux2))
+			goto invalidtable;
 		a = *aux1 - '0';
 		b = *aux2 - '0';
 
 		/* must always have the same format and length: */
 		while(*aux1 && *aux2) {
-			if(!(isdigit(*aux1) && isdigit(*aux2)) && (*aux1!=*aux2 || *aux1 != '-')) 
+			if (!(isdigit((unsigned char) *aux1) &&
+				  isdigit((unsigned char) *aux2)) &&
+				(*aux1 != *aux2 || *aux1 != '-')) 
 				goto invalidtable;
 			aux1++;
 			aux2++;
@@ -124,7 +127,7 @@ unsigned dehyphenate(char *bufO, char *bufI)
 {
 	unsigned ret = 0;
 	while(*bufI) {
-		if(isdigit(*bufI)) {
+		if(isdigit((unsigned char) *bufI)) {
 			*bufO++ = *bufI;
 			ret++;
 		}
@@ -183,7 +186,7 @@ unsigned hyphenate(char *bufO, char *bufI, const char *(*TABLE)[2], const unsign
 
 			firstdig++, ean_aux1++, ean_aux2++;
 			if(!(*ean_aux1 && *ean_aux2 && *firstdig)) break;
-			if(!isdigit(*ean_aux1)) ean_aux1++, ean_aux2++;
+			if(!isdigit((unsigned char) *ean_aux1)) ean_aux1++, ean_aux2++;
 		} else {
 			/* check in what direction we should go and move the pointer accordingly */
 			if(*firstdig < *ean_aux1 && !ean_in1) upper = search;
@@ -227,7 +230,7 @@ unsigned weight_checkdig(char *isn, unsigned size)
 {
 	unsigned weight = 0;
 	while(*isn && size>1) {
-		if(isdigit(*isn)) {
+		if(isdigit((unsigned char) *isn)) {
 			weight += size-- * (*isn - '0');
 		}
 		isn++;
@@ -254,7 +257,7 @@ unsigned checkdig(char *num, unsigned size)
 		pos = 1;
 	}
 	while(*num && size>1) {
-		if(isdigit(*num)) {
+		if(isdigit((unsigned char) *num)) {
 			if(pos++%2) check3 += *num - '0';
 			else check += *num - '0';
 			size--;
@@ -366,7 +369,7 @@ void ean2ISBN(char *isn)
 	hyphenate(isn, isn+4, NULL, NULL);
 	check = weight_checkdig(isn, 10);
 	aux = strchr(isn, '\0');
-	while(!isdigit(*--aux));
+	while(!isdigit((unsigned char) *--aux));
 	if(check == 10) *aux = 'X';
 	else *aux = check + '0';
 }
@@ -411,7 +414,7 @@ ean13 str2ean(const char *num)
 {
 	ean13 ean = 0;	/* current ean */
 	while(*num) {
-		if(isdigit(*num)) ean = 10 * ean + (*num - '0');
+		if(isdigit((unsigned char) *num)) ean = 10 * ean + (*num - '0');
 		num++;
 	}
     return (ean<<1); /* also give room to a flag */
@@ -570,7 +573,7 @@ bool string2ean(const char *str, bool errorOK, ean13 *result,
 	/* recognize and validate the number: */
 	while(*aux2 && length <= 13) {
 		last = (*(aux2+1) == '!' || *(aux2+1) == '\0'); /* is the last character */
-		digit = (isdigit(*aux2)!=0); /* is current character a digit? */
+		digit = (isdigit((unsigned char) *aux2)!=0); /* is current character a digit? */
 		if(*aux2=='?' && last) /* automagically calculate check digit if it's '?' */
 			magic = digit = true;
 		if(length == 0 &&  (*aux2=='M' || *aux2=='m')) {
@@ -583,13 +586,13 @@ bool string2ean(const char *str, bool errorOK, ean13 *result,
 			/* only ISSN can be here */
 			if(type != INVALID) goto eaninvalid;
 			type = ISSN;
-			*aux1++ = toupper(*aux2);
+			*aux1++ = toupper((unsigned char) *aux2);
 			length++;
 		} else if(length == 9 && (digit || *aux2=='X' || *aux2=='x') && last) {
 			/* only ISBN and ISMN can be here */
 			if(type != INVALID && type != ISMN) goto eaninvalid;
 			if(type == INVALID) type = ISBN; /* ISMN must start with 'M' */
-			*aux1++ = toupper(*aux2);
+			*aux1++ = toupper((unsigned char) *aux2);
 			length++;
 		} else if(length == 11 && digit && last) {
 			/* only UPC can be here */
diff --git a/contrib/ltree/crc32.c b/contrib/ltree/crc32.c
index fea972b466..7362306915 100644
--- a/contrib/ltree/crc32.c
+++ b/contrib/ltree/crc32.c
@@ -8,7 +8,7 @@
 
 #ifdef LOWER_NODE
 #include <ctype.h>
-#define TOLOWER(x)	tolower(x)
+#define TOLOWER(x)	tolower((unsigned char) (x))
 #else
 #define TOLOWER(x)	(x)
 #endif
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index 353e06512b..ff1ace2989 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -332,7 +332,7 @@ lquery_in(PG_FUNCTION_ARGS)
 		{
 			if (*ptr == ',')
 				state = LQPRS_WAITSNUM;
-			else if (isdigit((unsigned int) *ptr))
+			else if (isdigit((unsigned char) *ptr))
 			{
 				curqlevel->low = atoi(ptr);
 				state = LQPRS_WAITND;
@@ -342,7 +342,7 @@ lquery_in(PG_FUNCTION_ARGS)
 		}
 		else if (state == LQPRS_WAITSNUM)
 		{
-			if (isdigit((unsigned int) *ptr))
+			if (isdigit((unsigned char) *ptr))
 			{
 				curqlevel->high = atoi(ptr);
 				state = LQPRS_WAITCLOSE;
@@ -359,7 +359,7 @@ lquery_in(PG_FUNCTION_ARGS)
 		{
 			if (*ptr == '}')
 				state = LQPRS_WAITEND;
-			else if (!isdigit((unsigned int) *ptr))
+			else if (!isdigit((unsigned char) *ptr))
 				UNCHAR;
 		}
 		else if (state == LQPRS_WAITND)
@@ -371,7 +371,7 @@ lquery_in(PG_FUNCTION_ARGS)
 			}
 			else if (*ptr == ',')
 				state = LQPRS_WAITSNUM;
-			else if (!isdigit((unsigned int) *ptr))
+			else if (!isdigit((unsigned char) *ptr))
 				UNCHAR;
 		}
 		else if (state == LQPRS_WAITEND)
diff --git a/contrib/ltree/ltxtquery_io.c b/contrib/ltree/ltxtquery_io.c
index 783005f330..76011b4e92 100644
--- a/contrib/ltree/ltxtquery_io.c
+++ b/contrib/ltree/ltxtquery_io.c
@@ -81,7 +81,7 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, uint1
 					*lenval = 1;
 					*flag = 0;
 				}
-				else if (!isspace((unsigned int) *(state->buf)))
+				else if (!isspace((unsigned char) *(state->buf)))
 					ereport(ERROR,
 							(errcode(ERRCODE_SYNTAX_ERROR),
 							 errmsg("operand syntax error")));
diff --git a/contrib/pgcrypto/imath.c b/contrib/pgcrypto/imath.c
index 67587dcc28..70ee74706b 100644
--- a/contrib/pgcrypto/imath.c
+++ b/contrib/pgcrypto/imath.c
@@ -1799,7 +1799,7 @@ mp_result mp_int_read_cstring(mp_int z, mp_size radix, const char *str, char **e
     return MP_RANGE;
 
   /* Skip leading whitespace */
-  while(isspace((int)*str))
+  while(isspace((unsigned char) *str))
     ++str;
 
   /* Handle leading sign tag (+/-, positive default) */
@@ -3127,10 +3127,10 @@ static int       s_ch2val(char c, int r)
 {
   int out;
 
-  if(isdigit((int)c))
+  if(isdigit((unsigned char)c))
     out = c - '0';
-  else if(r > 10 && isalpha((int)c))
-    out = toupper(c) - 'A' + 10;
+  else if(r > 10 && isalpha((unsigned char) c))
+    out = toupper((unsigned char) c) - 'A' + 10;
   else
     return -1;
 
@@ -3151,7 +3151,7 @@ static char      s_val2ch(int v, int caps)
     char out = (v - 10) + 'a';
 
     if(caps)
-      return toupper(out);
+      return toupper((unsigned char) out);
     else
       return out;
   }
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 19675138c8..5abb1334b1 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -145,6 +145,9 @@ static unsigned char unescape_single_char(unsigned char c);
  * did not end with a newline.
  *
  * XXX perhaps \f (formfeed) should be treated as a newline as well?
+ *
+ * XXX if you change the set of whitespace characters, fix scanner_isspace()
+ * to agree, and see also the plpgsql lexer.
  */
 
 space			[ \t\n\r\f]
diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c
index 504bab81d6..152e07ff88 100644
--- a/src/backend/parser/scansup.c
+++ b/src/backend/parser/scansup.c
@@ -183,3 +183,26 @@ truncate_identifier(char *ident, int len, bool warn)
 		ident[len] = '\0';
 	}
 }
+
+/*
+ * scanner_isspace() --- return TRUE if flex scanner considers char whitespace
+ *
+ * This should be used instead of the potentially locale-dependent isspace()
+ * function when it's important to match the lexer's behavior.
+ *
+ * In principle we might need similar functions for isalnum etc, but for the
+ * moment only isspace seems needed.
+ */
+bool
+scanner_isspace(char ch)
+{
+	/* This must match scan.l's list of {space} characters */
+	/* and plpgsql's scan.l as well */
+	if (ch == ' ' ||
+		ch == '\t' ||
+		ch == '\n' ||
+		ch == '\r' ||
+		ch == '\f')
+		return true;
+	return false;
+}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 7fcddacfae..7999c8e8f7 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -6155,7 +6155,7 @@ assign_custom_variable_classes(const char *newval, bool doit, GucSource source)
 	initStringInfo(&buf);
 	while ((c = *cp++) != 0)
 	{
-		if (isspace(c))
+		if (isspace((unsigned char) c))
 		{
 			if (symLen > 0)
 				hasSpaceAfterToken = true;
@@ -6173,7 +6173,7 @@ assign_custom_variable_classes(const char *newval, bool doit, GucSource source)
 			continue;
 		}
 
-		if (hasSpaceAfterToken || !isalnum(c))
+		if (hasSpaceAfterToken || !isalnum((unsigned char) c))
 		{
 			/*
 			 * Syntax error due to token following space after token or non
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 48073ea713..373f5530cb 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -367,10 +367,10 @@ main(int argc, char **argv)
 
 					new_obj_name->next = NULL;
 					new_obj_name->name = strdup(optarg);
-					new_obj_name->is_include = islower(c) ? true : false;
+					new_obj_name->is_include = islower((unsigned char) c) ? true : false;
 
 					/* add new entry to the proper list */
-					if (tolower(c) == 'n')
+					if (tolower((unsigned char) c) == 'n')
 					{
 						if (!schemaList_tail)
 							schemaList_tail = schemaList = new_obj_name;
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 43f8f5c977..2a8cbd6cea 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -1090,8 +1090,8 @@ psql_completion(char *text, int start, int end)
 	/* Complete "AS ON <sth with a 'T' :)>" with a "TO" */
 	else if (pg_strcasecmp(prev3_wd, "AS") == 0 &&
 			 pg_strcasecmp(prev2_wd, "ON") == 0 &&
-			 (toupper((unsigned char) prev_wd[4]) == 'T' ||
-			  toupper((unsigned char) prev_wd[5]) == 'T'))
+			 (pg_toupper((unsigned char) prev_wd[4]) == 'T' ||
+			  pg_toupper((unsigned char) prev_wd[5]) == 'T'))
 		COMPLETE_WITH_CONST("TO");
 	/* Complete "AS ON <sth> TO" with a table name */
 	else if (pg_strcasecmp(prev4_wd, "AS") == 0 &&
diff --git a/src/include/parser/scansup.h b/src/include/parser/scansup.h
index 1803249743..80ae82b350 100644
--- a/src/include/parser/scansup.h
+++ b/src/include/parser/scansup.h
@@ -22,4 +22,6 @@ extern char *downcase_truncate_identifier(const char *ident, int len,
 
 extern void truncate_identifier(char *ident, int len, bool warn);
 
+extern bool scanner_isspace(char ch);
+
 #endif   /* SCANSUP_H */
diff --git a/src/include/port.h b/src/include/port.h
index 85f9da07e6..6bc7022ce7 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -59,7 +59,7 @@ extern void get_parent_directory(char *path);
 ( \
 	((filename)[0] == '/') || \
 	(filename)[0] == '\\' || \
-	(isalpha((filename)[0]) && (filename)[1] == ':' && \
+	(isalpha((unsigned char) ((filename)[0])) && (filename)[1] == ':' && \
 	((filename)[2] == '\\' || (filename)[2] == '/')) \
 )
 #endif
diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l
index b709bbbc72..d8632bf78f 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -47,6 +47,7 @@ static void addlit(char *ytext, int yleng);
 static void addlitchar (unsigned char);
 static void parse_include (void);
 static void check_escape_warning(void);
+static bool ecpg_isspace(char ch);
 
 char *token_start;
 int state_before;
@@ -245,6 +246,9 @@ param			\${integer}
  * did not end with a newline.
  *
  * XXX perhaps \f (formfeed) should be treated as a newline as well?
+ *
+ * XXX if you change the set of whitespace characters, fix ecpg_isspace()
+ * to agree.
  */
 
 ccomment		"//".*\n
@@ -872,7 +876,7 @@ cppline			{space}*#(.*\\{space})*.*{newline}
 					 *	contains at least one non-space character plus the ";"
 					 */
 					for (i = strlen(yytext)-2;
-						 i > 0 && isspace((unsigned char) yytext[i]);
+						 i > 0 && ecpg_isspace(yytext[i]);
 						 i-- )
 						;
 					yytext[i+1] = '\0';
@@ -1060,7 +1064,7 @@ cppline			{space}*#(.*\\{space})*.*{newline}
 						 *	contains at least one non-space character plus the ";"
 						 */
 						for (i = strlen(yytext)-2;
-							 i > 0 && isspace((unsigned char) yytext[i]);
+							 i > 0 && ecpg_isspace(yytext[i]);
 							 i-- )
 							;
 						yytext[i+1] = '\0';
@@ -1252,7 +1256,7 @@ parse_include(void)
 	 * yytext contains at least one non-space character plus the ";" 
 	 */
   	for (i = strlen(yytext)-2;
-		 i > 0 && isspace((unsigned char) yytext[i]);
+		 i > 0 && ecpg_isspace(yytext[i]);
 		 i--)
 		;
 
@@ -1328,3 +1332,18 @@ check_escape_warning(void)
 	       	mmerror (PARSE_ERROR, ET_WARNING, "nonstandard use of escape in a string literal");
 	warn_on_first_escape = false;   /* warn only once per string */
 }
+
+/*
+ * ecpg_isspace() --- return TRUE if flex scanner considers char whitespace
+ */
+static bool
+ecpg_isspace(char ch)
+{
+	if (ch == ' ' ||
+		ch == '\t' ||
+		ch == '\n' ||
+		ch == '\r' ||
+		ch == '\f')
+		return true;
+	return false;
+}
diff --git a/src/interfaces/libpq/fe-auth.c b/src/interfaces/libpq/fe-auth.c
index 40d46c5fb1..0583c8fe7e 100644
--- a/src/interfaces/libpq/fe-auth.c
+++ b/src/interfaces/libpq/fe-auth.c
@@ -89,7 +89,7 @@ pg_an_to_ln(char *aname)
 		*p = '\0';
 #ifdef WIN32
 	for (p = aname; *p; p++)
-		*p = pg_tolower(*p);
+		*p = pg_tolower((unsigned char) *p);
 #endif
 
 	return aname;
diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index 7a7baf5bb7..ae7fa2f0ba 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -26,6 +26,7 @@
 #include "funcapi.h"
 #include "optimizer/clauses.h"
 #include "parser/parse_expr.h"
+#include "parser/scansup.h"
 #include "tcop/tcopprot.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
@@ -2527,7 +2528,7 @@ exec_stmt_dynexecute(PLpgSQL_execstate *estate,
 				char	   *ptr;
 
 				for (ptr = querystr; *ptr; ptr++)
-					if (!isspace((unsigned char) *ptr))
+					if (!scanner_isspace(*ptr))
 						break;
 				if (*ptr == 'S' || *ptr == 's')
 					ereport(ERROR,
diff --git a/src/pl/plpgsql/src/pl_funcs.c b/src/pl/plpgsql/src/pl_funcs.c
index b6377872a1..9c169e6068 100644
--- a/src/pl/plpgsql/src/pl_funcs.c
+++ b/src/pl/plpgsql/src/pl_funcs.c
@@ -381,7 +381,7 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
 			/* Normal identifier: extends till dot or whitespace */
 			const char *thisstart = s;
 
-			while (*s && *s != '.' && !isspace((unsigned char) *s))
+			while (*s && *s != '.' && !scanner_isspace(*s))
 				s++;
 			/* Downcase and truncate to NAMEDATALEN */
 			curident = downcase_truncate_identifier(thisstart, s - thisstart,
@@ -400,11 +400,11 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
 		/* If not done, skip whitespace, dot, whitespace */
 		if (*s)
 		{
-			while (*s && isspace((unsigned char) *s))
+			while (*s && scanner_isspace(*s))
 				s++;
 			if (*s++ != '.')
 				elog(ERROR, "expected dot between identifiers: %s", sstart);
-			while (*s && isspace((unsigned char) *s))
+			while (*s && scanner_isspace(*s))
 				s++;
 			if (*s == '\0')
 				elog(ERROR, "expected another identifier: %s", sstart);
diff --git a/src/port/path.c b/src/port/path.c
index 410c285e4b..355788e9d6 100644
--- a/src/port/path.c
+++ b/src/port/path.c
@@ -69,7 +69,7 @@ skip_drive(const char *path)
 		while (*path && !IS_DIR_SEP(*path))
 			path++;
 	}
-	else if (isalpha(path[0]) && path[1] == ':')
+	else if (isalpha((unsigned char) path[0]) && path[1] == ':')
 	{
 		path += 2;
 	}
author	Tom Lane	2006-09-22 21:39:58 +0000
committer	Tom Lane	2006-09-22 21:39:58 +0000
commit	4acb714ad4497d1c4971520b26dfd404eb420e2a (patch)
tree	3af6ee3acda79529ee9250a013e77301c57a540f
parent	1ab048fe5c0d252df9c796f2e8daedab1113d9fb (diff)