summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2009-04-07 15:53:54 +0000
committerTom Lane2009-04-07 15:53:54 +0000
commit8c9977b0b8973fa268509256738d65063e88bfea (patch)
treeeccd02a1e3a8ed68fe7f0582e3fdd2ed349490a6
parentf358e6131448a60c092acec2169858ac8d46349c (diff)
Defend against non-ASCII letters in fuzzystrmatch code. The functions
still don't behave very sanely for multibyte encodings, but at least they won't be indexing off the ends of static arrays.
-rw-r--r--contrib/fuzzystrmatch/fuzzystrmatch.c36
1 files changed, 27 insertions, 9 deletions
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c
index 09d3f9c11d..7553919547 100644
--- a/contrib/fuzzystrmatch/fuzzystrmatch.c
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.c
@@ -74,7 +74,15 @@ static void _soundex(const char *instr, char *outstr);
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
static const char *soundex_table = "01230120022455012623010202";
-#define soundex_code(letter) soundex_table[toupper((unsigned char) (letter)) - 'A']
+static char
+soundex_code(char letter)
+{
+ letter = toupper((unsigned char) letter);
+ /* Defend against non-ASCII letters */
+ if (letter >= 'A' && letter <= 'Z')
+ return soundex_table[letter - 'A'];
+ return letter;
+}
/*
@@ -143,27 +151,37 @@ static int _metaphone(char *word, int max_phonemes, char **phoned_word);
/*-- Character encoding array & accessing macros --*/
/* Stolen directly out of the book... */
-char _codes[26] = {
+static const char _codes[26] = {
1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0
/* a b c d e f g h i j k l m n o p q r s t u v w x y z */
};
+static int
+getcode(char c)
+{
+ if (isalpha((unsigned char) c))
+ {
+ c = toupper((unsigned char) c);
+ /* Defend against non-ASCII letters */
+ if (c >= 'A' && c <= 'Z')
+ return _codes[c - 'A'];
+ }
+ return 0;
+}
-#define ENCODE(c) (isalpha((unsigned char) (c)) ? _codes[((toupper((unsigned char) (c))) - 'A')] : 0)
-
-#define isvowel(c) (ENCODE(c) & 1) /* AEIOU */
+#define isvowel(c) (getcode(c) & 1) /* AEIOU */
/* These letters are passed through unchanged */
-#define NOCHANGE(c) (ENCODE(c) & 2) /* FJMNR */
+#define NOCHANGE(c) (getcode(c) & 2) /* FJMNR */
/* These form dipthongs when preceding H */
-#define AFFECTH(c) (ENCODE(c) & 4) /* CGPST */
+#define AFFECTH(c) (getcode(c) & 4) /* CGPST */
/* These make C and G soft */
-#define MAKESOFT(c) (ENCODE(c) & 8) /* EIY */
+#define MAKESOFT(c) (getcode(c) & 8) /* EIY */
/* These prevent GH from becoming F */
-#define NOGHTOF(c) (ENCODE(c) & 16) /* BDH */
+#define NOGHTOF(c) (getcode(c) & 16) /* BDH */
/*