diff options
author | Tom Lane | 2009-04-07 15:53:54 +0000 |
---|---|---|
committer | Tom Lane | 2009-04-07 15:53:54 +0000 |
commit | 8c9977b0b8973fa268509256738d65063e88bfea (patch) | |
tree | eccd02a1e3a8ed68fe7f0582e3fdd2ed349490a6 | |
parent | f358e6131448a60c092acec2169858ac8d46349c (diff) |
Defend against non-ASCII letters in fuzzystrmatch code. The functions
still don't behave very sanely for multibyte encodings, but at least
they won't be indexing off the ends of static arrays.
-rw-r--r-- | contrib/fuzzystrmatch/fuzzystrmatch.c | 36 |
1 files changed, 27 insertions, 9 deletions
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c index 09d3f9c11d..7553919547 100644 --- a/contrib/fuzzystrmatch/fuzzystrmatch.c +++ b/contrib/fuzzystrmatch/fuzzystrmatch.c @@ -74,7 +74,15 @@ static void _soundex(const char *instr, char *outstr); /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ static const char *soundex_table = "01230120022455012623010202"; -#define soundex_code(letter) soundex_table[toupper((unsigned char) (letter)) - 'A'] +static char +soundex_code(char letter) +{ + letter = toupper((unsigned char) letter); + /* Defend against non-ASCII letters */ + if (letter >= 'A' && letter <= 'Z') + return soundex_table[letter - 'A']; + return letter; +} /* @@ -143,27 +151,37 @@ static int _metaphone(char *word, int max_phonemes, char **phoned_word); /*-- Character encoding array & accessing macros --*/ /* Stolen directly out of the book... */ -char _codes[26] = { +static const char _codes[26] = { 1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0 /* a b c d e f g h i j k l m n o p q r s t u v w x y z */ }; +static int +getcode(char c) +{ + if (isalpha((unsigned char) c)) + { + c = toupper((unsigned char) c); + /* Defend against non-ASCII letters */ + if (c >= 'A' && c <= 'Z') + return _codes[c - 'A']; + } + return 0; +} -#define ENCODE(c) (isalpha((unsigned char) (c)) ? _codes[((toupper((unsigned char) (c))) - 'A')] : 0) - -#define isvowel(c) (ENCODE(c) & 1) /* AEIOU */ +#define isvowel(c) (getcode(c) & 1) /* AEIOU */ /* These letters are passed through unchanged */ -#define NOCHANGE(c) (ENCODE(c) & 2) /* FJMNR */ +#define NOCHANGE(c) (getcode(c) & 2) /* FJMNR */ /* These form dipthongs when preceding H */ -#define AFFECTH(c) (ENCODE(c) & 4) /* CGPST */ +#define AFFECTH(c) (getcode(c) & 4) /* CGPST */ /* These make C and G soft */ -#define MAKESOFT(c) (ENCODE(c) & 8) /* EIY */ +#define MAKESOFT(c) (getcode(c) & 8) /* EIY */ /* These prevent GH from becoming F */ -#define NOGHTOF(c) (ENCODE(c) & 16) /* BDH */ +#define NOGHTOF(c) (getcode(c) & 16) /* BDH */ /* |