@@ -78,7 +78,8 @@ static const char _codes[26] =
78
78
};
79
79
80
80
81
- #define ENCODE (c ) (isalpha(c) ? _codes[((toupper(c)) - 'A')] : 0)
81
+ /* Note: these macros require an uppercase letter input! */
82
+ #define ENCODE (c ) (isalpha(c) ? _codes[((c) - 'A')] : 0)
82
83
83
84
#define isvowel (c ) (ENCODE(c) & 1) /* AEIOU */
84
85
@@ -101,16 +102,19 @@ static const char _codes[26] =
101
102
/* I suppose I could have been using a character pointer instead of
102
103
* accesssing the array directly... */
103
104
105
+ #define Convert_Raw (c ) toupper(c)
104
106
/* Look at the next letter in the word */
105
- #define Next_Letter (toupper(word[w_idx+1]))
107
+ #define Read_Raw_Next_Letter (word[w_idx+1])
108
+ #define Read_Next_Letter (Convert_Raw(Read_Raw_Next_Letter))
106
109
/* Look at the current letter in the word */
107
- #define Curr_Letter (toupper(word[w_idx]))
110
+ #define Read_Raw_Curr_Letter (word[w_idx])
111
+ #define Read_Curr_Letter (Convert_Raw(Read_Raw_Curr_Letter))
108
112
/* Go N letters back. */
109
- #define Look_Back_Letter (n ) (w_idx >= n ? toupper (word[w_idx-n]) : '\0')
113
+ #define Look_Back_Letter (n ) (w_idx >= n ? Convert_Raw (word[w_idx-n]) : '\0')
110
114
/* Previous letter. I dunno, should this return null on failure? */
111
- #define Prev_Letter (Look_Back_Letter(1))
115
+ #define Read_Prev_Letter (Look_Back_Letter(1))
112
116
/* Look two letters down. It makes sure you don't walk off the string. */
113
- #define After_Next_Letter (Next_Letter != '\0' ? toupper (word[w_idx+2]) \
117
+ #define Read_After_Next_Letter (Read_Raw_Next_Letter != '\0' ? Convert_Raw (word[w_idx+2]) \
114
118
: '\0')
115
119
#define Look_Ahead_Letter (n ) (toupper(Lookahead((char *) word+w_idx, n)))
116
120
@@ -119,15 +123,13 @@ static const char _codes[26] =
119
123
/* I probably could have just used strlen... */
120
124
static char Lookahead (char * word , int how_far )
121
125
{
122
- char letter_ahead = '\0' ; /* null by default */
123
126
int idx ;
124
127
for (idx = 0 ; word [idx ] != '\0' && idx < how_far ; idx ++ );
125
128
/* Edge forward in the string... */
126
129
127
- letter_ahead = word [idx ]; /* idx will be either == to how_far or
128
- * at the end of the string
130
+ return word [idx ]; /* idx will be either == to how_far or
131
+ * at the end of the string where it will be null
129
132
*/
130
- return letter_ahead ;
131
133
}
132
134
133
135
@@ -164,6 +166,7 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
164
166
int w_idx = 0 ; /* point in the phonization we're at. */
165
167
size_t p_idx = 0 ; /* end of the phoned phrase */
166
168
size_t max_buffer_len = 0 ; /* maximum length of the destination buffer */
169
+ char curr_letter ;
167
170
ZEND_ASSERT (word != NULL );
168
171
ZEND_ASSERT (max_phonemes >= 0 );
169
172
@@ -179,18 +182,20 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
179
182
180
183
/*-- The first phoneme has to be processed specially. --*/
181
184
/* Find our first letter */
182
- for (; !isalpha (Curr_Letter ); w_idx ++ ) {
185
+ for (; !isalpha (curr_letter = Read_Raw_Curr_Letter ); w_idx ++ ) {
183
186
/* On the off chance we were given nothing but crap... */
184
- if (Curr_Letter == '\0' ) {
187
+ if (curr_letter == '\0' ) {
185
188
End_Phoned_Word ();
186
189
return ;
187
190
}
188
191
}
189
192
190
- switch (Curr_Letter ) {
193
+ curr_letter = Convert_Raw (curr_letter );
194
+
195
+ switch (curr_letter ) {
191
196
/* AE becomes E */
192
197
case 'A' :
193
- if (Next_Letter == 'E' ) {
198
+ if (Read_Next_Letter == 'E' ) {
194
199
Phonize ('E' );
195
200
w_idx += 2 ;
196
201
}
@@ -204,24 +209,26 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
204
209
case 'G' :
205
210
case 'K' :
206
211
case 'P' :
207
- if (Next_Letter == 'N' ) {
212
+ if (Read_Next_Letter == 'N' ) {
208
213
Phonize ('N' );
209
214
w_idx += 2 ;
210
215
}
211
216
break ;
212
217
/* WH becomes W,
213
218
WR becomes R
214
219
W if followed by a vowel */
215
- case 'W' :
216
- if (Next_Letter == 'R' ) {
217
- Phonize (Next_Letter );
220
+ case 'W' : {
221
+ char next_letter = Read_Next_Letter ;
222
+ if (next_letter == 'R' ) {
223
+ Phonize ('R' );
218
224
w_idx += 2 ;
219
- } else if (Next_Letter == 'H' || isvowel (Next_Letter )) {
225
+ } else if (next_letter == 'H' || isvowel (next_letter )) {
220
226
Phonize ('W' );
221
227
w_idx += 2 ;
222
228
}
223
229
/* else ignore */
224
230
break ;
231
+ }
225
232
/* X becomes S */
226
233
case 'X' :
227
234
Phonize ('S' );
@@ -236,7 +243,7 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
236
243
case 'I' :
237
244
case 'O' :
238
245
case 'U' :
239
- Phonize (Curr_Letter );
246
+ Phonize (curr_letter );
240
247
w_idx ++ ;
241
248
break ;
242
249
default :
@@ -247,7 +254,7 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
247
254
248
255
249
256
/* On to the metaphoning */
250
- for (; Curr_Letter != '\0' &&
257
+ for (; ( curr_letter = Read_Raw_Curr_Letter ) != '\0' &&
251
258
(max_phonemes == 0 || Phone_Len < (size_t )max_phonemes );
252
259
w_idx ++ ) {
253
260
/* How many letters to skip because an eariler encoding handled
@@ -263,18 +270,23 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
263
270
*/
264
271
265
272
/* Ignore non-alphas */
266
- if (!isalpha (Curr_Letter ))
273
+ if (!isalpha (curr_letter ))
267
274
continue ;
268
275
276
+ curr_letter = Convert_Raw (curr_letter );
277
+ /* Note: we can't cache curr_letter from the previous loop
278
+ * because of the skip_letter variable. */
279
+ char prev_letter = Read_Prev_Letter ;
280
+
269
281
/* Drop duplicates, except CC */
270
- if (Curr_Letter == Prev_Letter &&
271
- Curr_Letter != 'C' )
282
+ if (curr_letter == prev_letter &&
283
+ curr_letter != 'C' )
272
284
continue ;
273
285
274
- switch (Curr_Letter ) {
286
+ switch (curr_letter ) {
275
287
/* B -> B unless in MB */
276
288
case 'B' :
277
- if (Prev_Letter != 'M' )
289
+ if (prev_letter != 'M' )
278
290
Phonize ('B' );
279
291
break ;
280
292
/* 'sh' if -CIA- or -CH, but not SCH, except SCHW.
@@ -283,20 +295,20 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
283
295
* dropped if -SCI-, SCE-, -SCY- (handed in S)
284
296
* else K
285
297
*/
286
- case 'C' :
287
- if ( MAKESOFT ( Next_Letter )) { /* C[IEY] */
288
- if (After_Next_Letter == 'A' &&
289
- Next_Letter == 'I' ) { /* CIA */
298
+ case 'C' : {
299
+ char next_letter = Read_Next_Letter ;
300
+ if (MAKESOFT ( next_letter )) { /* C[IEY] */
301
+ if ( next_letter == 'I' && Read_After_Next_Letter == 'A ' ) { /* CIA */
290
302
Phonize (SH );
291
303
}
292
304
/* SC[IEY] */
293
- else if (Prev_Letter == 'S' ) {
305
+ else if (prev_letter == 'S' ) {
294
306
/* Dropped */
295
307
} else {
296
308
Phonize ('S' );
297
309
}
298
- } else if (Next_Letter == 'H' ) {
299
- if ((!traditional ) && (After_Next_Letter == 'R ' || Prev_Letter == 'S ' )) { /* Christ, School */
310
+ } else if (next_letter == 'H' ) {
311
+ if ((!traditional ) && (prev_letter == 'S ' || Read_After_Next_Letter == 'R ' )) { /* Christ, School */
300
312
Phonize ('K' );
301
313
} else {
302
314
Phonize (SH );
@@ -306,12 +318,13 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
306
318
Phonize ('K' );
307
319
}
308
320
break ;
321
+ }
309
322
/* J if in -DGE-, -DGI- or -DGY-
310
323
* else T
311
324
*/
312
325
case 'D' :
313
- if (Next_Letter == 'G' &&
314
- MAKESOFT (After_Next_Letter )) {
326
+ if (Read_Next_Letter == 'G' &&
327
+ MAKESOFT (Read_After_Next_Letter )) {
315
328
Phonize ('J' );
316
329
skip_letter ++ ;
317
330
} else
@@ -323,47 +336,50 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
323
336
* else J if in -GE-, -GI, -GY and not GG
324
337
* else K
325
338
*/
326
- case 'G' :
327
- if (Next_Letter == 'H' ) {
339
+ case 'G' : {
340
+ char next_letter = Read_Next_Letter ;
341
+ if (next_letter == 'H' ) {
328
342
if (!(NOGHTOF (Look_Back_Letter (3 )) ||
329
343
Look_Back_Letter (4 ) == 'H' )) {
330
344
Phonize ('F' );
331
345
skip_letter ++ ;
332
346
} else {
333
347
/* silent */
334
348
}
335
- } else if (Next_Letter == 'N' ) {
336
- if (Isbreak (After_Next_Letter ) ||
337
- (After_Next_Letter == 'E' &&
349
+ } else if (next_letter == 'N' ) {
350
+ char after_next_letter = Read_After_Next_Letter ;
351
+ if (Isbreak (after_next_letter ) ||
352
+ (after_next_letter == 'E' &&
338
353
Look_Ahead_Letter (3 ) == 'D' )) {
339
354
/* dropped */
340
355
} else
341
356
Phonize ('K' );
342
- } else if (MAKESOFT (Next_Letter ) &&
343
- Prev_Letter != 'G' ) {
357
+ } else if (MAKESOFT (next_letter ) &&
358
+ prev_letter != 'G' ) {
344
359
Phonize ('J' );
345
360
} else {
346
361
Phonize ('K' );
347
362
}
348
363
break ;
364
+ }
349
365
/* H if before a vowel and not after C,G,P,S,T */
350
366
case 'H' :
351
- if (isvowel (Next_Letter ) &&
352
- !AFFECTH (Prev_Letter ))
367
+ if (isvowel (Read_Next_Letter ) &&
368
+ !AFFECTH (prev_letter ))
353
369
Phonize ('H' );
354
370
break ;
355
371
/* dropped if after C
356
372
* else K
357
373
*/
358
374
case 'K' :
359
- if (Prev_Letter != 'C' )
375
+ if (prev_letter != 'C' )
360
376
Phonize ('K' );
361
377
break ;
362
378
/* F if before H
363
379
* else P
364
380
*/
365
381
case 'P' :
366
- if (Next_Letter == 'H' ) {
382
+ if (Read_Next_Letter == 'H' ) {
367
383
Phonize ('F' );
368
384
} else {
369
385
Phonize ('P' );
@@ -377,44 +393,50 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
377
393
/* 'sh' in -SH-, -SIO- or -SIA- or -SCHW-
378
394
* else S
379
395
*/
380
- case 'S' :
381
- if (Next_Letter == 'I' &&
382
- (After_Next_Letter == 'O' ||
383
- After_Next_Letter == 'A' )) {
396
+ case 'S' : {
397
+ char next_letter = Read_Next_Letter ;
398
+ char after_next_letter ;
399
+ if (next_letter == 'I' &&
400
+ ((after_next_letter = Read_After_Next_Letter ) == 'O' ||
401
+ after_next_letter == 'A' )) {
384
402
Phonize (SH );
385
- } else if (Next_Letter == 'H' ) {
403
+ } else if (next_letter == 'H' ) {
386
404
Phonize (SH );
387
405
skip_letter ++ ;
388
- } else if ((!traditional ) && (Next_Letter == 'C' && Look_Ahead_Letter (2 ) == 'H' && Look_Ahead_Letter (3 ) == 'W' )) {
406
+ } else if ((!traditional ) && (next_letter == 'C' && Look_Ahead_Letter (2 ) == 'H' && Look_Ahead_Letter (3 ) == 'W' )) {
389
407
Phonize (SH );
390
408
skip_letter += 2 ;
391
409
} else {
392
410
Phonize ('S' );
393
411
}
394
412
break ;
413
+ }
395
414
/* 'sh' in -TIA- or -TIO-
396
415
* else 'th' before H
397
416
* else T
398
417
*/
399
- case 'T' :
400
- if (Next_Letter == 'I' &&
401
- (After_Next_Letter == 'O' ||
402
- After_Next_Letter == 'A' )) {
418
+ case 'T' : {
419
+ char next_letter = Read_Next_Letter ;
420
+ char after_next_letter ;
421
+ if (next_letter == 'I' &&
422
+ ((after_next_letter = Read_After_Next_Letter ) == 'O' ||
423
+ after_next_letter == 'A' )) {
403
424
Phonize (SH );
404
- } else if (Next_Letter == 'H' ) {
425
+ } else if (next_letter == 'H' ) {
405
426
Phonize (TH );
406
427
skip_letter ++ ;
407
- } else if (!(Next_Letter == 'C' && After_Next_Letter == 'H' )) {
428
+ } else if (!(next_letter == 'C' && Read_After_Next_Letter == 'H' )) {
408
429
Phonize ('T' );
409
430
}
410
431
break ;
432
+ }
411
433
/* F */
412
434
case 'V' :
413
435
Phonize ('F' );
414
436
break ;
415
437
/* W before a vowel, else dropped */
416
438
case 'W' :
417
- if (isvowel (Next_Letter ))
439
+ if (isvowel (Read_Next_Letter ))
418
440
Phonize ('W' );
419
441
break ;
420
442
/* KS */
@@ -424,7 +446,7 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
424
446
break ;
425
447
/* Y if followed by a vowel */
426
448
case 'Y' :
427
- if (isvowel (Next_Letter ))
449
+ if (isvowel (Read_Next_Letter ))
428
450
Phonize ('Y' );
429
451
break ;
430
452
/* S */
@@ -438,7 +460,7 @@ static void metaphone(unsigned char *word, size_t word_len, zend_long max_phonem
438
460
case 'M' :
439
461
case 'N' :
440
462
case 'R' :
441
- Phonize (Curr_Letter );
463
+ Phonize (curr_letter );
442
464
break ;
443
465
default :
444
466
/* nothing */
0 commit comments