20
20
#include "common/unicode_category.h"
21
21
#include "mb/pg_wchar.h"
22
22
23
+ enum CaseMapResult
24
+ {
25
+ CASEMAP_SELF ,
26
+ CASEMAP_SIMPLE ,
27
+ CASEMAP_SPECIAL ,
28
+ };
29
+
23
30
static const pg_case_map * find_case_map (pg_wchar ucs );
24
31
static size_t convert_case (char * dst , size_t dstsize , const char * src , ssize_t srclen ,
25
32
CaseKind str_casekind , bool full , WordBoundaryNext wbnext ,
26
33
void * wbstate );
27
- static bool check_special_conditions (int conditions , const char * str ,
28
- size_t len , size_t offset );
34
+ static enum CaseMapResult casemap (pg_wchar u1 , CaseKind casekind , bool full ,
35
+ const char * src , size_t srclen , size_t srcoff ,
36
+ pg_wchar * u2 , const pg_wchar * * special );
29
37
30
38
pg_wchar
31
39
unicode_lowercase_simple (pg_wchar code )
@@ -214,8 +222,9 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
214
222
{
215
223
pg_wchar u1 = utf8_to_unicode ((unsigned char * ) src + srcoff );
216
224
int u1len = unicode_utf8len (u1 );
217
- const pg_case_map * casemap = find_case_map (u1 );
218
- const pg_special_case * special = NULL ;
225
+ pg_wchar simple = 0 ;
226
+ const pg_wchar * special = NULL ;
227
+ enum CaseMapResult casemap_result ;
219
228
220
229
if (str_casekind == CaseTitle )
221
230
{
@@ -228,56 +237,47 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
228
237
chr_casekind = CaseLower ;
229
238
}
230
239
231
- /*
232
- * Find special case that matches the conditions, if any.
233
- *
234
- * Note: only a single special mapping per codepoint is currently
235
- * supported, though Unicode allows for multiple special mappings for
236
- * a single codepoint.
237
- */
238
- if (full && casemap && casemap -> special_case )
239
- {
240
- int16 conditions = casemap -> special_case -> conditions ;
241
-
242
- Assert (casemap -> special_case -> codepoint == u1 );
243
- if (check_special_conditions (conditions , src , srclen , srcoff ))
244
- special = casemap -> special_case ;
245
- }
240
+ casemap_result = casemap (u1 , chr_casekind , full , src , srclen , srcoff ,
241
+ & simple , & special );
246
242
247
- /* perform mapping, update result_len, and write to dst */
248
- if (special )
243
+ switch (casemap_result )
249
244
{
250
- for (int i = 0 ; i < MAX_CASE_EXPANSION ; i ++ )
251
- {
252
- pg_wchar u2 = special -> map [chr_casekind ][i ];
253
- size_t u2len = unicode_utf8len (u2 );
254
-
255
- if (u2 == '\0' )
256
- break ;
257
-
258
- if (result_len + u2len <= dstsize )
259
- unicode_to_utf8 (u2 , (unsigned char * ) dst + result_len );
260
-
261
- result_len += u2len ;
262
- }
263
- }
264
- else if (casemap )
265
- {
266
- pg_wchar u2 = casemap -> simplemap [chr_casekind ];
267
- pg_wchar u2len = unicode_utf8len (u2 );
268
-
269
- if (result_len + u2len <= dstsize )
270
- unicode_to_utf8 (u2 , (unsigned char * ) dst + result_len );
271
-
272
- result_len += u2len ;
273
- }
274
- else
275
- {
276
- /* no mapping; copy bytes from src */
277
- if (result_len + u1len <= dstsize )
278
- memcpy (dst + result_len , src + srcoff , u1len );
279
-
280
- result_len += u1len ;
245
+ case CASEMAP_SELF :
246
+ /* no mapping; copy bytes from src */
247
+ Assert (simple == 0 );
248
+ Assert (special == NULL );
249
+ if (result_len + u1len <= dstsize )
250
+ memcpy (dst + result_len , src + srcoff , u1len );
251
+
252
+ result_len += u1len ;
253
+ break ;
254
+ case CASEMAP_SIMPLE :
255
+ {
256
+ /* replace with single character */
257
+ pg_wchar u2 = simple ;
258
+ pg_wchar u2len = unicode_utf8len (u2 );
259
+
260
+ Assert (special == NULL );
261
+ if (result_len + u2len <= dstsize )
262
+ unicode_to_utf8 (u2 , (unsigned char * ) dst + result_len );
263
+
264
+ result_len += u2len ;
265
+ }
266
+ break ;
267
+ case CASEMAP_SPECIAL :
268
+ /* replace with up to MAX_CASE_EXPANSION characters */
269
+ Assert (simple == 0 );
270
+ for (int i = 0 ; i < MAX_CASE_EXPANSION && special [i ]; i ++ )
271
+ {
272
+ pg_wchar u2 = special [i ];
273
+ size_t u2len = unicode_utf8len (u2 );
274
+
275
+ if (result_len + u2len <= dstsize )
276
+ unicode_to_utf8 (u2 , (unsigned char * ) dst + result_len );
277
+
278
+ result_len += u2len ;
279
+ }
280
+ break ;
281
281
}
282
282
283
283
srcoff += u1len ;
@@ -351,6 +351,10 @@ check_final_sigma(const unsigned char *str, size_t len, size_t offset)
351
351
return true;
352
352
}
353
353
354
+ /*
355
+ * Unicode allows for special casing to be applied only under certain
356
+ * circumstances. The only currently-supported condition is Final_Sigma.
357
+ */
354
358
static bool
355
359
check_special_conditions (int conditions , const char * str , size_t len ,
356
360
size_t offset )
@@ -365,6 +369,51 @@ check_special_conditions(int conditions, const char *str, size_t len,
365
369
return false;
366
370
}
367
371
372
+ /*
373
+ * Map the given character to the requested case.
374
+ *
375
+ * If full is true, and a special case mapping is found and the conditions are
376
+ * met, 'special' is set to the mapping result (which is an array of up to
377
+ * MAX_CASE_EXPANSION characters) and CASEMAP_SPECIAL is returned.
378
+ *
379
+ * Otherwise, search for a simple mapping, and if found, set 'simple' to the
380
+ * result and return CASEMAP_SIMPLE.
381
+ *
382
+ * If no mapping is found, return CASEMAP_SELF, and the caller should copy the
383
+ * character without modification.
384
+ */
385
+ static enum CaseMapResult
386
+ casemap (pg_wchar u1 , CaseKind casekind , bool full ,
387
+ const char * src , size_t srclen , size_t srcoff ,
388
+ pg_wchar * simple , const pg_wchar * * special )
389
+ {
390
+ const pg_case_map * map ;
391
+
392
+ if (u1 < 0x80 )
393
+ {
394
+ * simple = case_map [u1 ].simplemap [casekind ];
395
+
396
+ return CASEMAP_SIMPLE ;
397
+ }
398
+
399
+ map = find_case_map (u1 );
400
+
401
+ if (map == NULL )
402
+ return CASEMAP_SELF ;
403
+
404
+ if (full && map -> special_case != NULL &&
405
+ check_special_conditions (map -> special_case -> conditions ,
406
+ src , srclen , srcoff ))
407
+ {
408
+ * special = map -> special_case -> map [casekind ];
409
+ return CASEMAP_SPECIAL ;
410
+ }
411
+
412
+ * simple = map -> simplemap [casekind ];
413
+
414
+ return CASEMAP_SIMPLE ;
415
+ }
416
+
368
417
/* find entry in simple case map, if any */
369
418
static const pg_case_map *
370
419
find_case_map (pg_wchar ucs )
0 commit comments