Skip to content

Commit 6bf0c44

Browse files
committed
Optimize SJIS-Mobile#KDDI decoder for speed
From my microbenchmarks, the new decoder makes encoding conversion from SJIS-Mobile#KDDI about 30-50% faster.
1 parent 43cdfa3 commit 6bf0c44

File tree

1 file changed

+45
-44
lines changed

1 file changed

+45
-44
lines changed

ext/mbstring/libmbfl/filters/mbfilter_sjis.c

Lines changed: 45 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -2344,59 +2344,60 @@ static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t
23442344
break;
23452345
}
23462346
unsigned char c2 = *p++;
2347+
uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2];
23472348

2348-
if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) {
2349-
uint32_t w = 0;
2350-
unsigned int s1, s2;
2351-
SJIS_DECODE(c, c2, s1, s2);
2352-
unsigned int s = ((s1 - 0x21) * 94) + s2 - 0x21;
2353-
2354-
if (s <= 137) {
2355-
if (s == 31) {
2356-
w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
2357-
} else if (s == 32) {
2358-
w = 0xFF5E; /* FULLWIDTH TILDE */
2359-
} else if (s == 33) {
2360-
w = 0x2225; /* PARALLEL TO */
2361-
} else if (s == 60) {
2362-
w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
2363-
} else if (s == 80) {
2364-
w = 0xFFE0; /* FULLWIDTH CENT SIGN */
2365-
} else if (s == 81) {
2366-
w = 0xFFE1; /* FULLWIDTH POUND SIGN */
2367-
} else if (s == 137) {
2368-
w = 0xFFE2; /* FULLWIDTH NOT SIGN */
2369-
}
2349+
if (w <= 137) {
2350+
if (w == 31) {
2351+
*out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
2352+
continue;
2353+
} else if (w == 32) {
2354+
*out++ = 0xFF5E; /* FULLWIDTH TILDE */
2355+
continue;
2356+
} else if (w == 33) {
2357+
*out++ = 0x2225; /* PARALLEL TO */
2358+
continue;
2359+
} else if (w == 60) {
2360+
*out++ = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
2361+
continue;
2362+
} else if (w == 80) {
2363+
*out++ = 0xFFE0; /* FULLWIDTH CENT SIGN */
2364+
continue;
2365+
} else if (w == 81) {
2366+
*out++ = 0xFFE1; /* FULLWIDTH POUND SIGN */
2367+
continue;
2368+
} else if (w == 137) {
2369+
*out++ = 0xFFE2; /* FULLWIDTH NOT SIGN */
2370+
continue;
23702371
}
2372+
}
23712373

2374+
if (w >= mb_tbl_code2uni_kddi1_min && w <= mb_tbl_code2uni_kddi2_max) {
2375+
int snd = 0;
2376+
w = mbfilter_sjis_emoji_kddi2unicode(w, &snd);
23722377
if (!w) {
2373-
if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi2_max) {
2374-
int snd = 0;
2375-
w = mbfilter_sjis_emoji_kddi2unicode(s, &snd);
2376-
if (snd) {
2377-
*out++ = snd;
2378-
}
2379-
} else if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
2380-
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
2381-
} else if (s < jisx0208_ucs_table_size) {
2382-
w = jisx0208_ucs_table[s];
2383-
} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
2384-
w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
2385-
}
2386-
2387-
if (!w) {
2388-
if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) {
2389-
w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min];
2390-
} else if (s >= (94*94) && s < (114*94)) {
2391-
w = s - (94*94) + 0xE000;
2392-
}
2378+
w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2];
2379+
if (w >= (94*94) && w < (114*94)) {
2380+
w = w - (94*94) + 0xE000;
23932381
}
2382+
} else if (snd) {
2383+
*out++ = snd;
23942384
}
2395-
2396-
*out++ = w ? w : MBFL_BAD_INPUT;
2385+
} else if (w >= cp932ext1_ucs_table_min && w < cp932ext1_ucs_table_max) {
2386+
w = cp932ext1_ucs_table[w - cp932ext1_ucs_table_min];
2387+
} else if (w < jisx0208_ucs_table_size) {
2388+
w = jisx0208_ucs_table[w];
2389+
} else if (w >= cp932ext2_ucs_table_min && w < cp932ext2_ucs_table_max) {
2390+
w = cp932ext2_ucs_table[w - cp932ext2_ucs_table_min];
2391+
} else if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) {
2392+
w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min];
2393+
} else if (w >= (94*94) && w < (114*94)) {
2394+
w = w - (94*94) + 0xE000;
23972395
} else {
23982396
*out++ = MBFL_BAD_INPUT;
2397+
continue;
23992398
}
2399+
2400+
*out++ = w ? w : MBFL_BAD_INPUT;
24002401
} else {
24012402
*out++ = MBFL_BAD_INPUT;
24022403
}

0 commit comments

Comments
 (0)