Skip to content

Commit 005e49e

Browse files
committed
Optimize MacJapanese decoder for speed
On longer MacJapanese strings, conversion speed is boosted by 60-80%. On medium-length strings, conversion speed is boosted around 20-30%. For very short strings, there is no appreciable difference.
1 parent 4072a76 commit 005e49e

File tree

2 files changed

+76
-72
lines changed

2 files changed

+76
-72
lines changed

ext/mbstring/libmbfl/filters/mbfilter_sjis.c

Lines changed: 75 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -966,91 +966,95 @@ static size_t mb_sjismac_to_wchar(unsigned char **in, size_t *in_len, uint32_t *
966966
break;
967967
}
968968
unsigned char c2 = *p++;
969+
uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2];
969970

970-
if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) {
971-
unsigned int w = 0, s1 = 0, s2 = 0;
972-
SJIS_DECODE(c, c2, s1, s2);
973-
unsigned int s = (s1 - 0x21)*94 + s2 - 0x21;
974-
975-
if (s <= 0x89) {
976-
if (s == 0x1C) {
977-
w = 0x2014; /* EM DASH */
978-
} else if (s == 0x1F) {
979-
w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
980-
} else if (s == 0x20) {
981-
w = 0x301C; /* FULLWIDTH TILDE */
982-
} else if (s == 0x21) {
983-
w = 0x2016; /* PARALLEL TO */
984-
} else if (s == 0x3C) {
985-
w = 0x2212; /* FULLWIDTH HYPHEN-MINUS */
986-
} else if (s == 0x50) {
987-
w = 0xA2; /* FULLWIDTH CENT SIGN */
988-
} else if (s == 0x51) {
989-
w = 0xA3; /* FULLWIDTH POUND SIGN */
990-
} else if (s == 0x89) {
991-
w = 0xAC; /* FULLWIDTH NOT SIGN */
992-
}
993-
if (w) {
994-
*out++ = w;
995-
continue;
996-
}
971+
if (w <= 0x89) {
972+
if (w == 0x1C) {
973+
*out++ = 0x2014; /* EM DASH */
974+
continue;
975+
} else if (w == 0x1F) {
976+
*out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
977+
continue;
978+
} else if (w == 0x20) {
979+
*out++ = 0x301C; /* FULLWIDTH TILDE */
980+
continue;
981+
} else if (w == 0x21) {
982+
*out++ = 0x2016; /* PARALLEL TO */
983+
continue;
984+
} else if (w == 0x3C) {
985+
*out++ = 0x2212; /* FULLWIDTH HYPHEN-MINUS */
986+
continue;
987+
} else if (w == 0x50) {
988+
*out++ = 0xA2; /* FULLWIDTH CENT SIGN */
989+
continue;
990+
} else if (w == 0x51) {
991+
*out++ = 0xA3; /* FULLWIDTH POUND SIGN */
992+
continue;
993+
} else if (w == 0x89) {
994+
*out++ = 0xAC; /* FULLWIDTH NOT SIGN */
995+
continue;
997996
}
998-
999-
for (int i = 0; i < 7; i++) {
1000-
if (s >= code_tbl[i][0] && s <= code_tbl[i][1]) {
1001-
*out++ = s - code_tbl[i][0] + code_tbl[i][2];
1002-
goto next_iteration;
997+
} else {
998+
if (w >= 0x2F0 && w <= 0x3A3) {
999+
for (int i = 0; i < 7; i++) {
1000+
if (w >= code_tbl[i][0] && w <= code_tbl[i][1]) {
1001+
*out++ = w - code_tbl[i][0] + code_tbl[i][2];
1002+
goto next_iteration;
1003+
}
10031004
}
10041005
}
10051006

1006-
for (int i = 0; i < code_tbl_m_len; i++) {
1007-
if (s == code_tbl_m[i][0]) {
1008-
int n = 5;
1009-
if (code_tbl_m[i][1] == 0xF860) {
1010-
n = 3;
1011-
} else if (code_tbl_m[i][1] == 0xF861) {
1012-
n = 4;
1013-
}
1014-
if ((limit - out) < n) {
1015-
p -= 2;
1016-
goto finished;
1017-
}
1018-
for (int j = 1; j <= n; j++) {
1019-
*out++ = code_tbl_m[i][j];
1007+
if (w >= 0x340 && w <= 0x523) {
1008+
for (int i = 0; i < code_tbl_m_len; i++) {
1009+
if (w == code_tbl_m[i][0]) {
1010+
int n = 5;
1011+
if (code_tbl_m[i][1] == 0xF860) {
1012+
n = 3;
1013+
} else if (code_tbl_m[i][1] == 0xF861) {
1014+
n = 4;
1015+
}
1016+
if ((limit - out) < n) {
1017+
p -= 2;
1018+
goto finished;
1019+
}
1020+
for (int j = 1; j <= n; j++) {
1021+
*out++ = code_tbl_m[i][j];
1022+
}
1023+
goto next_iteration;
10201024
}
1021-
goto next_iteration;
10221025
}
10231026
}
10241027

1025-
for (int i = 0; i < 8; i++) {
1026-
if (s >= code_ofst_tbl[i][0] && s <= code_ofst_tbl[i][1]) {
1027-
w = code_map[i][s - code_ofst_tbl[i][0]];
1028-
if (!w) {
1029-
*out++ = MBFL_BAD_INPUT;
1028+
if (w >= 0x3AC && w <= 0x20A5) {
1029+
for (int i = 0; i < 8; i++) {
1030+
if (w >= code_ofst_tbl[i][0] && w <= code_ofst_tbl[i][1]) {
1031+
uint32_t w2 = code_map[i][w - code_ofst_tbl[i][0]];
1032+
if (!w2) {
1033+
*out++ = MBFL_BAD_INPUT;
1034+
goto next_iteration;
1035+
}
1036+
if ((limit - out) < 2) {
1037+
p -= 2;
1038+
goto finished;
1039+
}
1040+
*out++ = w2;
1041+
if (w >= 0x43E && w <= 0x441) {
1042+
*out++ = 0xF87A;
1043+
} else if (w == 0x3B1 || w == 0x3B7) {
1044+
*out++ = 0xF87F;
1045+
} else if (w == 0x4B8 || w == 0x4B9 || w == 0x4C4) {
1046+
*out++ = 0x20DD;
1047+
} else if (w == 0x1ED9 || w == 0x1EDA || w == 0x1EE8 || w == 0x1EF3 || (w >= 0x1EF5 && w <= 0x1EFB) || w == 0x1F05 || w == 0x1F06 || w == 0x1F18 || (w >= 0x1FF2 && w <= 0x20A5)) {
1048+
*out++ = 0xF87E;
1049+
}
10301050
goto next_iteration;
10311051
}
1032-
if ((limit - out) < 2) {
1033-
p -= 2;
1034-
goto finished;
1035-
}
1036-
*out++ = w;
1037-
if (s >= 0x43E && s <= 0x441) {
1038-
*out++ = 0xF87A;
1039-
} else if (s == 0x3B1 || s == 0x3B7) {
1040-
*out++ = 0xF87F;
1041-
} else if (s == 0x4B8 || s == 0x4B9 || s == 0x4C4) {
1042-
*out++ = 0x20DD;
1043-
} else if (s == 0x1ED9 || s == 0x1EDA || s == 0x1EE8 || s == 0x1EF3 || (s >= 0x1EF5 && s <= 0x1EFB) || s == 0x1F05 || s == 0x1F06 || s == 0x1F18 || (s >= 0x1FF2 && s <= 0x20A5)) {
1044-
*out++ = 0xF87E;
1045-
}
1046-
goto next_iteration;
10471052
}
10481053
}
1054+
}
10491055

1050-
if (s < jisx0208_ucs_table_size) {
1051-
w = jisx0208_ucs_table[s];
1052-
}
1053-
1056+
if (w < jisx0208_ucs_table_size) {
1057+
w = jisx0208_ucs_table[w];
10541058
if (!w)
10551059
w = MBFL_BAD_INPUT;
10561060
*out++ = w;

ext/mbstring/libmbfl/filters/sjis_mac2uni.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ static const unsigned short code_tbl[][3] = {
223223
{0x038a, 0x03a3, 0x249c},
224224
};
225225

226-
static const unsigned short code_ofst_tbl[] [2]= {
226+
static const unsigned short code_ofst_tbl[][2] = {
227227
{0x03ac, 0x03c9},
228228
{0x0406, 0x0420},
229229
{0x0432, 0x0441},

0 commit comments

Comments
 (0)