32
32
33
33
#include "unicode_table_cp936.h"
34
34
35
+ static int mbfl_filt_conv_euccn_wchar_flush (mbfl_convert_filter * filter );
36
+
35
37
static const unsigned char mblen_table_euccn [] = { /* 0xA1-0xFE */
36
38
1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
37
39
1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
@@ -70,7 +72,7 @@ const struct mbfl_convert_vtbl vtbl_euccn_wchar = {
70
72
mbfl_filt_conv_common_ctor ,
71
73
NULL ,
72
74
mbfl_filt_conv_euccn_wchar ,
73
- mbfl_filt_conv_common_flush ,
75
+ mbfl_filt_conv_euccn_wchar_flush ,
74
76
NULL ,
75
77
};
76
78
@@ -86,51 +88,46 @@ const struct mbfl_convert_vtbl vtbl_wchar_euccn = {
86
88
87
89
#define CK (statement ) do { if ((statement) < 0) return (-1); } while (0)
88
90
89
- /*
90
- * EUC-CN => wchar
91
- */
92
- int
93
- mbfl_filt_conv_euccn_wchar (int c , mbfl_convert_filter * filter )
91
+ int mbfl_filt_conv_euccn_wchar (int c , mbfl_convert_filter * filter )
94
92
{
95
93
int c1 , w ;
96
94
97
95
switch (filter -> status ) {
98
96
case 0 :
99
- if (c >= 0 && c < 0x80 ) { /* latin */
97
+ if (c >= 0 && c < 0x80 ) { /* latin */
100
98
CK ((* filter -> output_function )(c , filter -> data ));
101
- } else if (c > 0xa0 && c < 0xff ) { /* dbcs lead byte */
99
+ } else if (( c >= 0xA1 && c <= 0xA9 ) || ( c >= 0xB0 && c <= 0xF7 )) { /* dbcs lead byte */
102
100
filter -> status = 1 ;
103
101
filter -> cache = c ;
104
102
} else {
105
- w = c & MBFL_WCSGROUP_MASK ;
106
- w |= MBFL_WCSGROUP_THROUGH ;
107
- CK ((* filter -> output_function )(w , filter -> data ));
103
+ CK ((* filter -> output_function )(c | MBFL_WCSGROUP_THROUGH , filter -> data ));
108
104
}
109
105
break ;
110
106
111
- case 1 : /* dbcs second byte */
107
+ case 1 : /* dbcs second byte */
112
108
filter -> status = 0 ;
113
109
c1 = filter -> cache ;
114
- if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff ) {
110
+ if (c > 0xA0 && c < 0xFF ) {
115
111
w = (c1 - 0x81 )* 192 + (c - 0x40 );
116
112
if (w >= 0 && w < cp936_ucs_table_size ) {
117
- w = cp936_ucs_table [w ];
113
+ if (w == 0x1864 ) {
114
+ w = 0x30FB ;
115
+ } else if (w == 0x186A ) {
116
+ w = 0x2015 ;
117
+ } else if ((w >= 0x1921 && w <= 0x192A ) || w == 0x1963 || (w >= 0x1C59 && w <= 0x1C7E ) || (w >= 0x1DBB && w <= 0x1DC4 )) {
118
+ w = 0 ;
119
+ } else {
120
+ w = cp936_ucs_table [w ];
121
+ }
118
122
} else {
119
123
w = 0 ;
120
124
}
121
125
if (w <= 0 ) {
122
- w = (c1 << 8 ) | c ;
123
- w &= MBFL_WCSPLANE_MASK ;
124
- w |= MBFL_WCSPLANE_GB2312 ;
126
+ w = (c1 << 8 ) | c | MBFL_WCSPLANE_GB2312 ;
125
127
}
126
128
CK ((* filter -> output_function )(w , filter -> data ));
127
- } else if ((c >= 0 && c < 0x21 ) || c == 0x7f ) { /* CTLs */
128
- CK ((* filter -> output_function )(c , filter -> data ));
129
129
} else {
130
- w = (c1 << 8 ) | c ;
131
- w &= MBFL_WCSGROUP_MASK ;
132
- w |= MBFL_WCSGROUP_THROUGH ;
133
- CK ((* filter -> output_function )(w , filter -> data ));
130
+ CK ((* filter -> output_function )((c1 << 8 ) | c | MBFL_WCSGROUP_THROUGH , filter -> data ));
134
131
}
135
132
break ;
136
133
@@ -142,62 +139,81 @@ mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter)
142
139
return c ;
143
140
}
144
141
145
- /*
146
- * wchar => EUC-CN
147
- */
148
- int
149
- mbfl_filt_conv_wchar_euccn (int c , mbfl_convert_filter * filter )
142
+ int mbfl_filt_conv_wchar_euccn (int c , mbfl_convert_filter * filter )
150
143
{
151
- int c1 , c2 , s ;
144
+ int s = 0 ;
152
145
153
- s = 0 ;
154
146
if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max ) {
155
- s = ucs_a1_cp936_table [c - ucs_a1_cp936_table_min ];
147
+ if (c == 0xB7 || c == 0x144 || c == 0x148 || c == 0x251 || c == 0x261 ) {
148
+ s = 0 ;
149
+ } else {
150
+ s = ucs_a1_cp936_table [c - ucs_a1_cp936_table_min ];
151
+ }
156
152
} else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max ) {
157
- s = ucs_a2_cp936_table [c - ucs_a2_cp936_table_min ];
153
+ if (c == 0x2015 ) {
154
+ s = 0xA1AA ;
155
+ } else if (c == 0x2014 || (c >= 0x2170 && c <= 0x2179 )) {
156
+ s = 0 ;
157
+ } else {
158
+ s = ucs_a2_cp936_table [c - ucs_a2_cp936_table_min ];
159
+ }
158
160
} else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max ) {
159
- s = ucs_a3_cp936_table [c - ucs_a3_cp936_table_min ];
161
+ if (c == 0x30FB ) {
162
+ s = 0xA1A4 ;
163
+ } else {
164
+ s = ucs_a3_cp936_table [c - ucs_a3_cp936_table_min ];
165
+ }
160
166
} else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max ) {
161
167
s = ucs_i_cp936_table [c - ucs_i_cp936_table_min ];
162
168
} else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max ) {
163
- if (c == 0xff04 ) {
164
- s = 0xa1e7 ;
165
- } else if (c == 0xff5e ) {
166
- s = 0xa1ab ;
167
- } else if (c >= 0xff01 && c <= 0xff5d ) {
168
- s = c - 0xff01 + 0xa3a1 ;
169
- } else if (c >= 0xffe0 && c <= 0xffe5 ) {
170
- s = ucs_hff_s_cp936_table [c - 0xffe0 ];
169
+ if (c == 0xFF04 ) {
170
+ s = 0xA1E7 ;
171
+ } else if (c == 0xFF5E ) {
172
+ s = 0xA1AB ;
173
+ } else if (c >= 0xFF01 && c <= 0xFF5D ) {
174
+ s = c - 0xFF01 + 0xA3A1 ;
175
+ } else if (c >= 0xFFE0 && c <= 0xFFE5 ) {
176
+ s = ucs_hff_s_cp936_table [c - 0xFFE0 ];
171
177
}
172
178
}
173
- c1 = (s >> 8 ) & 0xff ;
174
- c2 = s & 0xff ;
175
179
176
- if (c1 < 0xa1 || c2 < 0xa1 ) { /* exclude CP936 extension */
177
- s = c ;
180
+ /* exclude CP936 extensions */
181
+ if (((s >> 8 ) & 0xFF ) < 0xA1 || (s & 0xFF ) < 0xA1 ) {
182
+ s = 0 ;
178
183
}
179
184
180
185
if (s <= 0 ) {
181
- c1 = c & ~MBFL_WCSPLANE_MASK ;
182
- if (c1 == MBFL_WCSPLANE_GB2312 ) {
183
- s = c & MBFL_WCSPLANE_MASK ;
184
- }
185
- if (c == 0 ) {
186
- s = 0 ;
186
+ if (c < 0x80 ) {
187
+ s = c ;
187
188
} else if (s <= 0 ) {
188
189
s = -1 ;
189
190
}
190
191
}
192
+
191
193
if (s >= 0 ) {
192
- if (s < 0x80 ) { /* latin */
194
+ if (s < 0x80 ) { /* latin */
193
195
CK ((* filter -> output_function )(s , filter -> data ));
194
196
} else {
195
- CK ((* filter -> output_function )((s >> 8 ) & 0xff , filter -> data ));
196
- CK ((* filter -> output_function )(s & 0xff , filter -> data ));
197
+ CK ((* filter -> output_function )((s >> 8 ) & 0xFF , filter -> data ));
198
+ CK ((* filter -> output_function )(s & 0xFF , filter -> data ));
197
199
}
198
200
} else {
199
201
CK (mbfl_filt_conv_illegal_output (c , filter ));
200
202
}
201
203
202
204
return c ;
203
205
}
206
+
207
+ static int mbfl_filt_conv_euccn_wchar_flush (mbfl_convert_filter * filter )
208
+ {
209
+ if (filter -> status == 1 ) {
210
+ /* 2-byte character was truncated */
211
+ CK ((* filter -> output_function )(filter -> cache | MBFL_WCSGROUP_THROUGH , filter -> data ));
212
+ }
213
+
214
+ if (filter -> flush_function ) {
215
+ (* filter -> flush_function )(filter -> data );
216
+ }
217
+
218
+ return 0 ;
219
+ }
0 commit comments