31
31
#include "mbfilter_hz.h"
32
32
33
33
#include "unicode_table_cp936.h"
34
+ #include "unicode_table_gb2312.h"
35
+
36
+ static int mbfl_filt_conv_hz_wchar_flush (mbfl_convert_filter * filter );
34
37
35
38
const mbfl_encoding mbfl_encoding_hz = {
36
39
mbfl_no_encoding_hz ,
@@ -49,7 +52,7 @@ const struct mbfl_convert_vtbl vtbl_hz_wchar = {
49
52
mbfl_filt_conv_common_ctor ,
50
53
NULL ,
51
54
mbfl_filt_conv_hz_wchar ,
52
- mbfl_filt_conv_common_flush ,
55
+ mbfl_filt_conv_hz_wchar_flush ,
53
56
NULL ,
54
57
};
55
58
@@ -65,68 +68,73 @@ const struct mbfl_convert_vtbl vtbl_wchar_hz = {
65
68
66
69
#define CK (statement ) do { if ((statement) < 0) return (-1); } while (0)
67
70
68
- /*
69
- * HZ => wchar
70
- */
71
- int
72
- mbfl_filt_conv_hz_wchar (int c , mbfl_convert_filter * filter )
71
+ int mbfl_filt_conv_hz_wchar (int c , mbfl_convert_filter * filter )
73
72
{
74
73
int c1 , s , w ;
75
74
76
75
switch (filter -> status & 0xf ) {
77
- /* case 0x00: ASCII */
78
- /* case 0x10: GB2312 */
76
+ /* case 0x00: ASCII */
77
+ /* case 0x10: GB2312 */
79
78
case 0 :
80
- if (c == 0x7e ) {
79
+ if (c == '~' ) {
81
80
filter -> status += 2 ;
82
- } else if (filter -> status == 0x10 && c > 0x20 && c < 0x7f ) { /* DBCS first char */
81
+ } else if (filter -> status == 0x10 && ((c > 0x20 && c <= 0x29 ) || (c >= 0x30 && c <= 0x77 ))) {
82
+ /* DBCS first char */
83
83
filter -> cache = c ;
84
84
filter -> status += 1 ;
85
- } else if (c >= 0 && c < 0x80 ) { /* latin, CTLs */
85
+ } else if (filter -> status == 0 && c >= 0 && c < 0x80 ) { /* latin, CTLs */
86
86
CK ((* filter -> output_function )(c , filter -> data ));
87
87
} else {
88
- w = c & MBFL_WCSGROUP_MASK ;
89
- w |= MBFL_WCSGROUP_THROUGH ;
90
- CK ((* filter -> output_function )(w , filter -> data ));
88
+ CK ((* filter -> output_function )(c | MBFL_WCSGROUP_THROUGH , filter -> data ));
91
89
}
92
90
break ;
93
91
94
- /* case 0x11: GB2312 second char */
92
+ /* case 0x11: GB2312 second char */
95
93
case 1 :
96
94
filter -> status &= ~0xf ;
97
95
c1 = filter -> cache ;
98
- if (c1 > 0x20 && c1 < 0x7f && c > 0x20 && c < 0x7f ) {
96
+ if (c1 > 0x20 && c1 < 0x7F && c > 0x20 && c < 0x7F ) {
99
97
s = (c1 - 1 )* 192 + c + 0x40 ; /* GB2312 */
100
98
if (s >= 0 && s < cp936_ucs_table_size ) {
101
- w = cp936_ucs_table [s ];
99
+ if (s == 0x1864 ) {
100
+ w = 0x30FB ;
101
+ } else if (s == 0x186A ) {
102
+ w = 0x2015 ;
103
+ } else if (s == 0x186C ) {
104
+ w = 0x2225 ;
105
+ } else if ((s >= 0x1920 && s <= 0x192A ) || s == 0x1963 || (s >= 0x1C60 && s <= 0x1C7F ) || (s >= 0x1DBB && s <= 0x1DC4 )) {
106
+ w = 0 ;
107
+ } else {
108
+ w = cp936_ucs_table [s ];
109
+ }
102
110
} else {
103
111
w = 0 ;
104
112
}
105
113
if (w <= 0 ) {
106
- w = (c1 << 8 ) | c ;
107
- w &= MBFL_WCSPLANE_MASK ;
108
- w |= MBFL_WCSPLANE_GB2312 ;
114
+ w = (c1 << 8 ) | c | MBFL_WCSPLANE_GB2312 ;
109
115
}
110
116
CK ((* filter -> output_function )(w , filter -> data ));
111
- } else if ((c >= 0 && c < 0x21 ) || c == 0x7f ) { /* CTLs */
112
- CK ((* filter -> output_function )(c , filter -> data ));
113
117
} else {
114
- w = (c1 << 8 ) | c ;
115
- w &= MBFL_WCSGROUP_MASK ;
116
- w |= MBFL_WCSGROUP_THROUGH ;
118
+ w = (c1 << 8 ) | c | MBFL_WCSGROUP_THROUGH ;
117
119
CK ((* filter -> output_function )(w , filter -> data ));
118
120
}
119
121
break ;
120
122
121
123
/* '~' */
122
124
case 2 :
123
- if (c == 0x7d ) { /* '}' */
124
- filter -> status = 0x0 ;
125
- } else if (c == 0x7b ) { /* '{' */
125
+ if (c == '}' && filter -> status == 0x12 ) {
126
+ filter -> status = 0 ;
127
+ } else if (c == '{' && filter -> status == 2 ) {
126
128
filter -> status = 0x10 ;
127
- } else if (c == 0x7e ) { /* '~' */
128
- filter -> status = 0x0 ;
129
- CK ((* filter -> output_function )(0x007e , filter -> data ));
129
+ } else if (c == '~' && filter -> status == 2 ) {
130
+ CK ((* filter -> output_function )('~' , filter -> data ));
131
+ } else if (c == '\n' ) {
132
+ /* "~\n" is a line continuation; no output is needed, nor should we shift modes */
133
+ filter -> status -= 2 ;
134
+ } else {
135
+ /* Invalid character after ~ */
136
+ filter -> status -= 2 ;
137
+ CK ((* filter -> output_function )(c | MBFL_WCSGROUP_THROUGH , filter -> data ));
130
138
}
131
139
break ;
132
140
@@ -138,66 +146,94 @@ mbfl_filt_conv_hz_wchar(int c, mbfl_convert_filter *filter)
138
146
return c ;
139
147
}
140
148
141
- /*
142
- * wchar => HZ
143
- */
144
- int
145
- mbfl_filt_conv_wchar_hz (int c , mbfl_convert_filter * filter )
149
+ static int mbfl_filt_conv_hz_wchar_flush (mbfl_convert_filter * filter )
150
+ {
151
+ if (filter -> status == 0x11 ) {
152
+ /* 2-byte character was truncated */
153
+ CK ((* filter -> output_function )(filter -> cache | MBFL_WCSGROUP_THROUGH , filter -> data ));
154
+ }
155
+
156
+ if (filter -> flush_function ) {
157
+ (* filter -> flush_function )(filter -> data );
158
+ }
159
+
160
+ return 0 ;
161
+ }
162
+
163
+ int mbfl_filt_conv_wchar_hz (int c , mbfl_convert_filter * filter )
146
164
{
147
- int s ;
165
+ int s = 0 ;
148
166
149
- s = 0 ;
150
167
if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max ) {
151
- s = ucs_a1_cp936_table [c - ucs_a1_cp936_table_min ];
168
+ if (c == 0xB7 || c == 0x144 || c == 0x148 || c == 0x251 || c == 0x261 || c == 0x2CA || c == 0x2CB || c == 0x2D9 ) {
169
+ s = 0 ;
170
+ } else {
171
+ s = ucs_a1_cp936_table [c - ucs_a1_cp936_table_min ];
172
+ }
152
173
} else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max ) {
153
- s = ucs_a2_cp936_table [c - ucs_a2_cp936_table_min ];
174
+ if (c == 0x2015 ) {
175
+ s = 0xA1AA ;
176
+ } else if (c == 0x2010 || c == 0x2013 || c == 0x2014 || c == 0x2016 || c == 0x2025 || c == 0x2035 ||
177
+ c == 0x2105 || c == 0x2109 || c == 0x2121 || (c >= 0x2170 && c <= 0x2179 ) || (c >= 0x2196 && c <= 0x2199 ) ||
178
+ c == 0x2215 || c == 0x221F || c == 0x2223 || c == 0x2252 || c == 0x2266 || c == 0x2267 || c == 0x2295 ||
179
+ (c >= 0x2550 && c <= 0x2573 ) || c == 0x22BF || c == 0x2609 || (c >= 0x2581 && c <= 0x258F ) ||
180
+ (c >= 0x2593 && c <= 0x2595 ) || c == 0x25BC || c == 0x25BD || (c >= 0x25E2 && c <= 0x25E5 )) {
181
+ s = 0 ;
182
+ } else {
183
+ s = ucs_a2_cp936_table [c - ucs_a2_cp936_table_min ];
184
+ }
154
185
} else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max ) {
155
- s = ucs_a3_cp936_table [c - ucs_a3_cp936_table_min ];
156
- } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max ) {
157
- s = ucs_i_cp936_table [c - ucs_i_cp936_table_min ];
186
+ if (c == 0x30FB ) {
187
+ s = 0xA1A4 ;
188
+ } else if (c == 0x3006 || c == 0x3007 || c == 0x3012 || c == 0x3231 || c == 0x32A3 || c >= 0x3300 ||
189
+ (c >= 0x3018 && c <= 0x3040 ) || (c >= 0x309B && c <= 0x309E ) || (c >= 0x30FC && c <= 0x30FE )) {
190
+ s = 0 ;
191
+ } else {
192
+ s = ucs_a3_cp936_table [c - ucs_a3_cp936_table_min ];
193
+ }
194
+ } else if (c >= ucs_i_gb2312_table_min && c < ucs_i_gb2312_table_max ) {
195
+ s = ucs_i_gb2312_table [c - ucs_i_gb2312_table_min ];
158
196
} else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max ) {
159
- if (c == 0xff04 ) {
160
- s = 0xa1e7 ;
161
- } else if (c == 0xff5e ) {
162
- s = 0xa1ab ;
163
- } else if (c >= 0xff01 && c <= 0xff5d ) {
164
- s = c - 0xff01 + 0xa3a1 ;
165
- } else if (c >= 0xffe0 && c <= 0xffe5 ) {
166
- s = ucs_hff_s_cp936_table [c - 0xffe0 ];
197
+ if (c == 0xFF04 ) {
198
+ s = 0xA1E7 ;
199
+ } else if (c == 0xFF5E ) {
200
+ s = 0xA1AB ;
201
+ } else if (c >= 0xFF01 && c <= 0xFF5D ) {
202
+ s = c - 0xFF01 + 0xA3A1 ;
203
+ } else if (c == 0xFFE0 || c == 0xFFE1 || c == 0xFFE3 || c == 0xFFE5 ) {
204
+ s = ucs_hff_s_cp936_table [c - 0xFFE0 ];
167
205
}
168
206
}
207
+
169
208
if (s & 0x8000 ) {
170
209
s -= 0x8080 ;
171
210
}
172
211
173
212
if (s <= 0 ) {
174
- if (c == 0 ) {
175
- s = 0 ;
176
- } else if (s <= 0 ) {
177
- s = -1 ;
178
- }
179
- } else if ((s >= 0x80 && s < 0x2121 ) || (s > 0x8080 )) {
213
+ s = (c == 0 ) ? 0 : -1 ;
214
+ } else if ((s >= 0x80 && s < 0x2121 ) || s > 0x8080 ) {
180
215
s = -1 ;
181
216
}
217
+
182
218
if (s >= 0 ) {
183
219
if (s < 0x80 ) { /* ASCII */
184
220
if ((filter -> status & 0xff00 ) != 0 ) {
185
- CK ((* filter -> output_function )(0x7e , filter -> data )); /* '~' */
186
- CK ((* filter -> output_function )(0x7d , filter -> data )); /* '}' */
221
+ CK ((* filter -> output_function )('~' , filter -> data ));
222
+ CK ((* filter -> output_function )('}' , filter -> data ));
187
223
}
188
224
filter -> status = 0 ;
189
- if (s == 0x7e ) {
190
- CK ((* filter -> output_function )(0x7e , filter -> data ));
225
+ if (s == 0x7E ) {
226
+ CK ((* filter -> output_function )('~' , filter -> data ));
191
227
}
192
228
CK ((* filter -> output_function )(s , filter -> data ));
193
229
} else { /* GB 2312-80 */
194
- if ((filter -> status & 0xff00 ) != 0x200 ) {
195
- CK ((* filter -> output_function )(0x7e , filter -> data )); /* '~' */
196
- CK ((* filter -> output_function )(0x7b , filter -> data )); /* '{' */
230
+ if ((filter -> status & 0xFF00 ) != 0x200 ) {
231
+ CK ((* filter -> output_function )('~' , filter -> data ));
232
+ CK ((* filter -> output_function )('{' , filter -> data ));
197
233
}
198
234
filter -> status = 0x200 ;
199
- CK ((* filter -> output_function )((s >> 8 ) & 0x7f , filter -> data ));
200
- CK ((* filter -> output_function )(s & 0x7f , filter -> data ));
235
+ CK ((* filter -> output_function )((s >> 8 ) & 0x7F , filter -> data ));
236
+ CK ((* filter -> output_function )(s & 0x7F , filter -> data ));
201
237
}
202
238
} else {
203
239
CK (mbfl_filt_conv_illegal_output (c , filter ));
@@ -206,14 +242,13 @@ mbfl_filt_conv_wchar_hz(int c, mbfl_convert_filter *filter)
206
242
return c ;
207
243
}
208
244
209
- int
210
- mbfl_filt_conv_any_hz_flush (mbfl_convert_filter * filter )
245
+ int mbfl_filt_conv_any_hz_flush (mbfl_convert_filter * filter )
211
246
{
212
247
/* back to latin */
213
- if (( filter -> status & 0xff00 ) != 0 ) {
214
- CK ((* filter -> output_function )(0x7e , filter -> data )); /* ~ */
215
- CK ((* filter -> output_function )(0x7d , filter -> data )); /* '{' */
248
+ if (filter -> status & 0xFF00 ) {
249
+ CK ((* filter -> output_function )('~' , filter -> data ));
250
+ CK ((* filter -> output_function )('}' , filter -> data ));
216
251
}
217
- filter -> status &= 0xff ;
252
+ filter -> status = 0 ;
218
253
return 0 ;
219
254
}
0 commit comments