Skip to content

Commit cbd108a

Browse files
author
Yasuo Ohgaki
committed
1 parent 06bbfe1 commit cbd108a

34 files changed

+464
-161
lines changed

ext/iconv/iconv.c

+39-5
Original file line numberDiff line numberDiff line change
@@ -220,21 +220,55 @@ static char _generic_superset_name[] = ICONV_UCS4_ENCODING;
220220
#define GENERIC_SUPERSET_NBYTES 4
221221
/* }}} */
222222

223-
static PHP_INI_MH(OnUpdateStringIconvCharset)
223+
224+
static PHP_INI_MH(OnUpdateInputEncoding)
225+
{
226+
if (new_value_length >= ICONV_CSNMAXLEN) {
227+
return FAILURE;
228+
}
229+
if (new_value_length) {
230+
OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
231+
} else {
232+
OnUpdateString(entry, PG(input_encoding), strlen(PG(input_encoding))+1, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
233+
}
234+
return SUCCESS;
235+
}
236+
237+
238+
static PHP_INI_MH(OnUpdateOutputEncoding)
224239
{
225240
if(new_value_length >= ICONV_CSNMAXLEN) {
226241
return FAILURE;
227242
}
228-
OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
243+
if (new_value_length) {
244+
OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
245+
} else {
246+
OnUpdateString(entry, PG(output_encoding), strlen(PG(output_encoding))+1, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
247+
}
229248
return SUCCESS;
230249
}
231250

251+
252+
static PHP_INI_MH(OnUpdateInternalEncoding)
253+
{
254+
if(new_value_length >= ICONV_CSNMAXLEN) {
255+
return FAILURE;
256+
}
257+
if (new_value_length) {
258+
OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
259+
} else {
260+
OnUpdateString(entry, PG(internal_encoding), strlen(PG(internal_encoding))+1, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
261+
}
262+
return SUCCESS;
263+
}
264+
265+
232266
/* {{{ PHP_INI
233267
*/
234268
PHP_INI_BEGIN()
235-
STD_PHP_INI_ENTRY("iconv.input_encoding", ICONV_INPUT_ENCODING, PHP_INI_ALL, OnUpdateStringIconvCharset, input_encoding, zend_iconv_globals, iconv_globals)
236-
STD_PHP_INI_ENTRY("iconv.output_encoding", ICONV_OUTPUT_ENCODING, PHP_INI_ALL, OnUpdateStringIconvCharset, output_encoding, zend_iconv_globals, iconv_globals)
237-
STD_PHP_INI_ENTRY("iconv.internal_encoding", ICONV_INTERNAL_ENCODING, PHP_INI_ALL, OnUpdateStringIconvCharset, internal_encoding, zend_iconv_globals, iconv_globals)
269+
STD_PHP_INI_ENTRY("iconv.input_encoding", "", PHP_INI_ALL, OnUpdateInputEncoding, input_encoding, zend_iconv_globals, iconv_globals)
270+
STD_PHP_INI_ENTRY("iconv.output_encoding", "", PHP_INI_ALL, OnUpdateOutputEncoding, output_encoding, zend_iconv_globals, iconv_globals)
271+
STD_PHP_INI_ENTRY("iconv.internal_encoding", "", PHP_INI_ALL, OnUpdateInternalEncoding, internal_encoding, zend_iconv_globals, iconv_globals)
238272
PHP_INI_END()
239273
/* }}} */
240274

ext/iconv/php_iconv.h

-6
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,9 @@ ZEND_END_MODULE_GLOBALS(iconv)
7979
#endif
8080

8181
#ifdef HAVE_IBM_ICONV
82-
# define ICONV_INPUT_ENCODING "ISO8859-1"
83-
# define ICONV_OUTPUT_ENCODING "ISO8859-1"
84-
# define ICONV_INTERNAL_ENCODING "ISO8859-1"
8582
# define ICONV_ASCII_ENCODING "IBM-850"
8683
# define ICONV_UCS4_ENCODING "UCS-4"
8784
#else
88-
# define ICONV_INPUT_ENCODING "ISO-8859-1"
89-
# define ICONV_OUTPUT_ENCODING "ISO-8859-1"
90-
# define ICONV_INTERNAL_ENCODING "ISO-8859-1"
9185
# define ICONV_ASCII_ENCODING "ASCII"
9286
# define ICONV_UCS4_ENCODING "UCS-4LE"
9387
#endif
+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
--TEST--
2+
Encoding INI test
3+
--SKIPIF--
4+
<?php extension_loaded('iconv') or die('skip mbstring not available'); ?>
5+
--INI--
6+
default_charset=ISO-8859-1
7+
internal_encoding=
8+
input_encoding=
9+
output_encoding=
10+
iconv.internal_encoding=ISO-8859-1
11+
iconv.http_input=ISO-8859-1
12+
iconv.http_output=ISO-8859-1
13+
--FILE--
14+
<?php
15+
echo "Getting INI\n";
16+
var_dump(ini_get('default_charset'));
17+
var_dump(ini_get('internal_encoding'));
18+
var_dump(ini_get('input_encoding'));
19+
var_dump(ini_get('output_encoding'));
20+
21+
var_dump(ini_get('iconv.internal_encoding'));
22+
var_dump(ini_get('iconv.input_encoding'));
23+
var_dump(ini_get('iconv.output_encoding'));
24+
25+
echo "Setting INI\n";
26+
var_dump(ini_set('default_charset', 'UTF-8'));
27+
var_dump(ini_set('internal_encoding', 'UTF-8'));
28+
var_dump(ini_set('input_encoding', 'UTF-8'));
29+
var_dump(ini_set('output_encoding', 'UTF-8'));
30+
var_dump(ini_set('iconv.internal_encoding', 'UTF-8'));
31+
var_dump(ini_set('iconv.input_encoding', 'UTF-8'));
32+
var_dump(ini_set('iconv.output_encoding', 'UTF-8'));
33+
34+
echo "Getting INI\n";
35+
var_dump(ini_get('default_charset'));
36+
var_dump(ini_get('internal_encoding'));
37+
var_dump(ini_get('input_encoding'));
38+
var_dump(ini_get('output_encoding'));
39+
40+
var_dump(ini_get('iconv.internal_encoding'));
41+
var_dump(ini_get('iconv.input_encoding'));
42+
var_dump(ini_get('iconv.output_encoding'));
43+
44+
--EXPECT--
45+
Getting INI
46+
string(10) "ISO-8859-1"
47+
string(0) ""
48+
string(0) ""
49+
string(0) ""
50+
string(10) "ISO-8859-1"
51+
string(0) ""
52+
string(0) ""
53+
Setting INI
54+
string(10) "ISO-8859-1"
55+
string(0) ""
56+
string(0) ""
57+
string(0) ""
58+
string(10) "ISO-8859-1"
59+
string(0) ""
60+
string(0) ""
61+
Getting INI
62+
string(5) "UTF-8"
63+
string(5) "UTF-8"
64+
string(5) "UTF-8"
65+
string(5) "UTF-8"
66+
string(5) "UTF-8"
67+
string(5) "UTF-8"
68+
string(5) "UTF-8"

ext/iconv/tests/iconv_set_encoding_variation.phpt

+27-27
Original file line numberDiff line numberDiff line change
@@ -183,17 +183,17 @@ string(3) "0.5"
183183
bool(true)
184184
bool(true)
185185
bool(true)
186-
string(0) ""
187-
string(0) ""
188-
string(0) ""
186+
string(5) "UTF-8"
187+
string(5) "UTF-8"
188+
string(5) "UTF-8"
189189

190190
-- Iteration 11 --
191191
bool(true)
192192
bool(true)
193193
bool(true)
194-
string(0) ""
195-
string(0) ""
196-
string(0) ""
194+
string(5) "UTF-8"
195+
string(5) "UTF-8"
196+
string(5) "UTF-8"
197197

198198
-- Iteration 12 --
199199
bool(true)
@@ -207,9 +207,9 @@ string(1) "1"
207207
bool(true)
208208
bool(true)
209209
bool(true)
210-
string(0) ""
211-
string(0) ""
212-
string(0) ""
210+
string(5) "UTF-8"
211+
string(5) "UTF-8"
212+
string(5) "UTF-8"
213213

214214
-- Iteration 14 --
215215
bool(true)
@@ -223,25 +223,25 @@ string(1) "1"
223223
bool(true)
224224
bool(true)
225225
bool(true)
226-
string(0) ""
227-
string(0) ""
228-
string(0) ""
226+
string(5) "UTF-8"
227+
string(5) "UTF-8"
228+
string(5) "UTF-8"
229229

230230
-- Iteration 16 --
231231
bool(true)
232232
bool(true)
233233
bool(true)
234-
string(0) ""
235-
string(0) ""
236-
string(0) ""
234+
string(5) "UTF-8"
235+
string(5) "UTF-8"
236+
string(5) "UTF-8"
237237

238238
-- Iteration 17 --
239239
bool(true)
240240
bool(true)
241241
bool(true)
242-
string(0) ""
243-
string(0) ""
244-
string(0) ""
242+
string(5) "UTF-8"
243+
string(5) "UTF-8"
244+
string(5) "UTF-8"
245245

246246
-- Iteration 18 --
247247
bool(true)
@@ -279,17 +279,17 @@ string(5) "UTF-8"
279279
bool(true)
280280
bool(true)
281281
bool(true)
282-
string(0) ""
283-
string(0) ""
284-
string(0) ""
282+
string(5) "UTF-8"
283+
string(5) "UTF-8"
284+
string(5) "UTF-8"
285285

286286
-- Iteration 23 --
287287
bool(true)
288288
bool(true)
289289
bool(true)
290-
string(0) ""
291-
string(0) ""
292-
string(0) ""
290+
string(5) "UTF-8"
291+
string(5) "UTF-8"
292+
string(5) "UTF-8"
293293

294294
-- Iteration 24 --
295295

@@ -301,7 +301,7 @@ NULL
301301

302302
Warning: iconv_set_encoding() expects parameter 2 to be string, resource given in %s on line %d
303303
NULL
304-
string(0) ""
305-
string(0) ""
306-
string(0) ""
304+
string(5) "UTF-8"
305+
string(5) "UTF-8"
306+
string(5) "UTF-8"
307307
Done

ext/mbstring/mbstring.c

+34-51
Original file line numberDiff line numberDiff line change
@@ -1236,6 +1236,11 @@ static PHP_INI_MH(OnUpdate_mbstring_http_input)
12361236
if (MBSTRG(http_input_list)) {
12371237
pefree(MBSTRG(http_input_list), 1);
12381238
}
1239+
if (SUCCESS == php_mb_parse_encoding_list(PG(input_encoding), strlen(PG(input_encoding))+1, &list, &size, 1 TSRMLS_CC)) {
1240+
MBSTRG(http_input_list) = list;
1241+
MBSTRG(http_input_list_size) = 0;
1242+
return SUCCESS;
1243+
}
12391244
MBSTRG(http_input_list) = NULL;
12401245
MBSTRG(http_input_list_size) = 0;
12411246
return SUCCESS;
@@ -1261,18 +1266,20 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output)
12611266
const mbfl_encoding *encoding;
12621267

12631268
if (new_value == NULL || new_value_length == 0) {
1264-
MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1265-
MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1266-
return SUCCESS;
1267-
}
1268-
1269-
encoding = mbfl_name2encoding(new_value);
1270-
if (!encoding) {
1271-
MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1272-
MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1273-
return FAILURE;
1269+
encoding = mbfl_name2encoding(PG(output_encoding));
1270+
if (!encoding) {
1271+
MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1272+
MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1273+
return SUCCESS;
1274+
}
1275+
} else {
1276+
encoding = mbfl_name2encoding(new_value);
1277+
if (!encoding) {
1278+
MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1279+
MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1280+
return FAILURE;
1281+
}
12741282
}
1275-
12761283
MBSTRG(http_output_encoding) = encoding;
12771284
MBSTRG(current_http_output_encoding) = encoding;
12781285
return SUCCESS;
@@ -1285,47 +1292,17 @@ int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_v
12851292
const mbfl_encoding *encoding;
12861293

12871294
if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) {
1288-
switch (MBSTRG(language)) {
1289-
case mbfl_no_language_uni:
1290-
encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1291-
break;
1292-
case mbfl_no_language_japanese:
1293-
encoding = mbfl_no2encoding(mbfl_no_encoding_euc_jp);
1294-
break;
1295-
case mbfl_no_language_korean:
1296-
encoding = mbfl_no2encoding(mbfl_no_encoding_euc_kr);
1297-
break;
1298-
case mbfl_no_language_simplified_chinese:
1299-
encoding = mbfl_no2encoding(mbfl_no_encoding_euc_cn);
1300-
break;
1301-
case mbfl_no_language_traditional_chinese:
1302-
encoding = mbfl_no2encoding(mbfl_no_encoding_euc_tw);
1303-
break;
1304-
case mbfl_no_language_russian:
1305-
encoding = mbfl_no2encoding(mbfl_no_encoding_koi8r);
1306-
break;
1307-
case mbfl_no_language_german:
1308-
encoding = mbfl_no2encoding(mbfl_no_encoding_8859_15);
1309-
break;
1310-
case mbfl_no_language_armenian:
1311-
encoding = mbfl_no2encoding(mbfl_no_encoding_armscii8);
1312-
break;
1313-
case mbfl_no_language_turkish:
1314-
encoding = mbfl_no2encoding(mbfl_no_encoding_8859_9);
1315-
break;
1316-
default:
1317-
encoding = mbfl_no2encoding(mbfl_no_encoding_8859_1);
1318-
break;
1319-
}
1320-
}
1295+
/* falls back to UTF-8 if an unkown encoding name is given */
1296+
encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1297+
}
13211298
MBSTRG(internal_encoding) = encoding;
13221299
MBSTRG(current_internal_encoding) = encoding;
13231300
#if HAVE_MBREGEX
13241301
{
13251302
const char *enc_name = new_value;
13261303
if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
1327-
/* falls back to EUC-JP if an unknown encoding name is given */
1328-
enc_name = "EUC-JP";
1304+
/* falls back to UTF-8 if an unknown encoding name is given */
1305+
enc_name = "UTF-8";
13291306
php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
13301307
}
13311308
php_mb_regex_set_mbctype(new_value TSRMLS_CC);
@@ -1343,7 +1320,11 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
13431320
}
13441321
if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN
13451322
|| stage == PHP_INI_STAGE_RUNTIME) {
1346-
return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
1323+
if (new_value_length) {
1324+
return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
1325+
} else {
1326+
return _php_mb_ini_mbstring_internal_encoding_set(PG(internal_encoding), strlen(PG(internal_encoding))+1 TSRMLS_CC);
1327+
}
13471328
} else {
13481329
/* the corresponding mbstring globals needs to be set according to the
13491330
* ini value in the later stage because it never falls back to the
@@ -1450,8 +1431,8 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
14501431
PHP_INI_BEGIN()
14511432
PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
14521433
PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1453-
PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
1454-
PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
1434+
PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1435+
PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
14551436
STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
14561437
PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
14571438
STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
@@ -2162,8 +2143,10 @@ PHP_FUNCTION(mb_output_handler)
21622143

21632144
/* feed the string */
21642145
mbfl_string_init(&string);
2165-
string.no_language = MBSTRG(language);
2166-
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2146+
/* these are not needed. convd has encoding info.
2147+
string.no_language = MBSTRG(language);
2148+
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2149+
*/
21672150
string.val = (unsigned char *)arg_string;
21682151
string.len = arg_string_len;
21692152
mbfl_buffer_converter_feed(MBSTRG(outconv), &string);

0 commit comments

Comments
 (0)