Skip to content

Commit f9a435a

Browse files
committed
Merge branch 'pull-request/1094'
* pull-request/1094: added php_mb_check_code_point for mb_substitute_character news entry for PR #1094
2 parents a793b79 + c28a6f4 commit f9a435a

File tree

3 files changed

+90
-3
lines changed

3 files changed

+90
-3
lines changed

NEWS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ PHP NEWS
6262
handles multibyte replacement char nicely). (Masakielastic, Yasuo)
6363
. Added array input support to mb_convert_encoding(). (Yasuo)
6464
. Added array input support to mb_check_encoding(). (Yasuo)
65+
. Fixed bug #69079 (enhancement for mb_substitute_character). (masakielastic)
6566

6667
- Mcrypt:
6768
. The deprecated mcrypt extension has been moved to PECL. (leigh)

ext/mbstring/mbstring.c

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,11 @@ static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *lis
104104

105105
static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
106106

107+
static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
108+
109+
static inline zend_bool php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc);
110+
111+
static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
107112
/* }}} */
108113

109114
/* {{{ php_mb_default_identify_list */
@@ -1992,6 +1997,73 @@ PHP_FUNCTION(mb_detect_order)
19921997
}
19931998
/* }}} */
19941999

2000+
static inline int php_mb_check_code_point(long cp)
2001+
{
2002+
enum mbfl_no_encoding no_enc;
2003+
char* buf;
2004+
char buf_len;
2005+
2006+
no_enc = MBSTRG(current_internal_encoding)->no_encoding;
2007+
2008+
if (php_mb_is_no_encoding_utf8(no_enc)) {
2009+
2010+
if ((cp > 0 && 0xd800 > cp) || (cp > 0xdfff && 0x110000 > cp)) {
2011+
return 1;
2012+
}
2013+
2014+
return 0;
2015+
} else if (php_mb_is_no_encoding_unicode(no_enc)) {
2016+
2017+
if (0 > cp || cp > 0x10ffff) {
2018+
return 0;
2019+
}
2020+
2021+
return 1;
2022+
2023+
// backward compatibility
2024+
} else if (php_mb_is_unsupported_no_encoding(no_enc)) {
2025+
return cp < 0xffff && cp > 0x0;
2026+
}
2027+
2028+
if (cp < 0x100) {
2029+
buf_len = 1;
2030+
buf = (char *) safe_emalloc(buf_len, 1, 1);
2031+
buf[0] = cp;
2032+
buf[1] = 0;
2033+
} else if (cp < 0x10000) {
2034+
buf_len = 2;
2035+
buf = (char *) safe_emalloc(buf_len, 1, 1);
2036+
buf[0] = cp >> 8;
2037+
buf[1] = cp & 0xff;
2038+
buf[2] = 0;
2039+
} else if (cp < 0x1000000) {
2040+
buf_len = 3;
2041+
buf = (char *) safe_emalloc(buf_len, 1, 1);
2042+
buf[0] = cp >> 16;
2043+
buf[1] = (cp >> 8) & 0xff;
2044+
buf[2] = cp & 0xff;
2045+
buf[3] = 0;
2046+
} else {
2047+
buf_len = 4;
2048+
buf = (char *) safe_emalloc(buf_len, 1, 1);
2049+
buf[0] = cp >> 24;
2050+
buf[1] = (cp >> 16) & 0xff;
2051+
buf[2] = (cp >> 8) & 0xff;
2052+
buf[3] = cp & 0xff;
2053+
buf[4] = 0;
2054+
}
2055+
2056+
if (php_mb_check_encoding(buf, buf_len, NULL)) {
2057+
efree(buf);
2058+
2059+
return 1;
2060+
}
2061+
2062+
efree(buf);
2063+
2064+
return 0;
2065+
}
2066+
19952067
/* {{{ proto mixed mb_substitute_character([mixed substchar])
19962068
Sets the current substitute_character or returns the current substitute_character */
19972069
PHP_FUNCTION(mb_substitute_character)
@@ -2026,7 +2098,7 @@ PHP_FUNCTION(mb_substitute_character)
20262098
} else {
20272099
convert_to_long_ex(arg1);
20282100

2029-
if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
2101+
if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
20302102
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
20312103
MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
20322104
} else {
@@ -2037,7 +2109,7 @@ PHP_FUNCTION(mb_substitute_character)
20372109
break;
20382110
default:
20392111
convert_to_long_ex(arg1);
2040-
if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
2112+
if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
20412113
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
20422114
MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
20432115
} else {
@@ -3124,7 +3196,7 @@ PHP_FUNCTION(mb_strimwidth)
31243196
if (from < 0) {
31253197
from += swidth;
31263198
}
3127-
3199+
31283200
if (from < 0 || (size_t)from > str_len) {
31293201
php_error_docref(NULL, E_WARNING, "Start position is out of range");
31303202
RETURN_FALSE;

ext/mbstring/tests/bug69079.phpt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
--TEST--
2+
Bug #69079 (enhancement for mb_substitute_character)
3+
--SKIPIF--
4+
<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
5+
--FILE--
6+
<?php
7+
mb_internal_encoding('UTF-8');
8+
var_dump(mb_substitute_character(0x1f600));
9+
mb_internal_encoding('EUC-JP-2004');
10+
var_dump(mb_substitute_character(0x8fa1ef));
11+
?>
12+
--EXPECT--
13+
bool(true)
14+
bool(true)

0 commit comments

Comments
 (0)