Skip to content

Commit eb670d0

Browse files
bug #358 Update to Unicode 13 and other fixes (nicolas-grekas)
This PR was squashed before being merged into the 1.23-dev branch. Discussion ---------- Update to Unicode 13 and other fixes Commits ------- fe2c571 Update to Unicode 13 34bcae6 Various fixes
2 parents c5fd6cb + fe2c571 commit eb670d0

File tree

15 files changed

+997
-159
lines changed

15 files changed

+997
-159
lines changed

CHANGELOG.md

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
1-
# 1.23.1
2-
3-
* Fix `IntlDateFormatter::format()` on 32b platforms
4-
51
# 1.23.0
62

73
* added `CURLStringFile` to the PHP 8.1 polyfill
84
* added `enum_exists()` to the PHP 8.1 polyfill
95
* added `MYSQLI_REFRESH_REPLICA` constant to PHP 8.1 polyfill
6+
* added `ReturnTypeWillChange` to PHP 8.1 polyfill
7+
* updated Unicode maps to v13
108
* removed `INTL_IDNA_VARIANT_2003` on PHP 8
119
* fixed return value of `mb_parse_str()`
10+
* fixed `IntlDateFormatter::format()` on 32b platforms
11+
* fixed `mb_chr()` in the PHP 7.2 polyfill
12+
* fixed `mb_strrchr()`, `mb_detect_order()`, `iconv_substr()`, `grapheme_substr()` on PHP 8
1213

1314
# 1.22.1
1415

src/Iconv/Resources/charset/translit.php

+4-1
Original file line numberDiff line numberDiff line change
@@ -3995,8 +3995,8 @@
39953995
'' => 'Fr.',
39963996
'' => 'L.',
39973997
'' => 'Pts',
3998-
'' => 'TL',
39993998
'' => 'Rs',
3999+
'' => 'TL',
40004000
'' => '(P)',
40014001
'' => 'P',
40024002
'' => 'Rx',
@@ -4043,6 +4043,8 @@
40434043
'' => '-',
40444044
'' => '-',
40454045
'' => '-',
4046+
'¡' => '!',
4047+
'¿' => '?',
40464048
'˂' => '<',
40474049
'˃' => '>',
40484050
'˄' => '^',
@@ -4087,6 +4089,7 @@
40874089
'' => '>',
40884090
'' => '[',
40894091
'' => ']',
4092+
'±' => '+/-',
40904093
'×' => '*',
40914094
'÷' => '/',
40924095
'˖' => '+',

src/Iconv/bootstrap80.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ function iconv_strpos(?string $haystack, ?string $needle, ?int $offset = 0, ?str
5151
function iconv_strrpos(?string $haystack, ?string $needle, ?string $encoding = null): int|false { null === $encoding && $encoding = p\Iconv::$internalEncoding; return mb_strrpos((string) $haystack, (string) $needle, 0, $encoding); }
5252
}
5353
if (!function_exists('iconv_substr')) {
54-
function iconv_substr(?string $string, ?int $offset, ?int $length = null, ?string $encoding = null): string|false { null === $encoding && $encoding = p\Iconv::$internalEncoding; return mb_substr((string) $string, (int) $offset, (int) $length, $encoding); }
54+
function iconv_substr(?string $string, ?int $offset, ?int $length = null, ?string $encoding = null): string|false { null === $encoding && $encoding = p\Iconv::$internalEncoding; return mb_substr((string) $string, (int) $offset, $length, $encoding); }
5555
}
5656
if (!function_exists('iconv_mime_decode')) {
5757
function iconv_mime_decode($string, $mode = 0, $encoding = null) { null === $encoding && $encoding = p\Iconv::$internalEncoding; return mb_decode_mimeheader($string, $mode, $encoding); }
@@ -72,7 +72,7 @@ function iconv_strpos(?string $haystack, ?string $needle, ?int $offset = 0, ?str
7272
function iconv_strrpos(?string $haystack, ?string $needle, ?string $encoding = null): int|false { return p\Iconv::iconv_strrpos((string) $haystack, (string) $needle, $encoding); }
7373
}
7474
if (!function_exists('iconv_substr')) {
75-
function iconv_substr(?string $string, ?int $offset, ?int $length = null, ?string $encoding = null): string|false { return p\Iconv::iconv_substr((string) $string, (string) $offset, (int) $length, $encoding); }
75+
function iconv_substr(?string $string, ?int $offset, ?int $length = null, ?string $encoding = null): string|false { return p\Iconv::iconv_substr((string) $string, (string) $offset, $length, $encoding); }
7676
}
7777
if (!function_exists('iconv_mime_decode')) {
7878
function iconv_mime_decode(?string $string, ?int $mode = 0, ?string $encoding = null): string|false { return p\Iconv::iconv_mime_decode((string) $string, (int) $mode, $encoding); }

src/Intl/Grapheme/bootstrap80.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,5 +46,5 @@ function grapheme_strrpos(?string $haystack, ?string $needle, ?int $offset = 0):
4646
function grapheme_strstr(?string $haystack, ?string $needle, ?bool $beforeNeedle = false): string|false { return p\Grapheme::grapheme_strstr((string) $haystack, (string) $needle, (bool) $beforeNeedle); }
4747
}
4848
if (!function_exists('grapheme_substr')) {
49-
function grapheme_substr(?string $string, ?int $offset, ?int $length = null): string|false { return p\Grapheme::grapheme_substr((string) $string, (int) $offset, (int) $length); }
49+
function grapheme_substr(?string $string, ?int $offset, ?int $length = null): string|false { return p\Grapheme::grapheme_substr((string) $string, (int) $offset, $length); }
5050
}

src/Intl/Idn/Resources/unidata/Regex.php

+11-11
Large diffs are not rendered by default.

src/Mbstring/Resources/unidata/lowerCase.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@
8181
'Ī' => 'ī',
8282
'Ĭ' => 'ĭ',
8383
'Į' => 'į',
84-
'İ' => 'i',
84+
'İ' => '',
8585
'IJ' => 'ij',
8686
'Ĵ' => 'ĵ',
8787
'Ķ' => 'ķ',

src/Mbstring/Resources/unidata/upperCase.php

+102-27
Original file line numberDiff line numberDiff line change
@@ -746,41 +746,41 @@
746746
'' => '',
747747
'' => '',
748748
'' => '',
749-
'' => '',
750-
'' => '',
751-
'' => '',
752-
'' => '',
753-
'' => '',
754-
'' => '',
755-
'' => '',
756-
'' => '',
757-
'' => '',
758-
'' => '',
759-
'' => '',
760-
'' => '',
761-
'' => '',
762-
'' => '',
763-
'' => '',
764-
'' => '',
765-
'' => '',
766-
'' => '',
767-
'' => '',
768-
'' => '',
769-
'' => '',
770-
'' => '',
771-
'' => '',
772-
'' => '',
749+
'' => 'ἈΙ',
750+
'' => 'ἉΙ',
751+
'' => 'ἊΙ',
752+
'' => 'ἋΙ',
753+
'' => 'ἌΙ',
754+
'' => 'ἍΙ',
755+
'' => 'ἎΙ',
756+
'' => 'ἏΙ',
757+
'' => 'ἨΙ',
758+
'' => 'ἩΙ',
759+
'' => 'ἪΙ',
760+
'' => 'ἫΙ',
761+
'' => 'ἬΙ',
762+
'' => 'ἭΙ',
763+
'' => 'ἮΙ',
764+
'' => 'ἯΙ',
765+
'' => 'ὨΙ',
766+
'' => 'ὩΙ',
767+
'' => 'ὪΙ',
768+
'' => 'ὫΙ',
769+
'' => 'ὬΙ',
770+
'' => 'ὭΙ',
771+
'' => 'ὮΙ',
772+
'' => 'ὯΙ',
773773
'' => '',
774774
'' => '',
775-
'' => '',
775+
'' => 'ΑΙ',
776776
'' => 'Ι',
777-
'' => '',
777+
'' => 'ΗΙ',
778778
'' => '',
779779
'' => '',
780780
'' => '',
781781
'' => '',
782782
'' => '',
783-
'' => '',
783+
'' => 'ΩΙ',
784784
'' => '',
785785
'' => '',
786786
'' => '',
@@ -1411,4 +1411,79 @@
14111411
'𞥁' => '𞤟',
14121412
'𞥂' => '𞤠',
14131413
'𞥃' => '𞤡',
1414+
'ß' => 'SS',
1415+
'' => 'FF',
1416+
'' => 'FI',
1417+
'' => 'FL',
1418+
'' => 'FFI',
1419+
'' => 'FFL',
1420+
'' => 'ST',
1421+
'' => 'ST',
1422+
'և' => 'ԵՒ',
1423+
'' => 'ՄՆ',
1424+
'' => 'ՄԵ',
1425+
'' => 'ՄԻ',
1426+
'' => 'ՎՆ',
1427+
'' => 'ՄԽ',
1428+
'ʼn' => 'ʼN',
1429+
'ΐ' => 'Ϊ́',
1430+
'ΰ' => 'Ϋ́',
1431+
'ǰ' => '',
1432+
'' => '',
1433+
'' => '',
1434+
'' => '',
1435+
'' => '',
1436+
'' => '',
1437+
'' => 'Υ̓',
1438+
'' => 'Υ̓̀',
1439+
'' => 'Υ̓́',
1440+
'' => 'Υ̓͂',
1441+
'' => 'Α͂',
1442+
'' => 'Η͂',
1443+
'' => 'Ϊ̀',
1444+
'' => 'Ϊ́',
1445+
'' => 'Ι͂',
1446+
'' => 'Ϊ͂',
1447+
'' => 'Ϋ̀',
1448+
'' => 'Ϋ́',
1449+
'' => 'Ρ̓',
1450+
'' => 'Υ͂',
1451+
'' => 'Ϋ͂',
1452+
'' => 'Ω͂',
1453+
'' => 'ἈΙ',
1454+
'' => 'ἉΙ',
1455+
'' => 'ἊΙ',
1456+
'' => 'ἋΙ',
1457+
'' => 'ἌΙ',
1458+
'' => 'ἍΙ',
1459+
'' => 'ἎΙ',
1460+
'' => 'ἏΙ',
1461+
'' => 'ἨΙ',
1462+
'' => 'ἩΙ',
1463+
'' => 'ἪΙ',
1464+
'' => 'ἫΙ',
1465+
'' => 'ἬΙ',
1466+
'' => 'ἭΙ',
1467+
'' => 'ἮΙ',
1468+
'' => 'ἯΙ',
1469+
'' => 'ὨΙ',
1470+
'' => 'ὩΙ',
1471+
'' => 'ὪΙ',
1472+
'' => 'ὫΙ',
1473+
'' => 'ὬΙ',
1474+
'' => 'ὭΙ',
1475+
'' => 'ὮΙ',
1476+
'' => 'ὯΙ',
1477+
'' => 'ΑΙ',
1478+
'' => 'ΗΙ',
1479+
'' => 'ΩΙ',
1480+
'' => 'ᾺΙ',
1481+
'' => 'ΆΙ',
1482+
'' => 'ῊΙ',
1483+
'' => 'ΉΙ',
1484+
'' => 'ῺΙ',
1485+
'' => 'ΏΙ',
1486+
'' => 'Α͂Ι',
1487+
'' => 'Η͂Ι',
1488+
'' => 'Ω͂Ι',
14141489
);

src/Mbstring/bootstrap80.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ function mb_check_encoding(array|string|null $value = null, ?string $encoding =
4848
function mb_detect_encoding(?string $string, array|string|null $encodings = null, ?bool $strict = false): string|false { return p\Mbstring::mb_detect_encoding((string) $string, $encodings, (bool) $strict); }
4949
}
5050
if (!function_exists('mb_detect_order')) {
51-
function mb_detect_order(array|string|null $encoding = null): array|bool { return p\Mbstring::mb_detect_order((string) $encoding); }
51+
function mb_detect_order(array|string|null $encoding = null): array|bool { return p\Mbstring::mb_detect_order($encoding); }
5252
}
5353
if (!function_exists('mb_parse_str')) {
5454
function mb_parse_str(?string $string, &$result = []): bool { parse_str((string) $string, $result); return (bool) $result; }
@@ -78,7 +78,7 @@ function mb_stripos(?string $haystack, ?string $needle, ?int $offset = 0, ?strin
7878
function mb_stristr(?string $haystack, ?string $needle, ?bool $before_needle = false, ?string $encoding = null): string|false { return p\Mbstring::mb_stristr((string) $haystack, (string) $needle, (bool) $before_needle, $encoding); }
7979
}
8080
if (!function_exists('mb_strrchr')) {
81-
function mb_strrchr(?string $haystack, ?string $needle, ?bool $before_needle = false, ?string $encoding = null): string|false { return p\Mbstring::mb_strrchr((string) $haystack, (string) $needle, $before_needle, (bool) $encoding); }
81+
function mb_strrchr(?string $haystack, ?string $needle, ?bool $before_needle = false, ?string $encoding = null): string|false { return p\Mbstring::mb_strrchr((string) $haystack, (string) $needle, (bool) $before_needle, $encoding); }
8282
}
8383
if (!function_exists('mb_strrichr')) {
8484
function mb_strrichr(?string $haystack, ?string $needle, ?bool $before_needle = false, ?string $encoding = null): string|false { return p\Mbstring::mb_strrichr((string) $haystack, (string) $needle, (bool) $before_needle, $encoding); }

src/Php72/Php72.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ public static function mb_chr($code, $encoding = null)
182182
$s = \chr(0xF0 | $code >> 18).\chr(0x80 | $code >> 12 & 0x3F).\chr(0x80 | $code >> 6 & 0x3F).\chr(0x80 | $code & 0x3F);
183183
}
184184

185-
if ('UTF-8' !== $encoding) {
185+
if ('UTF-8' !== $encoding = $encoding ?? mb_internal_encoding()) {
186186
$s = mb_convert_encoding($s, $encoding, 'UTF-8');
187187
}
188188

tests/Compiler.php

+19
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,25 @@ public static function unicodeMaps($out_dir)
139139
}
140140
fclose($h);
141141

142+
$h = fopen(self::getFile('SpecialCasing.txt'), 'r');
143+
while (false !== $m = fgets($h)) {
144+
if ('#' === $m[0] || 5 !== \count($m = explode('; ', $m))) {
145+
continue;
146+
}
147+
148+
$k = self::chr(hexdec($m[0]));
149+
$lower = implode('', array_map([__CLASS__, 'chr'], array_map('hexdec', explode(' ', $m[1]))));
150+
$upper = implode('', array_map([__CLASS__, 'chr'], array_map('hexdec', explode(' ', $m[3]))));
151+
152+
if ($lower !== $k) {
153+
$lowerCase[$k] = $lower;
154+
}
155+
if ($upper !== $k) {
156+
$upperCase[$k] = $upper;
157+
}
158+
}
159+
fclose($h);
160+
142161
do {
143162
$m = 0;
144163

0 commit comments

Comments
 (0)