Skip to content

Commit 503779e

Browse files
committed
- [DOC] add IDN support, idn_to_ascii and idn_to_utf8
tests and MFB will follow Wednesday
1 parent 4f3a4e5 commit 503779e

File tree

4 files changed

+205
-3
lines changed

4 files changed

+205
-3
lines changed

ext/intl/config.m4

+3-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ if test "$PHP_INTL" != "no"; then
5151
msgformat/msgformat_helpers.cpp \
5252
msgformat/msgformat_parse.c \
5353
grapheme/grapheme_string.c \
54-
grapheme/grapheme_util.c,$ext_shared,,$ICU_INCS)
54+
grapheme/grapheme_util.c \
55+
idn/idn.c, $ext_shared,,$ICU_INCS)
5556

5657
PHP_ADD_BUILD_DIR($ext_builddir/collator)
5758
PHP_ADD_BUILD_DIR($ext_builddir/common)
@@ -61,4 +62,5 @@ if test "$PHP_INTL" != "no"; then
6162
PHP_ADD_BUILD_DIR($ext_builddir/locale)
6263
PHP_ADD_BUILD_DIR($ext_builddir/msgformat)
6364
PHP_ADD_BUILD_DIR($ext_builddir/grapheme)
65+
PHP_ADD_BUILD_DIR($ext_builddir/idn)
6466
fi

ext/intl/idn/idn.c

+149
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
/*
2+
+----------------------------------------------------------------------+
3+
| PHP Version 5 |
4+
+----------------------------------------------------------------------+
5+
| Copyright (c) 2009 The PHP Group |
6+
+----------------------------------------------------------------------+
7+
| This source file is subject to version 3.01 of the PHP license, |
8+
| that is bundled with this package in the file LICENSE, and is |
9+
| available through the world-wide-web at the following url: |
10+
| http://www.php.net/license/3_01.txt |
11+
| If you did not receive a copy of the PHP license and are unable to |
12+
| obtain it through the world-wide-web, please send a note to |
13+
| [email protected] so we can mail you a copy immediately. |
14+
+----------------------------------------------------------------------+
15+
| Author: Pierre A. Joye <[email protected]> |
16+
+----------------------------------------------------------------------+
17+
*/
18+
/* $Id$ */
19+
20+
/* {{{ includes */
21+
#ifdef HAVE_CONFIG_H
22+
#include "config.h"
23+
#endif
24+
25+
#include <php.h>
26+
27+
#include <unicode/uidna.h>
28+
#include <unicode/ustring.h>
29+
#include "ext/standard/php_string.h"
30+
31+
#include "intl_error.h"
32+
#include "intl_convert.h"
33+
/* }}} */
34+
35+
/* {{{ grapheme_register_constants
36+
* Register API constants
37+
*/
38+
void idn_register_constants( INIT_FUNC_ARGS )
39+
{
40+
/* Option to prohibit processing of unassigned codepoints in the input and
41+
do not check if the input conforms to STD-3 ASCII rules. */
42+
REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
43+
44+
/* Option to allow processing of unassigned codepoints in the input */
45+
REGISTER_LONG_CONSTANT("IDNA_ALLOW_UNASSIGNED", UIDNA_ALLOW_UNASSIGNED, CONST_CS | CONST_PERSISTENT);
46+
47+
/* Option to check if input conforms to STD-3 ASCII rules */
48+
REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
49+
}
50+
/* }}} */
51+
52+
enum {
53+
INTL_IDN_TO_ASCII = 0,
54+
INTL_IDN_TO_UTF8
55+
};
56+
57+
static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
58+
{
59+
unsigned char* domain;
60+
int domain_len;
61+
long option = 0;
62+
UChar* ustring = NULL;
63+
int ustring_len = 0;
64+
UErrorCode status;
65+
char *converted_utf8;
66+
int32_t converted_utf8_len;
67+
UChar converted[MAXPATHLEN];
68+
int32_t converted_ret_len;
69+
70+
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ll", (char **)&domain, &domain_len, &option, &status) == FAILURE) {
71+
return;
72+
}
73+
74+
if (domain_len < 1) {
75+
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "idn_to_ascii: empty domain name", 0 TSRMLS_CC );
76+
RETURN_FALSE;
77+
}
78+
79+
/* convert the string to UTF-16. */
80+
status = U_ZERO_ERROR;
81+
intl_convert_utf8_to_utf16(&ustring, &ustring_len, (char*) domain, domain_len, &status );
82+
83+
if (U_FAILURE(status)) {
84+
intl_error_set_code(NULL, status TSRMLS_CC);
85+
86+
/* Set error messages. */
87+
intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 1 TSRMLS_CC );
88+
efree(ustring);
89+
RETURN_FALSE;
90+
} else {
91+
UParseError parse_error;
92+
93+
status = U_ZERO_ERROR;
94+
if (mode == INTL_IDN_TO_ASCII) {
95+
converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
96+
} else {
97+
converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
98+
}
99+
efree(ustring);
100+
101+
if (U_FAILURE(status)) {
102+
intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 TSRMLS_CC );
103+
RETURN_FALSE;
104+
}
105+
106+
status = U_ZERO_ERROR;
107+
intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len, converted, converted_ret_len, &status);
108+
109+
if (U_FAILURE(status)) {
110+
/* Set global error code. */
111+
intl_error_set_code(NULL, status TSRMLS_CC);
112+
113+
/* Set error messages. */
114+
intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 1 TSRMLS_CC );
115+
efree(converted_utf8);
116+
RETURN_FALSE;
117+
}
118+
}
119+
120+
/* return the allocated string, not a duplicate */
121+
RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0);
122+
}
123+
124+
/* {{{ proto int idn_to_ascii(string domain)
125+
Converts a UTF-8 domain to ASCII, as defined in the IDNA RFC */
126+
PHP_FUNCTION(idn_to_ascii)
127+
{
128+
php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
129+
}
130+
/* }}} */
131+
132+
133+
/* {{{ proto int idn_to_ascii(string domain)
134+
Converts a UTF-8 domain to ASCII, as defined in the IDNA RFC */
135+
PHP_FUNCTION(idn_to_utf8)
136+
{
137+
php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
138+
}
139+
/* }}} */
140+
141+
142+
/*
143+
* Local variables:
144+
* tab-width: 4
145+
* c-basic-offset: 4
146+
* End:
147+
* vim600: fdm=marker
148+
* vim: noet sw=4 ts=4
149+
*/

ext/intl/idn/idn.h

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*
2+
+----------------------------------------------------------------------+
3+
| PHP Version 5 |
4+
+----------------------------------------------------------------------+
5+
| Copyright (c) 2009 The PHP Group |
6+
+----------------------------------------------------------------------+
7+
| This source file is subject to version 3.01 of the PHP license, |
8+
| that is bundled with this package in the file LICENSE, and is |
9+
| available through the world-wide-web at the following url: |
10+
| http://www.php.net/license/3_01.txt |
11+
| If you did not receive a copy of the PHP license and are unable to |
12+
| obtain it through the world-wide-web, please send a note to |
13+
| [email protected] so we can mail you a copy immediately. |
14+
+----------------------------------------------------------------------+
15+
| Author: Pierre A. Joye <[email protected]> |
16+
+----------------------------------------------------------------------+
17+
*/
18+
/* $Id$ s*/
19+
20+
#ifndef IDN_IDN_H
21+
#define IDN_IDN_H
22+
23+
#include <php.h>
24+
25+
PHP_FUNCTION(idn_to_ascii);
26+
PHP_FUNCTION(idn_to_utf8);
27+
28+
void idn_register_constants(INIT_FUNC_ARGS);
29+
30+
#endif /* IDN_IDN_H */

ext/intl/php_intl.c

+23-2
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@
6262
#include "dateformat/dateformat_parse.h"
6363
#include "dateformat/dateformat_data.h"
6464

65+
#include "idn/idn.h"
66+
6567
#include "msgformat/msgformat.h"
6668
#include "common/common_error.h"
6769

@@ -316,6 +318,18 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_datefmt_create, 0, 0, 3)
316318
ZEND_ARG_INFO(0, calendar)
317319
ZEND_ARG_INFO(0, pattern)
318320
ZEND_END_ARG_INFO()
321+
322+
ZEND_BEGIN_ARG_INFO_EX(arginfo_idn_to_ascii, 0, 0, 1)
323+
ZEND_ARG_INFO(0, domain)
324+
ZEND_ARG_INFO(0, option)
325+
ZEND_ARG_INFO(0, status)
326+
ZEND_END_ARG_INFO()
327+
328+
ZEND_BEGIN_ARG_INFO_EX(arginfo_idn_to_utf8, 0, 0, 1)
329+
ZEND_ARG_INFO(0, domain)
330+
ZEND_ARG_INFO(0, option)
331+
ZEND_ARG_INFO(0, status)
332+
ZEND_END_ARG_INFO()
319333
/* }}} */
320334

321335
/* {{{ intl_functions
@@ -422,6 +436,10 @@ zend_function_entry intl_functions[] = {
422436
PHP_FE( grapheme_stristr, grapheme_strstr_args )
423437
PHP_FE( grapheme_extract, grapheme_extract_args )
424438

439+
/* IDN functions */
440+
PHP_FE(idn_to_ascii, arginfo_idn_to_ascii)
441+
PHP_FE(idn_to_utf8, arginfo_idn_to_ascii)
442+
425443
/* common functions */
426444
PHP_FE( intl_get_error_code, intl_0_args )
427445
PHP_FE( intl_get_error_message, intl_0_args )
@@ -521,12 +539,15 @@ PHP_MINIT_FUNCTION( intl )
521539
/* Expose ICU error codes to PHP scripts. */
522540
intl_expose_icu_error_codes( INIT_FUNC_ARGS_PASSTHRU );
523541

542+
/* Expose IDN constants to PHP scripts. */
543+
idn_register_constants(INIT_FUNC_ARGS_PASSTHRU);
544+
524545
/* Global error handling. */
525546
intl_error_init( NULL TSRMLS_CC );
526547

527548
/* Set the default_locale value */
528-
if( INTL_G(default_locale) == NULL ) {
529-
INTL_G(default_locale) = pestrdup(uloc_getDefault(), 1) ;
549+
if( INTL_G(default_locale) == NULL ) {
550+
INTL_G(default_locale) = pestrdup(uloc_getDefault(), 1) ;
530551
}
531552

532553
return SUCCESS;

0 commit comments

Comments
 (0)