Skip to content

Commit c350069

Browse files
committed
urlapi: CURLU_PUNY2IDN - convert from punycode to IDN name
Asssisted-by: Jay Satiro Closes #11655
1 parent 0efe8b2 commit c350069

File tree

7 files changed

+93
-1
lines changed

7 files changed

+93
-1
lines changed

docs/libcurl/curl_url_get.3

+10
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,16 @@ If libcurl is built without IDN capabilities, using this bit will make
9191
anything outside the ASCII range.
9292

9393
(Added in curl 7.88.0)
94+
.IP CURLU_PUNY2IDN
95+
If set and asked to retrieve the \fBCURLUPART_HOST\fP or \fBCURLUPART_URL\fP
96+
parts, libcurl returns the host name in its IDN (International Domain Name)
97+
UTF-8 version if it otherwise is a punycode version.
98+
99+
If libcurl is built without IDN capabilities, using this bit will make
100+
\fIcurl_url_get(3)\fP return \fICURLUE_LACKS_IDN\fP if the host name is using
101+
punycode.
102+
103+
(Added in curl 8.3.0)
94104
.SH PARTS
95105
.IP CURLUPART_URL
96106
When asked to return the full URL, \fIcurl_url_get(3)\fP will return a

docs/libcurl/symbols-in-versions

+1
Original file line numberDiff line numberDiff line change
@@ -1063,6 +1063,7 @@ CURLU_NO_AUTHORITY 7.67.0
10631063
CURLU_NO_DEFAULT_PORT 7.62.0
10641064
CURLU_NON_SUPPORT_SCHEME 7.62.0
10651065
CURLU_PATH_AS_IS 7.62.0
1066+
CURLU_PUNY2IDN 8.3.0
10661067
CURLU_PUNYCODE 7.88.0
10671068
CURLU_URLDECODE 7.62.0
10681069
CURLU_URLENCODE 7.62.0

include/curl/urlapi.h

+1
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ typedef enum {
9797
scheme is unknown. */
9898
#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
9999
#define CURLU_PUNYCODE (1<<12) /* get the host name in punycode */
100+
#define CURLU_PUNY2IDN (1<<13) /* punycode => IDN conversion */
100101

101102
typedef struct Curl_URL CURLU;
102103

lib/idn.c

+49-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ bool Curl_win32_idn_to_ascii(const char *in, char **out)
7575
wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
7676
if(in_w) {
7777
wchar_t punycode[IDN_MAX_LENGTH];
78-
int chars = IdnToAscii(0, in_w, -1, punycode, IDN_MAX_LENGTH);
78+
int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode,
79+
IDN_MAX_LENGTH);
7980
curlx_unicodefree(in_w);
8081
if(chars) {
8182
char *mstr = curlx_convert_wchar_to_UTF8(punycode);
@@ -91,6 +92,27 @@ bool Curl_win32_idn_to_ascii(const char *in, char **out)
9192
return success;
9293
}
9394

95+
char *Curl_win32_ascii_to_idn(const char *in)
96+
{
97+
char *out = NULL;
98+
99+
wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
100+
if(in_w) {
101+
WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */
102+
int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn,
103+
IDN_MAX_LENGTH);
104+
if(chars) {
105+
/* 'chars' is "the number of characters retrieved" */
106+
char *mstr = curlx_convert_wchar_to_UTF8(idn);
107+
if(mstr) {
108+
out = strdup(mstr);
109+
curlx_unicodefree(mstr);
110+
}
111+
}
112+
}
113+
return out;
114+
}
115+
94116
#endif /* USE_WIN32_IDN */
95117

96118
/*
@@ -144,6 +166,19 @@ static char *idn_decode(const char *input)
144166
return decoded;
145167
}
146168

169+
static char *idn_encode(const char *puny)
170+
{
171+
char *enc = NULL;
172+
#ifdef USE_LIBIDN2
173+
int rc = idn2_to_unicode_8z8z(puny, &enc, 0);
174+
if(rc != IDNA_SUCCESS)
175+
return NULL;
176+
#elif defined(USE_WIN32_IDN)
177+
enc = Curl_win32_ascii_to_idn(puny);
178+
#endif
179+
return enc;
180+
}
181+
147182
char *Curl_idn_decode(const char *input)
148183
{
149184
char *d = idn_decode(input);
@@ -157,6 +192,19 @@ char *Curl_idn_decode(const char *input)
157192
return d;
158193
}
159194

195+
char *Curl_idn_encode(const char *puny)
196+
{
197+
char *d = idn_encode(puny);
198+
#ifdef USE_LIBIDN2
199+
if(d) {
200+
char *c = strdup(d);
201+
idn2_free(d);
202+
d = c;
203+
}
204+
#endif
205+
return d;
206+
}
207+
160208
/*
161209
* Frees data allocated by idnconvert_hostname()
162210
*/

lib/idn.h

+2
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,15 @@
2626

2727
#ifdef USE_WIN32_IDN
2828
bool Curl_win32_idn_to_ascii(const char *in, char **out);
29+
char *Curl_win32_ascii_to_idn(const char *in);
2930
#endif /* USE_WIN32_IDN */
3031
bool Curl_is_ASCII_name(const char *hostname);
3132
CURLcode Curl_idnconvert_hostname(struct hostname *host);
3233
#if defined(USE_LIBIDN2) || defined(USE_WIN32_IDN)
3334
#define USE_IDN
3435
void Curl_free_idnconverted_hostname(struct hostname *host);
3536
char *Curl_idn_decode(const char *input);
37+
char *Curl_idn_encode(const char *input);
3638
#ifdef USE_LIBIDN2
3739
#define Curl_idn_free(x) idn2_free(x)
3840
#else

lib/urlapi.c

+27
Original file line numberDiff line numberDiff line change
@@ -1403,6 +1403,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
14031403
bool urldecode = (flags & CURLU_URLDECODE)?1:0;
14041404
bool urlencode = (flags & CURLU_URLENCODE)?1:0;
14051405
bool punycode = FALSE;
1406+
bool depunyfy = FALSE;
14061407
bool plusdecode = FALSE;
14071408
(void)flags;
14081409
if(!u)
@@ -1433,6 +1434,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
14331434
ptr = u->host;
14341435
ifmissing = CURLUE_NO_HOST;
14351436
punycode = (flags & CURLU_PUNYCODE)?1:0;
1437+
depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
14361438
break;
14371439
case CURLUPART_ZONEID:
14381440
ptr = u->zoneid;
@@ -1483,6 +1485,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
14831485
char *port = u->port;
14841486
char *allochost = NULL;
14851487
punycode = (flags & CURLU_PUNYCODE)?1:0;
1488+
depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
14861489
if(u->scheme && strcasecompare("file", u->scheme)) {
14871490
url = aprintf("file://%s%s%s",
14881491
u->path,
@@ -1548,6 +1551,17 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
15481551
#endif
15491552
}
15501553
}
1554+
else if(depunyfy) {
1555+
if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1556+
#ifndef USE_IDN
1557+
return CURLUE_LACKS_IDN;
1558+
#else
1559+
allochost = Curl_idn_encode(u->host);
1560+
if(!allochost)
1561+
return CURLUE_OUT_OF_MEMORY;
1562+
#endif
1563+
}
1564+
}
15511565

15521566
url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
15531567
scheme,
@@ -1626,6 +1640,19 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
16261640
#endif
16271641
}
16281642
}
1643+
else if(depunyfy) {
1644+
if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1645+
#ifndef USE_IDN
1646+
return CURLUE_LACKS_IDN;
1647+
#else
1648+
char *allochost = Curl_idn_encode(*part);
1649+
if(!allochost)
1650+
return CURLUE_OUT_OF_MEMORY;
1651+
free(*part);
1652+
*part = allochost;
1653+
#endif
1654+
}
1655+
}
16291656

16301657
return CURLUE_OK;
16311658
}

tests/libtest/lib1560.c

+3
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ static const struct testcase get_parts_list[] ={
179179
{"https://räksmörgås.se",
180180
"https | [11] | [12] | [13] | xn--rksmrgs-5wao1o.se | "
181181
"[15] | / | [16] | [17]", 0, CURLU_PUNYCODE, CURLUE_OK},
182+
{"https://xn--rksmrgs-5wao1o.se",
183+
"https | [11] | [12] | [13] | räksmörgås.se | "
184+
"[15] | / | [16] | [17]", 0, CURLU_PUNY2IDN, CURLUE_OK},
182185
#else
183186
{"https://räksmörgås.se",
184187
"https | [11] | [12] | [13] | [30] | [15] | / | [16] | [17]",

0 commit comments

Comments
 (0)