Skip to content

Commit db29620

Browse files
committed
Work around Windows locale name with non-ASCII character.
Windows has one a locale whose name contains a non-ASCII character: "Norwegian (Bokmål)" (that's an 'a' with a ring on top). That causes trouble; when passing it setlocale(), it's not clear what encoding the argument should be in. Another problem is that the locale name is stored in pg_database catalog table, and the encoding used there depends on what server encoding happens to be in use when the database is created. For example, if you issue the CREATE DATABASE when connected to a UTF-8 database, the locale name is stored in pg_database in UTF-8. As long as all locale names are pure ASCII, that's not a problem. To work around that, map the troublesome locale name to a pure-ASCII alias of the same locale, "norwegian-bokmal". Now, this doesn't change the existing values that are already in pg_database and in postgresql.conf. Old clusters will need to be fixed manually. Instructions for that need to be put in the release notes. This fixes bug #11431 reported by Alon Siman-Tov. Backpatch to 9.2; backpatching further would require more work than seems worth it.
1 parent c0c1f6f commit db29620

File tree

1 file changed

+114
-44
lines changed

1 file changed

+114
-44
lines changed

src/port/win32setlocale.c

+114-44
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,26 @@
99
* src/port/win32setlocale.c
1010
*
1111
*
12-
* Windows has a problem with locale names that have a dot in the country
13-
* name. For example:
12+
* The setlocale() function in Windows is broken in two ways. First, it
13+
* has a problem with locale names that have a dot in the country name. For
14+
* example:
1415
*
1516
* "Chinese (Traditional)_Hong Kong S.A.R..950"
1617
*
17-
* For some reason, setlocale() doesn't accept that. Fortunately, Windows'
18-
* setlocale() accepts various alternative names for such countries, so we
19-
* provide a wrapper setlocale() function that maps the troublemaking locale
20-
* names to accepted aliases.
18+
* For some reason, setlocale() doesn't accept that as argument, even though
19+
* setlocale(LC_ALL, NULL) returns exactly that. Fortunately, it accepts
20+
* various alternative names for such countries, so to work around the broken
21+
* setlocale() function, we map the troublemaking locale names to accepted
22+
* aliases, before calling setlocale().
23+
*
24+
* The second problem is that the locale name for "Norwegian (Bokmål)"
25+
* contains a non-ASCII character. That's problematic, because it's not clear
26+
* what encoding the locale name itself is supposed to be in, when you
27+
* haven't yet set a locale. Also, it causes problems when the cluster
28+
* contains databases with different encodings, as the locale name is stored
29+
* in the pg_database system catalog. To work around that, when setlocale()
30+
* returns that locale name, map it to a pure-ASCII alias for the same
31+
* locale.
2132
*-------------------------------------------------------------------------
2233
*/
2334

@@ -27,11 +38,23 @@
2738

2839
struct locale_map
2940
{
30-
const char *locale_name_part; /* string in locale name to replace */
31-
const char *replacement; /* string to replace it with */
41+
/*
42+
* String in locale name to replace. Can be a single string (end is NULL),
43+
* or separate start and end strings. If two strings are given, the
44+
* locale name must contain both of them, and everything between them
45+
* is replaced. This is used for a poor-man's regexp search, allowing
46+
* replacement of "start.*end".
47+
*/
48+
const char *locale_name_start;
49+
const char *locale_name_end;
50+
51+
const char *replacement; /* string to replace the match with */
3252
};
3353

34-
static const struct locale_map locale_map_list[] = {
54+
/*
55+
* Mappings applied before calling setlocale(), to the argument.
56+
*/
57+
static const struct locale_map locale_map_argument[] = {
3558
/*
3659
* "HKG" is listed here:
3760
* http://msdn.microsoft.com/en-us/library/cdax410z%28v=vs.71%29.aspx
@@ -40,8 +63,8 @@ static const struct locale_map locale_map_list[] = {
4063
* "ARE" is the ISO-3166 three-letter code for U.A.E. It is not on the
4164
* above list, but seems to work anyway.
4265
*/
43-
{"Hong Kong S.A.R.", "HKG"},
44-
{"U.A.E.", "ARE"},
66+
{"Hong Kong S.A.R.", NULL, "HKG"},
67+
{"U.A.E.", NULL, "ARE"},
4568

4669
/*
4770
* The ISO-3166 country code for Macau S.A.R. is MAC, but Windows doesn't
@@ -56,60 +79,107 @@ static const struct locale_map locale_map_list[] = {
5679
*
5780
* Some versions of Windows spell it "Macau", others "Macao".
5881
*/
59-
{"Chinese (Traditional)_Macau S.A.R..950", "ZHM"},
60-
{"Chinese_Macau S.A.R..950", "ZHM"},
61-
{"Chinese (Traditional)_Macao S.A.R..950", "ZHM"},
62-
{"Chinese_Macao S.A.R..950", "ZHM"}
82+
{"Chinese (Traditional)_Macau S.A.R..950", NULL, "ZHM"},
83+
{"Chinese_Macau S.A.R..950", NULL, "ZHM"},
84+
{"Chinese (Traditional)_Macao S.A.R..950", NULL, "ZHM"},
85+
{"Chinese_Macao S.A.R..950", NULL, "ZHM"},
86+
{NULL, NULL, NULL}
6387
};
6488

65-
char *
66-
pgwin32_setlocale(int category, const char *locale)
89+
/*
90+
* Mappings applied after calling setlocale(), to its return value.
91+
*/
92+
static const struct locale_map locale_map_result[] = {
93+
/*
94+
* "Norwegian (Bokmål)" locale name contains the a-ring character.
95+
* Map it to a pure-ASCII alias.
96+
*
97+
* It's not clear what encoding setlocale() uses when it returns the
98+
* locale name, so to play it safe, we search for "Norwegian (Bok*l)".
99+
*/
100+
{"Norwegian (Bokm", "l)", "norwegian-bokmal"},
101+
{NULL, NULL, NULL}
102+
};
103+
104+
#define MAX_LOCALE_NAME_LEN 100
105+
106+
static char *
107+
map_locale(struct locale_map *map, char *locale)
67108
{
68-
char *result;
69-
char *alias;
109+
static char aliasbuf[MAX_LOCALE_NAME_LEN];
70110
int i;
71111

72-
if (locale == NULL)
73-
return setlocale(category, locale);
74-
75112
/* Check if the locale name matches any of the problematic ones. */
76-
alias = NULL;
77-
for (i = 0; i < lengthof(locale_map_list); i++)
113+
for (i = 0; map[i].locale_name_start != NULL; i++)
78114
{
79-
const char *needle = locale_map_list[i].locale_name_part;
80-
const char *replacement = locale_map_list[i].replacement;
115+
const char *needle_start = map[i].locale_name_start;
116+
const char *needle_end = map[i].locale_name_end;
117+
const char *replacement = map[i].replacement;
81118
char *match;
119+
char *match_start = NULL;
120+
char *match_end = NULL;
82121

83-
match = strstr(locale, needle);
84-
if (match != NULL)
122+
match = strstr(locale, needle_start);
123+
if (match)
124+
{
125+
/*
126+
* Found a match for the first part. If this was a two-part
127+
* replacement, find the second part.
128+
*/
129+
match_start = match;
130+
if (needle_end)
131+
{
132+
match = strstr(match_start + strlen(needle_start), needle_end);
133+
if (match)
134+
match_end = match + strlen(needle_end);
135+
else
136+
match_start = NULL;
137+
}
138+
else
139+
match_end = match_start + strlen(needle_start);
140+
}
141+
142+
if (match_start)
85143
{
86144
/* Found a match. Replace the matched string. */
87-
int matchpos = match - locale;
145+
int matchpos = match_start - locale;
88146
int replacementlen = strlen(replacement);
89-
char *rest = match + strlen(needle);
147+
char *rest = match_end;
90148
int restlen = strlen(rest);
91149

92-
alias = malloc(matchpos + replacementlen + restlen + 1);
93-
if (!alias)
150+
/* check that the result fits in the static buffer */
151+
if (matchpos + replacementlen + restlen + 1 > MAX_LOCALE_NAME_LEN)
94152
return NULL;
95153

96-
memcpy(&alias[0], &locale[0], matchpos);
97-
memcpy(&alias[matchpos], replacement, replacementlen);
98-
memcpy(&alias[matchpos + replacementlen], rest, restlen + 1); /* includes null
99-
* terminator */
154+
memcpy(&aliasbuf[0], &locale[0], matchpos);
155+
memcpy(&aliasbuf[matchpos], replacement, replacementlen);
156+
/* includes null terminator */
157+
memcpy(&aliasbuf[matchpos + replacementlen], rest, restlen + 1);
100158

101-
break;
159+
return aliasbuf;
102160
}
103161
}
104162

105-
/* Call the real setlocale() function */
106-
if (alias)
107-
{
108-
result = setlocale(category, alias);
109-
free(alias);
110-
}
163+
/* no match, just return the original string */
164+
return locale;
165+
}
166+
167+
char *
168+
pgwin32_setlocale(int category, const char *locale)
169+
{
170+
char *argument;
171+
char *result;
172+
173+
if (locale == NULL)
174+
argument = NULL;
111175
else
112-
result = setlocale(category, locale);
176+
argument = map_locale(locale_map_argument, locale);
177+
178+
/* Call the real setlocale() function */
179+
result = setlocale(category, argument);
180+
181+
if (result)
182+
result = map_locale(locale_map_result, result);
113183

114184
return result;
115185
}

0 commit comments

Comments
 (0)