9
9
* src/port/win32setlocale.c
10
10
*
11
11
*
12
- * Windows has a problem with locale names that have a dot in the country
13
- * name. For example:
12
+ * The setlocale() function in Windows is broken in two ways. First, it
13
+ * has a problem with locale names that have a dot in the country name. For
14
+ * example:
14
15
*
15
16
* "Chinese (Traditional)_Hong Kong S.A.R..950"
16
17
*
17
- * For some reason, setlocale() doesn't accept that. Fortunately, Windows'
18
- * setlocale() accepts various alternative names for such countries, so we
19
- * provide a wrapper setlocale() function that maps the troublemaking locale
20
- * names to accepted aliases.
18
+ * For some reason, setlocale() doesn't accept that as argument, even though
19
+ * setlocale(LC_ALL, NULL) returns exactly that. Fortunately, it accepts
20
+ * various alternative names for such countries, so to work around the broken
21
+ * setlocale() function, we map the troublemaking locale names to accepted
22
+ * aliases, before calling setlocale().
23
+ *
24
+ * The second problem is that the locale name for "Norwegian (Bokmål)"
25
+ * contains a non-ASCII character. That's problematic, because it's not clear
26
+ * what encoding the locale name itself is supposed to be in, when you
27
+ * haven't yet set a locale. Also, it causes problems when the cluster
28
+ * contains databases with different encodings, as the locale name is stored
29
+ * in the pg_database system catalog. To work around that, when setlocale()
30
+ * returns that locale name, map it to a pure-ASCII alias for the same
31
+ * locale.
21
32
*-------------------------------------------------------------------------
22
33
*/
23
34
27
38
28
39
struct locale_map
29
40
{
30
- const char * locale_name_part ; /* string in locale name to replace */
31
- const char * replacement ; /* string to replace it with */
41
+ /*
42
+ * String in locale name to replace. Can be a single string (end is NULL),
43
+ * or separate start and end strings. If two strings are given, the
44
+ * locale name must contain both of them, and everything between them
45
+ * is replaced. This is used for a poor-man's regexp search, allowing
46
+ * replacement of "start.*end".
47
+ */
48
+ const char * locale_name_start ;
49
+ const char * locale_name_end ;
50
+
51
+ const char * replacement ; /* string to replace the match with */
32
52
};
33
53
34
- static const struct locale_map locale_map_list [] = {
54
+ /*
55
+ * Mappings applied before calling setlocale(), to the argument.
56
+ */
57
+ static const struct locale_map locale_map_argument [] = {
35
58
/*
36
59
* "HKG" is listed here:
37
60
* http://msdn.microsoft.com/en-us/library/cdax410z%28v=vs.71%29.aspx
@@ -40,8 +63,8 @@ static const struct locale_map locale_map_list[] = {
40
63
* "ARE" is the ISO-3166 three-letter code for U.A.E. It is not on the
41
64
* above list, but seems to work anyway.
42
65
*/
43
- {"Hong Kong S.A.R." , "HKG" },
44
- {"U.A.E." , "ARE" },
66
+ {"Hong Kong S.A.R." , NULL , "HKG" },
67
+ {"U.A.E." , NULL , "ARE" },
45
68
46
69
/*
47
70
* The ISO-3166 country code for Macau S.A.R. is MAC, but Windows doesn't
@@ -56,60 +79,107 @@ static const struct locale_map locale_map_list[] = {
56
79
*
57
80
* Some versions of Windows spell it "Macau", others "Macao".
58
81
*/
59
- {"Chinese (Traditional)_Macau S.A.R..950" , "ZHM" },
60
- {"Chinese_Macau S.A.R..950" , "ZHM" },
61
- {"Chinese (Traditional)_Macao S.A.R..950" , "ZHM" },
62
- {"Chinese_Macao S.A.R..950" , "ZHM" }
82
+ {"Chinese (Traditional)_Macau S.A.R..950" , NULL , "ZHM" },
83
+ {"Chinese_Macau S.A.R..950" , NULL , "ZHM" },
84
+ {"Chinese (Traditional)_Macao S.A.R..950" , NULL , "ZHM" },
85
+ {"Chinese_Macao S.A.R..950" , NULL , "ZHM" },
86
+ {NULL , NULL , NULL }
63
87
};
64
88
65
- char *
66
- pgwin32_setlocale (int category , const char * locale )
89
+ /*
90
+ * Mappings applied after calling setlocale(), to its return value.
91
+ */
92
+ static const struct locale_map locale_map_result [] = {
93
+ /*
94
+ * "Norwegian (Bokmål)" locale name contains the a-ring character.
95
+ * Map it to a pure-ASCII alias.
96
+ *
97
+ * It's not clear what encoding setlocale() uses when it returns the
98
+ * locale name, so to play it safe, we search for "Norwegian (Bok*l)".
99
+ */
100
+ {"Norwegian (Bokm" , "l)" , "norwegian-bokmal" },
101
+ {NULL , NULL , NULL }
102
+ };
103
+
104
+ #define MAX_LOCALE_NAME_LEN 100
105
+
106
+ static char *
107
+ map_locale (struct locale_map * map , char * locale )
67
108
{
68
- char * result ;
69
- char * alias ;
109
+ static char aliasbuf [MAX_LOCALE_NAME_LEN ];
70
110
int i ;
71
111
72
- if (locale == NULL )
73
- return setlocale (category , locale );
74
-
75
112
/* Check if the locale name matches any of the problematic ones. */
76
- alias = NULL ;
77
- for (i = 0 ; i < lengthof (locale_map_list ); i ++ )
113
+ for (i = 0 ; map [i ].locale_name_start != NULL ; i ++ )
78
114
{
79
- const char * needle = locale_map_list [i ].locale_name_part ;
80
- const char * replacement = locale_map_list [i ].replacement ;
115
+ const char * needle_start = map [i ].locale_name_start ;
116
+ const char * needle_end = map [i ].locale_name_end ;
117
+ const char * replacement = map [i ].replacement ;
81
118
char * match ;
119
+ char * match_start = NULL ;
120
+ char * match_end = NULL ;
82
121
83
- match = strstr (locale , needle );
84
- if (match != NULL )
122
+ match = strstr (locale , needle_start );
123
+ if (match )
124
+ {
125
+ /*
126
+ * Found a match for the first part. If this was a two-part
127
+ * replacement, find the second part.
128
+ */
129
+ match_start = match ;
130
+ if (needle_end )
131
+ {
132
+ match = strstr (match_start + strlen (needle_start ), needle_end );
133
+ if (match )
134
+ match_end = match + strlen (needle_end );
135
+ else
136
+ match_start = NULL ;
137
+ }
138
+ else
139
+ match_end = match_start + strlen (needle_start );
140
+ }
141
+
142
+ if (match_start )
85
143
{
86
144
/* Found a match. Replace the matched string. */
87
- int matchpos = match - locale ;
145
+ int matchpos = match_start - locale ;
88
146
int replacementlen = strlen (replacement );
89
- char * rest = match + strlen ( needle ) ;
147
+ char * rest = match_end ;
90
148
int restlen = strlen (rest );
91
149
92
- alias = malloc ( matchpos + replacementlen + restlen + 1 );
93
- if (! alias )
150
+ /* check that the result fits in the static buffer */
151
+ if (matchpos + replacementlen + restlen + 1 > MAX_LOCALE_NAME_LEN )
94
152
return NULL ;
95
153
96
- memcpy (& alias [0 ], & locale [0 ], matchpos );
97
- memcpy (& alias [matchpos ], replacement , replacementlen );
98
- memcpy ( & alias [ matchpos + replacementlen ], rest , restlen + 1 ); /* includes null
99
- * terminator */
154
+ memcpy (& aliasbuf [0 ], & locale [0 ], matchpos );
155
+ memcpy (& aliasbuf [matchpos ], replacement , replacementlen );
156
+ /* includes null terminator */
157
+ memcpy ( & aliasbuf [ matchpos + replacementlen ], rest , restlen + 1 );
100
158
101
- break ;
159
+ return aliasbuf ;
102
160
}
103
161
}
104
162
105
- /* Call the real setlocale() function */
106
- if (alias )
107
- {
108
- result = setlocale (category , alias );
109
- free (alias );
110
- }
163
+ /* no match, just return the original string */
164
+ return locale ;
165
+ }
166
+
167
+ char *
168
+ pgwin32_setlocale (int category , const char * locale )
169
+ {
170
+ char * argument ;
171
+ char * result ;
172
+
173
+ if (locale == NULL )
174
+ argument = NULL ;
111
175
else
112
- result = setlocale (category , locale );
176
+ argument = map_locale (locale_map_argument , locale );
177
+
178
+ /* Call the real setlocale() function */
179
+ result = setlocale (category , argument );
180
+
181
+ if (result )
182
+ result = map_locale (locale_map_result , result );
113
183
114
184
return result ;
115
185
}
0 commit comments