Skip to content

Commit 5bc429a

Browse files
committed
Extend collection of Unicode combining characters to beyond the BMP
The former limit was perhaps a carryover from an older hand-coded table. Since commit bab9821 we have enough space in mbinterval to store larger codepoints, so collect all combining characters. Discussion: https://www.postgresql.org/message-id/49ad1fa0-174e-c901-b14c-c484b60907f1%40enterprisedb.com
1 parent bab9821 commit 5bc429a

File tree

2 files changed

+102
-2
lines changed

2 files changed

+102
-2
lines changed

src/common/unicode/generate-unicode_combining_table.pl

-2
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
my @fields = split ';', $line;
2626
$codepoint = hex $fields[0];
2727

28-
next if $codepoint > 0xFFFF;
29-
3028
if ($fields[2] eq 'Me' || $fields[2] eq 'Mn')
3129
{
3230
# combining character, save for start of range

src/include/common/unicode_combining_table.h

+102
Original file line numberDiff line numberDiff line change
@@ -193,4 +193,106 @@ static const struct mbinterval combining[] = {
193193
{0xFB1E, 0xFB1E},
194194
{0xFE00, 0xFE0F},
195195
{0xFE20, 0xFE2F},
196+
{0x101FD, 0x101FD},
197+
{0x102E0, 0x102E0},
198+
{0x10376, 0x1037A},
199+
{0x10A01, 0x10A0F},
200+
{0x10A38, 0x10A3F},
201+
{0x10AE5, 0x10AE6},
202+
{0x10D24, 0x10D27},
203+
{0x10EAB, 0x10EAC},
204+
{0x10F46, 0x10F50},
205+
{0x11001, 0x11001},
206+
{0x11038, 0x11046},
207+
{0x1107F, 0x11081},
208+
{0x110B3, 0x110B6},
209+
{0x110B9, 0x110BA},
210+
{0x11100, 0x11102},
211+
{0x11127, 0x1112B},
212+
{0x1112D, 0x11134},
213+
{0x11173, 0x11173},
214+
{0x11180, 0x11181},
215+
{0x111B6, 0x111BE},
216+
{0x111C9, 0x111CC},
217+
{0x111CF, 0x111CF},
218+
{0x1122F, 0x11231},
219+
{0x11234, 0x11234},
220+
{0x11236, 0x11237},
221+
{0x1123E, 0x1123E},
222+
{0x112DF, 0x112DF},
223+
{0x112E3, 0x112EA},
224+
{0x11300, 0x11301},
225+
{0x1133B, 0x1133C},
226+
{0x11340, 0x11340},
227+
{0x11366, 0x11374},
228+
{0x11438, 0x1143F},
229+
{0x11442, 0x11444},
230+
{0x11446, 0x11446},
231+
{0x1145E, 0x1145E},
232+
{0x114B3, 0x114B8},
233+
{0x114BA, 0x114BA},
234+
{0x114BF, 0x114C0},
235+
{0x114C2, 0x114C3},
236+
{0x115B2, 0x115B5},
237+
{0x115BC, 0x115BD},
238+
{0x115BF, 0x115C0},
239+
{0x115DC, 0x115DD},
240+
{0x11633, 0x1163A},
241+
{0x1163D, 0x1163D},
242+
{0x1163F, 0x11640},
243+
{0x116AB, 0x116AB},
244+
{0x116AD, 0x116AD},
245+
{0x116B0, 0x116B5},
246+
{0x116B7, 0x116B7},
247+
{0x1171D, 0x1171F},
248+
{0x11722, 0x11725},
249+
{0x11727, 0x1172B},
250+
{0x1182F, 0x11837},
251+
{0x11839, 0x1183A},
252+
{0x1193B, 0x1193C},
253+
{0x1193E, 0x1193E},
254+
{0x11943, 0x11943},
255+
{0x119D4, 0x119DB},
256+
{0x119E0, 0x119E0},
257+
{0x11A01, 0x11A0A},
258+
{0x11A33, 0x11A38},
259+
{0x11A3B, 0x11A3E},
260+
{0x11A47, 0x11A47},
261+
{0x11A51, 0x11A56},
262+
{0x11A59, 0x11A5B},
263+
{0x11A8A, 0x11A96},
264+
{0x11A98, 0x11A99},
265+
{0x11C30, 0x11C3D},
266+
{0x11C3F, 0x11C3F},
267+
{0x11C92, 0x11CA7},
268+
{0x11CAA, 0x11CB0},
269+
{0x11CB2, 0x11CB3},
270+
{0x11CB5, 0x11CB6},
271+
{0x11D31, 0x11D45},
272+
{0x11D47, 0x11D47},
273+
{0x11D90, 0x11D91},
274+
{0x11D95, 0x11D95},
275+
{0x11D97, 0x11D97},
276+
{0x11EF3, 0x11EF4},
277+
{0x16AF0, 0x16AF4},
278+
{0x16B30, 0x16B36},
279+
{0x16F4F, 0x16F4F},
280+
{0x16F8F, 0x16F92},
281+
{0x16FE4, 0x16FE4},
282+
{0x1BC9D, 0x1BC9E},
283+
{0x1D167, 0x1D169},
284+
{0x1D17B, 0x1D182},
285+
{0x1D185, 0x1D18B},
286+
{0x1D1AA, 0x1D1AD},
287+
{0x1D242, 0x1D244},
288+
{0x1DA00, 0x1DA36},
289+
{0x1DA3B, 0x1DA6C},
290+
{0x1DA75, 0x1DA75},
291+
{0x1DA84, 0x1DA84},
292+
{0x1DA9B, 0x1E02A},
293+
{0x1E130, 0x1E136},
294+
{0x1E2EC, 0x1E2EF},
295+
{0x1E8D0, 0x1E8D6},
296+
{0x1E944, 0x1E94A},
297+
{0xE0100, 0xE01EF},
196298
};

0 commit comments

Comments
 (0)