Skip to content

Commit 78ab944

Browse files
committed
Change mbbisearch to return the character range
Add a width field to mbinterval and have mbbisearch return a pointer to the found range rather than just bool for success. A future commit will add another width besides zero, and this will allow that to use the same search. Reviewed by Jacob Champion Discussion: https://www.postgresql.org/message-id/CAFBsxsGOCpzV7c-f3a8ADsA1n4uZ%3D8puCctQp%2Bx7W0vgkv%3Dw%2Bg%40mail.gmail.com
1 parent eb0d0d2 commit 78ab944

File tree

3 files changed

+208
-203
lines changed

3 files changed

+208
-203
lines changed

src/common/unicode/generate-unicode_width_table.pl

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
print
1818
"/* generated by src/common/unicode/generate-unicode_width_table.pl, do not edit */\n\n";
1919

20-
print "static const struct mbinterval combining[] = {\n";
20+
print "static const struct mbinterval wcwidth[] = {\n";
2121

2222
foreach my $line (<ARGV>)
2323
{
@@ -40,7 +40,7 @@
4040
# not a combining character, print out previous range if any
4141
if (defined($range_start))
4242
{
43-
printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint;
43+
printf "\t{0x%04X, 0x%04X, 0},\n", $range_start, $prev_codepoint;
4444
$range_start = undef;
4545
}
4646
}

src/common/wchar.c

+13-8
Original file line numberDiff line numberDiff line change
@@ -585,17 +585,18 @@ struct mbinterval
585585
{
586586
unsigned short first;
587587
unsigned short last;
588+
signed int width;
588589
};
589590

590591
/* auxiliary function for binary search in interval table */
591-
static int
592+
static const struct mbinterval *
592593
mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
593594
{
594595
int min = 0;
595596
int mid;
596597

597598
if (ucs < table[0].first || ucs > table[max].last)
598-
return 0;
599+
return NULL;
599600
while (max >= min)
600601
{
601602
mid = (min + max) / 2;
@@ -604,10 +605,10 @@ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
604605
else if (ucs < table[mid].first)
605606
max = mid - 1;
606607
else
607-
return 1;
608+
return &table[mid];
608609
}
609610

610-
return 0;
611+
return NULL;
611612
}
612613

613614

@@ -646,17 +647,21 @@ ucs_wcwidth(pg_wchar ucs)
646647
{
647648
#include "common/unicode_width_table.h"
648649

650+
const struct mbinterval *range;
651+
649652
/* test for 8-bit control characters */
650653
if (ucs == 0)
651654
return 0;
652655

653656
if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
654657
return -1;
655658

656-
/* binary search in table of non-spacing characters */
657-
if (mbbisearch(ucs, combining,
658-
sizeof(combining) / sizeof(struct mbinterval) - 1))
659-
return 0;
659+
/* binary search in table of character widths */
660+
range = mbbisearch(ucs, wcwidth,
661+
sizeof(wcwidth) / sizeof(struct mbinterval) - 1);
662+
663+
if (range != NULL)
664+
return range->width;
660665

661666
/*
662667
* if we arrive here, ucs is not a combining or C0/C1 control character

0 commit comments

Comments
 (0)