Skip to content

Commit 2d8314b

Browse files
committed
Rename utf2ucs() to utf8_to_unicode(), and export it so it can be used
elsewhere. Similarly rename the version in mbprint.c, not because this affects anything but just to keep the two copies in exact sync. There was some discussion of having only one copy in src/port/ instead, but this function is so small and unlikely to change that that seems like overkill. Slightly editorialized version of a patch by Joseph Adams. (The bug-fix aspect of his patch was applied separately, and back-patched.)
1 parent b5565bc commit 2d8314b

File tree

3 files changed

+22
-17
lines changed

3 files changed

+22
-17
lines changed

src/backend/utils/mb/wchar.c

+11-9
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* conversion functions between pg_wchar and multibyte streams.
33
* Tatsuo Ishii
4-
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.74 2010/01/04 20:38:31 adunstan Exp $
4+
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.75 2010/08/18 19:54:01 tgl Exp $
55
*
66
*/
77
/* can be used in either frontend or backend */
@@ -462,7 +462,7 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
462462
* We return "1" for any leading byte that is either flat-out illegal or
463463
* indicates a length larger than we support.
464464
*
465-
* pg_utf2wchar_with_len(), utf2ucs(), pg_utf8_islegal(), and perhaps
465+
* pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), and perhaps
466466
* other places would need to be fixed to change this.
467467
*/
468468
int
@@ -632,13 +632,15 @@ ucs_wcwidth(pg_wchar ucs)
632632
(ucs >= 0x20000 && ucs <= 0x2ffff)));
633633
}
634634

635-
static pg_wchar
636-
utf2ucs(const unsigned char *c)
635+
/*
636+
* Convert a UTF-8 character to a Unicode code point.
637+
* This is a one-character version of pg_utf2wchar_with_len.
638+
*
639+
* No error checks here, c must point to a long-enough string.
640+
*/
641+
pg_wchar
642+
utf8_to_unicode(const unsigned char *c)
637643
{
638-
/*
639-
* one char version of pg_utf2wchar_with_len. no control here, c must
640-
* point to a large enough string
641-
*/
642644
if ((*c & 0x80) == 0)
643645
return (pg_wchar) c[0];
644646
else if ((*c & 0xe0) == 0xc0)
@@ -661,7 +663,7 @@ utf2ucs(const unsigned char *c)
661663
static int
662664
pg_utf_dsplen(const unsigned char *s)
663665
{
664-
return ucs_wcwidth(utf2ucs(s));
666+
return ucs_wcwidth(utf8_to_unicode(s));
665667
}
666668

667669
/*

src/bin/psql/mbprint.c

+9-7
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
*
44
* Copyright (c) 2000-2010, PostgreSQL Global Development Group
55
*
6-
* $PostgreSQL: pgsql/src/bin/psql/mbprint.c,v 1.39 2010/08/16 00:06:18 tgl Exp $
6+
* $PostgreSQL: pgsql/src/bin/psql/mbprint.c,v 1.40 2010/08/18 19:54:01 tgl Exp $
77
*
88
* XXX this file does not really belong in psql/. Perhaps move to libpq?
99
* It also seems that the mbvalidate function is redundant with existing
@@ -43,13 +43,15 @@ pg_get_utf8_id(void)
4343
#define PG_UTF8 pg_get_utf8_id()
4444

4545

46+
/*
47+
* Convert a UTF-8 character to a Unicode code point.
48+
* This is a one-character version of pg_utf2wchar_with_len.
49+
*
50+
* No error checks here, c must point to a long-enough string.
51+
*/
4652
static pg_wchar
47-
utf2ucs(const unsigned char *c)
53+
utf8_to_unicode(const unsigned char *c)
4854
{
49-
/*
50-
* one char version of pg_utf2wchar_with_len. no control here, c must
51-
* point to a large enough string
52-
*/
5355
if ((*c & 0x80) == 0)
5456
return (pg_wchar) c[0];
5557
else if ((*c & 0xe0) == 0xc0)
@@ -346,7 +348,7 @@ pg_wcsformat(unsigned char *pwcs, size_t len, int encoding,
346348
else if (w < 0) /* Non-ascii control char */
347349
{
348350
if (encoding == PG_UTF8)
349-
sprintf((char *) ptr, "\\u%04X", utf2ucs(pwcs));
351+
sprintf((char *) ptr, "\\u%04X", utf8_to_unicode(pwcs));
350352
else
351353
{
352354
/*

src/include/mb/pg_wchar.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
9-
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.94 2010/02/26 02:01:25 momjian Exp $
9+
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.95 2010/08/18 19:54:01 tgl Exp $
1010
*
1111
* NOTES
1212
* This is used both by the backend and by libpq, but should not be
@@ -412,6 +412,7 @@ extern int pg_valid_client_encoding(const char *name);
412412
extern int pg_valid_server_encoding(const char *name);
413413

414414
extern unsigned char *unicode_to_utf8(pg_wchar c, unsigned char *utf8string);
415+
extern pg_wchar utf8_to_unicode(const unsigned char *c);
415416
extern int pg_utf_mblen(const unsigned char *);
416417
extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
417418
int src_encoding,

0 commit comments

Comments
 (0)