PostgreSQL Source Code git master
mbutils.c File Reference
#include "postgres.h"
#include "access/xact.h"
#include "catalog/namespace.h"
#include "mb/pg_wchar.h"
#include "utils/fmgrprotos.h"
#include "utils/memutils.h"
#include "utils/relcache.h"
#include "varatt.h"
Include dependency graph for mbutils.c:

Go to the source code of this file.

Data Structures

struct  ConvProcInfo
 

Typedefs

typedef struct ConvProcInfo ConvProcInfo
 

Functions

static char * perform_default_encoding_conversion (const char *src, int len, bool is_client_to_server)
 
static int cliplen (const char *str, int len, int limit)
 
int PrepareClientEncoding (int encoding)
 
int SetClientEncoding (int encoding)
 
void InitializeClientEncoding (void)
 
int pg_get_client_encoding (void)
 
const char * pg_get_client_encoding_name (void)
 
unsigned char * pg_do_encoding_conversion (unsigned char *src, int len, int src_encoding, int dest_encoding)
 
int pg_do_encoding_conversion_buf (Oid proc, int src_encoding, int dest_encoding, unsigned char *src, int srclen, unsigned char *dest, int destlen, bool noError)
 
Datum pg_convert_to (PG_FUNCTION_ARGS)
 
Datum pg_convert_from (PG_FUNCTION_ARGS)
 
Datum pg_convert (PG_FUNCTION_ARGS)
 
Datum length_in_encoding (PG_FUNCTION_ARGS)
 
Datum pg_encoding_max_length_sql (PG_FUNCTION_ARGS)
 
char * pg_client_to_server (const char *s, int len)
 
char * pg_any_to_server (const char *s, int len, int encoding)
 
char * pg_server_to_client (const char *s, int len)
 
char * pg_server_to_any (const char *s, int len, int encoding)
 
void pg_unicode_to_server (pg_wchar c, unsigned char *s)
 
bool pg_unicode_to_server_noerror (pg_wchar c, unsigned char *s)
 
int pg_mb2wchar (const char *from, pg_wchar *to)
 
int pg_mb2wchar_with_len (const char *from, pg_wchar *to, int len)
 
int pg_encoding_mb2wchar_with_len (int encoding, const char *from, pg_wchar *to, int len)
 
int pg_wchar2mb (const pg_wchar *from, char *to)
 
int pg_wchar2mb_with_len (const pg_wchar *from, char *to, int len)
 
int pg_encoding_wchar2mb_with_len (int encoding, const pg_wchar *from, char *to, int len)
 
int pg_mblen (const char *mbstr)
 
int pg_dsplen (const char *mbstr)
 
int pg_mbstrlen (const char *mbstr)
 
int pg_mbstrlen_with_len (const char *mbstr, int limit)
 
int pg_mbcliplen (const char *mbstr, int len, int limit)
 
int pg_encoding_mbcliplen (int encoding, const char *mbstr, int len, int limit)
 
int pg_mbcharcliplen (const char *mbstr, int len, int limit)
 
void SetDatabaseEncoding (int encoding)
 
void SetMessageEncoding (int encoding)
 
int GetDatabaseEncoding (void)
 
const char * GetDatabaseEncodingName (void)
 
Datum getdatabaseencoding (PG_FUNCTION_ARGS)
 
Datum pg_client_encoding (PG_FUNCTION_ARGS)
 
Datum PG_char_to_encoding (PG_FUNCTION_ARGS)
 
Datum PG_encoding_to_char (PG_FUNCTION_ARGS)
 
int GetMessageEncoding (void)
 
static bool pg_generic_charinc (unsigned char *charptr, int len)
 
static bool pg_utf8_increment (unsigned char *charptr, int length)
 
static bool pg_eucjp_increment (unsigned char *charptr, int length)
 
mbcharacter_incrementer pg_database_encoding_character_incrementer (void)
 
int pg_database_encoding_max_length (void)
 
bool pg_verifymbstr (const char *mbstr, int len, bool noError)
 
bool pg_verify_mbstr (int encoding, const char *mbstr, int len, bool noError)
 
int pg_verify_mbstr_len (int encoding, const char *mbstr, int len, bool noError)
 
void check_encoding_conversion_args (int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding)
 
void report_invalid_encoding (int encoding, const char *mbstr, int len)
 
void report_untranslatable_char (int src_encoding, int dest_encoding, const char *mbstr, int len)
 

Variables

static ListConvProcList = NIL
 
static FmgrInfoToServerConvProc = NULL
 
static FmgrInfoToClientConvProc = NULL
 
static FmgrInfoUtf8ToServerConvProc = NULL
 
static const pg_enc2nameClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
 
static const pg_enc2nameDatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
 
static const pg_enc2nameMessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
 
static bool backend_startup_complete = false
 
static int pending_client_encoding = PG_SQL_ASCII
 

Typedef Documentation

◆ ConvProcInfo

typedef struct ConvProcInfo ConvProcInfo

Function Documentation

◆ check_encoding_conversion_args()

void check_encoding_conversion_args ( int  src_encoding,
int  dest_encoding,
int  len,
int  expected_src_encoding,
int  expected_dest_encoding 
)

Definition at line 1670 of file mbutils.c.

1675{
1676 if (!PG_VALID_ENCODING(src_encoding))
1677 elog(ERROR, "invalid source encoding ID: %d", src_encoding);
1678 if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
1679 elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
1680 pg_enc2name_tbl[expected_src_encoding].name,
1681 pg_enc2name_tbl[src_encoding].name);
1682 if (!PG_VALID_ENCODING(dest_encoding))
1683 elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
1684 if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
1685 elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
1686 pg_enc2name_tbl[expected_dest_encoding].name,
1687 pg_enc2name_tbl[dest_encoding].name);
1688 if (len < 0)
1689 elog(ERROR, "encoding conversion length must not be negative");
1690}
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
const pg_enc2name pg_enc2name_tbl[]
Definition: encnames.c:308
const void size_t len
#define PG_VALID_ENCODING(_enc)
Definition: pg_wchar.h:287
const char * name

References elog, ERROR, len, name, pg_enc2name_tbl, and PG_VALID_ENCODING.

◆ cliplen()

static int cliplen ( const char *  str,
int  len,
int  limit 
)
static

Definition at line 1151 of file mbutils.c.

1152{
1153 int l = 0;
1154
1155 len = Min(len, limit);
1156 while (l < len && str[l])
1157 l++;
1158 return l;
1159}
#define Min(x, y)
Definition: c.h:975
const char * str

References len, Min, and str.

Referenced by pg_encoding_mbcliplen(), pg_mbcharcliplen(), and pgstat_clip_activity().

◆ getdatabaseencoding()

Datum getdatabaseencoding ( PG_FUNCTION_ARGS  )

Definition at line 1274 of file mbutils.c.

1275{
1277}
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:682
static const pg_enc2name * DatabaseEncoding
Definition: mbutils.c:82
Datum namein(PG_FUNCTION_ARGS)
Definition: name.c:48
static Datum CStringGetDatum(const char *X)
Definition: postgres.h:355
const char * name
Definition: pg_wchar.h:341

References CStringGetDatum(), DatabaseEncoding, DirectFunctionCall1, pg_enc2name::name, and namein().

◆ GetDatabaseEncoding()

◆ GetDatabaseEncodingName()

◆ GetMessageEncoding()

int GetMessageEncoding ( void  )

Definition at line 1309 of file mbutils.c.

1310{
1311 return MessageEncoding->encoding;
1312}
static const pg_enc2name * MessageEncoding
Definition: mbutils.c:83

References pg_enc2name::encoding, and MessageEncoding.

◆ InitializeClientEncoding()

void InitializeClientEncoding ( void  )

Definition at line 282 of file mbutils.c.

283{
284 int current_server_encoding;
285
288
291 {
292 /*
293 * Oops, the requested conversion is not available. We couldn't fail
294 * before, but we can now.
295 */
297 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
298 errmsg("conversion between %s and %s is not supported",
301 }
302
303 /*
304 * Also look up the UTF8-to-server conversion function if needed. Since
305 * the server encoding is fixed within any one backend process, we don't
306 * have to do this more than once.
307 */
308 current_server_encoding = GetDatabaseEncoding();
309 if (current_server_encoding != PG_UTF8 &&
310 current_server_encoding != PG_SQL_ASCII)
311 {
312 Oid utf8_to_server_proc;
313
315 utf8_to_server_proc =
317 current_server_encoding);
318 /* If there's no such conversion, just leave the pointer as NULL */
319 if (OidIsValid(utf8_to_server_proc))
320 {
321 FmgrInfo *finfo;
322
324 sizeof(FmgrInfo));
325 fmgr_info_cxt(utf8_to_server_proc, finfo,
327 /* Set Utf8ToServerConvProc only after data is fully valid */
328 Utf8ToServerConvProc = finfo;
329 }
330 }
331}
#define OidIsValid(objectId)
Definition: c.h:746
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define FATAL
Definition: elog.h:41
#define ereport(elevel,...)
Definition: elog.h:149
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:137
Assert(PointerIsAligned(start, uint64))
int GetDatabaseEncoding(void)
Definition: mbutils.c:1262
static FmgrInfo * Utf8ToServerConvProc
Definition: mbutils.c:76
const char * GetDatabaseEncodingName(void)
Definition: mbutils.c:1268
int SetClientEncoding(int encoding)
Definition: mbutils.c:209
int PrepareClientEncoding(int encoding)
Definition: mbutils.c:111
static bool backend_startup_complete
Definition: mbutils.c:91
static int pending_client_encoding
Definition: mbutils.c:92
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1185
MemoryContext TopMemoryContext
Definition: mcxt.c:149
Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding)
Definition: namespace.c:4080
@ PG_SQL_ASCII
Definition: pg_wchar.h:226
@ PG_UTF8
Definition: pg_wchar.h:232
unsigned int Oid
Definition: postgres_ext.h:30
static void AssertCouldGetRelation(void)
Definition: relcache.h:44
Definition: fmgr.h:57

References Assert(), AssertCouldGetRelation(), backend_startup_complete, ereport, errcode(), errmsg(), FATAL, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), GetDatabaseEncodingName(), MemoryContextAlloc(), name, OidIsValid, pending_client_encoding, pg_enc2name_tbl, PG_SQL_ASCII, PG_UTF8, PrepareClientEncoding(), SetClientEncoding(), TopMemoryContext, and Utf8ToServerConvProc.

Referenced by InitPostgres().

◆ length_in_encoding()

Datum length_in_encoding ( PG_FUNCTION_ARGS  )

Definition at line 616 of file mbutils.c.

617{
618 bytea *string = PG_GETARG_BYTEA_PP(0);
619 char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
620 int src_encoding = pg_char_to_encoding(src_encoding_name);
621 const char *src_str;
622 int len;
623 int retval;
624
625 if (src_encoding < 0)
627 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
628 errmsg("invalid encoding name \"%s\"",
629 src_encoding_name)));
630
631 len = VARSIZE_ANY_EXHDR(string);
632 src_str = VARDATA_ANY(string);
633
634 retval = pg_verify_mbstr_len(src_encoding, src_str, len, false);
635
636 PG_RETURN_INT32(retval);
637}
#define NameStr(name)
Definition: c.h:717
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1598
#define pg_char_to_encoding
Definition: pg_wchar.h:629
Definition: c.h:658
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317

References ereport, errcode(), errmsg(), ERROR, len, NameStr, pg_char_to_encoding, PG_GETARG_BYTEA_PP, PG_GETARG_NAME, PG_RETURN_INT32, pg_verify_mbstr_len(), VARDATA_ANY, and VARSIZE_ANY_EXHDR.

◆ perform_default_encoding_conversion()

static char * perform_default_encoding_conversion ( const char *  src,
int  len,
bool  is_client_to_server 
)
static

Definition at line 784 of file mbutils.c.

786{
787 char *result;
788 int src_encoding,
789 dest_encoding;
790 FmgrInfo *flinfo;
791
792 if (is_client_to_server)
793 {
794 src_encoding = ClientEncoding->encoding;
795 dest_encoding = DatabaseEncoding->encoding;
796 flinfo = ToServerConvProc;
797 }
798 else
799 {
800 src_encoding = DatabaseEncoding->encoding;
801 dest_encoding = ClientEncoding->encoding;
802 flinfo = ToClientConvProc;
803 }
804
805 if (flinfo == NULL)
806 return unconstify(char *, src);
807
808 /*
809 * Allocate space for conversion result, being wary of integer overflow.
810 * See comments in pg_do_encoding_conversion.
811 */
814 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
815 errmsg("out of memory"),
816 errdetail("String of %d bytes is too long for encoding conversion.",
817 len)));
818
819 result = (char *)
822
823 FunctionCall6(flinfo,
824 Int32GetDatum(src_encoding),
825 Int32GetDatum(dest_encoding),
826 CStringGetDatum(src),
827 CStringGetDatum(result),
829 BoolGetDatum(false));
830
831 /*
832 * Release extra space if there might be a lot --- see comments in
833 * pg_do_encoding_conversion.
834 */
835 if (len > 1000000)
836 {
837 Size resultlen = strlen(result);
838
839 if (resultlen >= MaxAllocSize)
841 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
842 errmsg("out of memory"),
843 errdetail("String of %d bytes is too long for encoding conversion.",
844 len)));
845
846 result = (char *) repalloc(result, resultlen + 1);
847 }
848
849 return result;
850}
#define unconstify(underlying_type, expr)
Definition: c.h:1216
size_t Size
Definition: c.h:576
int errdetail(const char *fmt,...)
Definition: elog.c:1204
#define MaxAllocSize
Definition: fe_memutils.h:22
#define FunctionCall6(flinfo, arg1, arg2, arg3, arg4, arg5, arg6)
Definition: fmgr.h:710
static const pg_enc2name * ClientEncoding
Definition: mbutils.c:81
static FmgrInfo * ToServerConvProc
Definition: mbutils.c:68
static FmgrInfo * ToClientConvProc
Definition: mbutils.c:69
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1548
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void * MemoryContextAllocHuge(MemoryContext context, Size size)
Definition: mcxt.c:1647
#define MaxAllocHugeSize
Definition: memutils.h:45
#define MAX_CONVERSION_GROWTH
Definition: pg_wchar.h:302
static Datum BoolGetDatum(bool X)
Definition: postgres.h:107
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:217

References BoolGetDatum(), ClientEncoding, CStringGetDatum(), CurrentMemoryContext, DatabaseEncoding, pg_enc2name::encoding, ereport, errcode(), errdetail(), errmsg(), ERROR, FunctionCall6, Int32GetDatum(), len, MAX_CONVERSION_GROWTH, MaxAllocHugeSize, MaxAllocSize, MemoryContextAllocHuge(), repalloc(), ToClientConvProc, ToServerConvProc, and unconstify.

Referenced by pg_any_to_server(), and pg_server_to_any().

◆ pg_any_to_server()

char * pg_any_to_server ( const char *  s,
int  len,
int  encoding 
)

Definition at line 677 of file mbutils.c.

678{
679 if (len <= 0)
680 return unconstify(char *, s); /* empty string is always valid */
681
684 {
685 /*
686 * No conversion is needed, but we must still validate the data.
687 */
688 (void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
689 return unconstify(char *, s);
690 }
691
693 {
694 /*
695 * No conversion is possible, but we must still validate the data,
696 * because the client-side code might have done string escaping using
697 * the selected client_encoding. If the client encoding is ASCII-safe
698 * then we just do a straight validation under that encoding. For an
699 * ASCII-unsafe encoding we have a problem: we dare not pass such data
700 * to the parser but we have no way to convert it. We compromise by
701 * rejecting the data if it contains any non-ASCII characters.
702 */
704 (void) pg_verify_mbstr(encoding, s, len, false);
705 else
706 {
707 int i;
708
709 for (i = 0; i < len; i++)
710 {
711 if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
713 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
714 errmsg("invalid byte value for encoding \"%s\": 0x%02x",
716 (unsigned char) s[i])));
717 }
718 }
719 return unconstify(char *, s);
720 }
721
722 /* Fast path if we can use cached conversion function */
725
726 /* General case ... will not work outside transactions */
727 return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
728 len,
729 encoding,
731}
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1126
int i
Definition: isn.c:77
unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding, int dest_encoding)
Definition: mbutils.c:357
bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
Definition: mbutils.c:1567
static char * perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
Definition: mbutils.c:784
int32 encoding
Definition: pg_database.h:41
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:281

References ClientEncoding, DatabaseEncoding, encoding, pg_enc2name::encoding, ereport, errcode(), errmsg(), ERROR, i, IS_HIGHBIT_SET, len, name, perform_default_encoding_conversion(), pg_do_encoding_conversion(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_BE_ENCODING, pg_verify_mbstr(), and unconstify.

Referenced by ASN1_STRING_to_text(), cache_single_string(), db_encoding_convert(), dsnowball_lexize(), pg_client_to_server(), pg_stat_statements_internal(), pgp_armor_headers(), PLyUnicode_Bytes(), read_extension_script_file(), tsearch_readline(), utf_u2e(), X509_NAME_to_cstring(), and xml_recv().

◆ PG_char_to_encoding()

Datum PG_char_to_encoding ( PG_FUNCTION_ARGS  )

Definition at line 1286 of file mbutils.c.

1287{
1288 Name s = PG_GETARG_NAME(0);
1289
1291}
Definition: c.h:712

References NameStr, pg_char_to_encoding, PG_GETARG_NAME, and PG_RETURN_INT32.

◆ pg_client_encoding()

Datum pg_client_encoding ( PG_FUNCTION_ARGS  )

◆ pg_client_to_server()

char * pg_client_to_server ( const char *  s,
int  len 
)

Definition at line 661 of file mbutils.c.

662{
664}
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:677

References ClientEncoding, pg_enc2name::encoding, len, and pg_any_to_server().

Referenced by exec_bind_message(), parse_fcall_arguments(), pq_getmsgstring(), and pq_getmsgtext().

◆ pg_convert()

Datum pg_convert ( PG_FUNCTION_ARGS  )

Definition at line 554 of file mbutils.c.

555{
556 bytea *string = PG_GETARG_BYTEA_PP(0);
557 char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
558 int src_encoding = pg_char_to_encoding(src_encoding_name);
559 char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
560 int dest_encoding = pg_char_to_encoding(dest_encoding_name);
561 const char *src_str;
562 char *dest_str;
563 bytea *retval;
564 int len;
565
566 if (src_encoding < 0)
568 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
569 errmsg("invalid source encoding name \"%s\"",
570 src_encoding_name)));
571 if (dest_encoding < 0)
573 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
574 errmsg("invalid destination encoding name \"%s\"",
575 dest_encoding_name)));
576
577 /* make sure that source string is valid */
578 len = VARSIZE_ANY_EXHDR(string);
579 src_str = VARDATA_ANY(string);
580 (void) pg_verify_mbstr(src_encoding, src_str, len, false);
581
582 /* perform conversion */
583 dest_str = (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str),
584 len,
585 src_encoding,
586 dest_encoding);
587
588
589 /* return source string if no conversion happened */
590 if (dest_str == src_str)
591 PG_RETURN_BYTEA_P(string);
592
593 /*
594 * build bytea data type structure.
595 */
596 len = strlen(dest_str);
597 retval = (bytea *) palloc(len + VARHDRSZ);
598 SET_VARSIZE(retval, len + VARHDRSZ);
599 memcpy(VARDATA(retval), dest_str, len);
600 pfree(dest_str);
601
602 /* free memory if allocated by the toaster */
603 PG_FREE_IF_COPY(string, 0);
604
605 PG_RETURN_BYTEA_P(retval);
606}
#define VARHDRSZ
Definition: c.h:663
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
void pfree(void *pointer)
Definition: mcxt.c:1528
void * palloc(Size size)
Definition: mcxt.c:1321
#define VARDATA(PTR)
Definition: varatt.h:278
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305

References ereport, errcode(), errmsg(), ERROR, len, NameStr, palloc(), pfree(), pg_char_to_encoding, pg_do_encoding_conversion(), PG_FREE_IF_COPY, PG_GETARG_BYTEA_PP, PG_GETARG_NAME, PG_RETURN_BYTEA_P, pg_verify_mbstr(), SET_VARSIZE, unconstify, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY_EXHDR.

Referenced by pg_convert_from(), and pg_convert_to().

◆ pg_convert_from()

Datum pg_convert_from ( PG_FUNCTION_ARGS  )

Definition at line 527 of file mbutils.c.

528{
529 Datum string = PG_GETARG_DATUM(0);
530 Datum src_encoding_name = PG_GETARG_DATUM(1);
531 Datum dest_encoding_name = DirectFunctionCall1(namein,
533 Datum result;
534
535 result = DirectFunctionCall3(pg_convert, string,
536 src_encoding_name, dest_encoding_name);
537
538 /*
539 * pg_convert returns a bytea, which we in turn return as text, relying on
540 * the fact that they are both in fact varlena types, and thus
541 * structurally identical. Although not all bytea values are valid text,
542 * in this case it will be because we've told pg_convert to return one
543 * that is valid as text in the current database encoding.
544 */
545 PG_RETURN_DATUM(result);
546}
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:686
Datum pg_convert(PG_FUNCTION_ARGS)
Definition: mbutils.c:554
uintptr_t Datum
Definition: postgres.h:69

References CStringGetDatum(), DatabaseEncoding, DirectFunctionCall1, DirectFunctionCall3, pg_enc2name::name, namein(), pg_convert(), PG_GETARG_DATUM, and PG_RETURN_DATUM.

◆ pg_convert_to()

Datum pg_convert_to ( PG_FUNCTION_ARGS  )

Definition at line 502 of file mbutils.c.

503{
504 Datum string = PG_GETARG_DATUM(0);
505 Datum dest_encoding_name = PG_GETARG_DATUM(1);
506 Datum src_encoding_name = DirectFunctionCall1(namein,
508 Datum result;
509
510 /*
511 * pg_convert expects a bytea as its first argument. We're passing it a
512 * text argument here, relying on the fact that they are both in fact
513 * varlena types, and thus structurally identical.
514 */
515 result = DirectFunctionCall3(pg_convert, string,
516 src_encoding_name, dest_encoding_name);
517
518 PG_RETURN_DATUM(result);
519}

References CStringGetDatum(), DatabaseEncoding, DirectFunctionCall1, DirectFunctionCall3, pg_enc2name::name, namein(), pg_convert(), PG_GETARG_DATUM, and PG_RETURN_DATUM.

◆ pg_database_encoding_character_incrementer()

mbcharacter_incrementer pg_database_encoding_character_incrementer ( void  )

Definition at line 1524 of file mbutils.c.

1525{
1526 /*
1527 * Eventually it might be best to add a field to pg_wchar_table[], but for
1528 * now we just use a switch.
1529 */
1530 switch (GetDatabaseEncoding())
1531 {
1532 case PG_UTF8:
1533 return pg_utf8_increment;
1534
1535 case PG_EUC_JP:
1536 return pg_eucjp_increment;
1537
1538 default:
1539 return pg_generic_charinc;
1540 }
1541}
static bool pg_generic_charinc(unsigned char *charptr, int len)
Definition: mbutils.c:1326
static bool pg_utf8_increment(unsigned char *charptr, int length)
Definition: mbutils.c:1360
static bool pg_eucjp_increment(unsigned char *charptr, int length)
Definition: mbutils.c:1438
@ PG_EUC_JP
Definition: pg_wchar.h:227

References GetDatabaseEncoding(), PG_EUC_JP, pg_eucjp_increment(), pg_generic_charinc(), PG_UTF8, and pg_utf8_increment().

Referenced by make_greater_string().

◆ pg_database_encoding_max_length()

◆ pg_do_encoding_conversion()

unsigned char * pg_do_encoding_conversion ( unsigned char *  src,
int  len,
int  src_encoding,
int  dest_encoding 
)

Definition at line 357 of file mbutils.c.

359{
360 unsigned char *result;
361 Oid proc;
362
363 if (len <= 0)
364 return src; /* empty string is always valid */
365
366 if (src_encoding == dest_encoding)
367 return src; /* no conversion required, assume valid */
368
369 if (dest_encoding == PG_SQL_ASCII)
370 return src; /* any string is valid in SQL_ASCII */
371
372 if (src_encoding == PG_SQL_ASCII)
373 {
374 /* No conversion is possible, but we must validate the result */
375 (void) pg_verify_mbstr(dest_encoding, (const char *) src, len, false);
376 return src;
377 }
378
379 if (!IsTransactionState()) /* shouldn't happen */
380 elog(ERROR, "cannot perform encoding conversion outside a transaction");
381
382 proc = FindDefaultConversionProc(src_encoding, dest_encoding);
383 if (!OidIsValid(proc))
385 (errcode(ERRCODE_UNDEFINED_FUNCTION),
386 errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
387 pg_encoding_to_char(src_encoding),
388 pg_encoding_to_char(dest_encoding))));
389
390 /*
391 * Allocate space for conversion result, being wary of integer overflow.
392 *
393 * len * MAX_CONVERSION_GROWTH is typically a vast overestimate of the
394 * required space, so it might exceed MaxAllocSize even though the result
395 * would actually fit. We do not want to hand back a result string that
396 * exceeds MaxAllocSize, because callers might not cope gracefully --- but
397 * if we just allocate more than that, and don't use it, that's fine.
398 */
401 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
402 errmsg("out of memory"),
403 errdetail("String of %d bytes is too long for encoding conversion.",
404 len)));
405
406 result = (unsigned char *)
409
410 (void) OidFunctionCall6(proc,
411 Int32GetDatum(src_encoding),
412 Int32GetDatum(dest_encoding),
413 CStringGetDatum((char *) src),
414 CStringGetDatum((char *) result),
416 BoolGetDatum(false));
417
418 /*
419 * If the result is large, it's worth repalloc'ing to release any extra
420 * space we asked for. The cutoff here is somewhat arbitrary, but we
421 * *must* check when len * MAX_CONVERSION_GROWTH exceeds MaxAllocSize.
422 */
423 if (len > 1000000)
424 {
425 Size resultlen = strlen((char *) result);
426
427 if (resultlen >= MaxAllocSize)
429 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
430 errmsg("out of memory"),
431 errdetail("String of %d bytes is too long for encoding conversion.",
432 len)));
433
434 result = (unsigned char *) repalloc(result, resultlen + 1);
435 }
436
437 return result;
438}
#define OidFunctionCall6(functionId, arg1, arg2, arg3, arg4, arg5, arg6)
Definition: fmgr.h:730
#define pg_encoding_to_char
Definition: pg_wchar.h:630
bool IsTransactionState(void)
Definition: xact.c:387

References BoolGetDatum(), CStringGetDatum(), CurrentMemoryContext, elog, ereport, errcode(), errdetail(), errmsg(), ERROR, FindDefaultConversionProc(), Int32GetDatum(), IsTransactionState(), len, MAX_CONVERSION_GROWTH, MaxAllocHugeSize, MaxAllocSize, MemoryContextAllocHuge(), OidFunctionCall6, OidIsValid, pg_encoding_to_char, PG_SQL_ASCII, pg_verify_mbstr(), and repalloc().

Referenced by convert_charset(), pg_any_to_server(), pg_convert(), and pg_server_to_any().

◆ pg_do_encoding_conversion_buf()

int pg_do_encoding_conversion_buf ( Oid  proc,
int  src_encoding,
int  dest_encoding,
unsigned char *  src,
int  srclen,
unsigned char *  dest,
int  destlen,
bool  noError 
)

Definition at line 470 of file mbutils.c.

476{
477 Datum result;
478
479 /*
480 * If the destination buffer is not large enough to hold the result in the
481 * worst case, limit the input size passed to the conversion function.
482 */
483 if ((Size) srclen >= ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH))
484 srclen = ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH);
485
486 result = OidFunctionCall6(proc,
487 Int32GetDatum(src_encoding),
488 Int32GetDatum(dest_encoding),
489 CStringGetDatum((char *) src),
490 CStringGetDatum((char *) dest),
491 Int32GetDatum(srclen),
492 BoolGetDatum(noError));
493 return DatumGetInt32(result);
494}
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:207

References BoolGetDatum(), CStringGetDatum(), DatumGetInt32(), generate_unaccent_rules::dest, Int32GetDatum(), MAX_CONVERSION_GROWTH, and OidFunctionCall6.

Referenced by CopyConversionError(), CopyConvertBuf(), and test_enc_conversion().

◆ pg_dsplen()

int pg_dsplen ( const char *  mbstr)

Definition at line 1031 of file mbutils.c.

1032{
1033 return pg_wchar_table[DatabaseEncoding->encoding].dsplen((const unsigned char *) mbstr);
1034}
mbdisplaylen_converter dsplen
Definition: pg_wchar.h:383

References DatabaseEncoding, pg_wchar_tbl::dsplen, pg_enc2name::encoding, and pg_wchar_table.

Referenced by p_isspecial().

◆ pg_encoding_max_length_sql()

Datum pg_encoding_max_length_sql ( PG_FUNCTION_ARGS  )

Definition at line 645 of file mbutils.c.

646{
647 int encoding = PG_GETARG_INT32(0);
648
651 else
653}
#define PG_RETURN_NULL()
Definition: fmgr.h:345
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269

References encoding, PG_GETARG_INT32, PG_RETURN_INT32, PG_RETURN_NULL, PG_VALID_ENCODING, and pg_wchar_table.

◆ pg_encoding_mb2wchar_with_len()

int pg_encoding_mb2wchar_with_len ( int  encoding,
const char *  from,
pg_wchar to,
int  len 
)

Definition at line 994 of file mbutils.c.

996{
997 return pg_wchar_table[encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
998}
mb2wchar_with_len_converter mb2wchar_with_len
Definition: pg_wchar.h:378

References encoding, len, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

◆ pg_encoding_mbcliplen()

int pg_encoding_mbcliplen ( int  encoding,
const char *  mbstr,
int  len,
int  limit 
)

Definition at line 1094 of file mbutils.c.

1096{
1097 mblen_converter mblen_fn;
1098 int clen = 0;
1099 int l;
1100
1101 /* optimization for single byte encoding */
1103 return cliplen(mbstr, len, limit);
1104
1105 mblen_fn = pg_wchar_table[encoding].mblen;
1106
1107 while (len > 0 && *mbstr)
1108 {
1109 l = (*mblen_fn) ((const unsigned char *) mbstr);
1110 if ((clen + l) > limit)
1111 break;
1112 clen += l;
1113 if (clen == limit)
1114 break;
1115 len -= l;
1116 mbstr += l;
1117 }
1118 return clen;
1119}
static int cliplen(const char *str, int len, int limit)
Definition: mbutils.c:1151
int(* mblen_converter)(const unsigned char *mbstr)
Definition: pg_wchar.h:366
mblen_converter mblen
Definition: pg_wchar.h:382
int pg_encoding_max_length(int encoding)
Definition: wchar.c:2213

References cliplen(), encoding, len, pg_wchar_tbl::mblen, pg_encoding_max_length(), and pg_wchar_table.

Referenced by pg_mbcliplen().

◆ PG_encoding_to_char()

Datum PG_encoding_to_char ( PG_FUNCTION_ARGS  )

Definition at line 1294 of file mbutils.c.

1295{
1297 const char *encoding_name = pg_encoding_to_char(encoding);
1298
1299 return DirectFunctionCall1(namein, CStringGetDatum(encoding_name));
1300}
int32_t int32
Definition: c.h:498

References CStringGetDatum(), DirectFunctionCall1, encoding, namein(), pg_encoding_to_char, and PG_GETARG_INT32.

◆ pg_encoding_wchar2mb_with_len()

int pg_encoding_wchar2mb_with_len ( int  encoding,
const pg_wchar from,
char *  to,
int  len 
)

Definition at line 1016 of file mbutils.c.

1018{
1019 return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
1020}
wchar2mb_with_len_converter wchar2mb_with_len
Definition: pg_wchar.h:380

References encoding, len, pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

◆ pg_eucjp_increment()

static bool pg_eucjp_increment ( unsigned char *  charptr,
int  length 
)
static

Definition at line 1438 of file mbutils.c.

1439{
1440 unsigned char c1,
1441 c2;
1442 int i;
1443
1444 c1 = *charptr;
1445
1446 switch (c1)
1447 {
1448 case SS2: /* JIS X 0201 */
1449 if (length != 2)
1450 return false;
1451
1452 c2 = charptr[1];
1453
1454 if (c2 >= 0xdf)
1455 charptr[0] = charptr[1] = 0xa1;
1456 else if (c2 < 0xa1)
1457 charptr[1] = 0xa1;
1458 else
1459 charptr[1]++;
1460 break;
1461
1462 case SS3: /* JIS X 0212 */
1463 if (length != 3)
1464 return false;
1465
1466 for (i = 2; i > 0; i--)
1467 {
1468 c2 = charptr[i];
1469 if (c2 < 0xa1)
1470 {
1471 charptr[i] = 0xa1;
1472 return true;
1473 }
1474 else if (c2 < 0xfe)
1475 {
1476 charptr[i]++;
1477 return true;
1478 }
1479 }
1480
1481 /* Out of 3-byte code region */
1482 return false;
1483
1484 default:
1485 if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */
1486 {
1487 if (length != 2)
1488 return false;
1489
1490 for (i = 1; i >= 0; i--)
1491 {
1492 c2 = charptr[i];
1493 if (c2 < 0xa1)
1494 {
1495 charptr[i] = 0xa1;
1496 return true;
1497 }
1498 else if (c2 < 0xfe)
1499 {
1500 charptr[i]++;
1501 return true;
1502 }
1503 }
1504
1505 /* Out of 2 byte code region */
1506 return false;
1507 }
1508 else
1509 { /* ASCII, single byte */
1510 if (c1 > 0x7e)
1511 return false;
1512 (*charptr)++;
1513 }
1514 break;
1515 }
1516
1517 return true;
1518}
#define SS2
Definition: pg_wchar.h:38
#define SS3
Definition: pg_wchar.h:39

References i, IS_HIGHBIT_SET, SS2, and SS3.

Referenced by pg_database_encoding_character_incrementer().

◆ pg_generic_charinc()

static bool pg_generic_charinc ( unsigned char *  charptr,
int  len 
)
static

Definition at line 1326 of file mbutils.c.

1327{
1328 unsigned char *lastbyte = charptr + len - 1;
1329 mbchar_verifier mbverify;
1330
1331 /* We can just invoke the character verifier directly. */
1333
1334 while (*lastbyte < (unsigned char) 255)
1335 {
1336 (*lastbyte)++;
1337 if ((*mbverify) (charptr, len) == len)
1338 return true;
1339 }
1340
1341 return false;
1342}
int(* mbchar_verifier)(const unsigned char *mbstr, int len)
Definition: pg_wchar.h:372
mbchar_verifier mbverifychar
Definition: pg_wchar.h:384

References GetDatabaseEncoding(), len, pg_wchar_tbl::mbverifychar, and pg_wchar_table.

Referenced by pg_database_encoding_character_incrementer().

◆ pg_get_client_encoding()

int pg_get_client_encoding ( void  )

Definition at line 337 of file mbutils.c.

338{
339 return ClientEncoding->encoding;
340}

References ClientEncoding, and pg_enc2name::encoding.

Referenced by BeginCopyFrom(), BeginCopyTo(), and xml_send().

◆ pg_get_client_encoding_name()

const char * pg_get_client_encoding_name ( void  )

Definition at line 346 of file mbutils.c.

347{
348 return ClientEncoding->name;
349}

References ClientEncoding, and pg_enc2name::name.

◆ pg_mb2wchar()

int pg_mb2wchar ( const char *  from,
pg_wchar to 
)

Definition at line 980 of file mbutils.c.

981{
982 return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, strlen(from));
983}

References DatabaseEncoding, pg_enc2name::encoding, pg_wchar_tbl::mb2wchar_with_len, and pg_wchar_table.

◆ pg_mb2wchar_with_len()

◆ pg_mbcharcliplen()

int pg_mbcharcliplen ( const char *  mbstr,
int  len,
int  limit 
)

Definition at line 1126 of file mbutils.c.

1127{
1128 int clen = 0;
1129 int nch = 0;
1130 int l;
1131
1132 /* optimization for single byte encoding */
1134 return cliplen(mbstr, len, limit);
1135
1136 while (len > 0 && *mbstr)
1137 {
1138 l = pg_mblen(mbstr);
1139 nch++;
1140 if (nch > limit)
1141 break;
1142 clen += l;
1143 len -= l;
1144 mbstr += l;
1145 }
1146 return clen;
1147}
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1547
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1024

References cliplen(), len, pg_database_encoding_max_length(), and pg_mblen().

Referenced by bpchar(), bpchar_input(), text_left(), text_right(), varchar(), and varchar_input().

◆ pg_mbcliplen()

◆ pg_mblen()

int pg_mblen ( const char *  mbstr)

Definition at line 1024 of file mbutils.c.

1025{
1026 return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
1027}

References DatabaseEncoding, pg_enc2name::encoding, pg_wchar_tbl::mblen, and pg_wchar_table.

Referenced by addCompoundAffixFlagValue(), bit_in(), charlen_to_bytelen(), DCH_from_char(), dotrim(), find_word(), findchar(), findchar2(), findwrd(), gbt_var_node_cp_len(), get_modifiers(), get_nextfield(), get_wildcard_part(), getlexeme(), getNextFlagFromString(), gettoken_query(), gettoken_query_standard(), gettoken_query_websearch(), gettoken_tsvector(), hex_decode_safe(), infix(), initTrie(), lpad(), make_trigrams(), map_sql_identifier_to_xml_name(), map_xml_name_to_sql_identifier(), match_prosrc_to_literal(), mb_strchr(), NIImportAffixes(), NIImportDictionary(), NIImportOOAffixes(), NUM_eat_non_data_chars(), NUM_processor(), parse_affentry(), parse_format(), parse_lquery(), parse_ltree(), parse_or_operator(), parse_re_flags(), parse_test_flags(), pg_base64_decode(), pg_mbcharcliplen(), pg_mbstrlen(), pg_mbstrlen_with_len(), prssyntaxerror(), px_crypt_shacrypt(), readstoplist(), report_json_context(), rpad(), RS_compile(), RS_execute(), RS_isRegis(), similar_escape_internal(), split_text(), t_isalnum(), t_isalpha(), text_format(), text_position_next(), text_position_next_internal(), text_reverse(), text_substring(), text_to_bits(), textregexreplace(), thesaurusRead(), TParserGet(), translate(), ts_stat_sql(), tsvectorout(), unaccent_lexize(), varbit_in(), varstr_levenshtein(), and wchareq().

◆ pg_mbstrlen()

int pg_mbstrlen ( const char *  mbstr)

Definition at line 1038 of file mbutils.c.

1039{
1040 int len = 0;
1041
1042 /* optimization for single byte encoding */
1044 return strlen(mbstr);
1045
1046 while (*mbstr)
1047 {
1048 mbstr += pg_mblen(mbstr);
1049 len++;
1050 }
1051 return len;
1052}

References len, pg_database_encoding_max_length(), and pg_mblen().

Referenced by NUM_processor(), and text_format_append_string().

◆ pg_mbstrlen_with_len()

int pg_mbstrlen_with_len ( const char *  mbstr,
int  limit 
)

Definition at line 1058 of file mbutils.c.

1059{
1060 int len = 0;
1061
1062 /* optimization for single byte encoding */
1064 return limit;
1065
1066 while (limit > 0 && *mbstr)
1067 {
1068 int l = pg_mblen(mbstr);
1069
1070 limit -= l;
1071 mbstr += l;
1072 len++;
1073 }
1074 return len;
1075}

References len, pg_database_encoding_max_length(), and pg_mblen().

Referenced by bpchar(), bpchar_input(), bpcharlen(), executor_errposition(), lpad(), match_prosrc_to_query(), parser_errposition(), plpgsql_scanner_errposition(), rpad(), scanner_errposition(), similar_escape_internal(), text_left(), text_length(), text_position_get_match_pos(), text_right(), text_substring(), unicode_assigned(), unicode_is_normalized(), unicode_normalize_func(), and varstr_levenshtein().

◆ pg_server_to_any()

char * pg_server_to_any ( const char *  s,
int  len,
int  encoding 
)

Definition at line 750 of file mbutils.c.

751{
752 if (len <= 0)
753 return unconstify(char *, s); /* empty string is always valid */
754
757 return unconstify(char *, s); /* assume data is valid */
758
760 {
761 /* No conversion is possible, but we must validate the result */
762 (void) pg_verify_mbstr(encoding, s, len, false);
763 return unconstify(char *, s);
764 }
765
766 /* Fast path if we can use cached conversion function */
769
770 /* General case ... will not work outside transactions */
771 return (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, s),
772 len,
774 encoding);
775}

References ClientEncoding, DatabaseEncoding, encoding, pg_enc2name::encoding, len, perform_default_encoding_conversion(), pg_do_encoding_conversion(), PG_SQL_ASCII, pg_verify_mbstr(), and unconstify.

Referenced by compareStrings(), CopyAttributeOutCSV(), CopyAttributeOutText(), CopyToTextLikeStart(), daitch_mokotoff(), dsnowball_lexize(), hv_fetch_string(), hv_store_string(), pg_server_to_client(), PLyUnicode_FromStringAndSize(), and utf_e2u().

◆ pg_server_to_client()

char * pg_server_to_client ( const char *  s,
int  len 
)

Definition at line 739 of file mbutils.c.

740{
742}
char * pg_server_to_any(const char *s, int len, int encoding)
Definition: mbutils.c:750

References ClientEncoding, pg_enc2name::encoding, len, and pg_server_to_any().

Referenced by pq_puttextmessage(), pq_sendcountedtext(), pq_sendstring(), pq_sendtext(), and pq_writestring().

◆ pg_unicode_to_server()

void pg_unicode_to_server ( pg_wchar  c,
unsigned char *  s 
)

Definition at line 865 of file mbutils.c.

866{
867 unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
868 int c_as_utf8_len;
869 int server_encoding;
870
871 /*
872 * Complain if invalid Unicode code point. The choice of errcode here is
873 * debatable, but really our caller should have checked this anyway.
874 */
877 (errcode(ERRCODE_SYNTAX_ERROR),
878 errmsg("invalid Unicode code point")));
879
880 /* Otherwise, if it's in ASCII range, conversion is trivial */
881 if (c <= 0x7F)
882 {
883 s[0] = (unsigned char) c;
884 s[1] = '\0';
885 return;
886 }
887
888 /* If the server encoding is UTF-8, we just need to reformat the code */
889 server_encoding = GetDatabaseEncoding();
890 if (server_encoding == PG_UTF8)
891 {
892 unicode_to_utf8(c, s);
893 s[pg_utf_mblen(s)] = '\0';
894 return;
895 }
896
897 /* For all other cases, we must have a conversion function available */
898 if (Utf8ToServerConvProc == NULL)
900 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
901 errmsg("conversion between %s and %s is not supported",
904
905 /* Construct UTF-8 source string */
906 unicode_to_utf8(c, c_as_utf8);
907 c_as_utf8_len = pg_utf_mblen(c_as_utf8);
908 c_as_utf8[c_as_utf8_len] = '\0';
909
910 /* Convert, or throw error if we can't */
913 Int32GetDatum(server_encoding),
914 CStringGetDatum((char *) c_as_utf8),
915 CStringGetDatum((char *) s),
916 Int32GetDatum(c_as_utf8_len),
917 BoolGetDatum(false));
918}
#define MAX_MULTIBYTE_CHAR_LEN
Definition: pg_wchar.h:33
#define pg_utf_mblen
Definition: pg_wchar.h:633
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: pg_wchar.h:575
static bool is_valid_unicode_codepoint(pg_wchar c)
Definition: pg_wchar.h:519
char * c

References BoolGetDatum(), CStringGetDatum(), ereport, errcode(), errmsg(), ERROR, FunctionCall6, GetDatabaseEncoding(), GetDatabaseEncodingName(), Int32GetDatum(), is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, name, pg_enc2name_tbl, PG_UTF8, pg_utf_mblen, unicode_to_utf8(), and Utf8ToServerConvProc.

Referenced by addunicode(), addUnicodeChar(), map_xml_name_to_sql_identifier(), str_udeescape(), and unistr().

◆ pg_unicode_to_server_noerror()

bool pg_unicode_to_server_noerror ( pg_wchar  c,
unsigned char *  s 
)

Definition at line 927 of file mbutils.c.

928{
929 unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
930 int c_as_utf8_len;
931 int converted_len;
932 int server_encoding;
933
934 /* Fail if invalid Unicode code point */
936 return false;
937
938 /* Otherwise, if it's in ASCII range, conversion is trivial */
939 if (c <= 0x7F)
940 {
941 s[0] = (unsigned char) c;
942 s[1] = '\0';
943 return true;
944 }
945
946 /* If the server encoding is UTF-8, we just need to reformat the code */
947 server_encoding = GetDatabaseEncoding();
948 if (server_encoding == PG_UTF8)
949 {
950 unicode_to_utf8(c, s);
951 s[pg_utf_mblen(s)] = '\0';
952 return true;
953 }
954
955 /* For all other cases, we must have a conversion function available */
956 if (Utf8ToServerConvProc == NULL)
957 return false;
958
959 /* Construct UTF-8 source string */
960 unicode_to_utf8(c, c_as_utf8);
961 c_as_utf8_len = pg_utf_mblen(c_as_utf8);
962 c_as_utf8[c_as_utf8_len] = '\0';
963
964 /* Convert, but without throwing error if we can't */
967 Int32GetDatum(server_encoding),
968 CStringGetDatum((char *) c_as_utf8),
969 CStringGetDatum((char *) s),
970 Int32GetDatum(c_as_utf8_len),
971 BoolGetDatum(true)));
972
973 /* Conversion was successful iff it consumed the whole input */
974 return (converted_len == c_as_utf8_len);
975}

References BoolGetDatum(), CStringGetDatum(), DatumGetInt32(), FunctionCall6, GetDatabaseEncoding(), Int32GetDatum(), is_valid_unicode_codepoint(), MAX_MULTIBYTE_CHAR_LEN, PG_UTF8, pg_utf_mblen, unicode_to_utf8(), and Utf8ToServerConvProc.

Referenced by addUnicodeChar(), and json_lex_string().

◆ pg_utf8_increment()

static bool pg_utf8_increment ( unsigned char *  charptr,
int  length 
)
static

Definition at line 1360 of file mbutils.c.

1361{
1362 unsigned char a;
1363 unsigned char limit;
1364
1365 switch (length)
1366 {
1367 default:
1368 /* reject lengths 5 and 6 for now */
1369 return false;
1370 case 4:
1371 a = charptr[3];
1372 if (a < 0xBF)
1373 {
1374 charptr[3]++;
1375 break;
1376 }
1377 /* FALL THRU */
1378 case 3:
1379 a = charptr[2];
1380 if (a < 0xBF)
1381 {
1382 charptr[2]++;
1383 break;
1384 }
1385 /* FALL THRU */
1386 case 2:
1387 a = charptr[1];
1388 switch (*charptr)
1389 {
1390 case 0xED:
1391 limit = 0x9F;
1392 break;
1393 case 0xF4:
1394 limit = 0x8F;
1395 break;
1396 default:
1397 limit = 0xBF;
1398 break;
1399 }
1400 if (a < limit)
1401 {
1402 charptr[1]++;
1403 break;
1404 }
1405 /* FALL THRU */
1406 case 1:
1407 a = *charptr;
1408 if (a == 0x7F || a == 0xDF || a == 0xEF || a == 0xF4)
1409 return false;
1410 charptr[0]++;
1411 break;
1412 }
1413
1414 return true;
1415}
int a
Definition: isn.c:73

References a.

Referenced by pg_database_encoding_character_incrementer().

◆ pg_verify_mbstr()

bool pg_verify_mbstr ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1567 of file mbutils.c.

1568{
1569 int oklen;
1570
1572
1573 oklen = pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len);
1574 if (oklen != len)
1575 {
1576 if (noError)
1577 return false;
1578 report_invalid_encoding(encoding, mbstr + oklen, len - oklen);
1579 }
1580 return true;
1581}
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1699
mbstr_verifier mbverifystr
Definition: pg_wchar.h:385

References Assert(), encoding, len, pg_wchar_tbl::mbverifystr, PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().

Referenced by AddFileToBackupManifest(), LogicalOutputWrite(), pg_any_to_server(), pg_convert(), pg_do_encoding_conversion(), pg_server_to_any(), pg_verifymbstr(), and read_extension_script_file().

◆ pg_verify_mbstr_len()

int pg_verify_mbstr_len ( int  encoding,
const char *  mbstr,
int  len,
bool  noError 
)

Definition at line 1598 of file mbutils.c.

1599{
1600 mbchar_verifier mbverifychar;
1601 int mb_len;
1602
1604
1605 /*
1606 * In single-byte encodings, we need only reject nulls (\0).
1607 */
1609 {
1610 const char *nullpos = memchr(mbstr, 0, len);
1611
1612 if (nullpos == NULL)
1613 return len;
1614 if (noError)
1615 return -1;
1616 report_invalid_encoding(encoding, nullpos, 1);
1617 }
1618
1619 /* fetch function pointer just once */
1620 mbverifychar = pg_wchar_table[encoding].mbverifychar;
1621
1622 mb_len = 0;
1623
1624 while (len > 0)
1625 {
1626 int l;
1627
1628 /* fast path for ASCII-subset characters */
1629 if (!IS_HIGHBIT_SET(*mbstr))
1630 {
1631 if (*mbstr != '\0')
1632 {
1633 mb_len++;
1634 mbstr++;
1635 len--;
1636 continue;
1637 }
1638 if (noError)
1639 return -1;
1641 }
1642
1643 l = (*mbverifychar) ((const unsigned char *) mbstr, len);
1644
1645 if (l < 0)
1646 {
1647 if (noError)
1648 return -1;
1650 }
1651
1652 mbstr += l;
1653 len -= l;
1654 mb_len++;
1655 }
1656 return mb_len;
1657}

References Assert(), encoding, IS_HIGHBIT_SET, len, pg_wchar_tbl::mbverifychar, pg_encoding_max_length(), PG_VALID_ENCODING, pg_wchar_table, and report_invalid_encoding().

Referenced by length_in_encoding().

◆ pg_verifymbstr()

bool pg_verifymbstr ( const char *  mbstr,
int  len,
bool  noError 
)

◆ pg_wchar2mb()

int pg_wchar2mb ( const pg_wchar from,
char *  to 
)

Definition at line 1002 of file mbutils.c.

1003{
1004 return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, pg_wchar_strlen(from));
1005}
size_t pg_wchar_strlen(const pg_wchar *str)
Definition: wstrncmp.c:70

References DatabaseEncoding, pg_enc2name::encoding, pg_wchar_strlen(), pg_wchar_table, and pg_wchar_tbl::wchar2mb_with_len.

◆ pg_wchar2mb_with_len()

int pg_wchar2mb_with_len ( const pg_wchar from,
char *  to,
int  len 
)

◆ PrepareClientEncoding()

int PrepareClientEncoding ( int  encoding)

Definition at line 111 of file mbutils.c.

112{
113 int current_server_encoding;
114 ListCell *lc;
115
117 return -1;
118
119 /* Can't do anything during startup, per notes above */
121 return 0;
122
123 current_server_encoding = GetDatabaseEncoding();
124
125 /*
126 * Check for cases that require no conversion function.
127 */
128 if (current_server_encoding == encoding ||
129 current_server_encoding == PG_SQL_ASCII ||
131 return 0;
132
133 if (IsTransactionState())
134 {
135 /*
136 * If we're in a live transaction, it's safe to access the catalogs,
137 * so look up the functions. We repeat the lookup even if the info is
138 * already cached, so that we can react to changes in the contents of
139 * pg_conversion.
140 */
141 Oid to_server_proc,
142 to_client_proc;
143 ConvProcInfo *convinfo;
144 MemoryContext oldcontext;
145
146 to_server_proc = FindDefaultConversionProc(encoding,
147 current_server_encoding);
148 if (!OidIsValid(to_server_proc))
149 return -1;
150 to_client_proc = FindDefaultConversionProc(current_server_encoding,
151 encoding);
152 if (!OidIsValid(to_client_proc))
153 return -1;
154
155 /*
156 * Load the fmgr info into TopMemoryContext (could still fail here)
157 */
159 sizeof(ConvProcInfo));
160 convinfo->s_encoding = current_server_encoding;
161 convinfo->c_encoding = encoding;
162 fmgr_info_cxt(to_server_proc, &convinfo->to_server_info,
164 fmgr_info_cxt(to_client_proc, &convinfo->to_client_info,
166
167 /* Attach new info to head of list */
169 ConvProcList = lcons(convinfo, ConvProcList);
170 MemoryContextSwitchTo(oldcontext);
171
172 /*
173 * We cannot yet remove any older entry for the same encoding pair,
174 * since it could still be in use. SetClientEncoding will clean up.
175 */
176
177 return 0; /* success */
178 }
179 else
180 {
181 /*
182 * If we're not in a live transaction, the only thing we can do is
183 * restore a previous setting using the cache. This covers all
184 * transaction-rollback cases. The only case it might not work for is
185 * trying to change client_encoding on the fly by editing
186 * postgresql.conf and SIGHUP'ing. Which would probably be a stupid
187 * thing to do anyway.
188 */
189 foreach(lc, ConvProcList)
190 {
191 ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);
192
193 if (oldinfo->s_encoding == current_server_encoding &&
194 oldinfo->c_encoding == encoding)
195 return 0;
196 }
197
198 return -1; /* it's not cached, so fail */
199 }
200}
List * lcons(void *datum, List *list)
Definition: list.c:495
static List * ConvProcList
Definition: mbutils.c:62
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
#define lfirst(lc)
Definition: pg_list.h:172
#define PG_VALID_FE_ENCODING(_enc)
Definition: pg_wchar.h:291
int s_encoding
Definition: mbutils.c:56
FmgrInfo to_client_info
Definition: mbutils.c:59
int c_encoding
Definition: mbutils.c:57
FmgrInfo to_server_info
Definition: mbutils.c:58

References backend_startup_complete, ConvProcInfo::c_encoding, ConvProcList, encoding, FindDefaultConversionProc(), fmgr_info_cxt(), GetDatabaseEncoding(), IsTransactionState(), lcons(), lfirst, MemoryContextAlloc(), MemoryContextSwitchTo(), OidIsValid, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, ConvProcInfo::to_server_info, and TopMemoryContext.

Referenced by check_client_encoding(), and InitializeClientEncoding().

◆ report_invalid_encoding()

void report_invalid_encoding ( int  encoding,
const char *  mbstr,
int  len 
)

Definition at line 1699 of file mbutils.c.

1700{
1702 char buf[8 * 5 + 1];
1703 char *p = buf;
1704 int j,
1705 jlimit;
1706
1707 jlimit = Min(l, len);
1708 jlimit = Min(jlimit, 8); /* prevent buffer overrun */
1709
1710 for (j = 0; j < jlimit; j++)
1711 {
1712 p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1713 if (j < jlimit - 1)
1714 p += sprintf(p, " ");
1715 }
1716
1717 ereport(ERROR,
1718 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1719 errmsg("invalid byte sequence for encoding \"%s\": %s",
1721 buf)));
1722}
int j
Definition: isn.c:78
static char * buf
Definition: pg_test_fsync.c:72
#define sprintf
Definition: port.h:241
int pg_encoding_mblen_or_incomplete(int encoding, const char *mbstr, size_t remaining)
Definition: wchar.c:2147

References buf, encoding, ereport, errcode(), errmsg(), ERROR, j, len, Min, name, pg_enc2name_tbl, pg_encoding_mblen_or_incomplete(), and sprintf.

Referenced by big52euc_tw(), big52mic(), CopyConversionError(), euc_cn2mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), iso8859_1_to_utf8(), latin2mic(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), pg_verify_mbstr(), pg_verify_mbstr_len(), shift_jis_20042euc_jis_2004(), sjis2euc_jp(), sjis2mic(), test_enc_conversion(), utf8_to_iso8859_1(), and UtfToLocal().

◆ report_untranslatable_char()

void report_untranslatable_char ( int  src_encoding,
int  dest_encoding,
const char *  mbstr,
int  len 
)

Definition at line 1731 of file mbutils.c.

1733{
1734 int l;
1735 char buf[8 * 5 + 1];
1736 char *p = buf;
1737 int j,
1738 jlimit;
1739
1740 /*
1741 * We probably could use plain pg_encoding_mblen(), because
1742 * gb18030_to_utf8() verifies before it converts. All conversions should.
1743 * For src_encoding!=GB18030, len>0 meets pg_encoding_mblen() needs. Even
1744 * so, be defensive, since a buggy conversion might pass invalid data.
1745 * This is not a performance-critical path.
1746 */
1747 l = pg_encoding_mblen_or_incomplete(src_encoding, mbstr, len);
1748 jlimit = Min(l, len);
1749 jlimit = Min(jlimit, 8); /* prevent buffer overrun */
1750
1751 for (j = 0; j < jlimit; j++)
1752 {
1753 p += sprintf(p, "0x%02x", (unsigned char) mbstr[j]);
1754 if (j < jlimit - 1)
1755 p += sprintf(p, " ");
1756 }
1757
1758 ereport(ERROR,
1759 (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
1760 errmsg("character with byte sequence %s in encoding \"%s\" has no equivalent in encoding \"%s\"",
1761 buf,
1762 pg_enc2name_tbl[src_encoding].name,
1763 pg_enc2name_tbl[dest_encoding].name)));
1764}

References buf, ereport, errcode(), errmsg(), ERROR, j, len, Min, name, pg_enc2name_tbl, pg_encoding_mblen_or_incomplete(), and sprintf.

Referenced by big52euc_tw(), big52mic(), euc_tw2big5(), latin2mic_with_table(), local2local(), LocalToUtf(), mic2big5(), mic2euc_cn(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2latin(), mic2latin_with_table(), mic2sjis(), utf8_to_iso8859_1(), and UtfToLocal().

◆ SetClientEncoding()

int SetClientEncoding ( int  encoding)

Definition at line 209 of file mbutils.c.

210{
211 int current_server_encoding;
212 bool found;
213 ListCell *lc;
214
216 return -1;
217
218 /* Can't do anything during startup, per notes above */
220 {
222 return 0;
223 }
224
225 current_server_encoding = GetDatabaseEncoding();
226
227 /*
228 * Check for cases that require no conversion function.
229 */
230 if (current_server_encoding == encoding ||
231 current_server_encoding == PG_SQL_ASCII ||
233 {
235 ToServerConvProc = NULL;
236 ToClientConvProc = NULL;
237 return 0;
238 }
239
240 /*
241 * Search the cache for the entry previously prepared by
242 * PrepareClientEncoding; if there isn't one, we lose. While at it,
243 * release any duplicate entries so that repeated Prepare/Set cycles don't
244 * leak memory.
245 */
246 found = false;
247 foreach(lc, ConvProcList)
248 {
249 ConvProcInfo *convinfo = (ConvProcInfo *) lfirst(lc);
250
251 if (convinfo->s_encoding == current_server_encoding &&
252 convinfo->c_encoding == encoding)
253 {
254 if (!found)
255 {
256 /* Found newest entry, so set up */
258 ToServerConvProc = &convinfo->to_server_info;
259 ToClientConvProc = &convinfo->to_client_info;
260 found = true;
261 }
262 else
263 {
264 /* Duplicate entry, release it */
266 pfree(convinfo);
267 }
268 }
269 }
270
271 if (found)
272 return 0; /* success */
273 else
274 return -1; /* it's not cached, so fail */
275}
#define foreach_delete_current(lst, var_or_cell)
Definition: pg_list.h:391

References backend_startup_complete, ConvProcInfo::c_encoding, ClientEncoding, ConvProcList, encoding, foreach_delete_current, GetDatabaseEncoding(), lfirst, pending_client_encoding, pfree(), pg_enc2name_tbl, PG_SQL_ASCII, PG_VALID_FE_ENCODING, ConvProcInfo::s_encoding, ConvProcInfo::to_client_info, ConvProcInfo::to_server_info, ToClientConvProc, and ToServerConvProc.

Referenced by assign_client_encoding(), InitializeClientEncoding(), and ParallelWorkerMain().

◆ SetDatabaseEncoding()

void SetDatabaseEncoding ( int  encoding)

Definition at line 1162 of file mbutils.c.

1163{
1165 elog(ERROR, "invalid database encoding: %d", encoding);
1166
1169}

References Assert(), DatabaseEncoding, elog, encoding, pg_enc2name::encoding, ERROR, pg_enc2name_tbl, and PG_VALID_BE_ENCODING.

Referenced by CheckMyDatabase().

◆ SetMessageEncoding()

void SetMessageEncoding ( int  encoding)

Definition at line 1172 of file mbutils.c.

1173{
1174 /* Some calls happen before we can elog()! */
1176
1179}

References Assert(), encoding, pg_enc2name::encoding, MessageEncoding, pg_enc2name_tbl, and PG_VALID_ENCODING.

Referenced by pg_perm_setlocale().

Variable Documentation

◆ backend_startup_complete

bool backend_startup_complete = false
static

Definition at line 91 of file mbutils.c.

Referenced by InitializeClientEncoding(), PrepareClientEncoding(), and SetClientEncoding().

◆ ClientEncoding

◆ ConvProcList

List* ConvProcList = NIL
static

Definition at line 62 of file mbutils.c.

Referenced by PrepareClientEncoding(), and SetClientEncoding().

◆ DatabaseEncoding

◆ MessageEncoding

const pg_enc2name* MessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]
static

Definition at line 83 of file mbutils.c.

Referenced by GetMessageEncoding(), and SetMessageEncoding().

◆ pending_client_encoding

int pending_client_encoding = PG_SQL_ASCII
static

Definition at line 92 of file mbutils.c.

Referenced by InitializeClientEncoding(), and SetClientEncoding().

◆ ToClientConvProc

FmgrInfo* ToClientConvProc = NULL
static

Definition at line 69 of file mbutils.c.

Referenced by perform_default_encoding_conversion(), and SetClientEncoding().

◆ ToServerConvProc

FmgrInfo* ToServerConvProc = NULL
static

Definition at line 68 of file mbutils.c.

Referenced by perform_default_encoding_conversion(), and SetClientEncoding().

◆ Utf8ToServerConvProc

FmgrInfo* Utf8ToServerConvProc = NULL
static