/*------------------------------------------------------------------------- * * util.c * General purpose routines used by JSON data type support. * * Copyright (c) 2010, PostgreSQL Global Development Group * Written by Joey Adams . * *------------------------------------------------------------------------- */ #include "util.h" #include "catalog/namespace.h" #include "catalog/pg_enum.h" #include "utils/syscache.h" #include "compat.h" /* * getTypeInfo * Retrieve information about a type, along with either its * input, output, binary receive, or binary send procedure. * * which_func should be one of: * IOFunc_input * IOFunc_output * IOFunc_receive * IOFunc_send * * mcxt is the memory context the IO function selected will use to store * subsidiary data. The memory context should live at least as long as * the TypeInfo structure you specify. */ void getTypeInfo(TypeInfo *d, Oid type, IOFuncSelector which_func, MemoryContext mcxt) { d->type = type; d->which_func = which_func; d->mcxt = mcxt; get_type_io_data(type, which_func, &d->typlen, &d->typbyval, &d->typalign, &d->typdelim, &d->typioparam, &d->typiofunc); fmgr_info_cxt(d->typiofunc, &d->proc, d->mcxt); get_type_category_preferred(type, &d->typcategory, &d->typispreferred); } static int enum_label_cmp(const void *left, const void *right) { const char *l = ((EnumLabel *) left)->label; const char *r = ((EnumLabel *) right)->label; return strcmp(l, r); } /* * getEnumLabelOids * Look up the OIDs of enum labels. Enum label OIDs are needed to * return values of a custom enum type from a C function. * * Callers should typically cache the OIDs produced by this function * using fn_extra, as retrieving enum label OIDs is somewhat expensive. * * Every labels[i].index must be between 0 and count, and oid_out * must be allocated to hold count items. Note that getEnumLabelOids * sorts the labels[] array passed to it. * * Any labels not found in the enum will have their corresponding * oid_out entries set to InvalidOid. * * Sample usage: * * -- SQL -- * CREATE TYPE colors AS ENUM ('red', 'green', 'blue'); * * -- C -- * enum Colors {RED, GREEN, BLUE, COLOR_COUNT}; * * static EnumLabel enum_labels[COLOR_COUNT] = * { * {RED, "red"}, * {GREEN, "green"}, * {BLUE, "blue"} * }; * * Oid *label_oids = palloc(COLOR_COUNT * sizeof(Oid)); * getEnumLabelOids("colors", enum_labels, label_oids, COLOR_COUNT); * * PG_RETURN_OID(label_oids[GREEN]); */ void getEnumLabelOids(const char *typname, EnumLabel labels[], Oid oid_out[], int count) { CatCList *list; Oid enumtypoid; int total; int i; EnumLabel key; EnumLabel *found; enumtypoid = TypenameGetTypid(typname); Assert(OidIsValid(enumtypoid)); qsort(labels, count, sizeof(EnumLabel), enum_label_cmp); for (i = 0; i < count; i++) { /* Initialize oid_out items to InvalidOid. */ oid_out[i] = InvalidOid; /* Make sure EnumLabel indices are in range. */ Assert(labels[i].index >= 0 && labels[i].index < count); } list = SearchSysCacheList1(ENUMTYPOIDNAME, ObjectIdGetDatum(enumtypoid)); total = list->n_members; for (i = 0; i < total; i++) { HeapTuple tup = &list->members[i]->tuple; Oid oid = HeapTupleGetOid(tup); Form_pg_enum en = (Form_pg_enum) GETSTRUCT(tup); key.label = NameStr(en->enumlabel); found = bsearch(&key, labels, count, sizeof(EnumLabel), enum_label_cmp); if (found != NULL) oid_out[found->index] = oid; } ReleaseCatCacheList(list); } /* * utf8_substring * Find substring bounds in a UTF-8-encoded string. * * @src and @srcbytes are the start and byte length of the input string. * @start and @length are the start and number of characters requested. * * Writes the bounds of the substring to * *out_start (start) and *out_bytes (byte length). * Returns the number of characters (not bytes) in the string. * * Example: * const char *out_start; * int out_bytes; * int out_chars; * * out_chars = * unicode_substring("⁰¹²³", 9, * 1, 100, * &out_start, &out_bytes); * * out_chars will be 3. * out_start will point to the "¹". * out_bytes will be 6. */ size_t utf8_substring( const char *src, size_t srcbytes, size_t start, size_t length, const char **out_start, size_t *out_bytes) { const char *e = src + srcbytes; const char *sub_start; const char *sub_end; size_t sub_length; sub_start = src; while (start > 0 && sub_start < e) { sub_start += pg_utf_mblen((const unsigned char *) sub_start); start--; } sub_end = sub_start; sub_length = 0; while (sub_length < length && sub_end < e) { sub_end += pg_utf_mblen((const unsigned char *) sub_end); sub_length++; } /* Make sure the input didn't have a clipped UTF-8 character */ if (sub_start > e) { Assert(false); sub_start = sub_end = e; } else if (sub_end > e) { Assert(false); sub_end = e; } *out_start = sub_start; *out_bytes = sub_end - sub_start; return sub_length; } static const bool utf8_allow_surrogates = false; /* * utf8_decode_char * Decodes a UTF-8 character, advancing *sp to point to the end of it. * Returns the Unicode code point of the character. * * This function will go away when a utf8_to_unicode * function becomes available. */ pg_wchar utf8_decode_char(const char **sp) { const unsigned char *s = (const unsigned char *) *sp; unsigned char c = *s++; unsigned int len; unsigned char sf[4] = {0xFF, 0x1F, 0xF, 0x7}; pg_wchar uc; if (c < 0x80) len = 0; else if (c < 0xE0) len = 1; else if (c < 0xF0) len = 2; else len = 3; uc = c & sf[len]; while (len--) { uc <<= 6; uc |= *s++ & 0x3F; } *sp = (const char *) s; return uc; } /* * utf8_validate * Essentially a variant of pg_verify_mbstr(PG_UTF8, str, length, true) * that allows '\0' characters. */ bool utf8_validate(const char *str, size_t length) { const unsigned char *s = (const unsigned char *) str; const unsigned char *e = s + length; int len; while (s < e) { if (!IS_HIGHBIT_SET(*s)) { s++; continue; } len = pg_utf_mblen(s); if (s + len > e) return false; if (!pg_utf8_islegal(s, len)) return false; s += len; } return true; } char * server_to_utf8(const char *str, int len) { return (char *) pg_do_encoding_conversion( (unsigned char *) str, len, GetDatabaseEncoding(), PG_UTF8); } char * utf8_to_server(const char *str, int len) { return (char *) pg_do_encoding_conversion( (unsigned char *) str, len, PG_UTF8, GetDatabaseEncoding()); } /* * text_to_utf8_cstring * Just like text_to_cstring, but yields a C string * encoded in UTF-8 instead of the server encoding. */ char * text_to_utf8_cstring(const text *t) { /* must cast away the const, just like in text_to_cstring */ text *tunpacked = pg_detoast_datum_packed((struct varlena *) t); const char *data = VARDATA_ANY(tunpacked); int len = VARSIZE_ANY_EXHDR(tunpacked); char *result; result = server_to_utf8(data, len); if (result == data) result = pnstrdup(data, len); if (tunpacked != t) pfree(tunpacked); return result; } /* * text_to_utf8_cstring * Just like cstring_to_text, but takes a C string * encoded in UTF-8 instead of the server encoding. */ text * utf8_cstring_to_text(const char *s) { return utf8_cstring_to_text_with_len(s, strlen(s)); } /* * utf8_cstring_to_text_with_len * Just like cstring_to_text_with_len, but takes a C string * encoded in UTF-8 instead of the server encoding. * * The input string should not contain null characters. */ text * utf8_cstring_to_text_with_len(const char *s, int len) { char *cstring; int cstring_len; text *result; cstring = utf8_to_server(s, len); if (cstring == s) cstring_len = len; else cstring_len = strlen(cstring); result = (text *) palloc(len + VARHDRSZ); SET_VARSIZE(result, len + VARHDRSZ); memcpy(VARDATA(result), cstring, cstring_len); if (cstring != s) pfree(cstring); return result; }