/*-------------------------------------------------------------------------
 *
 * util.c
 *	  General purpose routines used by JSON data type support.
 *
 * Copyright (c) 2010, PostgreSQL Global Development Group
 * Written by Joey Adams <joeyadams3.14159@gmail.com>.
 *
 *-------------------------------------------------------------------------
 */

#include "util.h"

#include "catalog/namespace.h"
#include "catalog/pg_enum.h"
#include "utils/syscache.h"

#include "compat.h"

/*
 * getTypeInfo
 *	  Retrieve information about a type, along with either its
 *	  input, output, binary receive, or binary send procedure.
 *
 *	  which_func should be one of:
 *		  IOFunc_input
 *		  IOFunc_output
 *		  IOFunc_receive
 *		  IOFunc_send
 *
 *	  mcxt is the memory context the IO function selected will use to store
 *	  subsidiary data.	The memory context should live at least as long as
 *	  the TypeInfo structure you specify.
 */
void
getTypeInfo(TypeInfo *d, Oid type, IOFuncSelector which_func, MemoryContext mcxt)
{
	d->type = type;
	d->which_func = which_func;
	d->mcxt = mcxt;

	get_type_io_data(type, which_func,
					 &d->typlen, &d->typbyval, &d->typalign,
					 &d->typdelim, &d->typioparam, &d->typiofunc);
	fmgr_info_cxt(d->typiofunc, &d->proc, d->mcxt);

	get_type_category_preferred(type,
								&d->typcategory, &d->typispreferred);
}

static int
enum_label_cmp(const void *left, const void *right)
{
	const char *l = ((EnumLabel *) left)->label;
	const char *r = ((EnumLabel *) right)->label;

	return strcmp(l, r);
}

/*
 * getEnumLabelOids
 *	  Look up the OIDs of enum labels.	Enum label OIDs are needed to
 *	  return values of a custom enum type from a C function.
 *
 *	  Callers should typically cache the OIDs produced by this function
 *	  using fn_extra, as retrieving enum label OIDs is somewhat expensive.
 *
 *	  Every labels[i].index must be between 0 and count, and oid_out
 *	  must be allocated to hold count items.  Note that getEnumLabelOids
 *	  sorts the labels[] array passed to it.
 *
 *	  Any labels not found in the enum will have their corresponding
 *	  oid_out entries set to InvalidOid.
 *
 *	  Sample usage:
 *
 *	  -- SQL --
 *	  CREATE TYPE colors AS ENUM ('red', 'green', 'blue');
 *
 *	  -- C --
 *	  enum Colors {RED, GREEN, BLUE, COLOR_COUNT};
 *
 *	  static EnumLabel enum_labels[COLOR_COUNT] =
 *	  {
 *		  {RED,   "red"},
 *		  {GREEN, "green"},
 *		  {BLUE,  "blue"}
 *	  };
 *
 *	  Oid *label_oids = palloc(COLOR_COUNT * sizeof(Oid));
 *	  getEnumLabelOids("colors", enum_labels, label_oids, COLOR_COUNT);
 *
 *	  PG_RETURN_OID(label_oids[GREEN]);
 */
void
getEnumLabelOids(const char *typname, EnumLabel labels[], Oid oid_out[], int count)
{
	CatCList   *list;
	Oid			enumtypoid;
	int			total;
	int			i;
	EnumLabel	key;
	EnumLabel  *found;

	enumtypoid = TypenameGetTypid(typname);
	Assert(OidIsValid(enumtypoid));

	qsort(labels, count, sizeof(EnumLabel), enum_label_cmp);

	for (i = 0; i < count; i++)
	{
		/* Initialize oid_out items to InvalidOid. */
		oid_out[i] = InvalidOid;

		/* Make sure EnumLabel indices are in range. */
		Assert(labels[i].index >= 0 && labels[i].index < count);
	}

	list = SearchSysCacheList1(ENUMTYPOIDNAME,
							   ObjectIdGetDatum(enumtypoid));
	total = list->n_members;

	for (i = 0; i < total; i++)
	{
		HeapTuple	tup = &list->members[i]->tuple;
		Oid			oid = HeapTupleGetOid(tup);
		Form_pg_enum en = (Form_pg_enum) GETSTRUCT(tup);

		key.label = NameStr(en->enumlabel);
		found = bsearch(&key, labels, count, sizeof(EnumLabel), enum_label_cmp);
		if (found != NULL)
			oid_out[found->index] = oid;
	}

	ReleaseCatCacheList(list);
}

/*
 * utf8_substring
 *	  Find substring bounds in a UTF-8-encoded string.
 *
 *	  @src and @srcbytes are the start and byte length of the input string.
 *	  @start and @length are the start and number of characters requested.
 *
 *	  Writes the bounds of the substring to
 *	  *out_start (start) and *out_bytes (byte length).
 *	  Returns the number of characters (not bytes) in the string.
 *
 *	  Example:
 *		 const char *out_start;
 *		 int		 out_bytes;
 *		 int		 out_chars;
 *
 *		 out_chars =
 *			 unicode_substring("⁰¹²³", 9,
 *							   1, 100,
 *							   &out_start, &out_bytes);
 *
 *	  out_chars will be 3.
 *	  out_start will point to the "¹".
 *	  out_bytes will be 6.
 */
size_t
utf8_substring(
			   const char *src, size_t srcbytes,
			   size_t start, size_t length,
			   const char **out_start, size_t *out_bytes)
{
	const char *e = src + srcbytes;
	const char *sub_start;
	const char *sub_end;
	size_t		sub_length;

	sub_start = src;
	while (start > 0 && sub_start < e)
	{
		sub_start += pg_utf_mblen((const unsigned char *) sub_start);
		start--;
	}

	sub_end = sub_start;
	sub_length = 0;
	while (sub_length < length && sub_end < e)
	{
		sub_end += pg_utf_mblen((const unsigned char *) sub_end);
		sub_length++;
	}

	/* Make sure the input didn't have a clipped UTF-8 character */
	if (sub_start > e)
	{
		Assert(false);
		sub_start = sub_end = e;
	}
	else if (sub_end > e)
	{
		Assert(false);
		sub_end = e;
	}

	*out_start = sub_start;
	*out_bytes = sub_end - sub_start;
	return sub_length;
}

static const bool utf8_allow_surrogates = false;

/*
 * utf8_decode_char
 *	  Decodes a UTF-8 character, advancing *sp to point to the end of it.
 *	  Returns the Unicode code point of the character.
 *
 *	  This function will go away when a utf8_to_unicode
 *	  function becomes available.
 */
pg_wchar
utf8_decode_char(const char **sp)
{
	const unsigned char *s = (const unsigned char *) *sp;
	unsigned char c = *s++;
	unsigned int len;
	unsigned char sf[4] = {0xFF, 0x1F, 0xF, 0x7};
	pg_wchar	uc;

	if (c < 0x80)
		len = 0;
	else if (c < 0xE0)
		len = 1;
	else if (c < 0xF0)
		len = 2;
	else
		len = 3;

	uc = c & sf[len];
	while (len--)
	{
		uc <<= 6;
		uc |= *s++ & 0x3F;
	}

	*sp = (const char *) s;
	return uc;
}

/*
 * utf8_validate
 *	  Essentially a variant of pg_verify_mbstr(PG_UTF8, str, length, true)
 *	  that allows '\0' characters.
 */
bool
utf8_validate(const char *str, size_t length)
{
	const unsigned char *s = (const unsigned char *) str;
	const unsigned char *e = s + length;
	int			len;

	while (s < e)
	{
		if (!IS_HIGHBIT_SET(*s))
		{
			s++;
			continue;
		}

		len = pg_utf_mblen(s);
		if (s + len > e)
			return false;

		if (!pg_utf8_islegal(s, len))
			return false;

		s += len;
	}

	return true;
}

char *
server_to_utf8(const char *str, int len)
{
	return (char *) pg_do_encoding_conversion(
				 (unsigned char *) str, len, GetDatabaseEncoding(), PG_UTF8);
}

char *
utf8_to_server(const char *str, int len)
{
	return (char *) pg_do_encoding_conversion(
				 (unsigned char *) str, len, PG_UTF8, GetDatabaseEncoding());
}

/*
 * text_to_utf8_cstring
 *	  Just like text_to_cstring, but yields a C string
 *	  encoded in UTF-8 instead of the server encoding.
 */
char *
text_to_utf8_cstring(const text *t)
{
	/* must cast away the const, just like in text_to_cstring */
	text	   *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
	const char *data = VARDATA_ANY(tunpacked);
	int			len = VARSIZE_ANY_EXHDR(tunpacked);
	char	   *result;

	result = server_to_utf8(data, len);
	if (result == data)
		result = pnstrdup(data, len);

	if (tunpacked != t)
		pfree(tunpacked);

	return result;
}

/*
 * text_to_utf8_cstring
 *	  Just like cstring_to_text, but takes a C string
 *	  encoded in UTF-8 instead of the server encoding.
 */
text *
utf8_cstring_to_text(const char *s)
{
	return utf8_cstring_to_text_with_len(s, strlen(s));
}

/*
 * utf8_cstring_to_text_with_len
 *	  Just like cstring_to_text_with_len, but takes a C string
 *	  encoded in UTF-8 instead of the server encoding.
 *
 *	  The input string should not contain null characters.
 */
text *
utf8_cstring_to_text_with_len(const char *s, int len)
{
	char	   *cstring;
	int			cstring_len;
	text	   *result;

	cstring = utf8_to_server(s, len);
	if (cstring == s)
		cstring_len = len;
	else
		cstring_len = strlen(cstring);

	result = (text *) palloc(len + VARHDRSZ);

	SET_VARSIZE(result, len + VARHDRSZ);
	memcpy(VARDATA(result), cstring, cstring_len);

	if (cstring != s)
		pfree(cstring);

	return result;
}