diff options
author | Tom Lane | 2009-08-04 16:08:37 +0000 |
---|---|---|
committer | Tom Lane | 2009-08-04 16:08:37 +0000 |
commit | 99c597aa45500cfe8f8e43f4126d60e6898301d9 (patch) | |
tree | c1bb24609c6651c6f519e1faeab110148202f93b | |
parent | f964177099f239a406c2030ed3005c665eaa8462 (diff) |
Support hex-string input and output for type BYTEA.
Both hex format and the traditional "escape" format are automatically
handled on input. The output format is selected by the new GUC variable
bytea_output.
As committed, bytea_output defaults to HEX, which is an *incompatible
change*. We will keep it this way for awhile for testing purposes, but
should consider whether to switch to the more backwards-compatible
default of ESCAPE before 8.5 is released.
Peter Eisentraut
-rw-r--r-- | doc/src/sgml/config.sgml | 17 | ||||
-rw-r--r-- | doc/src/sgml/datatype.sgml | 90 | ||||
-rw-r--r-- | src/backend/catalog/pg_largeobject.c | 2 | ||||
-rw-r--r-- | src/backend/commands/trigger.c | 1 | ||||
-rw-r--r-- | src/backend/optimizer/path/indxpath.c | 1 | ||||
-rw-r--r-- | src/backend/utils/adt/encode.c | 8 | ||||
-rw-r--r-- | src/backend/utils/adt/selfuncs.c | 1 | ||||
-rw-r--r-- | src/backend/utils/adt/varlena.c | 72 | ||||
-rw-r--r-- | src/backend/utils/misc/guc.c | 16 | ||||
-rw-r--r-- | src/backend/utils/misc/postgresql.conf.sample | 1 | ||||
-rw-r--r-- | src/bin/pg_dump/pg_dump.c | 58 | ||||
-rw-r--r-- | src/include/utils/builtins.h | 28 | ||||
-rw-r--r-- | src/include/utils/bytea.h | 50 | ||||
-rw-r--r-- | src/interfaces/libpq/fe-exec.c | 58 | ||||
-rw-r--r-- | src/test/regress/expected/conversion.out | 2 | ||||
-rw-r--r-- | src/test/regress/expected/strings.out | 93 | ||||
-rw-r--r-- | src/test/regress/input/largeobject.source | 3 | ||||
-rw-r--r-- | src/test/regress/output/largeobject.source | 2 | ||||
-rw-r--r-- | src/test/regress/output/largeobject_1.source | 2 | ||||
-rw-r--r-- | src/test/regress/sql/conversion.sql | 3 | ||||
-rw-r--r-- | src/test/regress/sql/strings.sql | 21 |
21 files changed, 430 insertions, 99 deletions
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 89208127b6..2034fdc90d 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4060,6 +4060,23 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; </listitem> </varlistentry> + <varlistentry id="guc-bytea-output" xreflabel="bytea_output"> + <term><varname>bytea_output</varname> (<type>enum</type>)</term> + <indexterm> + <primary><varname>bytea_output</> configuration parameter</primary> + </indexterm> + <listitem> + <para> + Sets the output format for values of type <type>bytea</type>. + Valid values are <literal>hex</literal> (the default) + and <literal>escape</literal> (the traditional PostgreSQL + format). See <xref linkend="datatype-binary"> for more + information. The <type>bytea</type> type always + accepts both formats on input, regardless of this setting. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-xmlbinary" xreflabel="xmlbinary"> <term><varname>xmlbinary</varname> (<type>enum</type>)</term> <indexterm> diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index f7ee8e9f22..56f7a697f1 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -1177,7 +1177,7 @@ SELECT b, char_length(b) FROM test2; <para> A binary string is a sequence of octets (or bytes). Binary strings are distinguished from character strings in two - ways: First, binary strings specifically allow storing + ways. First, binary strings specifically allow storing octets of value zero and other <quote>non-printable</quote> octets (usually, octets outside the range 32 to 126). Character strings disallow zero octets, and also disallow any @@ -1191,13 +1191,82 @@ SELECT b, char_length(b) FROM test2; </para> <para> - When entering <type>bytea</type> values, octets of certain - values <emphasis>must</emphasis> be escaped (but all octet - values <emphasis>can</emphasis> be escaped) when used as part - of a string literal in an <acronym>SQL</acronym> statement. In + The <type>bytea</type> type supports two external formats for + input and output: <productname>PostgreSQL</productname>'s historical + <quote>escape</quote> format, and <quote>hex</quote> format. Both + of these are always accepted on input. The output format depends + on the configuration parameter <xref linkend="guc-bytea-output">; + the default is hex. (Note that the hex format was introduced in + <productname>PostgreSQL</productname> 8.5; earlier versions and some + tools don't understand it.) + </para> + + <para> + The <acronym>SQL</acronym> standard defines a different binary + string type, called <type>BLOB</type> or <type>BINARY LARGE + OBJECT</type>. The input format is different from + <type>bytea</type>, but the provided functions and operators are + mostly the same. + </para> + + <sect2> + <title><type>bytea</> hex format</title> + + <para> + The <quote>hex</> format encodes binary data as 2 hexadecimal digits + per byte, most significant nibble first. The entire string is + preceded by the sequence <literal>\x</literal> (to distinguish it + from the escape format). In some contexts, the initial backslash may + need to be escaped by doubling it, in the same cases in which backslashes + have to be doubled in escape format; details appear below. + The hexadecimal digits can + be either upper or lower case, and whitespace is permitted between + digit pairs (but not within a digit pair nor in the starting + <literal>\x</literal> sequence). + The hex format is compatible with a wide + range of external applications and protocols, and it tends to be + faster to convert than the escape format, so its use is preferred. + </para> + + <para> + Example: +<programlisting> +SELECT E'\\xDEADBEEF'; +</programlisting> + </para> + </sect2> + + <sect2> + <title><type>bytea</> escape format</title> + + <para> + The <quote>escape</quote> format is the traditional + <productname>PostgreSQL</productname> format for the <type>bytea</type> + type. It + takes the approach of representing a binary string as a sequence + of ASCII characters, while converting those bytes that cannot be + represented as an ASCII character into special escape sequences. + If, from the point of view of the application, representing bytes + as characters makes sense, then this representation can be + convenient. But in practice it is usually confusing becauses it + fuzzes up the distinction between binary strings and character + strings, and also the particular escape mechanism that was chosen is + somewhat unwieldy. So this format should probably be avoided + for most new applications. + </para> + + <para> + When entering <type>bytea</type> values in escape format, + octets of certain + values <emphasis>must</emphasis> be escaped, while all octet + values <emphasis>can</emphasis> be escaped. In general, to escape an octet, convert it into its three-digit octal value and precede it - by two backslashes. <xref linkend="datatype-binary-sqlesc"> + by a backslash (or two backslashes, if writing the value as a + literal using escape string syntax). + Backslash itself (octet value 92) can alternatively be represented by + double backslashes. + <xref linkend="datatype-binary-sqlesc"> shows the characters that must be escaped, and gives the alternative escape sequences where applicable. </para> @@ -1343,14 +1412,7 @@ SELECT b, char_length(b) FROM test2; have to escape line feeds and carriage returns if your interface automatically translates these. </para> - - <para> - The <acronym>SQL</acronym> standard defines a different binary - string type, called <type>BLOB</type> or <type>BINARY LARGE - OBJECT</type>. The input format is different from - <type>bytea</type>, but the provided functions and operators are - mostly the same. - </para> + </sect2> </sect1> diff --git a/src/backend/catalog/pg_largeobject.c b/src/backend/catalog/pg_largeobject.c index c92ab024e5..08a0b06111 100644 --- a/src/backend/catalog/pg_largeobject.c +++ b/src/backend/catalog/pg_largeobject.c @@ -18,7 +18,7 @@ #include "access/heapam.h" #include "catalog/indexing.h" #include "catalog/pg_largeobject.h" -#include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/fmgroids.h" #include "utils/rel.h" #include "utils/tqual.h" diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 3024f40486..6022ad0d86 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -37,6 +37,7 @@ #include "tcop/utility.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/fmgroids.h" #include "utils/inval.h" #include "utils/lsyscache.h" diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index c3b4b36860..52cadddeba 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -31,6 +31,7 @@ #include "optimizer/restrictinfo.h" #include "optimizer/var.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" #include "utils/selfuncs.h" diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index eed799a0f4..b2042e78b0 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -109,7 +109,7 @@ binary_decode(PG_FUNCTION_ARGS) * HEX */ -static const char *hextbl = "0123456789abcdef"; +static const char hextbl[] = "0123456789abcdef"; static const int8 hexlookup[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, @@ -122,7 +122,7 @@ static const int8 hexlookup[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, }; -static unsigned +unsigned hex_encode(const char *src, unsigned len, char *dst) { const char *end = src + len; @@ -136,7 +136,7 @@ hex_encode(const char *src, unsigned len, char *dst) return len * 2; } -static char +static inline char get_hex(char c) { int res = -1; @@ -152,7 +152,7 @@ get_hex(char c) return (char) res; } -static unsigned +unsigned hex_decode(const char *src, unsigned len, char *dst) { const char *s, diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 30b4b4763d..1748f1a7d4 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -109,6 +109,7 @@ #include "parser/parse_coerce.h" #include "parser/parsetree.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/date.h" #include "utils/datum.h" #include "utils/fmgroids.h" diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 4434c97b69..f89483fcd2 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -24,10 +24,14 @@ #include "parser/scansup.h" #include "regex/regex.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" +/* GUC variable */ +int bytea_output = BYTEA_OUTPUT_HEX; + typedef struct varlena unknown; typedef struct @@ -186,10 +190,24 @@ byteain(PG_FUNCTION_ARGS) char *inputText = PG_GETARG_CSTRING(0); char *tp; char *rp; - int byte; + int bc; bytea *result; - for (byte = 0, tp = inputText; *tp != '\0'; byte ++) + /* Recognize hex input */ + if (inputText[0] == '\\' && inputText[1] == 'x') + { + size_t len = strlen(inputText); + + bc = (len - 2)/2 + VARHDRSZ; /* maximum possible length */ + result = palloc(bc); + bc = hex_decode(inputText + 2, len - 2, VARDATA(result)); + SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ + + PG_RETURN_BYTEA_P(result); + } + + /* Else, it's the traditional escaped style */ + for (bc = 0, tp = inputText; *tp != '\0'; bc++) { if (tp[0] != '\\') tp++; @@ -204,7 +222,7 @@ byteain(PG_FUNCTION_ARGS) else { /* - * one backslash, not followed by 0 or ### valid octal + * one backslash, not followed by another or ### valid octal */ ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), @@ -212,10 +230,10 @@ byteain(PG_FUNCTION_ARGS) } } - byte +=VARHDRSZ; + bc += VARHDRSZ; - result = (bytea *) palloc(byte); - SET_VARSIZE(result, byte); + result = (bytea *) palloc(bc); + SET_VARSIZE(result, bc); tp = inputText; rp = VARDATA(result); @@ -228,11 +246,11 @@ byteain(PG_FUNCTION_ARGS) (tp[2] >= '0' && tp[2] <= '7') && (tp[3] >= '0' && tp[3] <= '7')) { - byte = VAL(tp[1]); - byte <<=3; - byte +=VAL(tp[2]); - byte <<=3; - *rp++ = byte +VAL(tp[3]); + bc = VAL(tp[1]); + bc <<= 3; + bc += VAL(tp[2]); + bc <<= 3; + *rp++ = bc + VAL(tp[3]); tp += 4; } @@ -259,21 +277,30 @@ byteain(PG_FUNCTION_ARGS) /* * byteaout - converts to printable representation of byte array * - * Non-printable characters are inserted as '\nnn' (octal) and '\' as - * '\\'. - * - * NULL vlena should be an error--returning string with NULL for now. + * In the traditional escaped format, non-printable characters are + * printed as '\nnn' (octal) and '\' as '\\'. */ Datum byteaout(PG_FUNCTION_ARGS) { bytea *vlena = PG_GETARG_BYTEA_PP(0); char *result; - char *vp; char *rp; - int val; /* holds unprintable chars */ - int i; + + if (bytea_output == BYTEA_OUTPUT_HEX) + { + /* Print hex format */ + rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); + *rp++ = '\\'; + *rp++ = 'x'; + rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); + } + else if (bytea_output == BYTEA_OUTPUT_ESCAPE) + { + /* Print traditional escaped format */ + char *vp; int len; + int i; len = 1; /* empty string has 1 char */ vp = VARDATA_ANY(vlena); @@ -297,6 +324,8 @@ byteaout(PG_FUNCTION_ARGS) } else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) { + int val; /* holds unprintable chars */ + val = *vp; rp[0] = '\\'; rp[3] = DIG(val & 07); @@ -309,6 +338,13 @@ byteaout(PG_FUNCTION_ARGS) else *rp++ = *vp; } + } + else + { + elog(ERROR, "unrecognized bytea_output setting: %d", + bytea_output); + rp = result = NULL; /* keep compiler quiet */ + } *rp = '\0'; PG_RETURN_CSTRING(result); } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 54ffc3aa59..b7bf6e27dc 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -61,6 +61,7 @@ #include "tcop/tcopprot.h" #include "tsearch/ts_cache.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/guc_tables.h" #include "utils/memutils.h" #include "utils/pg_locale.h" @@ -180,6 +181,12 @@ static char *config_enum_get_options(struct config_enum * record, * NOTE! Option values may not contain double quotes! */ +static const struct config_enum_entry bytea_output_options[] = { + {"escape", BYTEA_OUTPUT_ESCAPE, false}, + {"hex", BYTEA_OUTPUT_HEX, false}, + {NULL, 0, false} +}; + /* * We have different sets for client and server message level options because * they sort slightly different (see "log" level) @@ -2541,6 +2548,15 @@ static struct config_enum ConfigureNamesEnum[] = }, { + {"bytea_output", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Sets the output format for bytea."), + NULL + }, + &bytea_output, + BYTEA_OUTPUT_HEX, bytea_output_options, NULL, NULL + }, + + { {"client_min_messages", PGC_USERSET, LOGGING_WHEN, gettext_noop("Sets the message levels that are sent to the client."), gettext_noop("Each level includes all the levels that follow it. The later" diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index e50d7a44f7..41488e264f 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -424,6 +424,7 @@ #statement_timeout = 0 # in milliseconds, 0 is disabled #vacuum_freeze_min_age = 50000000 #vacuum_freeze_table_age = 150000000 +#bytea_output = 'hex' # hex, escape #xmlbinary = 'base64' #xmloption = 'content' diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 1294b53857..1e83ad283c 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -11008,6 +11008,8 @@ dumpTrigger(Archive *fout, TriggerInfo *tginfo) TableInfo *tbinfo = tginfo->tgtable; PQExpBuffer query; PQExpBuffer delqry; + char *tgargs; + size_t lentgargs; const char *p; int findx; @@ -11109,53 +11111,29 @@ dumpTrigger(Archive *fout, TriggerInfo *tginfo) appendPQExpBuffer(query, "EXECUTE PROCEDURE %s(", fmtId(tginfo->tgfname)); - p = tginfo->tgargs; + tgargs = (char *) PQunescapeBytea(tginfo->tgargs, &lentgargs); + p = tgargs; for (findx = 0; findx < tginfo->tgnargs; findx++) { - const char *s = p; + /* find the embedded null that terminates this trigger argument */ + size_t tlen = strlen(p); - /* Set 'p' to end of arg string. marked by '\000' */ - for (;;) + if (p + tlen >= tgargs + lentgargs) { - p = strchr(p, '\\'); - if (p == NULL) - { - write_msg(NULL, "invalid argument string (%s) for trigger \"%s\" on table \"%s\"\n", - tginfo->tgargs, - tginfo->dobj.name, - tbinfo->dobj.name); - exit_nicely(); - } - p++; - if (*p == '\\') /* is it '\\'? */ - { - p++; - continue; - } - if (p[0] == '0' && p[1] == '0' && p[2] == '0') /* is it '\000'? */ - break; + /* hm, not found before end of bytea value... */ + write_msg(NULL, "invalid argument string (%s) for trigger \"%s\" on table \"%s\"\n", + tginfo->tgargs, + tginfo->dobj.name, + tbinfo->dobj.name); + exit_nicely(); } - p--; - - appendPQExpBufferChar(query, '\''); - while (s < p) - { - if (*s == '\'') - appendPQExpBufferChar(query, '\''); - /* - * bytea unconditionally doubles backslashes, so we suppress the - * doubling for standard_conforming_strings. - */ - if (fout->std_strings && *s == '\\' && s[1] == '\\') - s++; - appendPQExpBufferChar(query, *s++); - } - appendPQExpBufferChar(query, '\''); - appendPQExpBuffer(query, - (findx < tginfo->tgnargs - 1) ? ", " : ""); - p = p + 4; + if (findx > 0) + appendPQExpBuffer(query, ", "); + appendStringLiteralAH(query, p, fout); + p += tlen + 1; } + free(tgargs); appendPQExpBuffer(query, ");\n"); if (tginfo->tgenabled != 't' && tginfo->tgenabled != 'O') diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 8180e39c11..9b0a2b7262 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -138,6 +138,12 @@ extern Datum char_text(PG_FUNCTION_ARGS); extern Datum domain_in(PG_FUNCTION_ARGS); extern Datum domain_recv(PG_FUNCTION_ARGS); +/* encode.c */ +extern Datum binary_encode(PG_FUNCTION_ARGS); +extern Datum binary_decode(PG_FUNCTION_ARGS); +extern unsigned hex_encode(const char *src, unsigned len, char *dst); +extern unsigned hex_decode(const char *src, unsigned len, char *dst); + /* enum.c */ extern Datum enum_in(PG_FUNCTION_ARGS); extern Datum enum_out(PG_FUNCTION_ARGS); @@ -711,28 +717,6 @@ extern Datum unknownout(PG_FUNCTION_ARGS); extern Datum unknownrecv(PG_FUNCTION_ARGS); extern Datum unknownsend(PG_FUNCTION_ARGS); -extern Datum byteain(PG_FUNCTION_ARGS); -extern Datum byteaout(PG_FUNCTION_ARGS); -extern Datum bytearecv(PG_FUNCTION_ARGS); -extern Datum byteasend(PG_FUNCTION_ARGS); -extern Datum byteaoctetlen(PG_FUNCTION_ARGS); -extern Datum byteaGetByte(PG_FUNCTION_ARGS); -extern Datum byteaGetBit(PG_FUNCTION_ARGS); -extern Datum byteaSetByte(PG_FUNCTION_ARGS); -extern Datum byteaSetBit(PG_FUNCTION_ARGS); -extern Datum binary_encode(PG_FUNCTION_ARGS); -extern Datum binary_decode(PG_FUNCTION_ARGS); -extern Datum byteaeq(PG_FUNCTION_ARGS); -extern Datum byteane(PG_FUNCTION_ARGS); -extern Datum bytealt(PG_FUNCTION_ARGS); -extern Datum byteale(PG_FUNCTION_ARGS); -extern Datum byteagt(PG_FUNCTION_ARGS); -extern Datum byteage(PG_FUNCTION_ARGS); -extern Datum byteacmp(PG_FUNCTION_ARGS); -extern Datum byteacat(PG_FUNCTION_ARGS); -extern Datum byteapos(PG_FUNCTION_ARGS); -extern Datum bytea_substr(PG_FUNCTION_ARGS); -extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS); extern Datum pg_column_size(PG_FUNCTION_ARGS); /* version.c */ diff --git a/src/include/utils/bytea.h b/src/include/utils/bytea.h new file mode 100644 index 0000000000..ba8285209c --- /dev/null +++ b/src/include/utils/bytea.h @@ -0,0 +1,50 @@ +/*------------------------------------------------------------------------- + * + * bytea.h + * Declarations for BYTEA data type support. + * + * + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ +#ifndef BYTEA_H +#define BYTEA_H + +#include "fmgr.h" + + +typedef enum +{ + BYTEA_OUTPUT_ESCAPE, + BYTEA_OUTPUT_HEX +} ByteaOutputType; + +extern int bytea_output; /* ByteaOutputType, but int for GUC enum */ + +/* functions are in utils/adt/varlena.c */ +extern Datum byteain(PG_FUNCTION_ARGS); +extern Datum byteaout(PG_FUNCTION_ARGS); +extern Datum bytearecv(PG_FUNCTION_ARGS); +extern Datum byteasend(PG_FUNCTION_ARGS); +extern Datum byteaoctetlen(PG_FUNCTION_ARGS); +extern Datum byteaGetByte(PG_FUNCTION_ARGS); +extern Datum byteaGetBit(PG_FUNCTION_ARGS); +extern Datum byteaSetByte(PG_FUNCTION_ARGS); +extern Datum byteaSetBit(PG_FUNCTION_ARGS); +extern Datum byteaeq(PG_FUNCTION_ARGS); +extern Datum byteane(PG_FUNCTION_ARGS); +extern Datum bytealt(PG_FUNCTION_ARGS); +extern Datum byteale(PG_FUNCTION_ARGS); +extern Datum byteagt(PG_FUNCTION_ARGS); +extern Datum byteage(PG_FUNCTION_ARGS); +extern Datum byteacmp(PG_FUNCTION_ARGS); +extern Datum byteacat(PG_FUNCTION_ARGS); +extern Datum byteapos(PG_FUNCTION_ARGS); +extern Datum bytea_substr(PG_FUNCTION_ARGS); +extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS); + +#endif /* BYTEA_H */ diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index bb36121394..ba4ff66b7c 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -3167,6 +3167,29 @@ PQescapeBytea(const unsigned char *from, size_t from_length, size_t *to_length) } +static const int8 hexlookup[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +static inline char +get_hex(char c) +{ + int res = -1; + + if (c > 0 && c < 127) + res = hexlookup[(unsigned char) c]; + + return (char) res; +} + + #define ISFIRSTOCTDIGIT(CH) ((CH) >= '0' && (CH) <= '3') #define ISOCTDIGIT(CH) ((CH) >= '0' && (CH) <= '7') #define OCTVAL(CH) ((CH) - '0') @@ -3198,6 +3221,40 @@ PQunescapeBytea(const unsigned char *strtext, size_t *retbuflen) strtextlen = strlen((const char *) strtext); + if (strtext[0] == '\\' && strtext[1] == 'x') + { + const unsigned char *s; + unsigned char *p; + + buflen = (strtextlen - 2)/2; + /* Avoid unportable malloc(0) */ + buffer = (unsigned char *) malloc(buflen > 0 ? buflen : 1); + if (buffer == NULL) + return NULL; + + s = strtext + 2; + p = buffer; + while (*s) + { + char v1, + v2; + + /* + * Bad input is silently ignored. Note that this includes + * whitespace between hex pairs, which is allowed by byteain. + */ + v1 = get_hex(*s++); + if (!*s || v1 == (char) -1) + continue; + v2 = get_hex(*s++); + if (v2 != (char) -1) + *p++ = (v1 << 4) | v2; + } + + buflen = p - buffer; + } + else + { /* * Length of input is max length of output, but add one to avoid * unportable malloc(0) if input is zero-length. @@ -3244,6 +3301,7 @@ PQunescapeBytea(const unsigned char *strtext, size_t *retbuflen) } } buflen = j; /* buflen is the length of the dequoted data */ + } /* Shrink the buffer to be no larger than necessary */ /* +1 avoids unportable behavior when buflen==0 */ diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out index e80e1a4534..82eca262f0 100644 --- a/src/test/regress/expected/conversion.out +++ b/src/test/regress/expected/conversion.out @@ -1,3 +1,5 @@ +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; -- -- create user defined conversion -- diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 1241a2ace6..392f48ef8c 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -97,6 +97,99 @@ LINE 1: SELECT U&'wrong: +0061' UESCAPE '+'; ^ DETAIL: String constants with Unicode escapes cannot be used when standard_conforming_strings is off. RESET standard_conforming_strings; +-- bytea +SET bytea_output TO hex; +SELECT E'\\xDeAdBeEf'::bytea; + bytea +------------ + \xdeadbeef +(1 row) + +SELECT E'\\x De Ad Be Ef '::bytea; + bytea +------------ + \xdeadbeef +(1 row) + +SELECT E'\\xDeAdBeE'::bytea; +ERROR: invalid hexadecimal data: odd number of digits +LINE 1: SELECT E'\\xDeAdBeE'::bytea; + ^ +SELECT E'\\xDeAdBeEx'::bytea; +ERROR: invalid hexadecimal digit: "x" +LINE 1: SELECT E'\\xDeAdBeEx'::bytea; + ^ +SELECT E'\\xDe00BeEf'::bytea; + bytea +------------ + \xde00beef +(1 row) + +SELECT E'DeAdBeEf'::bytea; + bytea +-------------------- + \x4465416442654566 +(1 row) + +SELECT E'De\\000dBeEf'::bytea; + bytea +-------------------- + \x4465006442654566 +(1 row) + +SELECT E'De\123dBeEf'::bytea; + bytea +-------------------- + \x4465536442654566 +(1 row) + +SELECT E'De\\123dBeEf'::bytea; + bytea +-------------------- + \x4465536442654566 +(1 row) + +SELECT E'De\\678dBeEf'::bytea; +ERROR: invalid input syntax for type bytea +LINE 1: SELECT E'De\\678dBeEf'::bytea; + ^ +SET bytea_output TO escape; +SELECT E'\\xDeAdBeEf'::bytea; + bytea +------------------ + \336\255\276\357 +(1 row) + +SELECT E'\\x De Ad Be Ef '::bytea; + bytea +------------------ + \336\255\276\357 +(1 row) + +SELECT E'\\xDe00BeEf'::bytea; + bytea +------------------ + \336\000\276\357 +(1 row) + +SELECT E'DeAdBeEf'::bytea; + bytea +---------- + DeAdBeEf +(1 row) + +SELECT E'De\\000dBeEf'::bytea; + bytea +------------- + De\000dBeEf +(1 row) + +SELECT E'De\\123dBeEf'::bytea; + bytea +---------- + DeSdBeEf +(1 row) + -- -- test conversions between various string types -- E021-10 implicit casting among the character data types diff --git a/src/test/regress/input/largeobject.source b/src/test/regress/input/largeobject.source index 46ba9261ac..807cfd7cc4 100644 --- a/src/test/regress/input/largeobject.source +++ b/src/test/regress/input/largeobject.source @@ -2,6 +2,9 @@ -- Test large object support -- +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; + -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/output/largeobject.source b/src/test/regress/output/largeobject.source index 9d69f6c913..d7468bb513 100644 --- a/src/test/regress/output/largeobject.source +++ b/src/test/regress/output/largeobject.source @@ -1,6 +1,8 @@ -- -- Test large object support -- +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/output/largeobject_1.source b/src/test/regress/output/largeobject_1.source index 1fbc29c251..84e916fea4 100644 --- a/src/test/regress/output/largeobject_1.source +++ b/src/test/regress/output/largeobject_1.source @@ -1,6 +1,8 @@ -- -- Test large object support -- +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql index 99a9178315..be194eec1f 100644 --- a/src/test/regress/sql/conversion.sql +++ b/src/test/regress/sql/conversion.sql @@ -1,3 +1,6 @@ +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; + -- -- create user defined conversion -- diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 681a0e1e62..63df9402ed 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -43,6 +43,27 @@ SELECT U&'wrong: +0061' UESCAPE '+'; RESET standard_conforming_strings; +-- bytea +SET bytea_output TO hex; +SELECT E'\\xDeAdBeEf'::bytea; +SELECT E'\\x De Ad Be Ef '::bytea; +SELECT E'\\xDeAdBeE'::bytea; +SELECT E'\\xDeAdBeEx'::bytea; +SELECT E'\\xDe00BeEf'::bytea; +SELECT E'DeAdBeEf'::bytea; +SELECT E'De\\000dBeEf'::bytea; +SELECT E'De\123dBeEf'::bytea; +SELECT E'De\\123dBeEf'::bytea; +SELECT E'De\\678dBeEf'::bytea; + +SET bytea_output TO escape; +SELECT E'\\xDeAdBeEf'::bytea; +SELECT E'\\x De Ad Be Ef '::bytea; +SELECT E'\\xDe00BeEf'::bytea; +SELECT E'DeAdBeEf'::bytea; +SELECT E'De\\000dBeEf'::bytea; +SELECT E'De\\123dBeEf'::bytea; + -- -- test conversions between various string types -- E021-10 implicit casting among the character data types |