diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 99d25d7..9a543bc 100644
*** a/doc/src/sgml/config.sgml
--- b/doc/src/sgml/config.sgml
*************** COPY postgres_log FROM '/full/path/to/lo
*** 3735,3740 ****
--- 3735,3757 ----
Statement Behavior
+
+ bytea_output (enum)
+
+ bytea_output> configuration parameter
+
+
+
+ Sets the output format for values of type bytea.
+ Valid values are hex (the default)
+ and escape (the traditional PostgreSQL
+ format). The for more
+ information. Note that the bytea type always
+ accepts both formats on input.
+
+
+
+
search_path (string)
diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index f7ee8e9..8576419 100644
*** a/doc/src/sgml/datatype.sgml
--- b/doc/src/sgml/datatype.sgml
***************
*** 1,4 ****
!
Data Types
--- 1,4 ----
!
Data Types
*************** SELECT b, char_length(b) FROM test2;
*** 1191,1196 ****
--- 1191,1256 ----
+ The bytea type supports two external formats for
+ input and output: the escape
format that is
+ particular to PostgreSQL, and the hex
format. Both
+ of these are always accepted on input. The output format depends
+ on the configuration parameter ;
+ the default is hex. (Note that the hex format was introduced in
+ PostgreSQL 8.5; so earlier version and some tools don't understand
+ it.)
+
+
+
+ The SQL standard defines a different binary
+ string type, called BLOB or BINARY LARGE
+ OBJECT. The input format is different from
+ bytea, but the provided functions and operators are
+ mostly the same.
+
+
+
+ The Hex Format
+
+
+ The hex format encodes the binary data as 2 hexadecimal digits per
+ byte, highest significant nibble first. The entire string ist
+ preceded by the sequence \x (to distinguish it
+ from the bytea format). In SQL literals, the backslash may need
+ to be escaped, but it is one logical backslash as far as the
+ bytea type is concerned. The hex format is compatible with a wide
+ range of external applications and protocols, and it tends to be
+ faster than the traditional bytea format, so its use is
+ somewhat preferrable.
+
+
+
+ Example:
+
+ SELECT E'\\xDEADBEEF';
+
+
+
+
+
+ The Escape Format
+
+
+ The escape
format is the traditional
+ PostgreSQL-specific format for the bytea type. It
+ takes the approach of representing a binary string as a sequence
+ of ASCII characters and escaping those bytes that cannot be
+ represented as an ASCII character by a special escape sequence.
+ If, from the point of view of the application, representing bytes
+ as characters makes sense, then this representation can be
+ convenient, but in practice it is usually confusing becauses it
+ fuzzes up the distinction between binary strings and characters
+ strings, and the particular escape mechanism that was chosen is
+ also somewhat unwieldy. So this format should probably not be
+ used for most new applications.
+
+
+
When entering bytea values, octets of certain
values must be escaped (but all octet
values can be escaped) when used as part
*************** SELECT b, char_length(b) FROM test2;
*** 1343,1356 ****
have to escape line feeds and carriage returns if your interface
automatically translates these.
!
!
! The SQL standard defines a different binary
! string type, called BLOB or BINARY LARGE
! OBJECT. The input format is different from
! bytea, but the provided functions and operators are
! mostly the same.
!
--- 1403,1409 ----
have to escape line feeds and carriage returns if your interface
automatically translates these.
!
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index eed799a..4f47c8c 100644
*** a/src/backend/utils/adt/encode.c
--- b/src/backend/utils/adt/encode.c
*************** static const int8 hexlookup[128] = {
*** 122,128 ****
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
! static unsigned
hex_encode(const char *src, unsigned len, char *dst)
{
const char *end = src + len;
--- 122,128 ----
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
! unsigned
hex_encode(const char *src, unsigned len, char *dst)
{
const char *end = src + len;
*************** get_hex(char c)
*** 152,158 ****
return (char) res;
}
! static unsigned
hex_decode(const char *src, unsigned len, char *dst)
{
const char *s,
--- 152,158 ----
return (char) res;
}
! unsigned
hex_decode(const char *src, unsigned len, char *dst)
{
const char *s,
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 4434c97..9c8ea9d 100644
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
*************** static text *text_substring(Datum str,
*** 62,67 ****
--- 62,69 ----
bool length_not_specified);
static void appendStringInfoText(StringInfo str, const text *t);
+ int bytea_output;
+
/*****************************************************************************
* CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
*************** byteain(PG_FUNCTION_ARGS)
*** 189,194 ****
--- 191,207 ----
int byte;
bytea *result;
+ if (inputText[0] == '\\' && inputText[1] == 'x')
+ {
+ size_t len = strlen(inputText);
+ byte = (len - 2)/2 + VARHDRSZ;
+ result = palloc(byte);
+ SET_VARSIZE(result, byte);
+ hex_decode(inputText + 2, len - 2, VARDATA(result));
+
+ PG_RETURN_BYTEA_P(result);
+ }
+
for (byte = 0, tp = inputText; *tp != '\0'; byte ++)
{
if (tp[0] != '\\')
*************** Datum
*** 268,278 ****
byteaout(PG_FUNCTION_ARGS)
{
bytea *vlena = PG_GETARG_BYTEA_PP(0);
! char *result;
char *vp;
! char *rp;
! int val; /* holds unprintable chars */
int i;
int len;
len = 1; /* empty string has 1 char */
--- 281,300 ----
byteaout(PG_FUNCTION_ARGS)
{
bytea *vlena = PG_GETARG_BYTEA_PP(0);
! char *result = NULL;
char *vp;
! char *rp = NULL;
int i;
+
+ if (bytea_output == BYTEA_OUTPUT_HEX)
+ {
+ rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
+ *rp++ = '\\';
+ *rp++ = 'x';
+ rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
+ }
+ else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
+ {
int len;
len = 1; /* empty string has 1 char */
*************** byteaout(PG_FUNCTION_ARGS)
*** 297,302 ****
--- 319,326 ----
}
else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
{
+ int val; /* holds unprintable chars */
+
val = *vp;
rp[0] = '\\';
rp[3] = DIG(val & 07);
*************** byteaout(PG_FUNCTION_ARGS)
*** 309,314 ****
--- 333,341 ----
else
*rp++ = *vp;
}
+ }
+ else
+ elog(ERROR, "unrecognized bytea_output setting: %d", bytea_output);
*rp = '\0';
PG_RETURN_CSTRING(result);
}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 11ebac8..0f6ef6c 100644
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
*************** static char *config_enum_get_options(str
*** 180,185 ****
--- 180,191 ----
* NOTE! Option values may not contain double quotes!
*/
+ static const struct config_enum_entry bytea_output_options[] = {
+ {"escape", BYTEA_OUTPUT_ESCAPE, false},
+ {"hex", BYTEA_OUTPUT_HEX, false},
+ {NULL, 0, false}
+ };
+
/*
* We have different sets for client and server message level options because
* they sort slightly different (see "log" level)
*************** static struct config_enum ConfigureNames
*** 2533,2538 ****
--- 2539,2553 ----
},
{
+ {"bytea_output", PGC_USERSET, CLIENT_CONN_OTHER,
+ gettext_noop("Sets the bytea output format to the hex format."),
+ NULL
+ },
+ &bytea_output,
+ BYTEA_OUTPUT_HEX, bytea_output_options, NULL, NULL
+ },
+
+ {
{"client_min_messages", PGC_USERSET, LOGGING_WHEN,
gettext_noop("Sets the message levels that are sent to the client."),
gettext_noop("Each level includes all the levels that follow it. The later"
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 13fd41a..8635416 100644
*** a/src/include/utils/builtins.h
--- b/src/include/utils/builtins.h
*************** extern Datum unknownout(PG_FUNCTION_ARGS
*** 704,709 ****
--- 704,716 ----
extern Datum unknownrecv(PG_FUNCTION_ARGS);
extern Datum unknownsend(PG_FUNCTION_ARGS);
+ typedef enum {
+ BYTEA_OUTPUT_ESCAPE,
+ BYTEA_OUTPUT_HEX
+ } ByteaOutputType;
+
+ extern int bytea_output; /* ByteaOutputType, but int for guc enum */
+
extern Datum byteain(PG_FUNCTION_ARGS);
extern Datum byteaout(PG_FUNCTION_ARGS);
extern Datum bytearecv(PG_FUNCTION_ARGS);
*************** extern Datum bytea_substr(PG_FUNCTION_AR
*** 728,733 ****
--- 735,743 ----
extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS);
extern Datum pg_column_size(PG_FUNCTION_ARGS);
+ extern unsigned hex_encode(const char *src, unsigned len, char *dst);
+ extern unsigned hex_decode(const char *src, unsigned len, char *dst);
+
/* version.c */
extern Datum pgsql_version(PG_FUNCTION_ARGS);
diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
index bb36121..504ad2f 100644
*** a/src/interfaces/libpq/fe-exec.c
--- b/src/interfaces/libpq/fe-exec.c
*************** PQescapeBytea(const unsigned char *from,
*** 3167,3172 ****
--- 3167,3195 ----
}
+ static const int8 hexlookup[128] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ };
+
+ static char
+ get_hex(char c)
+ {
+ int res = -1;
+
+ if (c > 0 && c < 127)
+ res = hexlookup[(unsigned char) c];
+
+ return (char) res;
+ }
+
+
#define ISFIRSTOCTDIGIT(CH) ((CH) >= '0' && (CH) <= '3')
#define ISOCTDIGIT(CH) ((CH) >= '0' && (CH) <= '7')
#define OCTVAL(CH) ((CH) - '0')
*************** PQunescapeBytea(const unsigned char *str
*** 3198,3203 ****
--- 3221,3257 ----
strtextlen = strlen((const char *) strtext);
+ if (strtext[0] == '\\' && strtext[1] == 'x')
+ {
+ const unsigned char *s;
+ unsigned char *p;
+
+ *retbuflen = (strtextlen - 2)/2;
+ /* Avoid unportable malloc(0) */
+ buffer = malloc(*retbuflen > 0 ? *retbuflen : 1);
+ if (buffer == NULL)
+ return NULL;
+
+ s = strtext + 2;
+ p = buffer;
+ while (*s)
+ {
+ char v1, v2;
+
+ v1 = get_hex(*s++) << 4;
+ if (!*s)
+ {
+ *retbuflen = -1;
+ return NULL;
+ }
+
+ v2 = get_hex(*s++);
+ *p++ = v1 | v2;
+ }
+
+ return buffer;
+ }
+ else {
/*
* Length of input is max length of output, but add one to avoid
* unportable malloc(0) if input is zero-length.
*************** PQunescapeBytea(const unsigned char *str
*** 3259,3261 ****
--- 3313,3316 ----
*retbuflen = buflen;
return tmpbuf;
}
+ }
diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out
index e80e1a4..8f08706 100644
*** a/src/test/regress/expected/conversion.out
--- b/src/test/regress/expected/conversion.out
***************
*** 1,3 ****
--- 1,4 ----
+ SET bytea_output TO escape;
--
-- create user defined conversion
--
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 1241a2a..feaa574 100644
*** a/src/test/regress/expected/strings.out
--- b/src/test/regress/expected/strings.out
*************** LINE 1: SELECT U&'wrong: +0061' UESCAPE
*** 97,102 ****
--- 97,183 ----
^
DETAIL: String constants with Unicode escapes cannot be used when standard_conforming_strings is off.
RESET standard_conforming_strings;
+ -- bytea
+ SET bytea_output TO hex;
+ SELECT E'\\xDeAdBeEf'::bytea;
+ bytea
+ ------------
+ \xdeadbeef
+ (1 row)
+
+ SELECT E'\\xDeAdBeE'::bytea;
+ ERROR: invalid hexadecimal data: odd number of digits
+ LINE 1: SELECT E'\\xDeAdBeE'::bytea;
+ ^
+ SELECT E'\\xDeAdBeEx'::bytea;
+ ERROR: invalid hexadecimal digit: "x"
+ LINE 1: SELECT E'\\xDeAdBeEx'::bytea;
+ ^
+ SELECT E'\\xDe00BeEf'::bytea;
+ bytea
+ ------------
+ \xde00beef
+ (1 row)
+
+ SELECT E'DeAdBeEf'::bytea;
+ bytea
+ --------------------
+ \x4465416442654566
+ (1 row)
+
+ SELECT E'De\\000dBeEf'::bytea;
+ bytea
+ --------------------
+ \x4465006442654566
+ (1 row)
+
+ SELECT E'De\123dBeEf'::bytea;
+ bytea
+ --------------------
+ \x4465536442654566
+ (1 row)
+
+ SELECT E'De\\123dBeEf'::bytea;
+ bytea
+ --------------------
+ \x4465536442654566
+ (1 row)
+
+ SELECT E'De\\678dBeEf'::bytea;
+ ERROR: invalid input syntax for type bytea
+ LINE 1: SELECT E'De\\678dBeEf'::bytea;
+ ^
+ SET bytea_output TO escape;
+ SELECT E'\\xDeAdBeEf'::bytea;
+ bytea
+ ------------------
+ \336\255\276\357
+ (1 row)
+
+ SELECT E'\\xDe00BeEf'::bytea;
+ bytea
+ ------------------
+ \336\000\276\357
+ (1 row)
+
+ SELECT E'DeAdBeEf'::bytea;
+ bytea
+ ----------
+ DeAdBeEf
+ (1 row)
+
+ SELECT E'De\\000dBeEf'::bytea;
+ bytea
+ -------------
+ De\000dBeEf
+ (1 row)
+
+ SELECT E'De\\123dBeEf'::bytea;
+ bytea
+ ----------
+ DeSdBeEf
+ (1 row)
+
--
-- test conversions between various string types
-- E021-10 implicit casting among the character data types
diff --git a/src/test/regress/input/largeobject.source b/src/test/regress/input/largeobject.source
index 46ba926..5bfba18 100644
*** a/src/test/regress/input/largeobject.source
--- b/src/test/regress/input/largeobject.source
***************
*** 2,7 ****
--- 2,9 ----
-- Test large object support
--
+ SET bytea_output TO escape;
+
-- Load a file
CREATE TABLE lotest_stash_values (loid oid, fd integer);
-- lo_creat(mode integer) returns oid
diff --git a/src/test/regress/output/largeobject.source b/src/test/regress/output/largeobject.source
index 9d69f6c..5ff2e3b 100644
*** a/src/test/regress/output/largeobject.source
--- b/src/test/regress/output/largeobject.source
***************
*** 1,6 ****
--- 1,7 ----
--
-- Test large object support
--
+ SET bytea_output TO escape;
-- Load a file
CREATE TABLE lotest_stash_values (loid oid, fd integer);
-- lo_creat(mode integer) returns oid
diff --git a/src/test/regress/output/largeobject_1.source b/src/test/regress/output/largeobject_1.source
index 1fbc29c..0ece752 100644
*** a/src/test/regress/output/largeobject_1.source
--- b/src/test/regress/output/largeobject_1.source
***************
*** 1,6 ****
--- 1,7 ----
--
-- Test large object support
--
+ SET bytea_output_hex TO false;
-- Load a file
CREATE TABLE lotest_stash_values (loid oid, fd integer);
-- lo_creat(mode integer) returns oid
diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql
index 99a9178..c4bb583 100644
*** a/src/test/regress/sql/conversion.sql
--- b/src/test/regress/sql/conversion.sql
***************
*** 1,3 ****
--- 1,5 ----
+ SET bytea_output TO escape;
+
--
-- create user defined conversion
--
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 681a0e1..de067df 100644
*** a/src/test/regress/sql/strings.sql
--- b/src/test/regress/sql/strings.sql
*************** SELECT U&'wrong: +0061' UESCAPE '+';
*** 43,48 ****
--- 43,67 ----
RESET standard_conforming_strings;
+ -- bytea
+ SET bytea_output TO hex;
+ SELECT E'\\xDeAdBeEf'::bytea;
+ SELECT E'\\xDeAdBeE'::bytea;
+ SELECT E'\\xDeAdBeEx'::bytea;
+ SELECT E'\\xDe00BeEf'::bytea;
+ SELECT E'DeAdBeEf'::bytea;
+ SELECT E'De\\000dBeEf'::bytea;
+ SELECT E'De\123dBeEf'::bytea;
+ SELECT E'De\\123dBeEf'::bytea;
+ SELECT E'De\\678dBeEf'::bytea;
+
+ SET bytea_output TO escape;
+ SELECT E'\\xDeAdBeEf'::bytea;
+ SELECT E'\\xDe00BeEf'::bytea;
+ SELECT E'DeAdBeEf'::bytea;
+ SELECT E'De\\000dBeEf'::bytea;
+ SELECT E'De\\123dBeEf'::bytea;
+
--
-- test conversions between various string types
-- E021-10 implicit casting among the character data types