diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 99d25d7..9a543bc 100644 *** a/doc/src/sgml/config.sgml --- b/doc/src/sgml/config.sgml *************** COPY postgres_log FROM '/full/path/to/lo *** 3735,3740 **** --- 3735,3757 ---- Statement Behavior + + bytea_output (enum) + + bytea_output configuration parameter + + + + Sets the output format for values of type bytea. + Valid values are hex (the default) + and escape (the traditional PostgreSQL + format). The for more + information. Note that the bytea type always + accepts both formats on input. + + + + search_path (string) diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index f7ee8e9..8576419 100644 *** a/doc/src/sgml/datatype.sgml --- b/doc/src/sgml/datatype.sgml *************** *** 1,4 **** ! Data Types --- 1,4 ---- ! Data Types *************** SELECT b, char_length(b) FROM test2; *** 1191,1196 **** --- 1191,1256 ---- + The bytea type supports two external formats for + input and output: the escape format that is + particular to PostgreSQL, and the hex format. Both + of these are always accepted on input. The output format depends + on the configuration parameter ; + the default is hex. (Note that the hex format was introduced in + PostgreSQL 8.5; so earlier version and some tools don't understand + it.) + + + + The SQL standard defines a different binary + string type, called BLOB or BINARY LARGE + OBJECT. The input format is different from + bytea, but the provided functions and operators are + mostly the same. + + + + The Hex Format + + + The hex format encodes the binary data as 2 hexadecimal digits per + byte, highest significant nibble first. The entire string ist + preceded by the sequence \x (to distinguish it + from the bytea format). In SQL literals, the backslash may need + to be escaped, but it is one logical backslash as far as the + bytea type is concerned. The hex format is compatible with a wide + range of external applications and protocols, and it tends to be + faster than the traditional bytea format, so its use is + somewhat preferrable. + + + + Example: + + SELECT E'\\xDEADBEEF'; + + + + + + The Escape Format + + + The escape format is the traditional + PostgreSQL-specific format for the bytea type. It + takes the approach of representing a binary string as a sequence + of ASCII characters and escaping those bytes that cannot be + represented as an ASCII character by a special escape sequence. + If, from the point of view of the application, representing bytes + as characters makes sense, then this representation can be + convenient, but in practice it is usually confusing becauses it + fuzzes up the distinction between binary strings and characters + strings, and the particular escape mechanism that was chosen is + also somewhat unwieldy. So this format should probably not be + used for most new applications. + + + When entering bytea values, octets of certain values must be escaped (but all octet values can be escaped) when used as part *************** SELECT b, char_length(b) FROM test2; *** 1343,1356 **** have to escape line feeds and carriage returns if your interface automatically translates these. ! ! ! The SQL standard defines a different binary ! string type, called BLOB or BINARY LARGE ! OBJECT. The input format is different from ! bytea, but the provided functions and operators are ! mostly the same. ! --- 1403,1409 ---- have to escape line feeds and carriage returns if your interface automatically translates these. ! diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index eed799a..4f47c8c 100644 *** a/src/backend/utils/adt/encode.c --- b/src/backend/utils/adt/encode.c *************** static const int8 hexlookup[128] = { *** 122,128 **** -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, }; ! static unsigned hex_encode(const char *src, unsigned len, char *dst) { const char *end = src + len; --- 122,128 ---- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, }; ! unsigned hex_encode(const char *src, unsigned len, char *dst) { const char *end = src + len; *************** get_hex(char c) *** 152,158 **** return (char) res; } ! static unsigned hex_decode(const char *src, unsigned len, char *dst) { const char *s, --- 152,158 ---- return (char) res; } ! unsigned hex_decode(const char *src, unsigned len, char *dst) { const char *s, diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 4434c97..9c8ea9d 100644 *** a/src/backend/utils/adt/varlena.c --- b/src/backend/utils/adt/varlena.c *************** static text *text_substring(Datum str, *** 62,67 **** --- 62,69 ---- bool length_not_specified); static void appendStringInfoText(StringInfo str, const text *t); + int bytea_output; + /***************************************************************************** * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE * *************** byteain(PG_FUNCTION_ARGS) *** 189,194 **** --- 191,207 ---- int byte; bytea *result; + if (inputText[0] == '\\' && inputText[1] == 'x') + { + size_t len = strlen(inputText); + byte = (len - 2)/2 + VARHDRSZ; + result = palloc(byte); + SET_VARSIZE(result, byte); + hex_decode(inputText + 2, len - 2, VARDATA(result)); + + PG_RETURN_BYTEA_P(result); + } + for (byte = 0, tp = inputText; *tp != '\0'; byte ++) { if (tp[0] != '\\') *************** Datum *** 268,278 **** byteaout(PG_FUNCTION_ARGS) { bytea *vlena = PG_GETARG_BYTEA_PP(0); ! char *result; char *vp; ! char *rp; ! int val; /* holds unprintable chars */ int i; int len; len = 1; /* empty string has 1 char */ --- 281,300 ---- byteaout(PG_FUNCTION_ARGS) { bytea *vlena = PG_GETARG_BYTEA_PP(0); ! char *result = NULL; char *vp; ! char *rp = NULL; int i; + + if (bytea_output == BYTEA_OUTPUT_HEX) + { + rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); + *rp++ = '\\'; + *rp++ = 'x'; + rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); + } + else if (bytea_output == BYTEA_OUTPUT_ESCAPE) + { int len; len = 1; /* empty string has 1 char */ *************** byteaout(PG_FUNCTION_ARGS) *** 297,302 **** --- 319,326 ---- } else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) { + int val; /* holds unprintable chars */ + val = *vp; rp[0] = '\\'; rp[3] = DIG(val & 07); *************** byteaout(PG_FUNCTION_ARGS) *** 309,314 **** --- 333,341 ---- else *rp++ = *vp; } + } + else + elog(ERROR, "unrecognized bytea_output setting: %d", bytea_output); *rp = '\0'; PG_RETURN_CSTRING(result); } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 11ebac8..0f6ef6c 100644 *** a/src/backend/utils/misc/guc.c --- b/src/backend/utils/misc/guc.c *************** static char *config_enum_get_options(str *** 180,185 **** --- 180,191 ---- * NOTE! Option values may not contain double quotes! */ + static const struct config_enum_entry bytea_output_options[] = { + {"escape", BYTEA_OUTPUT_ESCAPE, false}, + {"hex", BYTEA_OUTPUT_HEX, false}, + {NULL, 0, false} + }; + /* * We have different sets for client and server message level options because * they sort slightly different (see "log" level) *************** static struct config_enum ConfigureNames *** 2533,2538 **** --- 2539,2553 ---- }, { + {"bytea_output", PGC_USERSET, CLIENT_CONN_OTHER, + gettext_noop("Sets the bytea output format to the hex format."), + NULL + }, + &bytea_output, + BYTEA_OUTPUT_HEX, bytea_output_options, NULL, NULL + }, + + { {"client_min_messages", PGC_USERSET, LOGGING_WHEN, gettext_noop("Sets the message levels that are sent to the client."), gettext_noop("Each level includes all the levels that follow it. The later" diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 13fd41a..8635416 100644 *** a/src/include/utils/builtins.h --- b/src/include/utils/builtins.h *************** extern Datum unknownout(PG_FUNCTION_ARGS *** 704,709 **** --- 704,716 ---- extern Datum unknownrecv(PG_FUNCTION_ARGS); extern Datum unknownsend(PG_FUNCTION_ARGS); + typedef enum { + BYTEA_OUTPUT_ESCAPE, + BYTEA_OUTPUT_HEX + } ByteaOutputType; + + extern int bytea_output; /* ByteaOutputType, but int for guc enum */ + extern Datum byteain(PG_FUNCTION_ARGS); extern Datum byteaout(PG_FUNCTION_ARGS); extern Datum bytearecv(PG_FUNCTION_ARGS); *************** extern Datum bytea_substr(PG_FUNCTION_AR *** 728,733 **** --- 735,743 ---- extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS); extern Datum pg_column_size(PG_FUNCTION_ARGS); + extern unsigned hex_encode(const char *src, unsigned len, char *dst); + extern unsigned hex_decode(const char *src, unsigned len, char *dst); + /* version.c */ extern Datum pgsql_version(PG_FUNCTION_ARGS); diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index bb36121..504ad2f 100644 *** a/src/interfaces/libpq/fe-exec.c --- b/src/interfaces/libpq/fe-exec.c *************** PQescapeBytea(const unsigned char *from, *** 3167,3172 **** --- 3167,3195 ---- } + static const int8 hexlookup[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }; + + static char + get_hex(char c) + { + int res = -1; + + if (c > 0 && c < 127) + res = hexlookup[(unsigned char) c]; + + return (char) res; + } + + #define ISFIRSTOCTDIGIT(CH) ((CH) >= '0' && (CH) <= '3') #define ISOCTDIGIT(CH) ((CH) >= '0' && (CH) <= '7') #define OCTVAL(CH) ((CH) - '0') *************** PQunescapeBytea(const unsigned char *str *** 3198,3203 **** --- 3221,3257 ---- strtextlen = strlen((const char *) strtext); + if (strtext[0] == '\\' && strtext[1] == 'x') + { + const unsigned char *s; + unsigned char *p; + + *retbuflen = (strtextlen - 2)/2; + /* Avoid unportable malloc(0) */ + buffer = malloc(*retbuflen > 0 ? *retbuflen : 1); + if (buffer == NULL) + return NULL; + + s = strtext + 2; + p = buffer; + while (*s) + { + char v1, v2; + + v1 = get_hex(*s++) << 4; + if (!*s) + { + *retbuflen = -1; + return NULL; + } + + v2 = get_hex(*s++); + *p++ = v1 | v2; + } + + return buffer; + } + else { /* * Length of input is max length of output, but add one to avoid * unportable malloc(0) if input is zero-length. *************** PQunescapeBytea(const unsigned char *str *** 3259,3261 **** --- 3313,3316 ---- *retbuflen = buflen; return tmpbuf; } + } diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out index e80e1a4..8f08706 100644 *** a/src/test/regress/expected/conversion.out --- b/src/test/regress/expected/conversion.out *************** *** 1,3 **** --- 1,4 ---- + SET bytea_output TO escape; -- -- create user defined conversion -- diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 1241a2a..feaa574 100644 *** a/src/test/regress/expected/strings.out --- b/src/test/regress/expected/strings.out *************** LINE 1: SELECT U&'wrong: +0061' UESCAPE *** 97,102 **** --- 97,183 ---- ^ DETAIL: String constants with Unicode escapes cannot be used when standard_conforming_strings is off. RESET standard_conforming_strings; + -- bytea + SET bytea_output TO hex; + SELECT E'\\xDeAdBeEf'::bytea; + bytea + ------------ + \xdeadbeef + (1 row) + + SELECT E'\\xDeAdBeE'::bytea; + ERROR: invalid hexadecimal data: odd number of digits + LINE 1: SELECT E'\\xDeAdBeE'::bytea; + ^ + SELECT E'\\xDeAdBeEx'::bytea; + ERROR: invalid hexadecimal digit: "x" + LINE 1: SELECT E'\\xDeAdBeEx'::bytea; + ^ + SELECT E'\\xDe00BeEf'::bytea; + bytea + ------------ + \xde00beef + (1 row) + + SELECT E'DeAdBeEf'::bytea; + bytea + -------------------- + \x4465416442654566 + (1 row) + + SELECT E'De\\000dBeEf'::bytea; + bytea + -------------------- + \x4465006442654566 + (1 row) + + SELECT E'De\123dBeEf'::bytea; + bytea + -------------------- + \x4465536442654566 + (1 row) + + SELECT E'De\\123dBeEf'::bytea; + bytea + -------------------- + \x4465536442654566 + (1 row) + + SELECT E'De\\678dBeEf'::bytea; + ERROR: invalid input syntax for type bytea + LINE 1: SELECT E'De\\678dBeEf'::bytea; + ^ + SET bytea_output TO escape; + SELECT E'\\xDeAdBeEf'::bytea; + bytea + ------------------ + \336\255\276\357 + (1 row) + + SELECT E'\\xDe00BeEf'::bytea; + bytea + ------------------ + \336\000\276\357 + (1 row) + + SELECT E'DeAdBeEf'::bytea; + bytea + ---------- + DeAdBeEf + (1 row) + + SELECT E'De\\000dBeEf'::bytea; + bytea + ------------- + De\000dBeEf + (1 row) + + SELECT E'De\\123dBeEf'::bytea; + bytea + ---------- + DeSdBeEf + (1 row) + -- -- test conversions between various string types -- E021-10 implicit casting among the character data types diff --git a/src/test/regress/input/largeobject.source b/src/test/regress/input/largeobject.source index 46ba926..5bfba18 100644 *** a/src/test/regress/input/largeobject.source --- b/src/test/regress/input/largeobject.source *************** *** 2,7 **** --- 2,9 ---- -- Test large object support -- + SET bytea_output TO escape; + -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/output/largeobject.source b/src/test/regress/output/largeobject.source index 9d69f6c..5ff2e3b 100644 *** a/src/test/regress/output/largeobject.source --- b/src/test/regress/output/largeobject.source *************** *** 1,6 **** --- 1,7 ---- -- -- Test large object support -- + SET bytea_output TO escape; -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/output/largeobject_1.source b/src/test/regress/output/largeobject_1.source index 1fbc29c..0ece752 100644 *** a/src/test/regress/output/largeobject_1.source --- b/src/test/regress/output/largeobject_1.source *************** *** 1,6 **** --- 1,7 ---- -- -- Test large object support -- + SET bytea_output_hex TO false; -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql index 99a9178..c4bb583 100644 *** a/src/test/regress/sql/conversion.sql --- b/src/test/regress/sql/conversion.sql *************** *** 1,3 **** --- 1,5 ---- + SET bytea_output TO escape; + -- -- create user defined conversion -- diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 681a0e1..de067df 100644 *** a/src/test/regress/sql/strings.sql --- b/src/test/regress/sql/strings.sql *************** SELECT U&'wrong: +0061' UESCAPE '+'; *** 43,48 **** --- 43,67 ---- RESET standard_conforming_strings; + -- bytea + SET bytea_output TO hex; + SELECT E'\\xDeAdBeEf'::bytea; + SELECT E'\\xDeAdBeE'::bytea; + SELECT E'\\xDeAdBeEx'::bytea; + SELECT E'\\xDe00BeEf'::bytea; + SELECT E'DeAdBeEf'::bytea; + SELECT E'De\\000dBeEf'::bytea; + SELECT E'De\123dBeEf'::bytea; + SELECT E'De\\123dBeEf'::bytea; + SELECT E'De\\678dBeEf'::bytea; + + SET bytea_output TO escape; + SELECT E'\\xDeAdBeEf'::bytea; + SELECT E'\\xDe00BeEf'::bytea; + SELECT E'DeAdBeEf'::bytea; + SELECT E'De\\000dBeEf'::bytea; + SELECT E'De\\123dBeEf'::bytea; + -- -- test conversions between various string types -- E021-10 implicit casting among the character data types