Index: src/backend/commands/copy.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/commands/copy.c,v retrieving revision 1.244 diff -u -r1.244 copy.c --- src/backend/commands/copy.c 7 May 2005 02:22:46 -0000 1.244 +++ src/backend/commands/copy.c 13 May 2005 22:21:00 -0000 @@ -84,6 +84,16 @@ EOL_CRNL } EolType; +/* + * Represents the format of the file to be read or written + */ +typedef enum CopyFmt +{ + FMT_TXT, + FMT_BIN, + FMT_CSV, + FMT_XML +} CopyFmt; static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0"; @@ -129,14 +139,14 @@ static bool line_buf_converted; /* non-export function prototypes */ -static void DoCopyTo(Relation rel, List *attnumlist, bool binary, bool oids, - char *delim, char *null_print, bool csv_mode, char *quote, +static void DoCopyTo(Relation rel, List *attnumlist, CopyFmt fmt, bool oids, + char *delim, char *null_print, char *quote, char *escape, List *force_quote_atts, bool header_line, bool fe_copy); -static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids, - char *delim, char *null_print, bool csv_mode, char *quote, char *escape, +static void CopyTo(Relation rel, List *attnumlist, CopyFmt fmt, bool oids, + char *delim, char *null_print, char *quote, char *escape, List *force_quote_atts, bool header_line); -static void CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids, - char *delim, char *null_print, bool csv_mode, char *quote, char *escape, +static void CopyFrom(Relation rel, List *attnumlist, CopyFmt fmt, bool oids, + char *delim, char *null_print, char *quote, char *escape, List *force_notnull_atts, bool header_line); static bool CopyReadLine(char * quote, char * escape); static char *CopyReadAttribute(const char *delim, const char *null_print, @@ -171,6 +181,11 @@ static void CopySendInt16(int16 val); static int16 CopyGetInt16(void); +static int GetDecimalFromHex(char hex); + +static void CopyAttributeOutXML (char *colname, char *string); +static void CopySendStringXML(char *string); +static char *CopyGetXMLEntity(char c, char *buf); /* * Send copy start/stop messages for frontend copies. These have changed @@ -692,10 +707,9 @@ List *attnamelist = stmt->attlist; List *attnumlist; bool fe_copy = false; - bool binary = false; bool oids = false; - bool csv_mode = false; - bool header_line = false; + bool header_line = false; + CopyFmt fmt = FMT_TXT; char *delim = NULL; char *quote = NULL; char *escape = NULL; @@ -715,11 +729,11 @@ if (strcmp(defel->defname, "binary") == 0) { - if (binary) + if (fmt != FMT_TXT) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); - binary = intVal(defel->arg); + fmt = FMT_BIN; } else if (strcmp(defel->defname, "oids") == 0) { @@ -747,11 +761,19 @@ } else if (strcmp(defel->defname, "csv") == 0) { - if (csv_mode) + if (fmt != FMT_TXT) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); - csv_mode = intVal(defel->arg); + fmt = FMT_CSV; + } + else if (strcmp(defel->defname, "xml") == 0) + { + if (fmt != FMT_TXT) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + fmt = FMT_XML; } else if (strcmp(defel->defname, "header") == 0) { @@ -798,29 +820,39 @@ defel->defname); } - if (binary && delim) + if (fmt == FMT_BIN && delim) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("cannot specify DELIMITER in BINARY mode"))); - if (binary && csv_mode) + if (fmt == FMT_BIN && null_print) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("cannot specify CSV in BINARY mode"))); + errmsg("cannot specify NULL in BINARY mode"))); - if (binary && null_print) + if (fmt == FMT_XML && is_from) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("cannot specify NULL in BINARY mode"))); + errmsg("XML mode is not available in COPY FROM"))); + + if (fmt == FMT_XML && delim) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify DELIMITER in XML mode"))); + + if (fmt == FMT_XML && null_print) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify NULL in XML mode"))); /* Set defaults */ if (!delim) - delim = csv_mode ? "," : "\t"; + delim = (fmt == FMT_CSV) ? "," : "\t"; if (!null_print) - null_print = csv_mode ? "" : "\\N"; + null_print = (fmt == FMT_CSV) ? "" : "\\N"; - if (csv_mode) + if (fmt == FMT_CSV) { if (!quote) quote = "\""; @@ -835,35 +867,35 @@ errmsg("COPY delimiter must be a single character"))); /* Check header */ - if (!csv_mode && header_line) + if (fmt != FMT_CSV && header_line) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY HEADER available only in CSV mode"))); /* Check quote */ - if (!csv_mode && quote != NULL) + if (fmt != FMT_CSV && quote != NULL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY quote available only in CSV mode"))); - if (csv_mode && strlen(quote) != 1) + if (fmt == FMT_CSV && strlen(quote) != 1) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY quote must be a single character"))); /* Check escape */ - if (!csv_mode && escape != NULL) + if (fmt != FMT_CSV && escape != NULL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY escape available only in CSV mode"))); - if (csv_mode && strlen(escape) != 1) + if (fmt == FMT_CSV && strlen(escape) != 1) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY escape must be a single character"))); /* Check force_quote */ - if (!csv_mode && force_quote != NIL) + if (fmt != FMT_CSV && force_quote != NIL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY force quote available only in CSV mode"))); @@ -873,7 +905,7 @@ errmsg("COPY force quote only available using COPY TO"))); /* Check force_notnull */ - if (!csv_mode && force_notnull != NIL) + if (fmt != FMT_CSV && force_notnull != NIL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY force not null available only in CSV mode"))); @@ -889,7 +921,7 @@ errmsg("COPY delimiter must not appear in the NULL specification"))); /* Don't allow the csv quote char to appear in the null string. */ - if (csv_mode && strchr(null_print, quote[0]) != NULL) + if (fmt == FMT_CSV && strchr(null_print, quote[0]) != NULL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("CSV quote character must not appear in the NULL specification"))); @@ -1004,7 +1036,7 @@ if (pipe) { if (whereToSendOutput == Remote) - ReceiveCopyBegin(binary, list_length(attnumlist)); + ReceiveCopyBegin(fmt == FMT_BIN, list_length(attnumlist)); else copy_file = stdin; } @@ -1029,7 +1061,7 @@ errmsg("\"%s\" is a directory", filename))); } } - CopyFrom(rel, attnumlist, binary, oids, delim, null_print, csv_mode, + CopyFrom(rel, attnumlist, fmt, oids, delim, null_print, quote, escape, force_notnull_atts, header_line); } else @@ -1093,7 +1125,7 @@ } } - DoCopyTo(rel, attnumlist, binary, oids, delim, null_print, csv_mode, + DoCopyTo(rel, attnumlist, fmt, oids, delim, null_print, quote, escape, force_quote_atts, header_line, fe_copy); } @@ -1124,20 +1156,20 @@ * so we don't need to plaster a lot of variables with "volatile". */ static void -DoCopyTo(Relation rel, List *attnumlist, bool binary, bool oids, - char *delim, char *null_print, bool csv_mode, char *quote, +DoCopyTo(Relation rel, List *attnumlist, CopyFmt fmt, bool oids, + char *delim, char *null_print, char *quote, char *escape, List *force_quote_atts, bool header_line, bool fe_copy) { PG_TRY(); { if (fe_copy) - SendCopyBegin(binary, list_length(attnumlist)); + SendCopyBegin(fmt == FMT_BIN, list_length(attnumlist)); - CopyTo(rel, attnumlist, binary, oids, delim, null_print, csv_mode, + CopyTo(rel, attnumlist, fmt, oids, delim, null_print, quote, escape, force_quote_atts, header_line); if (fe_copy) - SendCopyEnd(binary); + SendCopyEnd(fmt == FMT_BIN); } PG_CATCH(); { @@ -1156,8 +1188,8 @@ * Copy from relation TO file. */ static void -CopyTo(Relation rel, List *attnumlist, bool binary, bool oids, - char *delim, char *null_print, bool csv_mode, char *quote, +CopyTo(Relation rel, List *attnumlist, CopyFmt fmt, bool oids, + char *delim, char *null_print, char *quote, char *escape, List *force_quote_atts, bool header_line) { HeapTuple tuple; @@ -1187,7 +1219,7 @@ Oid out_func_oid; bool isvarlena; - if (binary) + if (fmt == FMT_BIN) getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid, &out_func_oid, &isvarlena); @@ -1215,7 +1247,7 @@ ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); - if (binary) + if (fmt == FMT_BIN) { /* Generate header for a binary copy */ int32 tmp; @@ -1233,6 +1265,14 @@ } else { + if (fmt == FMT_XML) + { + CopySendString(""); + CopySendEndOfRow(false); + CopySendString(""); + CopySendEndOfRow(false); + } + /* * For non-binary copy, we need to convert null_print to client * encoding, because it will be sent directly with CopySendString. @@ -1262,7 +1302,7 @@ strcmp(colname, null_print) == 0); } - CopySendEndOfRow(binary); + CopySendEndOfRow(fmt == FMT_BIN); } } @@ -1278,7 +1318,7 @@ MemoryContextReset(mycontext); oldcontext = MemoryContextSwitchTo(mycontext); - if (binary) + if (fmt == FMT_BIN) { /* Binary per-tuple header */ CopySendInt16(attr_count); @@ -1294,25 +1334,34 @@ } else { + if (fmt == FMT_XML) + CopySendString(""); + /* Text format has no per-tuple header, but send OID if wanted */ if (oids) { string = DatumGetCString(DirectFunctionCall1(oidout, ObjectIdGetDatum(HeapTupleGetOid(tuple)))); - CopySendString(string); + + if (fmt == FMT_XML) + CopyAttributeOutXML("oid", string); + else + CopySendString(string); + need_delim = true; } } foreach(cur, attnumlist) { - int attnum = lfirst_int(cur); + int attnum = lfirst_int(cur); Datum value; bool isnull; + char *colname = NameStr(attr[attnum - 1]->attname); value = heap_getattr(tuple, attnum, tupDesc, &isnull); - if (!binary) + if (fmt == FMT_TXT || fmt == FMT_CSV) { if (need_delim) CopySendChar(delim[0]); @@ -1321,53 +1370,71 @@ if (isnull) { - if (!binary) - CopySendString(null_print); /* null indicator */ - else - CopySendInt32(-1); /* null marker */ + switch (fmt) + { + case FMT_BIN: + CopySendInt32(-1); /* null marker */ + break; + case FMT_XML: + CopyAttributeOutXML(colname, NULL); /* null entity */ + break; + default: + CopySendString(null_print); /* null indicator */ + break; + } + } else { - if (!binary) + if (fmt == FMT_BIN) { - string = DatumGetCString(FunctionCall1(&out_functions[attnum - 1], - value)); - if (csv_mode) - { - CopyAttributeOutCSV(string, delim, quote, escape, - (strcmp(string, null_print) == 0 || - force_quote[attnum - 1])); - } - else - CopyAttributeOut(string, delim); - + bytea *outputbytes = + DatumGetByteaP(FunctionCall1(&out_functions[attnum - 1], value)); + /* We assume the result will not have been toasted */ + CopySendInt32(VARSIZE(outputbytes) - VARHDRSZ); + CopySendData(VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ); } else { - bytea *outputbytes; - - outputbytes = DatumGetByteaP(FunctionCall1(&out_functions[attnum - 1], - value)); - /* We assume the result will not have been toasted */ - CopySendInt32(VARSIZE(outputbytes) - VARHDRSZ); - CopySendData(VARDATA(outputbytes), - VARSIZE(outputbytes) - VARHDRSZ); + string = DatumGetCString(FunctionCall1(&out_functions[attnum - 1], value)); + switch (fmt) + { + case FMT_CSV: + CopyAttributeOutCSV(string, delim, quote, escape, + (strcmp(string, null_print) == 0 + || force_quote[attnum - 1])); + break; + case FMT_XML: + CopyAttributeOutXML(colname, string); + break; + default: + CopyAttributeOut(string, delim); + break; + } } } } - CopySendEndOfRow(binary); + if (fmt == FMT_XML) + CopySendString(""); + + CopySendEndOfRow(fmt == FMT_BIN); MemoryContextSwitchTo(oldcontext); } heap_endscan(scandesc); - if (binary) + if (fmt == FMT_BIN) { /* Generate trailer for a binary copy */ CopySendInt16(-1); } + else if (fmt == FMT_XML) + { + CopySendString("
"); + CopySendEndOfRow(false); + } MemoryContextDelete(mycontext); @@ -1464,8 +1531,8 @@ * Copy FROM file to relation. */ static void -CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids, - char *delim, char *null_print, bool csv_mode, char *quote, +CopyFrom(Relation rel, List *attnumlist, CopyFmt fmt, bool oids, + char *delim, char *null_print, char *quote, char *escape, List *force_notnull_atts, bool header_line) { HeapTuple tuple; @@ -1549,7 +1616,7 @@ continue; /* Fetch the input function and typioparam info */ - if (binary) + if (fmt == FMT_BIN) getTypeBinaryInputInfo(attr[attnum - 1]->atttypid, &in_func_oid, &typioparams[attnum - 1]); else @@ -1620,7 +1687,7 @@ */ ExecBSInsertTriggers(estate, resultRelInfo); - if (!binary) + if (fmt != FMT_BIN) file_has_oids = oids; /* must rely on user to tell us this... */ else { @@ -1663,7 +1730,7 @@ } } - if (file_has_oids && binary) + if (file_has_oids && fmt == FMT_BIN) { getTypeBinaryInputInfo(OIDOID, &in_func_oid, &oid_typioparam); @@ -1681,7 +1748,7 @@ /* Initialize static variables */ fe_eof = false; eol_type = EOL_UNKNOWN; - copy_binary = binary; + copy_binary = (fmt == FMT_BIN); copy_relname = RelationGetRelationName(rel); copy_lineno = 0; copy_attname = NULL; @@ -1718,14 +1785,14 @@ MemSet(values, 0, num_phys_attrs * sizeof(Datum)); MemSet(nulls, 'n', num_phys_attrs * sizeof(char)); - if (!binary) + if (fmt != FMT_BIN) { CopyReadResult result = NORMAL_ATTR; char *string; ListCell *cur; /* Actually read the line into memory here */ - done = csv_mode ? + done = (fmt == FMT_CSV) ? CopyReadLine(quote, escape) : CopyReadLine(NULL, NULL); /* @@ -1776,7 +1843,7 @@ errmsg("missing data for column \"%s\"", NameStr(attr[m]->attname)))); - if (csv_mode) + if (fmt == FMT_CSV) { string = CopyReadAttributeCSV(delim, null_print, quote, escape, &result, &isnull); @@ -1789,7 +1856,7 @@ string = CopyReadAttribute(delim, null_print, &result, &isnull); - if (csv_mode && isnull && force_notnull[m]) + if (fmt == FMT_CSV && isnull && force_notnull[m]) { string = null_print; /* set to NULL string */ isnull = false; @@ -2275,6 +2342,27 @@ } /*---------- + * Returns decimal value for a hexadecimal digit. +*---------- + */ +static int GetDecimalFromHex(char hex) +{ + if (isdigit(hex)) + { + // If it is a digit + return hex - '0'; + } + if (hex < 'a') + { + return hex - 'A' + 10; + } + else + { + return hex - 'a' + 10; + } +} + +/*---------- * Read the value of a single attribute, performing de-escaping as needed. * * delim is the column delimiter string (must be just one byte for now). @@ -2378,6 +2466,29 @@ case 'v': c = '\v'; break; + case 'x': + case 'X': + if (line_buf.cursor < line_buf.len) + { + char hexchar = line_buf.data[line_buf.cursor]; + if (isxdigit(hexchar)) + { + int val = GetDecimalFromHex(hexchar); + line_buf.cursor++; + if (line_buf.cursor < line_buf.len) + { + hexchar = line_buf.data[line_buf.cursor]; + if (isxdigit(hexchar)) + { + line_buf.cursor++; + val = (val << 4) + GetDecimalFromHex(hexchar); + } + } + + c = val & 0xff; + } + } + break; /* * in all other cases, take the char after '\' @@ -2760,3 +2871,84 @@ return attnums; } + +/* + * Send XML representation of one attribute, with element tagging, null + * marking, and entity escaping. + */ + +static void +CopyAttributeOutXML (char *colname, char *string) +{ + CopySendString(""); + + if (string != NULL) + CopySendStringXML(string); + + CopySendString(""); +} + +/* + * Sends a string with entity escaping. + */ + +static void +CopySendStringXML (char *string) +{ + char *csr; + for (csr = string; *csr; ++csr) + { + char buf[10]; + char *entity = CopyGetXMLEntity(*csr, buf); + if (entity) + CopySendString(entity); + else + CopySendChar(*csr); + } +} + +/* + * Locates or creates an XML entity for the given character. + * If that character doesn't require an entity, then the + * function returns NULL. + */ + +static char * +CopyGetXMLEntity (char c, char *buf) +{ + char *entity; + + switch (c) + { + case '<': + entity = "<"; + break; + case '>': + entity = ">"; + break; + case '&': + entity = "&"; + break; + case '\'': + entity = "'"; + break; + case '"': + entity = """; + break; + default: + if (!isgraph(c) && c != ' ') + { + sprintf(buf, "&#%02x;", (unsigned char)c); + entity = buf; + } + else + entity = NULL; + break; + } + + return entity; +} Index: src/backend/parser/gram.y =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/parser/gram.y,v retrieving revision 2.491 diff -u -r2.491 gram.y --- src/backend/parser/gram.y 7 May 2005 02:22:46 -0000 2.491 +++ src/backend/parser/gram.y 13 May 2005 22:21:01 -0000 @@ -413,6 +413,8 @@ WHEN WHERE WITH WITHOUT WORK WRITE + XML + YEAR_P ZONE @@ -1448,6 +1450,10 @@ { $$ = makeDefElem("header", (Node *)makeInteger(TRUE)); } + | XML + { + $$ = makeDefElem("xml", (Node *)makeInteger(TRUE)); + } | QUOTE opt_as Sconst { $$ = makeDefElem("quote", (Node *)makeString($3)); Index: src/backend/parser/keywords.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/parser/keywords.c,v retrieving revision 1.155 diff -u -r1.155 keywords.c --- src/backend/parser/keywords.c 7 May 2005 02:22:47 -0000 1.155 +++ src/backend/parser/keywords.c 13 May 2005 22:21:01 -0000 @@ -342,6 +342,7 @@ {"without", WITHOUT}, {"work", WORK}, {"write", WRITE}, + {"xml", XML}, {"year", YEAR_P}, {"zone", ZONE}, }; Index: src/test/regress/expected/copy2.out =================================================================== RCS file: /projects/cvsroot/pgsql/src/test/regress/expected/copy2.out,v retrieving revision 1.21 diff -u -r1.21 copy2.out --- src/test/regress/expected/copy2.out 13 May 2005 06:33:40 -0000 1.21 +++ src/test/regress/expected/copy2.out 13 May 2005 22:21:01 -0000 @@ -194,6 +194,28 @@ --test that we read consecutive LFs properly CREATE TEMP TABLE testnl (a int, b text, c int); COPY testnl FROM stdin CSV; -DROP TABLE x, y; +CREATE TABLE z ( + col1 text, + col2 text +); +COPY z from stdin; +COPY z TO stdout; +Jackson, Sam \\h +ABC \\\\\t +It is "perfect". \t + NULL +COPY z TO stdout WITH CSV; +"Jackson, Sam",\h +ABC,\\ +"It is ""perfect"".", +"",NULL +COPY y TO stdout WITH XML; + + +Jackson, Sam\h +It is "perfect". + +
+DROP TABLE x, y, z; DROP FUNCTION fn_x_before(); DROP FUNCTION fn_x_after(); Index: src/test/regress/sql/copy2.sql =================================================================== RCS file: /projects/cvsroot/pgsql/src/test/regress/sql/copy2.sql,v retrieving revision 1.12 diff -u -r1.12 copy2.sql --- src/test/regress/sql/copy2.sql 13 May 2005 06:33:40 -0000 1.12 +++ src/test/regress/sql/copy2.sql 13 May 2005 22:21:01 -0000 @@ -139,7 +139,22 @@ inside",2 \. +CREATE TABLE z ( + col1 text, + col2 text +); -DROP TABLE x, y; +COPY z from stdin; +Jackson, Sam \\h +\x41\x42\x43\xa0\x1 \x5c\x5c\x9 +It is "perfect". \t + NULL +\. + +COPY z TO stdout; +COPY z TO stdout WITH CSV; +COPY y TO stdout WITH XML; + +DROP TABLE x, y, z; DROP FUNCTION fn_x_before(); DROP FUNCTION fn_x_after();