I have applied the attached patch that complete TODO item:
o -Allow dump/load of CSV format
This adds new keywords to COPY and \copy:
CSV - enable CSV mode
QUOTE - specify quote character
ESCAPE - specify escape character
FORCE - force quoting of specified columns
LITERAL - prevent NULL checks for specific columns
If folks have better ideas for naming of those keywords, I am all ears!
--
Bruce Momjian | http://candle.pha.pa.us
pgman@candle.pha.pa.us | (610) 359-1001
+ If your life is a hard drive, | 13 Roberts Road
+ Christ can be your backup. | Newtown Square, Pennsylvania 19073
Index: doc/src/sgml/ref/copy.sgml
===================================================================
RCS file: /cvsroot/pgsql-server/doc/src/sgml/ref/copy.sgml,v
retrieving revision 1.55
diff -c -c -r1.55 copy.sgml
*** doc/src/sgml/ref/copy.sgml 13 Dec 2003 23:59:07 -0000 1.55
--- doc/src/sgml/ref/copy.sgml 19 Apr 2004 17:07:42 -0000
***************
*** 26,32 ****
[ BINARY ]
[ OIDS ]
[ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
! [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ] ]
COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable class="parameter">column</replaceable>
[,...] ) ]
TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
--- 26,35 ----
[ BINARY ]
[ OIDS ]
[ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
! [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
! [ CSV [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ]
! [ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
! [ LITERAL <replaceable class="parameter">column</replaceable> [, ...] ]
COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable class="parameter">column</replaceable>
[,...] ) ]
TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
***************
*** 34,40 ****
[ BINARY ]
[ OIDS ]
[ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
! [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ] ]
</synopsis>
</refsynopsisdiv>
--- 37,46 ----
[ BINARY ]
[ OIDS ]
[ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
! [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
! [ CSV [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ]
! [ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
! [ FORCE <replaceable class="parameter">column</replaceable> [, ...] ]
</synopsis>
</refsynopsisdiv>
***************
*** 146,152 ****
<listitem>
<para>
The single character that separates columns within each row
! (line) of the file. The default is a tab character.
</para>
</listitem>
</varlistentry>
--- 152,159 ----
<listitem>
<para>
The single character that separates columns within each row
! (line) of the file. The default is a tab character in text mode,
! a comma in <literal>CSV</> mode.
</para>
</listitem>
</varlistentry>
***************
*** 156,175 ****
<listitem>
<para>
The string that represents a null value. The default is
! <literal>\N</literal> (backslash-N). You might prefer an empty
! string, for example.
</para>
<note>
<para>
! On a <command>COPY FROM</command>, any data item that matches
this string will be stored as a null value, so you should make
sure that you use the same string as you used with
<command>COPY TO</command>.
</para>
</note>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
--- 163,248 ----
<listitem>
<para>
The string that represents a null value. The default is
! <literal>\N</literal> (backslash-N) in text mode, and a empty
! value with no quotes in <literal>CSV</> mode. You might prefer an
! empty string even in text mode for cases where you don't want to
! distinguish nulls from empty strings.
</para>
<note>
<para>
! When using <command>COPY FROM</command>, any data item that matches
this string will be stored as a null value, so you should make
sure that you use the same string as you used with
<command>COPY TO</command>.
</para>
</note>
+
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><literal>CSV</literal></term>
+ <listitem>
+ <para>
+ Enables Comma Separated Variable (<literal>CSV</>) mode. (Also called
+ Comma Separated Value). It sets the default <literal>DELIMITER</> to
+ comma, and <literal>QUOTE</> and <literal>ESCAPE</> values to
+ double-quote.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable class="parameter">quote</replaceable></term>
+ <listitem>
+ <para>
+ Specifies the quotation character in <literal>CSV</> mode.
+ The default is double-quote.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable class="parameter">escape</replaceable></term>
+ <listitem>
+ <para>
+ Specifies the character that should appear before a <literal>QUOTE</>
+ data character value in <literal>CSV</> mode. The default is the
+ <literal>QUOTE</> value (usually double-quote).
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>FORCE</></term>
+ <listitem>
+ <para>
+ In <literal>CSV</> <command>COPY TO</> mode, forces quoting
+ to be used for all non-<literal>NULL</> values in each specified
+ column. <literal>NULL</> output is never quoted.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>LITERAL</></term>
+ <listitem>
+ <para>
+ In <literal>CSV</> <command>COPY FROM</> mode, for each column specified,
+ do not do a <literal>null string</> comparison; instead load the value
+ literally. <literal>QUOTE</> and <literal>ESCAPE</> processing are still
+ performed.
+ </para>
+ <para>
+ If the <literal>null string</> is <literal>''</> (the default
+ in <literal>CSV</> mode), a missing input value (<literal>delimiter,
+ delimiter</>), will load as a zero-length string. <literal>Delimiter, quote,
+ quote, delimiter</> is always treated as a zero-length string on input.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</refsect1>
***************
*** 234,239 ****
--- 307,323 ----
</para>
<para>
+ <command>COPY</command> input and output is affected by
+ <varname>DateStyle </varname>. For portability with other
+ <productname>PostgreSQL</productname> installations which might use
+ non-default <varname>DateStyle</varname> settings,
+ <varname>DateStyle</varname> should be set to <literal>ISO</> before
+ using <command>COPY</>. In <literal>CSV</> mode, use <literal>ISO</>
+ or a <varname>DateStyle</varname> setting appropriate for the
+ external application.
+ </para>
+
+ <para>
<command>COPY</command> stops operation at the first error. This
should not lead to problems in the event of a <command>COPY
TO</command>, but the target table will already have received
***************
*** 253,259 ****
<para>
When <command>COPY</command> is used without the <literal>BINARY</literal> option,
! the data read or written is a text file with one line per table row.
Columns in a row are separated by the delimiter character.
The column values themselves are strings generated by the
output function, or acceptable to the input function, of each
--- 337,344 ----
<para>
When <command>COPY</command> is used without the <literal>BINARY</literal> option,
! the data read or written is a text file with one line per table row,
! unless <literal>CSV</> mode is used.
Columns in a row are separated by the delimiter character.
The column values themselves are strings generated by the
output function, or acceptable to the input function, of each
***************
*** 377,382 ****
--- 462,524 ----
meant as data, <command>COPY FROM</command> will complain if the line
endings in the input are not all alike.
</para>
+ </refsect2>
+
+ <refsect2>
+ <title>CSV Format</title>
+
+ <para>
+ This format is used for importing and exporting the Comma
+ Separated Variable (<literal>CSV</>) file format used by many other
+ programs, such as spreadsheets. Instead of the escaping used by
+ <productname>PostgreSQL</productname>'s standard text mode, it
+ produces and recognises the common CSV escaping mechanism.
+ </para>
+
+ <para>
+ The values in each record are separated by the <literal>DELIMITER</>
+ character. If the value contains the delimiter character, the
+ <literal>QUOTE</> character, the <literal>NULL</> string, a carriage
+ return, or line feed character, then the whole value is prefixed and
+ suffixed by the <literal>QUOTE</> character, and any occurrence
+ within the value of a <literal>QUOTE</> character or the
+ <literal>ESCAPE</> character is preceded by the escape character.
+ You can also use <literal>FORCE</> to force quotes when outputting
+ non-<literal>NULL</> values in specific columns.
+ </para>
+
+ <para>
+ In general, the <literal>CSV</> format has no way to distinguish a
+ <literal>NULL</> from an empty string.
+ <productname>PostgreSQL</productname>'s COPY handles this by
+ quoting. A <literal>NULL</> is output as the <literal>NULL</> string
+ and is not quoted, while a data value matching the <literal>NULL</> string
+ is quoted. Therefore, using the default settings, a <literal>NULL</> is
+ written as an unquoted empty string, while an empty string is
+ written with double quotes (<literal>""</>). Reading values follows
+ similar rules. You can use <literal>LITERAL</> to prevent <literal>NULL</>
+ input comparisons for specific columns.
+ </para>
+
+ <note>
+ <para>
+ CSV mode will both recognize and produce CSV files with quoted
+ values containing embedded carriage returns and line feeds. Thus
+ the files are not strictly one line per table row like text-mode
+ files.
+ </para>
+ </note>
+
+ <note>
+ <para>
+ Many programs produce strange and occasionally perverse CSV files,
+ so the file format is more a convention than a standard. Thus you
+ might encounter some files that cannot be imported using this
+ mechanism, and <command>COPY</> might produce files that other
+ programs can not process.
+ </para>
+ </note>
+
</refsect2>
<refsect2>
Index: doc/src/sgml/ref/psql-ref.sgml
===================================================================
RCS file: /cvsroot/pgsql-server/doc/src/sgml/ref/psql-ref.sgml,v
retrieving revision 1.110
diff -c -c -r1.110 psql-ref.sgml
*** doc/src/sgml/ref/psql-ref.sgml 12 Apr 2004 15:58:52 -0000 1.110
--- doc/src/sgml/ref/psql-ref.sgml 19 Apr 2004 17:07:44 -0000
***************
*** 711,716 ****
--- 711,720 ----
[ <literal>oids</literal> ]
[ <literal>delimiter [as] </literal> '<replaceable class="parameter">character</replaceable>' ]
[ <literal>null [as] </literal> '<replaceable class="parameter">string</replaceable>' ]</literal>
+ [ <literal>csv [ quote [as] </literal> '<replaceable class="parameter">character</replaceable>' ]
+ [ <literal>escape [as] </literal> '<replaceable class="parameter">character</replaceable>'
]
+ [ <literal>force</> <replaceable class="parameter">column_list</replaceable> ]
+ [ <literal>literal</> <replaceable class="parameter">column_list</replaceable> ] ]
</term>
<listitem>
Index: src/backend/commands/copy.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/backend/commands/copy.c,v
retrieving revision 1.220
diff -c -c -r1.220 copy.c
*** src/backend/commands/copy.c 15 Apr 2004 22:36:03 -0000 1.220
--- src/backend/commands/copy.c 19 Apr 2004 17:07:46 -0000
***************
*** 70,76 ****
typedef enum CopyReadResult
{
NORMAL_ATTR,
! END_OF_LINE
} CopyReadResult;
/*
--- 70,77 ----
typedef enum CopyReadResult
{
NORMAL_ATTR,
! END_OF_LINE,
! UNTERMINATED_FIELD
} CopyReadResult;
/*
***************
*** 130,144 ****
/* non-export function prototypes */
static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
! char *delim, char *null_print);
static void CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
! char *delim, char *null_print);
static bool CopyReadLine(void);
static char *CopyReadAttribute(const char *delim, const char *null_print,
CopyReadResult *result, bool *isnull);
static Datum CopyReadBinaryAttribute(int column_no, FmgrInfo *flinfo,
Oid typelem, bool *isnull);
static void CopyAttributeOut(char *string, char *delim);
static List *CopyGetAttnums(Relation rel, List *attnamelist);
static void limit_printout_length(StringInfo buf);
--- 131,152 ----
/* non-export function prototypes */
static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
! char *delim, char *null_print, bool csv_mode, char *quote, char *escape,
! List *force_atts);
static void CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
! char *delim, char *null_print, bool csv_mode, char *quote, char *escape,
! List *literal_atts);
static bool CopyReadLine(void);
static char *CopyReadAttribute(const char *delim, const char *null_print,
CopyReadResult *result, bool *isnull);
+ static char *CopyReadAttributeCSV(const char *delim, const char *null_print,
+ char *quote, char *escape,
+ CopyReadResult *result, bool *isnull);
static Datum CopyReadBinaryAttribute(int column_no, FmgrInfo *flinfo,
Oid typelem, bool *isnull);
static void CopyAttributeOut(char *string, char *delim);
+ static void CopyAttributeOutCSV(char *string, char *delim, char *quote,
+ char *escape, bool force_quote);
static List *CopyGetAttnums(Relation rel, List *attnamelist);
static void limit_printout_length(StringInfo buf);
***************
*** 682,689 ****
--- 690,704 ----
List *attnumlist;
bool binary = false;
bool oids = false;
+ bool csv_mode = false;
char *delim = NULL;
+ char *quote = NULL;
+ char *escape = NULL;
char *null_print = NULL;
+ List *force = NIL;
+ List *literal = NIL;
+ List *force_atts = NIL;
+ List *literal_atts = NIL;
Relation rel;
AclMode required_access = (is_from ? ACL_INSERT : ACL_SELECT);
AclResult aclresult;
***************
*** 725,730 ****
--- 740,785 ----
errmsg("conflicting or redundant options")));
null_print = strVal(defel->arg);
}
+ else if (strcmp(defel->defname, "csv") == 0)
+ {
+ if (csv_mode)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+ csv_mode = intVal(defel->arg);
+ }
+ else if (strcmp(defel->defname, "quote") == 0)
+ {
+ if (quote)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+ quote = strVal(defel->arg);
+ }
+ else if (strcmp(defel->defname, "escape") == 0)
+ {
+ if (escape)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+ escape = strVal(defel->arg);
+ }
+ else if (strcmp(defel->defname, "force") == 0)
+ {
+ if (force)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+ force = (List *)defel->arg;
+ }
+ else if (strcmp(defel->defname, "literal") == 0)
+ {
+ if (literal)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+ literal = (List *)defel->arg;
+ }
else
elog(ERROR, "option \"%s\" not recognized",
defel->defname);
***************
*** 735,740 ****
--- 790,800 ----
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot specify DELIMITER in BINARY mode")));
+ if (binary && csv_mode)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot specify CSV in BINARY mode")));
+
if (binary && null_print)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
***************
*** 742,751 ****
/* Set defaults */
if (!delim)
! delim = "\t";
!
if (!null_print)
! null_print = "\\N";
/*
* Open and lock the relation, using the appropriate lock type.
--- 802,893 ----
/* Set defaults */
if (!delim)
! delim = csv_mode ? "," : "\t";
!
if (!null_print)
! null_print = csv_mode ? "" : "\\N";
!
! if (csv_mode)
! {
! if (!quote)
! quote = "\"";
! if (!escape)
! escape = quote;
! }
!
! /*
! * Only single-character delimiter strings are supported.
! */
! if (strlen(delim) != 1)
! ereport(ERROR,
! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! errmsg("COPY delimiter must be a single character")));
!
! /*
! * Check quote
! */
! if (!csv_mode && quote != NULL)
! ereport(ERROR,
! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! errmsg("COPY quote available only in CSV mode")));
!
! if (csv_mode && strlen(quote) != 1)
! ereport(ERROR,
! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! errmsg("COPY quote must be a single character")));
!
! /*
! * Check escape
! */
! if (!csv_mode && escape != NULL)
! ereport(ERROR,
! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! errmsg("COPY escape available only in CSV mode")));
!
! if (csv_mode && strlen(escape) != 1)
! ereport(ERROR,
! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! errmsg("COPY escape must be a single character")));
!
! /*
! * Check force
! */
! if (!csv_mode && force != NIL)
! ereport(ERROR,
! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! errmsg("COPY force available only in CSV mode")));
! if (force != NIL && is_from)
! ereport(ERROR,
! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! errmsg("COPY force only available using COPY TO")));
!
! /*
! * Check literal
! */
! if (!csv_mode && literal != NIL)
! ereport(ERROR,
! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! errmsg("COPY literal available only in CSV mode")));
! if (literal != NIL && !is_from)
! ereport(ERROR,
! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! errmsg("COPY literal only available using COPY FROM")));
!
! /*
! * Don't allow the delimiter to appear in the null string.
! */
! if (strchr(null_print, delim[0]) != NULL)
! ereport(ERROR,
! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! errmsg("COPY delimiter must not appear in the NULL specification")));
!
! /*
! * Don't allow the csv quote char to appear in the null string.
! */
! if (csv_mode && strchr(null_print, quote[0]) != NULL)
! ereport(ERROR,
! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! errmsg("CSV quote character must not appear in the NULL specification")));
/*
* Open and lock the relation, using the appropriate lock type.
***************
*** 772,793 ****
"psql's \\copy command also works for anyone.")));
/*
- * Presently, only single-character delimiter strings are supported.
- */
- if (strlen(delim) != 1)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("COPY delimiter must be a single character")));
-
- /*
- * Don't allow the delimiter to appear in the null string.
- */
- if (strchr(null_print, delim[0]) != NULL)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("COPY delimiter must not appear in the NULL specification")));
-
- /*
* Don't allow COPY w/ OIDs to or from a table without them
*/
if (oids && !rel->rd_rel->relhasoids)
--- 914,919 ----
***************
*** 802,807 ****
--- 928,979 ----
attnumlist = CopyGetAttnums(rel, attnamelist);
/*
+ * Check that FORCE references valid COPY columns
+ */
+ if (force)
+ {
+ TupleDesc tupDesc = RelationGetDescr(rel);
+ Form_pg_attribute *attr = tupDesc->attrs;
+ List *cur;
+
+ force_atts = CopyGetAttnums(rel, force);
+
+ foreach(cur, force_atts)
+ {
+ int attnum = lfirsti(cur);
+
+ if (!intMember(attnum, attnumlist))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("FORCE column \"%s\" not referenced by COPY",
+ NameStr(attr[attnum - 1]->attname))));
+ }
+ }
+
+ /*
+ * Check that LITERAL references valid COPY columns
+ */
+ if (literal)
+ {
+ List *cur;
+ TupleDesc tupDesc = RelationGetDescr(rel);
+ Form_pg_attribute *attr = tupDesc->attrs;
+
+ literal_atts = CopyGetAttnums(rel, literal);
+
+ foreach(cur, literal_atts)
+ {
+ int attnum = lfirsti(cur);
+
+ if (!intMember(attnum, attnumlist))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("LITERAL column \"%s\" not referenced by COPY",
+ NameStr(attr[attnum - 1]->attname))));
+ }
+ }
+
+ /*
* Set up variables to avoid per-attribute overhead.
*/
initStringInfo(&attribute_buf);
***************
*** 864,870 ****
errmsg("\"%s\" is a directory", filename)));
}
}
! CopyFrom(rel, attnumlist, binary, oids, delim, null_print);
}
else
{ /* copy from database to file */
--- 1036,1043 ----
errmsg("\"%s\" is a directory", filename)));
}
}
! CopyFrom(rel, attnumlist, binary, oids, delim, null_print, csv_mode,
! quote, escape, literal_atts);
}
else
{ /* copy from database to file */
***************
*** 926,932 ****
errmsg("\"%s\" is a directory", filename)));
}
}
! CopyTo(rel, attnumlist, binary, oids, delim, null_print);
}
if (!pipe)
--- 1099,1106 ----
errmsg("\"%s\" is a directory", filename)));
}
}
! CopyTo(rel, attnumlist, binary, oids, delim, null_print, csv_mode,
! quote, escape, force_atts);
}
if (!pipe)
***************
*** 958,964 ****
*/
static void
CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
! char *delim, char *null_print)
{
HeapTuple tuple;
TupleDesc tupDesc;
--- 1132,1139 ----
*/
static void
CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
! char *delim, char *null_print, bool csv_mode, char *quote,
! char *escape, List *force_atts)
{
HeapTuple tuple;
TupleDesc tupDesc;
***************
*** 967,972 ****
--- 1142,1148 ----
int attr_count;
Form_pg_attribute *attr;
FmgrInfo *out_functions;
+ bool *force_quote;
Oid *elements;
bool *isvarlena;
char *string;
***************
*** 988,998 ****
out_functions = (FmgrInfo *) palloc((num_phys_attrs + 1) * sizeof(FmgrInfo));
elements = (Oid *) palloc((num_phys_attrs + 1) * sizeof(Oid));
isvarlena = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool));
foreach(cur, attnumlist)
{
int attnum = lfirsti(cur);
Oid out_func_oid;
!
if (binary)
getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
&out_func_oid, &elements[attnum - 1],
--- 1164,1175 ----
out_functions = (FmgrInfo *) palloc((num_phys_attrs + 1) * sizeof(FmgrInfo));
elements = (Oid *) palloc((num_phys_attrs + 1) * sizeof(Oid));
isvarlena = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool));
+ force_quote = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool));
foreach(cur, attnumlist)
{
int attnum = lfirsti(cur);
Oid out_func_oid;
!
if (binary)
getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
&out_func_oid, &elements[attnum - 1],
***************
*** 1002,1007 ****
--- 1179,1189 ----
&out_func_oid, &elements[attnum - 1],
&isvarlena[attnum - 1]);
fmgr_info(out_func_oid, &out_functions[attnum - 1]);
+
+ if (intMember(attnum, force_atts))
+ force_quote[attnum - 1] = true;
+ else
+ force_quote[attnum - 1] = false;
}
/*
***************
*** 1051,1057 ****
while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL)
{
bool need_delim = false;
-
CHECK_FOR_INTERRUPTS();
MemoryContextReset(mycontext);
--- 1233,1238 ----
***************
*** 1113,1119 ****
value,
ObjectIdGetDatum(elements[attnum - 1]),
Int32GetDatum(attr[attnum - 1]->atttypmod)));
! CopyAttributeOut(string, delim);
}
else
{
--- 1294,1308 ----
value,
ObjectIdGetDatum(elements[attnum - 1]),
Int32GetDatum(attr[attnum - 1]->atttypmod)));
! if (csv_mode)
! {
! CopyAttributeOutCSV(string, delim, quote, escape,
! (strcmp(string, null_print) == 0 ||
! force_quote[attnum - 1]));
! }
! else
! CopyAttributeOut(string, delim);
!
}
else
{
***************
*** 1148,1153 ****
--- 1337,1343 ----
pfree(out_functions);
pfree(elements);
pfree(isvarlena);
+ pfree(force_quote);
}
***************
*** 1243,1249 ****
*/
static void
CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
! char *delim, char *null_print)
{
HeapTuple tuple;
TupleDesc tupDesc;
--- 1433,1440 ----
*/
static void
CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
! char *delim, char *null_print, bool csv_mode, char *quote,
! char *escape, List *literal_atts)
{
HeapTuple tuple;
TupleDesc tupDesc;
***************
*** 1256,1264 ****
Oid *elements;
Oid oid_in_element;
ExprState **constraintexprs;
bool hasConstraints = false;
- int i;
int attnum;
List *cur;
Oid in_func_oid;
Datum *values;
--- 1447,1456 ----
Oid *elements;
Oid oid_in_element;
ExprState **constraintexprs;
+ bool *literal_nullstr;
bool hasConstraints = false;
int attnum;
+ int i;
List *cur;
Oid in_func_oid;
Datum *values;
***************
*** 1317,1322 ****
--- 1509,1515 ----
defmap = (int *) palloc((num_phys_attrs + 1) * sizeof(int));
defexprs = (ExprState **) palloc((num_phys_attrs + 1) * sizeof(ExprState *));
constraintexprs = (ExprState **) palloc0((num_phys_attrs + 1) * sizeof(ExprState *));
+ literal_nullstr = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool));
for (attnum = 1; attnum <= num_phys_attrs; attnum++)
{
***************
*** 1333,1338 ****
--- 1526,1536 ----
&in_func_oid, &elements[attnum - 1]);
fmgr_info(in_func_oid, &in_functions[attnum - 1]);
+ if (intMember(attnum, literal_atts))
+ literal_nullstr[attnum - 1] = true;
+ else
+ literal_nullstr[attnum - 1] = false;
+
/* Get default info if needed */
if (!intMember(attnum, attnumlist))
{
***************
*** 1389,1397 ****
ExecBSInsertTriggers(estate, resultRelInfo);
if (!binary)
- {
file_has_oids = oids; /* must rely on user to tell us this... */
- }
else
{
/* Read and verify binary header */
--- 1587,1593 ----
***************
*** 1500,1505 ****
--- 1696,1702 ----
if (file_has_oids)
{
+ /* can't be in CSV mode here */
string = CopyReadAttribute(delim, null_print,
&result, &isnull);
***************
*** 1538,1551 ****
errmsg("missing data for column \"%s\"",
NameStr(attr[m]->attname))));
! string = CopyReadAttribute(delim, null_print,
! &result, &isnull);
!
! if (isnull)
{
! /* we read an SQL NULL, no need to do anything */
}
else
{
copy_attname = NameStr(attr[m]->attname);
values[m] = FunctionCall3(&in_functions[m],
--- 1735,1761 ----
errmsg("missing data for column \"%s\"",
NameStr(attr[m]->attname))));
! if (csv_mode)
{
! string = CopyReadAttributeCSV(delim, null_print, quote,
! escape, &result, &isnull);
! if (result == UNTERMINATED_FIELD)
! ereport(ERROR,
! (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
! errmsg("unterminated CSV quoted field")));
}
else
+ string = CopyReadAttribute(delim, null_print,
+ &result, &isnull);
+
+ if (csv_mode && isnull && literal_nullstr[m])
+ {
+ string = null_print; /* set to NULL string */
+ isnull = false;
+ }
+
+ /* we read an SQL NULL, no need to do anything */
+ if (!isnull)
{
copy_attname = NameStr(attr[m]->attname);
values[m] = FunctionCall3(&in_functions[m],
***************
*** 1732,1742 ****
pfree(values);
pfree(nulls);
! if (!binary)
! {
! pfree(in_functions);
! pfree(elements);
! }
ExecDropTupleTable(tupleTable, true);
--- 1942,1953 ----
pfree(values);
pfree(nulls);
! pfree(in_functions);
! pfree(elements);
! pfree(defmap);
! pfree(defexprs);
! pfree(constraintexprs);
! pfree(literal_nullstr);
ExecDropTupleTable(tupleTable, true);
***************
*** 2070,2075 ****
--- 2281,2432 ----
return attribute_buf.data;
}
+
+ /*
+ * Read the value of a single attribute in CSV mode,
+ * performing de-escaping as needed. Escaping does not follow the normal
+ * PostgreSQL text mode, but instead "standard" (i.e. common) CSV usage.
+ *
+ * Quoted fields can span lines, in which case the line end is embedded
+ * in the returned string.
+ *
+ * null_print is the null marker string. Note that this is compared to
+ * the pre-de-escaped input string (thus if it is quoted it is not a NULL).
+ *
+ * *result is set to indicate what terminated the read:
+ * NORMAL_ATTR: column delimiter
+ * END_OF_LINE: end of line
+ * UNTERMINATED_FIELD no quote detected at end of a quoted field
+ *
+ * In any case, the string read up to the terminator (or end of file)
+ * is returned.
+ *
+ * *isnull is set true or false depending on whether the input matched
+ * the null marker. Note that the caller cannot check this since the
+ * returned string will be the post-de-escaping equivalent, which may
+ * look the same as some valid data string.
+ *----------
+ */
+
+ static char *
+ CopyReadAttributeCSV(const char *delim, const char *null_print, char *quote,
+ char *escape, CopyReadResult *result, bool *isnull)
+ {
+ char delimc = delim[0];
+ char quotec = quote[0];
+ char escapec = escape[0];
+ char c;
+ int start_cursor = line_buf.cursor;
+ int end_cursor = start_cursor;
+ int input_len;
+ bool in_quote = false;
+ bool saw_quote = false;
+
+ /* reset attribute_buf to empty */
+ attribute_buf.len = 0;
+ attribute_buf.data[0] = '\0';
+
+ /* set default status */
+ *result = END_OF_LINE;
+
+ for (;;)
+ {
+ /* handle multiline quoted fields */
+ if (in_quote && line_buf.cursor >= line_buf.len)
+ {
+ bool done;
+
+ switch(eol_type)
+ {
+ case EOL_NL:
+ appendStringInfoString(&attribute_buf,"\n");
+ break;
+ case EOL_CR:
+ appendStringInfoString(&attribute_buf,"\r");
+ break;
+ case EOL_CRNL:
+ appendStringInfoString(&attribute_buf,"\r\n");
+ break;
+ case EOL_UNKNOWN:
+ /* shouldn't happen - just keep going */
+ break;
+ }
+
+ copy_lineno++;
+ done = CopyReadLine();
+ if (done && line_buf.len == 0)
+ break;
+ start_cursor = line_buf.cursor;
+ }
+
+ end_cursor = line_buf.cursor;
+ if (line_buf.cursor >= line_buf.len)
+ break;
+ c = line_buf.data[line_buf.cursor++];
+ /*
+ * unquoted field delimiter
+ */
+ if (!in_quote && c == delimc)
+ {
+ *result = NORMAL_ATTR;
+ break;
+ }
+ /*
+ * start of quoted field (or part of field)
+ */
+ if (!in_quote && c == quotec)
+ {
+ saw_quote = true;
+ in_quote = true;
+ continue;
+ }
+ /*
+ * escape within a quoted field
+ */
+ if (in_quote && c == escapec)
+ {
+ /*
+ * peek at the next char if available, and escape it if it
+ * is an escape char or a quote char
+ */
+ if (line_buf.cursor <= line_buf.len)
+ {
+ char nextc = line_buf.data[line_buf.cursor];
+ if (nextc == escapec || nextc == quotec)
+ {
+ appendStringInfoCharMacro(&attribute_buf, nextc);
+ line_buf.cursor++;
+ continue;
+ }
+ }
+ }
+ /*
+ * end of quoted field.
+ * Must do this test after testing for escape in case quote char
+ * and escape char are the same (which is the common case).
+ */
+ if (in_quote && c == quotec)
+ {
+ in_quote = false;
+ continue;
+ }
+ appendStringInfoCharMacro(&attribute_buf, c);
+ }
+
+ if (in_quote)
+ *result = UNTERMINATED_FIELD;
+
+ /* check whether raw input matched null marker */
+ input_len = end_cursor - start_cursor;
+ if (!saw_quote && input_len == strlen(null_print) &&
+ strncmp(&line_buf.data[start_cursor], null_print, input_len) == 0)
+ *isnull = true;
+ else
+ *isnull = false;
+
+ return attribute_buf.data;
+ }
+
/*
* Read a binary attribute
*/
***************
*** 2193,2198 ****
--- 2550,2622 ----
break;
}
}
+ }
+
+ /*
+ * Send CSV representation of one attribute, with conversion and
+ * CSV type escaping
+ */
+ static void
+ CopyAttributeOutCSV(char *server_string, char *delim, char *quote,
+ char *escape, bool force_quote)
+ {
+ char *string;
+ char c;
+ char delimc = delim[0];
+ char quotec = quote[0];
+ char escapec = escape[0];
+ bool need_quote = force_quote;
+ char *test_string;
+ bool same_encoding;
+ int mblen;
+ int i;
+
+ same_encoding = (server_encoding == client_encoding);
+ if (!same_encoding)
+ string = (char *) pg_server_to_client((unsigned char *) server_string,
+ strlen(server_string));
+ else
+ string = server_string;
+
+ /* have to run through the string twice,
+ * first time to see if it needs quoting, second to actually send it
+ */
+
+ for(test_string = string;
+ !need_quote && (c = *test_string) != '\0';
+ test_string += mblen)
+ {
+ if (c == delimc || c == quotec || c == '\n' || c == '\r')
+ need_quote = true;
+ if (!same_encoding)
+ mblen = pg_encoding_mblen(client_encoding, test_string);
+ else
+ mblen = 1;
+ }
+
+ if (need_quote)
+ CopySendChar(quotec);
+
+ for (; (c = *string) != '\0'; string += mblen)
+ {
+ if (c == quotec || c == escapec)
+ CopySendChar(escapec);
+
+ CopySendChar(c);
+
+ if (!same_encoding)
+ {
+ /* send additional bytes of the char, if any */
+ mblen = pg_encoding_mblen(client_encoding, string);
+ for (i = 1; i < mblen; i++)
+ CopySendChar(string[i]);
+ }
+ else
+ mblen = 1;
+ }
+
+ if (need_quote)
+ CopySendChar(quotec);
}
/*
Index: src/backend/parser/gram.y
===================================================================
RCS file: /cvsroot/pgsql-server/src/backend/parser/gram.y,v
retrieving revision 2.450
diff -c -c -r2.450 gram.y
*** src/backend/parser/gram.y 5 Apr 2004 03:07:26 -0000 2.450
--- src/backend/parser/gram.y 19 Apr 2004 17:07:50 -0000
***************
*** 343,349 ****
CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
CLUSTER COALESCE COLLATE COLUMN COMMENT COMMIT
COMMITTED CONSTRAINT CONSTRAINTS CONVERSION_P CONVERT COPY CREATE CREATEDB
! CREATEUSER CROSS CURRENT_DATE CURRENT_TIME
CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE
DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS
--- 343,349 ----
CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
CLUSTER COALESCE COLLATE COLUMN COMMENT COMMIT
COMMITTED CONSTRAINT CONSTRAINTS CONVERSION_P CONVERT COPY CREATE CREATEDB
! CREATEUSER CROSS CSV CURRENT_DATE CURRENT_TIME
CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE
DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS
***************
*** 370,376 ****
KEY
LANCOMPILER LANGUAGE LARGE_P LAST_P LEADING LEFT LEVEL LIKE LIMIT
! LISTEN LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION
LOCK_P
MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE
--- 370,376 ----
KEY
LANCOMPILER LANGUAGE LARGE_P LAST_P LEADING LEFT LEVEL LIKE LIMIT
! LISTEN LITERAL LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION
LOCK_P
MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE
***************
*** 386,391 ****
--- 386,393 ----
PRECISION PRESERVE PREPARE PRIMARY
PRIOR PRIVILEGES PROCEDURAL PROCEDURE
+ QUOTE
+
READ REAL RECHECK REFERENCES REINDEX RELATIVE_P RENAME REPEATABLE REPLACE
RESET RESTART RESTRICT RETURNS REVOKE RIGHT ROLLBACK ROW ROWS
RULE
***************
*** 1360,1365 ****
--- 1362,1387 ----
{
$$ = makeDefElem("null", (Node *)makeString($3));
}
+ | CSV
+ {
+ $$ = makeDefElem("csv", (Node *)makeInteger(TRUE));
+ }
+ | QUOTE opt_as Sconst
+ {
+ $$ = makeDefElem("quote", (Node *)makeString($3));
+ }
+ | ESCAPE opt_as Sconst
+ {
+ $$ = makeDefElem("escape", (Node *)makeString($3));
+ }
+ | FORCE columnList
+ {
+ $$ = makeDefElem("force", (Node *)$2);
+ }
+ | LITERAL columnList
+ {
+ $$ = makeDefElem("literal", (Node *)$2);
+ }
;
/* The following exist for backward compatibility */
***************
*** 7420,7425 ****
--- 7442,7448 ----
| COPY
| CREATEDB
| CREATEUSER
+ | CSV
| CURSOR
| CYCLE
| DATABASE
***************
*** 7473,7478 ****
--- 7496,7502 ----
| LAST_P
| LEVEL
| LISTEN
+ | LITERAL
| LOAD
| LOCAL
| LOCATION
***************
*** 7507,7512 ****
--- 7531,7537 ----
| PRIVILEGES
| PROCEDURAL
| PROCEDURE
+ | QUOTE
| READ
| RECHECK
| REINDEX
Index: src/backend/parser/keywords.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/backend/parser/keywords.c,v
retrieving revision 1.147
diff -c -c -r1.147 keywords.c
*** src/backend/parser/keywords.c 11 Mar 2004 01:47:40 -0000 1.147
--- src/backend/parser/keywords.c 19 Apr 2004 17:07:50 -0000
***************
*** 90,95 ****
--- 90,96 ----
{"createdb", CREATEDB},
{"createuser", CREATEUSER},
{"cross", CROSS},
+ {"csv", CSV},
{"current_date", CURRENT_DATE},
{"current_time", CURRENT_TIME},
{"current_timestamp", CURRENT_TIMESTAMP},
***************
*** 186,191 ****
--- 187,193 ----
{"like", LIKE},
{"limit", LIMIT},
{"listen", LISTEN},
+ {"literal", LITERAL},
{"load", LOAD},
{"local", LOCAL},
{"localtime", LOCALTIME},
***************
*** 248,253 ****
--- 250,256 ----
{"privileges", PRIVILEGES},
{"procedural", PROCEDURAL},
{"procedure", PROCEDURE},
+ {"quote", QUOTE},
{"read", READ},
{"real", REAL},
{"recheck", RECHECK},
Index: src/backend/tcop/fastpath.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/backend/tcop/fastpath.c,v
retrieving revision 1.71
diff -c -c -r1.71 fastpath.c
*** src/backend/tcop/fastpath.c 7 Jan 2004 18:56:27 -0000 1.71
--- src/backend/tcop/fastpath.c 19 Apr 2004 17:07:51 -0000
***************
*** 154,161 ****
bool typisvarlena;
char *outputstr;
! getTypeOutputInfo(rettype,
! &typoutput, &typelem, &typisvarlena);
outputstr = DatumGetCString(OidFunctionCall3(typoutput,
retval,
ObjectIdGetDatum(typelem),
--- 154,160 ----
bool typisvarlena;
char *outputstr;
! getTypeOutputInfo(rettype, &typoutput, &typelem, &typisvarlena);
outputstr = DatumGetCString(OidFunctionCall3(typoutput,
retval,
ObjectIdGetDatum(typelem),
Index: src/bin/psql/copy.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/bin/psql/copy.c,v
retrieving revision 1.43
diff -c -c -r1.43 copy.c
*** src/bin/psql/copy.c 12 Apr 2004 15:58:52 -0000 1.43
--- src/bin/psql/copy.c 19 Apr 2004 17:07:51 -0000
***************
*** 66,73 ****
--- 66,78 ----
bool from;
bool binary;
bool oids;
+ bool csv_mode;
char *delim;
char *null;
+ char *quote;
+ char *escape;
+ char *force_list;
+ char *literal_list;
};
***************
*** 81,86 ****
--- 86,95 ----
free(ptr->file);
free(ptr->delim);
free(ptr->null);
+ free(ptr->quote);
+ free(ptr->escape);
+ free(ptr->force_list);
+ free(ptr->literal_list);
free(ptr);
}
***************
*** 272,282 ****
--- 281,299 ----
while (token)
{
+ bool fetch_next;
+
+ fetch_next = true;
+
/* someday allow BINARY here */
if (strcasecmp(token, "oids") == 0)
{
result->oids = true;
}
+ else if (strcasecmp(token, "csv") == 0)
+ {
+ result->csv_mode = true;
+ }
else if (strcasecmp(token, "delimiter") == 0)
{
token = strtokx(NULL, whitespace, NULL, "'",
***************
*** 301,311 ****
else
goto error;
}
else
goto error;
! token = strtokx(NULL, whitespace, NULL, NULL,
! 0, false, pset.encoding);
}
}
--- 318,395 ----
else
goto error;
}
+ else if (strcasecmp(token, "quote") == 0)
+ {
+ token = strtokx(NULL, whitespace, NULL, "'",
+ '\\', false, pset.encoding);
+ if (token && strcasecmp(token, "as") == 0)
+ token = strtokx(NULL, whitespace, NULL, "'",
+ '\\', false, pset.encoding);
+ if (token)
+ result->quote = pg_strdup(token);
+ else
+ goto error;
+ }
+ else if (strcasecmp(token, "escape") == 0)
+ {
+ token = strtokx(NULL, whitespace, NULL, "'",
+ '\\', false, pset.encoding);
+ if (token && strcasecmp(token, "as") == 0)
+ token = strtokx(NULL, whitespace, NULL, "'",
+ '\\', false, pset.encoding);
+ if (token)
+ result->escape = pg_strdup(token);
+ else
+ goto error;
+ }
+ else if (strcasecmp(token, "force") == 0)
+ {
+ /* handle column list */
+ fetch_next = false;
+ for (;;)
+ {
+ token = strtokx(NULL, whitespace, ",", "\"",
+ 0, false, pset.encoding);
+ if (!token || strchr(",", token[0]))
+ goto error;
+ if (!result->force_list)
+ result->force_list = pg_strdup(token);
+ else
+ xstrcat(&result->force_list, token);
+ token = strtokx(NULL, whitespace, ",", "\"",
+ 0, false, pset.encoding);
+ if (!token || token[0] != ',')
+ break;
+ xstrcat(&result->force_list, token);
+ }
+ }
+ else if (strcasecmp(token, "literal") == 0)
+ {
+ /* handle column list */
+ fetch_next = false;
+ for (;;)
+ {
+ token = strtokx(NULL, whitespace, ",", "\"",
+ 0, false, pset.encoding);
+ if (!token || strchr(",", token[0]))
+ goto error;
+ if (!result->literal_list)
+ result->literal_list = pg_strdup(token);
+ else
+ xstrcat(&result->literal_list, token);
+ token = strtokx(NULL, whitespace, ",", "\"",
+ 0, false, pset.encoding);
+ if (!token || token[0] != ',')
+ break;
+ xstrcat(&result->literal_list, token);
+ }
+ }
else
goto error;
! if (fetch_next)
! token = strtokx(NULL, whitespace, NULL, NULL,
! 0, false, pset.encoding);
}
}
***************
*** 340,346 ****
PGresult *result;
bool success;
struct stat st;
!
/* parse options */
options = parse_slash_copy(args);
--- 424,430 ----
PGresult *result;
bool success;
struct stat st;
!
/* parse options */
options = parse_slash_copy(args);
***************
*** 379,390 ****
--- 463,506 ----
options->delim);
}
+ /* There is no backward-compatible CSV syntax */
if (options->null)
{
if (options->null[0] == '\'')
appendPQExpBuffer(&query, " WITH NULL AS %s", options->null);
else
appendPQExpBuffer(&query, " WITH NULL AS '%s'", options->null);
+ }
+
+ if (options->csv_mode)
+ {
+ appendPQExpBuffer(&query, " CSV");
+ }
+
+ if (options->quote)
+ {
+ if (options->quote[0] == '\'')
+ appendPQExpBuffer(&query, " QUOTE AS %s", options->quote);
+ else
+ appendPQExpBuffer(&query, " QUOTE AS '%s'", options->quote);
+ }
+
+ if (options->escape)
+ {
+ if (options->escape[0] == '\'')
+ appendPQExpBuffer(&query, " ESCAPE AS %s", options->escape);
+ else
+ appendPQExpBuffer(&query, " ESCAPE AS '%s'", options->escape);
+ }
+
+ if (options->force_list)
+ {
+ appendPQExpBuffer(&query, " FORCE %s", options->force_list);
+ }
+
+ if (options->literal_list)
+ {
+ appendPQExpBuffer(&query, " LITERAL %s", options->literal_list);
}
if (options->from)