From 739d3af4d255c2ccc285bcf75f61cc215ece8c59 Mon Sep 17 00:00:00 2001 From: Shinya Kato Date: Thu, 15 Jan 2026 16:19:30 +0900 Subject: [PATCH v2] file_fdw: Support multi-line HEADER option. Commit bc2f348 introduced multi-line HEADER support for COPY. This commit extends support for this option to file_fdw. Similar to the HEADER option in COPY, this option allows multiple header lines to be skipped. Since the CREATE/ALTER FOREIGN TABLE commands require foreign table options to be single-quoted, this commit updates defGetCopyHeaderOption() to also handle string values. Author: Shinya Kato Reviewed-by: Fujii Masao Reviewed-by: songjinzhou Reviewed-by: Japin Li Reviewed-by: Chao Li Discussion: https://postgr.es/m/CAOzEurT+iwC47VHPMS+uJ4WSzvOLPsZ2F2_wopm8M7O+CZa3Xw@mail.gmail.com --- contrib/file_fdw/data/multiline_header.csv | 4 + contrib/file_fdw/expected/file_fdw.out | 29 ++++- contrib/file_fdw/sql/file_fdw.sql | 13 ++ doc/src/sgml/file-fdw.sgml | 9 +- src/backend/commands/copy.c | 133 ++++++++++++--------- 5 files changed, 127 insertions(+), 61 deletions(-) create mode 100644 contrib/file_fdw/data/multiline_header.csv diff --git a/contrib/file_fdw/data/multiline_header.csv b/contrib/file_fdw/data/multiline_header.csv new file mode 100644 index 00000000000..0d5e482a1e4 --- /dev/null +++ b/contrib/file_fdw/data/multiline_header.csv @@ -0,0 +1,4 @@ +first header line +second header line +1,alpha +2,beta diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out index 5121e27dce5..bd0fe8b0c2e 100644 --- a/contrib/file_fdw/expected/file_fdw.out +++ b/contrib/file_fdw/expected/file_fdw.out @@ -104,6 +104,12 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (reject_limit '1'); ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (on_error 'ignore', reject_limit '0'); -- ERROR ERROR: REJECT_LIMIT (0) must be greater than zero +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header '-1'); -- ERROR +ERROR: a negative integer value cannot be specified for header +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header '2.5'); -- ERROR +ERROR: header requires a Boolean value, a non-negative integer, or the string "match" +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header 'unsupported'); -- ERROR +ERROR: header requires a Boolean value, a non-negative integer, or the string "match" CREATE FOREIGN TABLE tbl () SERVER file_server; -- ERROR ERROR: either filename or program is required for file_fdw foreign tables \set filename :abs_srcdir '/data/agg.data' @@ -142,6 +148,25 @@ OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match'); SELECT * FROM header_doesnt_match; -- ERROR ERROR: column name mismatch in header line field 1: got "1", expected "a" CONTEXT: COPY header_doesnt_match, line 1: "1,foo" +-- test multi-line header +\set filename :abs_srcdir '/data/multiline_header.csv' +CREATE FOREIGN TABLE multi_header (a int, b text) SERVER file_server +OPTIONS (format 'csv', filename :'filename', header '2'); +SELECT * FROM multi_header ORDER BY a; + a | b +---+------- + 1 | alpha + 2 | beta +(2 rows) + +CREATE FOREIGN TABLE multi_header_skip (a int, b text) SERVER file_server +OPTIONS (format 'csv', filename :'filename', header '5'); +SELECT count(*) FROM multi_header_skip; + count +------- + 0 +(1 row) + -- per-column options tests \set filename :abs_srcdir '/data/text.csv' CREATE FOREIGN TABLE text_csv ( @@ -543,7 +568,7 @@ SET ROLE regress_file_fdw_superuser; -- cleanup RESET ROLE; DROP EXTENSION file_fdw CASCADE; -NOTICE: drop cascades to 9 other objects +NOTICE: drop cascades to 11 other objects DETAIL: drop cascades to server file_server drop cascades to user mapping for regress_file_fdw_superuser on server file_server drop cascades to user mapping for regress_no_priv_user on server file_server @@ -552,5 +577,7 @@ drop cascades to foreign table agg_csv drop cascades to foreign table agg_bad drop cascades to foreign table header_match drop cascades to foreign table header_doesnt_match +drop cascades to foreign table multi_header +drop cascades to foreign table multi_header_skip drop cascades to foreign table text_csv DROP ROLE regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user; diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql index 1a397ad4bd1..408affcf87f 100644 --- a/contrib/file_fdw/sql/file_fdw.sql +++ b/contrib/file_fdw/sql/file_fdw.sql @@ -84,6 +84,9 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', on_erro CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (log_verbosity 'unsupported'); -- ERROR CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (reject_limit '1'); -- ERROR CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (on_error 'ignore', reject_limit '0'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header '-1'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header '2.5'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header 'unsupported'); -- ERROR CREATE FOREIGN TABLE tbl () SERVER file_server; -- ERROR \set filename :abs_srcdir '/data/agg.data' @@ -119,6 +122,16 @@ CREATE FOREIGN TABLE header_doesnt_match (a int, foo text) SERVER file_server OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match'); SELECT * FROM header_doesnt_match; -- ERROR +-- test multi-line header +\set filename :abs_srcdir '/data/multiline_header.csv' +CREATE FOREIGN TABLE multi_header (a int, b text) SERVER file_server +OPTIONS (format 'csv', filename :'filename', header '2'); +SELECT * FROM multi_header ORDER BY a; + +CREATE FOREIGN TABLE multi_header_skip (a int, b text) SERVER file_server +OPTIONS (format 'csv', filename :'filename', header '5'); +SELECT count(*) FROM multi_header_skip; + -- per-column options tests \set filename :abs_srcdir '/data/text.csv' CREATE FOREIGN TABLE text_csv ( diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml index 882d9a76d21..ff9435720c2 100644 --- a/doc/src/sgml/file-fdw.sgml +++ b/doc/src/sgml/file-fdw.sgml @@ -65,7 +65,7 @@ - Specifies whether the data has a header line, + Specifies whether to skip a header line, or how many header lines to skip, the same as COPY's HEADER option. @@ -166,9 +166,10 @@ Note that while COPY allows options such as HEADER to be specified without a corresponding value, the foreign table option - syntax requires a value to be present in all cases. To activate - COPY options typically written without a value, you can pass - the value TRUE, since all such options are Booleans. + syntax requires a value to be present in all cases. Use the appropriate + explicit value (for example TRUE, match, + or a non-negative integer line count for HEADER) to enable + the desired behavior. diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 9c51384ab92..523f7817b87 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -28,6 +28,7 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#include "nodes/miscnodes.h" #include "optimizer/optimizer.h" #include "parser/parse_coerce.h" #include "parser/parse_collate.h" @@ -364,6 +365,76 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt, table_close(rel, NoLock); } +/* + * Validate the HEADER option integer for COPY and return the validated + * non-negative line count (number of header lines to skip). + */ +static int +defCheckCopyHeaderInteger(DefElem *def, int header, bool is_from) +{ + if (header < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("a negative integer value cannot be " + "specified for %s", def->defname))); + + if (!is_from && header > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use multi-line header in COPY TO"))); + + return header; +} + +/* + * Validate the HEADER option string for COPY and return either a + * non-negative line count (number of header lines to skip) or one of + * the COPY_HEADER_* sentinel values + */ +static int +defCheckCopyHeaderString(DefElem *def, char *header, bool is_from) +{ + int ival; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + /* + * The set of strings accepted here should match up with the + * grammar's opt_boolean_or_string production. + */ + if (pg_strcasecmp(header, "true") == 0) + return COPY_HEADER_TRUE; + if (pg_strcasecmp(header, "on") == 0) + return COPY_HEADER_TRUE; + if (pg_strcasecmp(header, "false") == 0) + return COPY_HEADER_FALSE; + if (pg_strcasecmp(header, "off") == 0) + return COPY_HEADER_FALSE; + if (pg_strcasecmp(header, "match") == 0) + { + if (!is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use \"%s\" with HEADER in COPY TO", + header))); + return COPY_HEADER_MATCH; + } + + /* Check if the header is a valid integer */ + ival = pg_strtoint32_safe(header, (Node *)&escontext); + if (escontext.error_occurred) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a Boolean value, a non-negative integer, " + "or the string \"match\"", + def->defname))); + + /* + * If it's not a recognized keyword, treat it as a numeric value to + * preserve the existing error reporting for invalid integers. + */ + return defCheckCopyHeaderInteger(def, ival, is_from); +} + /* * Extract the CopyFormatOptions.header_line value from a DefElem. * @@ -380,63 +451,13 @@ defGetCopyHeaderOption(DefElem *def, bool is_from) return COPY_HEADER_TRUE; /* - * Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or - * "match". + * Allow 0, 1, "true", "false", "on", "off", a non-negative integer (also + * as a string, to support file_fdw options), or "match". */ - switch (nodeTag(def->arg)) - { - case T_Integer: - { - int ival = intVal(def->arg); - - if (ival < 0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("a negative integer value cannot be " - "specified for %s", def->defname))); - - if (!is_from && ival > 1) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot use multi-line header in COPY TO"))); - - return ival; - } - break; - default: - { - char *sval = defGetString(def); - - /* - * The set of strings accepted here should match up with the - * grammar's opt_boolean_or_string production. - */ - if (pg_strcasecmp(sval, "true") == 0) - return COPY_HEADER_TRUE; - if (pg_strcasecmp(sval, "false") == 0) - return COPY_HEADER_FALSE; - if (pg_strcasecmp(sval, "on") == 0) - return COPY_HEADER_TRUE; - if (pg_strcasecmp(sval, "off") == 0) - return COPY_HEADER_FALSE; - if (pg_strcasecmp(sval, "match") == 0) - { - if (!is_from) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot use \"%s\" with HEADER in COPY TO", - sval))); - return COPY_HEADER_MATCH; - } - } - break; - } - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("%s requires a Boolean value, a non-negative integer, " - "or the string \"match\"", - def->defname))); - return COPY_HEADER_FALSE; /* keep compiler quiet */ + if (IsA(def->arg, Integer)) + return defCheckCopyHeaderInteger(def, intVal(def->arg), is_from); + else + return defCheckCopyHeaderString(def, defGetString(def), is_from); } /* -- 2.47.3