From 7b6fd050469cf26afa4ede08120b123b620a551a Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy Date: Mon, 25 Mar 2024 11:00:59 +0000 Subject: [PATCH v9 2/2] Add detailed info when COPY skips soft errors This commit emits individual info like line number and column name when COPY skips soft errors. Because, the summary containing the total rows skipped isn't enough for the users to know what exactly are the malformed rows in the input data. Author: Bharath Rupireddy Reviewed-by: Michael Paquier, Masahiko Sawada Reviewed-by: Atsushi Torikoshi Discussion: https://www.postgresql.org/message-id/CALj2ACUk700cYhx1ATRQyRw-fBM%2BaRo6auRAitKGff7XNmYfqQ%40mail.gmail.com --- doc/src/sgml/ref/copy.sgml | 12 ++++++++-- src/backend/commands/copyfrom.c | 4 +--- src/backend/commands/copyfromparse.c | 35 ++++++++++++++++++++++++++++ src/include/commands/copy.h | 1 + src/test/regress/expected/copy2.out | 33 +++++++++++++++++++++++++- src/test/regress/sql/copy2.sql | 22 ++++++++++++++++- 6 files changed, 100 insertions(+), 7 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 4c307efb54..ecbbf5f94a 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -401,8 +401,12 @@ COPY { table_name [ ( FORMAT is text or csv. - A NOTICE message containing the ignored row count is emitted at the end - of the COPY FROM if at least one row was discarded. + A NOTICE message containing the ignored row count is + emitted at the end of the COPY FROM if at least one + row was discarded. When LOG_VERBOSITY option is set to + verbose, a NOTICE message + containing the line of the input file and the column name whose input + conversion has failed is emitted for each discarded row. @@ -429,6 +433,10 @@ COPY { table_name [ ( verbose can be used to emit more informative messages. default will not log any additional messages. + + This is currently used in COPY FROM command when + ON_ERROR is set to ignore. + diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 8908a440e1..fc5bc86ac7 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -101,8 +101,6 @@ typedef struct CopyMultiInsertInfo /* non-export function prototypes */ -static char *limit_printout_length(const char *str); - static void ClosePipeFromProgram(CopyFromState cstate); /* @@ -189,7 +187,7 @@ CopyFromErrorCallback(void *arg) * * Returns a pstrdup'd copy of the input. */ -static char * +char * limit_printout_length(const char *str) { #define MAX_COPY_DATA_DISPLAY 100 diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 5682d5d054..01ab1de9bd 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -967,7 +967,42 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, (Node *) cstate->escontext, &values[m])) { + Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP); + cstate->num_errors++; + + if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE) + { + /* + * Since we emit line number and column info in the below + * notice message, we suppress error context information + * other than the relation name. + */ + Assert(!cstate->relname_only); + cstate->relname_only = true; + + if (cstate->cur_attval) + { + char *attval; + + attval = limit_printout_length(cstate->cur_attval); + ereport(NOTICE, + errmsg("data type incompatibility at line %llu for column %s: \"%s\"", + (unsigned long long) cstate->cur_lineno, + cstate->cur_attname, + attval)); + pfree(attval); + } + else + ereport(NOTICE, + errmsg("data type incompatibility at line %llu for column %s: null input", + (unsigned long long) cstate->cur_lineno, + cstate->cur_attname)); + + /* reset relname_only */ + cstate->relname_only = false; + } + return true; } diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index 99d183fa4d..9c539772a5 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -107,6 +107,7 @@ extern bool NextCopyFrom(CopyFromState cstate, ExprContext *econtext, extern bool NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields); extern void CopyFromErrorCallback(void *arg); +extern char *limit_printout_length(const char *str); extern uint64 CopyFrom(CopyFromState cstate); diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index bb37a2ac70..d80d45dce4 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -737,8 +737,31 @@ CREATE TABLE check_ign_err (n int, m int[], k int); COPY check_ign_err FROM STDIN WITH (on_error stop); ERROR: invalid input syntax for type integer: "a" CONTEXT: COPY check_ign_err, line 2, column n: "a" -COPY check_ign_err FROM STDIN WITH (on_error ignore); +-- want context for notices +\set SHOW_CONTEXT always +COPY check_ign_err FROM STDIN WITH (on_error ignore, log_verbosity verbose); +NOTICE: data type incompatibility at line 2 for column n: "a" +CONTEXT: COPY check_ign_err +NOTICE: data type incompatibility at line 3 for column k: "3333333333" +CONTEXT: COPY check_ign_err +NOTICE: data type incompatibility at line 4 for column m: "{a, 4}" +CONTEXT: COPY check_ign_err +NOTICE: data type incompatibility at line 5 for column n: "" +CONTEXT: COPY check_ign_err +NOTICE: data type incompatibility at line 7 for column m: "a" +CONTEXT: COPY check_ign_err +NOTICE: data type incompatibility at line 8 for column k: "a" +CONTEXT: COPY check_ign_err NOTICE: 6 rows were skipped due to data type incompatibility +-- tests for on_error option with log_verbosity and null constraint via domain +CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL; +CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2); +COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose); +NOTICE: data type incompatibility at line 2 for column l: null input +CONTEXT: COPY check_ign_err2 +NOTICE: 1 row was skipped due to data type incompatibility +-- reset context choice +\set SHOW_CONTEXT errors SELECT * FROM check_ign_err; n | m | k ---+-----+--- @@ -747,6 +770,12 @@ SELECT * FROM check_ign_err; 8 | {8} | 8 (3 rows) +SELECT * FROM check_ign_err2; + n | m | k | l +---+-----+---+------- + 1 | {1} | 1 | 'foo' +(1 row) + -- test datatype error that can't be handled as soft: should fail CREATE TABLE hard_err(foo widget); COPY hard_err FROM STDIN WITH (on_error ignore); @@ -775,6 +804,8 @@ DROP VIEW instead_of_insert_tbl_view; DROP VIEW instead_of_insert_tbl_view_2; DROP FUNCTION fun_instead_of_insert_tbl(); DROP TABLE check_ign_err; +DROP TABLE check_ign_err2; +DROP DOMAIN dcheck_ign_err2; DROP TABLE hard_err; -- -- COPY FROM ... DEFAULT diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index 4cd3ae577d..624d531fd6 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -510,7 +510,11 @@ a {2} 2 5 {5} 5 \. -COPY check_ign_err FROM STDIN WITH (on_error ignore); + +-- want context for notices +\set SHOW_CONTEXT always + +COPY check_ign_err FROM STDIN WITH (on_error ignore, log_verbosity verbose); 1 {1} 1 a {2} 2 3 {3} 3333333333 @@ -521,8 +525,22 @@ a {2} 2 7 {7} a 8 {8} 8 \. + +-- tests for on_error option with log_verbosity and null constraint via domain +CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL; +CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2); +COPY check_ign_err2 FROM STDIN WITH (on_error ignore, log_verbosity verbose); +1 {1} 1 'foo' +2 {2} 2 \N +\. + +-- reset context choice +\set SHOW_CONTEXT errors + SELECT * FROM check_ign_err; +SELECT * FROM check_ign_err2; + -- test datatype error that can't be handled as soft: should fail CREATE TABLE hard_err(foo widget); COPY hard_err FROM STDIN WITH (on_error ignore); @@ -554,6 +572,8 @@ DROP VIEW instead_of_insert_tbl_view; DROP VIEW instead_of_insert_tbl_view_2; DROP FUNCTION fun_instead_of_insert_tbl(); DROP TABLE check_ign_err; +DROP TABLE check_ign_err2; +DROP DOMAIN dcheck_ign_err2; DROP TABLE hard_err; -- -- 2.34.1