Re: Error-safe user functions - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: Error-safe user functions |
Date | |
Msg-id | 3496391.1670276406@sss.pgh.pa.us Whole thread Raw |
In response to | Re: Error-safe user functions (Tom Lane <tgl@sss.pgh.pa.us>) |
Responses |
Re: Error-safe user functions
|
List | pgsql-hackers |
I wrote: > Seems like everybody's okay with errsave. I'll make a v2 in a > little bit. I'd like to try updating array_in and/or record_in > just to verify that indirection cases work okay, before we consider > the design to be set. v2 as promised, incorporating the discussed renamings as well as some follow-on ones (ErrorReturnContext -> ErrorSaveContext, notably). I also tried moving the struct into a new header file, miscnodes.h after Andrew's suggestion upthread. That seems at least marginally cleaner than putting it in nodes.h, although I'm not wedded to this choice. I was really glad that I took the trouble to update some less-trivial input functions, because I learned two things: * It's better if InputFunctionCallSafe will tolerate the case of not being passed an ErrorSaveContext. In the COPY hack it felt worthwhile to have completely separate code paths calling InputFunctionCallSafe or InputFunctionCall, but that's less appetizing elsewhere. * There's a crying need for a macro that wraps up errsave() with an immediate return. Hence, ereturn() is reborn from the ashes. I hope Robert won't object to that name if it *does* do a return. I feel pretty good about this version; it seems committable if there are not objections. Not sure if we should commit 0003 like this, though. regards, tom lane diff --git a/doc/src/sgml/ref/create_type.sgml b/doc/src/sgml/ref/create_type.sgml index 693423e524..53b8d15f97 100644 --- a/doc/src/sgml/ref/create_type.sgml +++ b/doc/src/sgml/ref/create_type.sgml @@ -900,6 +900,15 @@ CREATE TYPE <replaceable class="parameter">name</replaceable> function is written in C. </para> + <para> + In <productname>PostgreSQL</productname> version 16 and later, it is + desirable for base types' input functions to return <quote>safe</quote> + errors using the new <function>errsave()</function> mechanism, rather + than throwing <function>ereport()</function> exceptions as in previous + versions. See <filename>src/backend/utils/fmgr/README</filename> for + more information. + </para> + </refsect1> <refsect1> diff --git a/src/backend/nodes/Makefile b/src/backend/nodes/Makefile index 4368c30fdb..7c594be583 100644 --- a/src/backend/nodes/Makefile +++ b/src/backend/nodes/Makefile @@ -56,6 +56,7 @@ node_headers = \ nodes/bitmapset.h \ nodes/extensible.h \ nodes/lockoptions.h \ + nodes/miscnodes.h \ nodes/replnodes.h \ nodes/supportnodes.h \ nodes/value.h \ diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index 7212bc486f..08992dfd47 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -68,6 +68,7 @@ my @all_input_files = qw( nodes/bitmapset.h nodes/extensible.h nodes/lockoptions.h + nodes/miscnodes.h nodes/replnodes.h nodes/supportnodes.h nodes/value.h @@ -89,6 +90,7 @@ my @nodetag_only_files = qw( executor/tuptable.h foreign/fdwapi.h nodes/lockoptions.h + nodes/miscnodes.h nodes/replnodes.h nodes/supportnodes.h ); diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index 2585e24845..81727ecb28 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -71,6 +71,7 @@ #include "libpq/libpq.h" #include "libpq/pqformat.h" #include "mb/pg_wchar.h" +#include "nodes/miscnodes.h" #include "miscadmin.h" #include "pgstat.h" #include "postmaster/bgworker.h" @@ -686,6 +687,154 @@ errfinish(const char *filename, int lineno, const char *funcname) } +/* + * errsave_start --- begin a "safe" error-reporting cycle + * + * If "context" isn't an ErrorSaveContext node, this behaves as + * errstart(ERROR, domain), and the errsave() macro ends up acting + * exactly like ereport(ERROR, ...). + * + * If "context" is an ErrorSaveContext node, but the node creator only wants + * notification of the fact of a safe error without any details, just set + * the error_occurred flag in the ErrorSaveContext node and return false, + * which will cause us to skip the remaining error processing steps. + * + * Otherwise, create and initialize error stack entry and return true. + * Subsequently, errmsg() and perhaps other routines will be called to further + * populate the stack entry. Finally, errsave_finish() will be called to + * tidy up. + */ +bool +errsave_start(void *context, const char *domain) +{ + ErrorSaveContext *escontext; + ErrorData *edata; + + /* + * Do we have a context for safe error reporting? If not, just punt to + * errstart(). + */ + if (context == NULL || !IsA(context, ErrorSaveContext)) + return errstart(ERROR, domain); + + /* Report that an error was detected */ + escontext = (ErrorSaveContext *) context; + escontext->error_occurred = true; + + /* Nothing else to do if caller wants no further details */ + if (!escontext->details_wanted) + return false; + + /* + * Okay, crank up a stack entry to store the info in. + */ + + recursion_depth++; + if (++errordata_stack_depth >= ERRORDATA_STACK_SIZE) + { + /* + * Wups, stack not big enough. We treat this as a PANIC condition + * because it suggests an infinite loop of errors during error + * recovery. + */ + errordata_stack_depth = -1; /* make room on stack */ + ereport(PANIC, (errmsg_internal("ERRORDATA_STACK_SIZE exceeded"))); + } + + /* Initialize data for this error frame */ + edata = &errordata[errordata_stack_depth]; + MemSet(edata, 0, sizeof(ErrorData)); + edata->elevel = LOG; /* signal all is well to errsave_finish */ + /* the default text domain is the backend's */ + edata->domain = domain ? domain : PG_TEXTDOMAIN("postgres"); + /* initialize context_domain the same way (see set_errcontext_domain()) */ + edata->context_domain = edata->domain; + /* Select default errcode based on the assumed elevel of ERROR */ + edata->sqlerrcode = ERRCODE_INTERNAL_ERROR; + /* errno is saved here so that error parameter eval can't change it */ + edata->saved_errno = errno; + + /* + * Any allocations for this error state level should go into the caller's + * context. We don't need to pollute ErrorContext, or even require it to + * exist, in this code path. + */ + edata->assoc_context = CurrentMemoryContext; + + recursion_depth--; + return true; +} + +/* + * errsave_finish --- end a "safe" error-reporting cycle + * + * If errsave_start() decided this was a regular error, behave as + * errfinish(). Otherwise, package up the error details and save + * them in the ErrorSaveContext node. + */ +void +errsave_finish(void *context, const char *filename, int lineno, + const char *funcname) +{ + ErrorSaveContext *escontext = (ErrorSaveContext *) context; + ErrorData *edata = &errordata[errordata_stack_depth]; + + /* verify stack depth before accessing *edata */ + CHECK_STACK_DEPTH(); + + /* + * If errsave_start punted to errstart, then elevel will be ERROR or + * perhaps even PANIC. Punt likewise to errfinish. + */ + if (edata->elevel >= ERROR) + errfinish(filename, lineno, funcname); + + /* + * Else, we should package up the stack entry contents and deliver them to + * the caller. + */ + recursion_depth++; + + /* Save the last few bits of error state into the stack entry */ + if (filename) + { + const char *slash; + + /* keep only base name, useful especially for vpath builds */ + slash = strrchr(filename, '/'); + if (slash) + filename = slash + 1; + /* Some Windows compilers use backslashes in __FILE__ strings */ + slash = strrchr(filename, '\\'); + if (slash) + filename = slash + 1; + } + + edata->filename = filename; + edata->lineno = lineno; + edata->funcname = funcname; + edata->elevel = ERROR; /* hide the LOG value used above */ + + /* + * We skip calling backtrace and context functions, which are more likely + * to cause trouble than provide useful context; they might act on the + * assumption that a transaction abort is about to occur. + */ + + /* + * Make a copy of the error info for the caller. All the subsidiary + * strings are already in the caller's context, so it's sufficient to + * flat-copy the stack entry. + */ + escontext->error_data = palloc_object(ErrorData); + memcpy(escontext->error_data, edata, sizeof(ErrorData)); + + /* Exit error-handling context */ + errordata_stack_depth--; + recursion_depth--; +} + + /* * errcode --- add SQLSTATE error code to the current error * diff --git a/src/backend/utils/fmgr/README b/src/backend/utils/fmgr/README index 49845f67ac..aff8f6fb3e 100644 --- a/src/backend/utils/fmgr/README +++ b/src/backend/utils/fmgr/README @@ -267,6 +267,70 @@ See windowapi.h for more information. information about the context of the CALL statement, particularly whether it is within an "atomic" execution context. +* Some callers of datatype input functions (and in future perhaps +other classes of functions) pass an instance of ErrorSaveContext. +This indicates that the caller wishes to handle "safe" errors without +a transaction-terminating exception being thrown: instead, the callee +should store information about the error cause in the ErrorSaveContext +struct and return a dummy result value. Further details appear in +"Handling Non-Exception Errors" below. + + +Handling Non-Exception Errors +----------------------------- + +Postgres' standard mechanism for reporting errors (ereport() or elog()) +is used for all sorts of error conditions. This means that throwing +an exception via ereport(ERROR) requires an expensive transaction or +subtransaction abort and cleanup, since the exception catcher dare not +make many assumptions about what has gone wrong. There are situations +where we would rather have a lighter-weight mechanism for dealing +with errors that are known to be safe to recover from without a full +transaction cleanup. SQL-callable functions can support this need +using the ErrorSaveContext context mechanism. + +To report a "safe" error, a SQL-callable function should call + errsave(fcinfo->context, ...) +where it would previously have done + ereport(ERROR, ...) +If the passed "context" is NULL or is not an ErrorSaveContext node, +then errsave behaves precisely as ereport(ERROR): the exception is +thrown via longjmp, so that control does not return. If "context" +is an ErrorSaveContext node, then the error information included in +errsave's subsidiary reporting calls is stored into the context node +and control returns normally. The function should then return a dummy +value to its caller. (SQL NULL is recommendable as the dummy value; +but anything will do, since the caller is expected to ignore the +function's return value once it sees that an error has been reported +in the ErrorSaveContext node.) + +If there is nothing to do except return after calling errsave(), use + ereturn(fcinfo->context, dummy_value, ...) +to perform errsave() and then "return dummy_value". + +Considering datatype input functions as examples, typical "safe" error +conditions include input syntax errors and out-of-range values. An input +function typically detects such cases with simple if-tests and can easily +change the following ereport call to errsave. Error conditions that +should NOT be handled this way include out-of-memory, internal errors, and +anything where there is any question about our ability to continue normal +processing of the transaction. Those should still be thrown with ereport. +Because of this restriction, it's typically not necessary to pass the +ErrorSaveContext pointer down very far, as errors reported by palloc or +other low-level functions are typically reasonable to consider internal. + +Because no transaction cleanup will occur, a function that is exiting +after errsave() returns still bears responsibility for resource cleanup. +It is not necessary to be concerned about small leakages of palloc'd +memory, since the caller should be running the function in a short-lived +memory context. However, resources such as locks, open files, or buffer +pins must be closed out cleanly, as they would be in the non-error code +path. + +Conventions for callers that use the ErrorSaveContext mechanism +to trap errors are discussed with the declaration of that struct, +in nodes/miscnodes.h. + Functions Accepting or Returning Sets ------------------------------------- diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c index 3c210297aa..8ef50781ec 100644 --- a/src/backend/utils/fmgr/fmgr.c +++ b/src/backend/utils/fmgr/fmgr.c @@ -23,6 +23,7 @@ #include "lib/stringinfo.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#include "nodes/miscnodes.h" #include "nodes/nodeFuncs.h" #include "pgstat.h" #include "utils/acl.h" @@ -1548,6 +1549,61 @@ InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod) return result; } +/* + * Call a previously-looked-up datatype input function, with non-exception + * handling of "safe" errors. + * + * This is the same as InputFunctionCall, but the caller may also pass a + * previously-initialized ErrorSaveContext node. (We declare that as + * "void *" to avoid including miscnodes.h in fmgr.h.) If escontext points + * to an ErrorSaveContext, any "safe" errors detected by the input function + * will be reported by filling the escontext struct. The caller must + * check escontext->error_occurred before assuming that the function result + * is meaningful. + */ +Datum +InputFunctionCallSafe(FmgrInfo *flinfo, char *str, + Oid typioparam, int32 typmod, + void *escontext) +{ + LOCAL_FCINFO(fcinfo, 3); + Datum result; + + if (str == NULL && flinfo->fn_strict) + return (Datum) 0; /* just return null result */ + + InitFunctionCallInfoData(*fcinfo, flinfo, 3, InvalidOid, escontext, NULL); + + fcinfo->args[0].value = CStringGetDatum(str); + fcinfo->args[0].isnull = false; + fcinfo->args[1].value = ObjectIdGetDatum(typioparam); + fcinfo->args[1].isnull = false; + fcinfo->args[2].value = Int32GetDatum(typmod); + fcinfo->args[2].isnull = false; + + result = FunctionCallInvoke(fcinfo); + + /* Result value is garbage, and could be null, if an error was reported */ + if (SAFE_ERROR_OCCURRED(escontext)) + return (Datum) 0; + + /* Otherwise, should get null result if and only if str is NULL */ + if (str == NULL) + { + if (!fcinfo->isnull) + elog(ERROR, "input function %u returned non-NULL", + flinfo->fn_oid); + } + else + { + if (fcinfo->isnull) + elog(ERROR, "input function %u returned NULL", + flinfo->fn_oid); + } + + return result; +} + /* * Call a previously-looked-up datatype output function. * diff --git a/src/include/fmgr.h b/src/include/fmgr.h index 380a82b9de..27f98a4413 100644 --- a/src/include/fmgr.h +++ b/src/include/fmgr.h @@ -700,6 +700,9 @@ extern Datum OidFunctionCall9Coll(Oid functionId, Oid collation, /* Special cases for convenient invocation of datatype I/O functions. */ extern Datum InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod); +extern Datum InputFunctionCallSafe(FmgrInfo *flinfo, char *str, + Oid typioparam, int32 typmod, + void *escontext); extern Datum OidInputFunctionCall(Oid functionId, char *str, Oid typioparam, int32 typmod); extern char *OutputFunctionCall(FmgrInfo *flinfo, Datum val); diff --git a/src/include/nodes/meson.build b/src/include/nodes/meson.build index e63881086e..f0e60935b6 100644 --- a/src/include/nodes/meson.build +++ b/src/include/nodes/meson.build @@ -16,6 +16,7 @@ node_support_input_i = [ 'nodes/bitmapset.h', 'nodes/extensible.h', 'nodes/lockoptions.h', + 'nodes/miscnodes.h', 'nodes/replnodes.h', 'nodes/supportnodes.h', 'nodes/value.h', diff --git a/src/include/nodes/miscnodes.h b/src/include/nodes/miscnodes.h new file mode 100644 index 0000000000..893c49e02f --- /dev/null +++ b/src/include/nodes/miscnodes.h @@ -0,0 +1,52 @@ +/*------------------------------------------------------------------------- + * + * miscnodes.h + * Definitions for hard-to-classify node types. + * + * Node types declared here are not part of parse trees, plan trees, + * or execution state trees. We only assign them NodeTag values because + * IsA() tests provide a convenient way to disambiguate what kind of + * structure is being passed through assorted APIs, such as function + * "context" pointers. + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/nodes/miscnodes.h + * + *------------------------------------------------------------------------- + */ +#ifndef MISCNODES_H +#define MISCNODES_H + +#include "nodes/nodes.h" + +/* + * ErrorSaveContext - + * function call context node for handling of "safe" errors + * + * A caller wishing to trap "safe" errors must initialize a struct like this + * with all fields zero/NULL except for the NodeTag. Optionally, set + * details_wanted = true if more than the bare knowledge that a "safe" error + * occurred is required. After calling code that might report an error this + * way, check error_occurred to see if an error happened. If so, and if + * details_wanted is true, error_data has been filled with error details + * (stored in the callee's memory context!). FreeErrorData() can be called + * to release error_data, although this step is typically not necessary + * if the called code was run in a short-lived context. + */ +typedef struct ErrorSaveContext +{ + NodeTag type; + bool error_occurred; /* set to true if we detect a "safe" error */ + bool details_wanted; /* does caller want more info than that? */ + ErrorData *error_data; /* details of error, if so */ +} ErrorSaveContext; + +/* Often-useful macro for checking if a safe error was reported */ +#define SAFE_ERROR_OCCURRED(escontext) \ + ((escontext) != NULL && IsA(escontext, ErrorSaveContext) && \ + ((ErrorSaveContext *) (escontext))->error_occurred) + +#endif /* MISCNODES_H */ diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h index f107a818e8..9d292ea6fd 100644 --- a/src/include/utils/elog.h +++ b/src/include/utils/elog.h @@ -235,6 +235,62 @@ extern int getinternalerrposition(void); ereport(elevel, errmsg_internal(__VA_ARGS__)) +/*---------- + * Support for reporting "safe" errors that don't require a full transaction + * abort to clean up. This is to be used in this way: + * errsave(context, + * errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + * errmsg("invalid input syntax for type %s: \"%s\"", + * "boolean", in_str), + * ... other errxxx() fields as needed ...); + * + * "context" is a node pointer or NULL, and the remaining auxiliary calls + * provide the same error details as in ereport(). If context is not a + * pointer to an ErrorSaveContext node, then errsave(context, ...) + * behaves identically to ereport(ERROR, ...). If context is a pointer + * to an ErrorSaveContext node, then the information provided by the + * auxiliary calls is stored in the context node and control returns + * normally. The caller of errsave() must then do any required cleanup + * and return control back to its caller. That caller must check the + * ErrorSaveContext node to see whether an error occurred before + * it can trust the function's result to be meaningful. + * + * errsave_domain() allows a message domain to be specified; it is + * precisely analogous to ereport_domain(). + *---------- + */ +#define errsave_domain(context, domain, ...) \ + do { \ + void *context_ = (context); \ + pg_prevent_errno_in_scope(); \ + if (errsave_start(context_, domain)) \ + __VA_ARGS__, errsave_finish(context_, __FILE__, __LINE__, __func__); \ + } while(0) + +#define errsave(context, ...) \ + errsave_domain(context, TEXTDOMAIN, __VA_ARGS__) + +/* + * "ereturn(context, dummy_value, ...);" is exactly the same as + * "errsave(context, ...); return dummy_value;". This saves a bit + * of typing in the common case where a function has no cleanup + * actions to take after reporting a safe error. "dummy_value" + * can be empty if the function returns void. + */ +#define ereturn_domain(context, dummy_value, domain, ...) \ + do { \ + errsave_domain(context, domain, __VA_ARGS__); \ + return dummy_value; \ + } while(0) + +#define ereturn(context, dummy_value, ...) \ + ereturn_domain(context, dummy_value, TEXTDOMAIN, __VA_ARGS__) + +extern bool errsave_start(void *context, const char *domain); +extern void errsave_finish(void *context, const char *filename, int lineno, + const char *funcname); + + /* Support for constructing error strings separately from ereport() calls */ extern void pre_format_elog_string(int errnumber, const char *domain); diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 495e449a9e..245ea5ba09 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -21,6 +21,7 @@ #include "catalog/pg_type.h" #include "funcapi.h" #include "libpq/pqformat.h" +#include "nodes/miscnodes.h" #include "nodes/nodeFuncs.h" #include "nodes/supportnodes.h" #include "optimizer/optimizer.h" @@ -90,14 +91,15 @@ typedef struct ArrayIteratorData } ArrayIteratorData; static bool array_isspace(char ch); -static int ArrayCount(const char *str, int *dim, char typdelim); +static int ArrayCount(const char *str, int *dim, char typdelim, + void *escontext); static void ReadArrayStr(char *arrayStr, const char *origStr, int nitems, int ndim, int *dim, FmgrInfo *inputproc, Oid typioparam, int32 typmod, char typdelim, int typlen, bool typbyval, char typalign, Datum *values, bool *nulls, - bool *hasnulls, int32 *nbytes); + bool *hasnulls, int32 *nbytes, void *escontext); static void ReadArrayBinary(StringInfo buf, int nitems, FmgrInfo *receiveproc, Oid typioparam, int32 typmod, int typlen, bool typbyval, char typalign, @@ -177,6 +179,7 @@ array_in(PG_FUNCTION_ARGS) Oid element_type = PG_GETARG_OID(1); /* type of an array * element */ int32 typmod = PG_GETARG_INT32(2); /* typmod for array elements */ + void *escontext = fcinfo->context; int typlen; bool typbyval; char typalign; @@ -188,8 +191,8 @@ array_in(PG_FUNCTION_ARGS) nitems; Datum *dataPtr; bool *nullsPtr; - bool hasnulls; - int32 nbytes; + bool hasnulls = false; + int32 nbytes = 0; int32 dataoffset; ArrayType *retval; int ndim, @@ -258,7 +261,7 @@ array_in(PG_FUNCTION_ARGS) break; /* no more dimension items */ p++; if (ndim >= MAXDIM) - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", ndim + 1, MAXDIM))); @@ -266,7 +269,7 @@ array_in(PG_FUNCTION_ARGS) for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++) /* skip */ ; if (q == p) /* no digits? */ - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("\"[\" must introduce explicitly-specified array dimensions."))); @@ -280,7 +283,7 @@ array_in(PG_FUNCTION_ARGS) for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++) /* skip */ ; if (q == p) /* no digits? */ - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("Missing array dimension value."))); @@ -291,7 +294,7 @@ array_in(PG_FUNCTION_ARGS) lBound[ndim] = 1; } if (*q != ']') - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("Missing \"%s\" after array dimensions.", @@ -301,7 +304,7 @@ array_in(PG_FUNCTION_ARGS) ub = atoi(p); p = q + 1; if (ub < lBound[ndim]) - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), errmsg("upper bound cannot be less than lower bound"))); @@ -313,11 +316,13 @@ array_in(PG_FUNCTION_ARGS) { /* No array dimensions, so intuit dimensions from brace structure */ if (*p != '{') - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("Array value must start with \"{\" or dimension information."))); - ndim = ArrayCount(p, dim, typdelim); + ndim = ArrayCount(p, dim, typdelim, escontext); + if (ndim < 0) + PG_RETURN_NULL(); for (i = 0; i < ndim; i++) lBound[i] = 1; } @@ -328,7 +333,7 @@ array_in(PG_FUNCTION_ARGS) /* If array dimensions are given, expect '=' operator */ if (strncmp(p, ASSGN, strlen(ASSGN)) != 0) - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("Missing \"%s\" after array dimensions.", @@ -342,20 +347,22 @@ array_in(PG_FUNCTION_ARGS) * were given */ if (*p != '{') - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("Array contents must start with \"{\"."))); - ndim_braces = ArrayCount(p, dim_braces, typdelim); + ndim_braces = ArrayCount(p, dim_braces, typdelim, escontext); + if (ndim_braces < 0) + PG_RETURN_NULL(); if (ndim_braces != ndim) - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("Specified array dimensions do not match array contents."))); for (i = 0; i < ndim; ++i) { if (dim[i] != dim_braces[i]) - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("Specified array dimensions do not match array contents."))); @@ -372,8 +379,10 @@ array_in(PG_FUNCTION_ARGS) #endif /* This checks for overflow of the array dimensions */ - nitems = ArrayGetNItems(ndim, dim); - ArrayCheckBounds(ndim, dim, lBound); + nitems = ArrayGetNItemsSafe(ndim, dim, escontext); + ArrayCheckBoundsSafe(ndim, dim, lBound, escontext); + if (SAFE_ERROR_OCCURRED(escontext)) + PG_RETURN_NULL(); /* Empty array? */ if (nitems == 0) @@ -387,7 +396,9 @@ array_in(PG_FUNCTION_ARGS) typdelim, typlen, typbyval, typalign, dataPtr, nullsPtr, - &hasnulls, &nbytes); + &hasnulls, &nbytes, escontext); + if (SAFE_ERROR_OCCURRED(escontext)) + PG_RETURN_NULL(); if (hasnulls) { dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, nitems); @@ -451,9 +462,11 @@ array_isspace(char ch) * * Returns number of dimensions as function result. The axis lengths are * returned in dim[], which must be of size MAXDIM. + * + * If we detect an error, fill *escontext with error details and return -1. */ static int -ArrayCount(const char *str, int *dim, char typdelim) +ArrayCount(const char *str, int *dim, char typdelim, void *escontext) { int nest_level = 0, i; @@ -488,11 +501,10 @@ ArrayCount(const char *str, int *dim, char typdelim) { case '\0': /* Signal a premature end of the string */ - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected end of input."))); - break; case '\\': /* @@ -504,7 +516,7 @@ ArrayCount(const char *str, int *dim, char typdelim) parse_state != ARRAY_ELEM_STARTED && parse_state != ARRAY_QUOTED_ELEM_STARTED && parse_state != ARRAY_ELEM_DELIMITED) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", @@ -515,7 +527,7 @@ ArrayCount(const char *str, int *dim, char typdelim) if (*(ptr + 1)) ptr++; else - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected end of input."))); @@ -530,7 +542,7 @@ ArrayCount(const char *str, int *dim, char typdelim) if (parse_state != ARRAY_LEVEL_STARTED && parse_state != ARRAY_QUOTED_ELEM_STARTED && parse_state != ARRAY_ELEM_DELIMITED) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected array element."))); @@ -551,14 +563,14 @@ ArrayCount(const char *str, int *dim, char typdelim) if (parse_state != ARRAY_NO_LEVEL && parse_state != ARRAY_LEVEL_STARTED && parse_state != ARRAY_LEVEL_DELIMITED) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", '{'))); parse_state = ARRAY_LEVEL_STARTED; if (nest_level >= MAXDIM) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", nest_level + 1, MAXDIM))); @@ -581,14 +593,14 @@ ArrayCount(const char *str, int *dim, char typdelim) parse_state != ARRAY_QUOTED_ELEM_COMPLETED && parse_state != ARRAY_LEVEL_COMPLETED && !(nest_level == 1 && parse_state == ARRAY_LEVEL_STARTED)) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", '}'))); parse_state = ARRAY_LEVEL_COMPLETED; if (nest_level == 0) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unmatched \"%c\" character.", '}'))); @@ -596,7 +608,7 @@ ArrayCount(const char *str, int *dim, char typdelim) if (nelems_last[nest_level] != 0 && nelems[nest_level] != nelems_last[nest_level]) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Multidimensional arrays must have " @@ -630,7 +642,7 @@ ArrayCount(const char *str, int *dim, char typdelim) parse_state != ARRAY_ELEM_COMPLETED && parse_state != ARRAY_QUOTED_ELEM_COMPLETED && parse_state != ARRAY_LEVEL_COMPLETED) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", @@ -653,7 +665,7 @@ ArrayCount(const char *str, int *dim, char typdelim) if (parse_state != ARRAY_LEVEL_STARTED && parse_state != ARRAY_ELEM_STARTED && parse_state != ARRAY_ELEM_DELIMITED) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected array element."))); @@ -673,7 +685,7 @@ ArrayCount(const char *str, int *dim, char typdelim) while (*ptr) { if (!array_isspace(*ptr++)) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Junk after closing right brace."))); @@ -713,9 +725,14 @@ ArrayCount(const char *str, int *dim, char typdelim) * *hasnulls: set true iff there are any null elements. * *nbytes: set to total size of data area needed (including alignment * padding but not including array header overhead). + * *escontext: if this points to an ErrorSaveContext, details of + * any error are reported there. * * Note that values[] and nulls[] are allocated by the caller, and must have * nitems elements. + * + * If escontext isn't NULL, caller must check for "safe" errors by + * examining the escontext. */ static void ReadArrayStr(char *arrayStr, @@ -733,7 +750,8 @@ ReadArrayStr(char *arrayStr, Datum *values, bool *nulls, bool *hasnulls, - int32 *nbytes) + int32 *nbytes, + void *escontext) { int i, nest_level = 0; @@ -784,7 +802,7 @@ ReadArrayStr(char *arrayStr, { case '\0': /* Signal a premature end of the string */ - ereport(ERROR, + ereturn(escontext,, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); @@ -793,7 +811,7 @@ ReadArrayStr(char *arrayStr, /* Skip backslash, copy next character as-is. */ srcptr++; if (*srcptr == '\0') - ereport(ERROR, + ereturn(escontext,, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); @@ -823,7 +841,7 @@ ReadArrayStr(char *arrayStr, if (!in_quotes) { if (nest_level >= ndim) - ereport(ERROR, + ereturn(escontext,, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); @@ -838,7 +856,7 @@ ReadArrayStr(char *arrayStr, if (!in_quotes) { if (nest_level == 0) - ereport(ERROR, + ereturn(escontext,, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); @@ -891,7 +909,7 @@ ReadArrayStr(char *arrayStr, *dstendptr = '\0'; if (i < 0 || i >= nitems) - ereport(ERROR, + ereturn(escontext,, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); @@ -900,16 +918,20 @@ ReadArrayStr(char *arrayStr, pg_strcasecmp(itemstart, "NULL") == 0) { /* it's a NULL item */ - values[i] = InputFunctionCall(inputproc, NULL, - typioparam, typmod); + values[i] = InputFunctionCallSafe(inputproc, NULL, + typioparam, typmod, + escontext); nulls[i] = true; } else { - values[i] = InputFunctionCall(inputproc, itemstart, - typioparam, typmod); + values[i] = InputFunctionCallSafe(inputproc, itemstart, + typioparam, typmod, + escontext); nulls[i] = false; } + if (SAFE_ERROR_OCCURRED(escontext)) + return; } /* @@ -930,7 +952,7 @@ ReadArrayStr(char *arrayStr, totbytes = att_align_nominal(totbytes, typalign); /* check for overflow of total request */ if (!AllocSizeIsValid(totbytes)) - ereport(ERROR, + ereturn(escontext,, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("array size exceeds the maximum allowed (%d)", (int) MaxAllocSize))); diff --git a/src/backend/utils/adt/arrayutils.c b/src/backend/utils/adt/arrayutils.c index 051169a149..2868f4ef10 100644 --- a/src/backend/utils/adt/arrayutils.c +++ b/src/backend/utils/adt/arrayutils.c @@ -74,6 +74,16 @@ ArrayGetOffset0(int n, const int *tup, const int *scale) */ int ArrayGetNItems(int ndim, const int *dims) +{ + return ArrayGetNItemsSafe(ndim, dims, NULL); +} + +/* + * This entry point can return the error into an ErrorSaveContext + * instead of throwing an exception. -1 is returned after an error. + */ +int +ArrayGetNItemsSafe(int ndim, const int *dims, void *escontext) { int32 ret; int i; @@ -89,7 +99,7 @@ ArrayGetNItems(int ndim, const int *dims) /* A negative dimension implies that UB-LB overflowed ... */ if (dims[i] < 0) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("array size exceeds the maximum allowed (%d)", (int) MaxArraySize))); @@ -98,14 +108,14 @@ ArrayGetNItems(int ndim, const int *dims) ret = (int32) prod; if ((int64) ret != prod) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("array size exceeds the maximum allowed (%d)", (int) MaxArraySize))); } Assert(ret >= 0); if ((Size) ret > MaxArraySize) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("array size exceeds the maximum allowed (%d)", (int) MaxArraySize))); @@ -126,6 +136,17 @@ ArrayGetNItems(int ndim, const int *dims) */ void ArrayCheckBounds(int ndim, const int *dims, const int *lb) +{ + ArrayCheckBoundsSafe(ndim, dims, lb, NULL); +} + +/* + * This entry point can return the error into an ErrorSaveContext + * instead of throwing an exception. + */ +void +ArrayCheckBoundsSafe(int ndim, const int *dims, const int *lb, + void *escontext) { int i; @@ -135,7 +156,7 @@ ArrayCheckBounds(int ndim, const int *dims, const int *lb) int32 sum PG_USED_FOR_ASSERTS_ONLY; if (pg_add_s32_overflow(dims[i], lb[i], &sum)) - ereport(ERROR, + ereturn(escontext,, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("array lower bound is too large: %d", lb[i]))); diff --git a/src/backend/utils/adt/bool.c b/src/backend/utils/adt/bool.c index cd7335287f..e291672ae4 100644 --- a/src/backend/utils/adt/bool.c +++ b/src/backend/utils/adt/bool.c @@ -148,13 +148,10 @@ boolin(PG_FUNCTION_ARGS) if (parse_bool_with_len(str, len, &result)) PG_RETURN_BOOL(result); - ereport(ERROR, + ereturn(fcinfo->context, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type %s: \"%s\"", "boolean", in_str))); - - /* not reached */ - PG_RETURN_BOOL(false); } /* diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c index 42ddae99ef..e1837bee71 100644 --- a/src/backend/utils/adt/int.c +++ b/src/backend/utils/adt/int.c @@ -291,7 +291,7 @@ int4in(PG_FUNCTION_ARGS) { char *num = PG_GETARG_CSTRING(0); - PG_RETURN_INT32(pg_strtoint32(num)); + PG_RETURN_INT32(pg_strtoint32_safe(num, fcinfo->context)); } /* diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index a64422c8d0..0de0bed0e8 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -166,8 +166,11 @@ invalid_syntax: /* * Convert input string to a signed 32 bit integer. * - * Allows any number of leading or trailing whitespace characters. Will throw - * ereport() upon bad input format or overflow. + * Allows any number of leading or trailing whitespace characters. + * + * pg_strtoint32() will throw ereport() upon bad input format or overflow; + * while pg_strtoint32_safe() instead returns such complaints in *escontext, + * if it's an ErrorSaveContext. * * NB: Accumulate input as an unsigned number, to deal with two's complement * representation of the most negative number, which can't be represented as a @@ -175,6 +178,12 @@ invalid_syntax: */ int32 pg_strtoint32(const char *s) +{ + return pg_strtoint32_safe(s, NULL); +} + +int32 +pg_strtoint32_safe(const char *s, Node *escontext) { const char *ptr = s; uint32 tmp = 0; @@ -227,18 +236,16 @@ pg_strtoint32(const char *s) return (int32) tmp; out_of_range: - ereport(ERROR, + ereturn(escontext, 0, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("value \"%s\" is out of range for type %s", s, "integer"))); invalid_syntax: - ereport(ERROR, + ereturn(escontext, 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type %s: \"%s\"", "integer", s))); - - return 0; /* keep compiler quiet */ } /* diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c index db843a0fbf..221362ddb8 100644 --- a/src/backend/utils/adt/rowtypes.c +++ b/src/backend/utils/adt/rowtypes.c @@ -23,6 +23,7 @@ #include "funcapi.h" #include "libpq/pqformat.h" #include "miscadmin.h" +#include "nodes/miscnodes.h" #include "utils/builtins.h" #include "utils/datum.h" #include "utils/lsyscache.h" @@ -77,6 +78,7 @@ record_in(PG_FUNCTION_ARGS) char *string = PG_GETARG_CSTRING(0); Oid tupType = PG_GETARG_OID(1); int32 tupTypmod = PG_GETARG_INT32(2); + void *escontext = fcinfo->context; HeapTupleHeader result; TupleDesc tupdesc; HeapTuple tuple; @@ -100,7 +102,7 @@ record_in(PG_FUNCTION_ARGS) * supply a valid typmod, and then we can do something useful for RECORD. */ if (tupType == RECORDOID && tupTypmod < 0) - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("input of anonymous composite types is not implemented"))); @@ -152,7 +154,7 @@ record_in(PG_FUNCTION_ARGS) while (*ptr && isspace((unsigned char) *ptr)) ptr++; if (*ptr++ != '(') - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Missing left parenthesis."))); @@ -181,7 +183,7 @@ record_in(PG_FUNCTION_ARGS) ptr++; else /* *ptr must be ')' */ - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Too few columns."))); @@ -204,7 +206,7 @@ record_in(PG_FUNCTION_ARGS) char ch = *ptr++; if (ch == '\0') - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), @@ -212,7 +214,7 @@ record_in(PG_FUNCTION_ARGS) if (ch == '\\') { if (*ptr == '\0') - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), @@ -252,10 +254,13 @@ record_in(PG_FUNCTION_ARGS) column_info->column_type = column_type; } - values[i] = InputFunctionCall(&column_info->proc, - column_data, - column_info->typioparam, - att->atttypmod); + values[i] = InputFunctionCallSafe(&column_info->proc, + column_data, + column_info->typioparam, + att->atttypmod, + escontext); + if (SAFE_ERROR_OCCURRED(escontext)) + PG_RETURN_NULL(); /* * Prep for next column @@ -264,7 +269,7 @@ record_in(PG_FUNCTION_ARGS) } if (*ptr++ != ')') - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Too many columns."))); @@ -272,7 +277,7 @@ record_in(PG_FUNCTION_ARGS) while (*ptr && isspace((unsigned char) *ptr)) ptr++; if (*ptr) - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Junk after right parenthesis."))); diff --git a/src/include/utils/array.h b/src/include/utils/array.h index 2f794d1168..c56b6937b0 100644 --- a/src/include/utils/array.h +++ b/src/include/utils/array.h @@ -447,7 +447,10 @@ extern void array_free_iterator(ArrayIterator iterator); extern int ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx); extern int ArrayGetOffset0(int n, const int *tup, const int *scale); extern int ArrayGetNItems(int ndim, const int *dims); +extern int ArrayGetNItemsSafe(int ndim, const int *dims, void *escontext); extern void ArrayCheckBounds(int ndim, const int *dims, const int *lb); +extern void ArrayCheckBoundsSafe(int ndim, const int *dims, const int *lb, + void *escontext); extern void mda_get_range(int n, int *span, const int *st, const int *endp); extern void mda_get_prod(int n, const int *range, int *prod); extern void mda_get_offset_values(int n, int *dist, const int *prod, const int *span); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 81631f1645..fbfd8375e3 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -45,6 +45,7 @@ extern int namestrcmp(Name name, const char *str); /* numutils.c */ extern int16 pg_strtoint16(const char *s); extern int32 pg_strtoint32(const char *s); +extern int32 pg_strtoint32_safe(const char *s, Node *escontext); extern int64 pg_strtoint64(const char *s); extern int pg_itoa(int16 i, char *a); extern int pg_ultoa_n(uint32 value, char *a); diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index c25b52d0cb..462e4d338b 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -42,6 +42,8 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * } FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] ) FORCE_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] ) + NULL_ON_ERROR [ <replaceable class="parameter">boolean</replaceable> ] + WARN_ON_ERROR [ <replaceable class="parameter">boolean</replaceable> ] ENCODING '<replaceable class="parameter">encoding_name</replaceable>' </synopsis> </refsynopsisdiv> @@ -356,6 +358,27 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable </listitem> </varlistentry> + <varlistentry> + <term><literal>NULL_ON_ERROR</literal></term> + <listitem> + <para> + Requests silently replacing any erroneous input values with + <literal>NULL</literal>. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><literal>WARN_ON_ERROR</literal></term> + <listitem> + <para> + Requests replacing any erroneous input values with + <literal>NULL</literal>, and emitting a warning message instead of + the usual error. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><literal>ENCODING</literal></term> <listitem> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index db4c9dbc23..d224167111 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -409,6 +409,7 @@ ProcessCopyOptions(ParseState *pstate, bool format_specified = false; bool freeze_specified = false; bool header_specified = false; + bool on_error_specified = false; ListCell *option; /* Support external use for option sanity checking */ @@ -520,6 +521,20 @@ ProcessCopyOptions(ParseState *pstate, defel->defname), parser_errposition(pstate, defel->location))); } + else if (strcmp(defel->defname, "null_on_error") == 0) + { + if (on_error_specified) + errorConflictingDefElem(defel, pstate); + on_error_specified = true; + opts_out->null_on_error = defGetBoolean(defel); + } + else if (strcmp(defel->defname, "warn_on_error") == 0) + { + if (on_error_specified) + errorConflictingDefElem(defel, pstate); + on_error_specified = true; + opts_out->warn_on_error = defGetBoolean(defel); + } else if (strcmp(defel->defname, "convert_selectively") == 0) { /* @@ -701,6 +716,30 @@ ProcessCopyOptions(ParseState *pstate, ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("CSV quote character must not appear in the NULL specification"))); + + /* + * The XXX_ON_ERROR options are only supported for input, and only in text + * modes. We could in future extend safe-errors support to datatype + * receive functions, but it'd take a lot more work. Moreover, it's not + * clear that receive functions can detect errors very well, so the + * feature likely wouldn't work terribly well. + */ + if (opts_out->null_on_error && !is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY NULL_ON_ERROR only available using COPY FROM"))); + if (opts_out->null_on_error && opts_out->binary) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot specify NULL_ON_ERROR in BINARY mode"))); + if (opts_out->warn_on_error && !is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COPY WARN_ON_ERROR only available using COPY FROM"))); + if (opts_out->warn_on_error && opts_out->binary) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot specify WARN_ON_ERROR in BINARY mode"))); } /* diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 504afcb811..16b01b6598 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -1599,6 +1599,15 @@ BeginCopyFrom(ParseState *pstate, } } + /* For the XXX_ON_ERROR options, we'll need an ErrorSaveContext */ + if (cstate->opts.null_on_error || + cstate->opts.warn_on_error) + { + cstate->es_context = makeNode(ErrorSaveContext); + /* Error details are only needed for warnings */ + if (cstate->opts.warn_on_error) + cstate->es_context->details_wanted = true; + } /* initialize progress */ pgstat_progress_start_command(PROGRESS_COMMAND_COPY, diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 097414ef12..9cf7d31dd2 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -876,6 +876,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, char **field_strings; ListCell *cur; int fldct; + bool safe_mode; int fieldno; char *string; @@ -889,6 +890,8 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), errmsg("extra data after last expected column"))); + safe_mode = cstate->opts.null_on_error || cstate->opts.warn_on_error; + fieldno = 0; /* Loop to read the user attributes on the line. */ @@ -938,12 +941,50 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, cstate->cur_attname = NameStr(att->attname); cstate->cur_attval = string; - values[m] = InputFunctionCall(&in_functions[m], - string, - typioparams[m], - att->atttypmod); - if (string != NULL) - nulls[m] = false; + + if (safe_mode) + { + ErrorSaveContext *es_context = cstate->es_context; + + /* Must reset the error_occurred flag each time */ + es_context->error_occurred = false; + + values[m] = InputFunctionCallSafe(&in_functions[m], + string, + typioparams[m], + att->atttypmod, + es_context); + if (es_context->error_occurred) + { + /* nulls[m] is already true */ + if (cstate->opts.warn_on_error) + { + ErrorData *edata = es_context->error_data; + + /* Note that our errcontext callback wasn't used */ + ereport(WARNING, + errcode(edata->sqlerrcode), + errmsg_internal("invalid input for column %s: %s", + cstate->cur_attname, + edata->message), + errcontext("COPY %s, line %llu", + cstate->cur_relname, + (unsigned long long) cstate->cur_lineno)); + } + } + else if (string != NULL) + nulls[m] = false; + } + else + { + values[m] = InputFunctionCall(&in_functions[m], + string, + typioparams[m], + att->atttypmod); + if (string != NULL) + nulls[m] = false; + } + cstate->cur_attname = NULL; cstate->cur_attval = NULL; } diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index b77b935005..ee38bd0e28 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -57,6 +57,8 @@ typedef struct CopyFormatOptions bool *force_notnull_flags; /* per-column CSV FNN flags */ List *force_null; /* list of column names */ bool *force_null_flags; /* per-column CSV FN flags */ + bool null_on_error; /* replace erroneous inputs with NULL? */ + bool warn_on_error; /* ... and warn about it? */ bool convert_selectively; /* do selective binary conversion? */ List *convert_select; /* list of column names (can be NIL) */ } CopyFormatOptions; diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h index 8d9cc5accd..ee6a11306d 100644 --- a/src/include/commands/copyfrom_internal.h +++ b/src/include/commands/copyfrom_internal.h @@ -16,6 +16,7 @@ #include "commands/copy.h" #include "commands/trigger.h" +#include "nodes/miscnodes.h" /* * Represents the different source cases we need to worry about at @@ -97,6 +98,7 @@ typedef struct CopyFromStateData int *defmap; /* array of default att numbers */ ExprState **defexprs; /* array of default att expressions */ bool volatile_defexprs; /* is any of defexprs volatile? */ + ErrorSaveContext *es_context; /* used for XXX_ON_ERROR options */ List *range_table; ExprState *qualexpr; diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index 3fad1c52d1..f848ce124d 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -240,3 +240,27 @@ SELECT * FROM header_copytest ORDER BY a; (5 rows) drop table header_copytest; +-- "safe" error handling +create table on_error_copytest(i int, b bool, ai int[]); +copy on_error_copytest from stdin with (null_on_error); +copy on_error_copytest from stdin with (warn_on_error); +WARNING: invalid input for column b: invalid input syntax for type boolean: "b" +WARNING: invalid input for column ai: malformed array literal: "[0:1000]={3,4}" +WARNING: invalid input for column i: invalid input syntax for type integer: "err" +WARNING: invalid input for column i: invalid input syntax for type integer: "bad" +WARNING: invalid input for column b: invalid input syntax for type boolean: "z" +WARNING: invalid input for column ai: invalid input syntax for type integer: "zed" +select * from on_error_copytest; + i | b | ai +---+---+------------- + 1 | | + | t | + 2 | f | {3,4} + | | + 3 | f | [3:4]={3,4} + 4 | | + | t | {} + | | +(8 rows) + +drop table on_error_copytest; diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index 285022e07c..ff77d27cfc 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -268,3 +268,23 @@ a c b SELECT * FROM header_copytest ORDER BY a; drop table header_copytest; + +-- "safe" error handling +create table on_error_copytest(i int, b bool, ai int[]); + +copy on_error_copytest from stdin with (null_on_error); +1 a {1,} +err 1 {x} +2 f {3,4} +bad x {, +\. + +copy on_error_copytest from stdin with (warn_on_error); +3 0 [3:4]={3,4} +4 b [0:1000]={3,4} +err t {} +bad z {"zed"} +\. + +select * from on_error_copytest; +drop table on_error_copytest;
pgsql-hackers by date: