Re: Error-safe user functions - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: Error-safe user functions |
Date | |
Msg-id | 3211.1670452341@sss.pgh.pa.us Whole thread Raw |
In response to | Re: Error-safe user functions (Tom Lane <tgl@sss.pgh.pa.us>) |
Responses |
Re: Error-safe user functions
Re: Error-safe user functions |
List | pgsql-hackers |
OK, here's a v4 that I think is possibly committable. I've changed all the comments and docs to use the "soft error" terminology, but since using "soft" in the actual function names didn't seem that appealing, they still use "safe". I already pushed the 0000 elog-refactoring patch, since that seemed uncontroversial. 0001 attached covers the same territory as before, but I regrouped the rest so that 0002 installs the new test support functions, then 0003 adds both the per-datatype changes and corresponding test cases for bool, int4, arrays, and records. The idea here is that 0003 can be pointed to as a sample of what has to be done to datatype input functions, while the preceding patches can be cited as relevant documentation. (I've not decided whether to squash 0001 and 0002 together or commit them separately. Does it make sense to break 0003 into 4 separate commits, or is that overkill?) Thoughts? regards, tom lane diff --git a/doc/src/sgml/ref/create_type.sgml b/doc/src/sgml/ref/create_type.sgml index 693423e524..994dfc6526 100644 --- a/doc/src/sgml/ref/create_type.sgml +++ b/doc/src/sgml/ref/create_type.sgml @@ -900,6 +900,17 @@ CREATE TYPE <replaceable class="parameter">name</replaceable> function is written in C. </para> + <para> + In <productname>PostgreSQL</productname> version 16 and later, + it is desirable for base types' input functions to + return <quote>soft</quote> errors using the + new <function>errsave()</function>/<function>ereturn()</function> + mechanism, rather than throwing <function>ereport()</function> + exceptions as in previous versions. + See <filename>src/backend/utils/fmgr/README</filename> for more + information. + </para> + </refsect1> <refsect1> diff --git a/src/backend/nodes/Makefile b/src/backend/nodes/Makefile index 4368c30fdb..7c594be583 100644 --- a/src/backend/nodes/Makefile +++ b/src/backend/nodes/Makefile @@ -56,6 +56,7 @@ node_headers = \ nodes/bitmapset.h \ nodes/extensible.h \ nodes/lockoptions.h \ + nodes/miscnodes.h \ nodes/replnodes.h \ nodes/supportnodes.h \ nodes/value.h \ diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index 7212bc486f..08992dfd47 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -68,6 +68,7 @@ my @all_input_files = qw( nodes/bitmapset.h nodes/extensible.h nodes/lockoptions.h + nodes/miscnodes.h nodes/replnodes.h nodes/supportnodes.h nodes/value.h @@ -89,6 +90,7 @@ my @nodetag_only_files = qw( executor/tuptable.h foreign/fdwapi.h nodes/lockoptions.h + nodes/miscnodes.h nodes/replnodes.h nodes/supportnodes.h ); diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index f5cd1b7493..a36aeb832e 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -71,6 +71,7 @@ #include "libpq/libpq.h" #include "libpq/pqformat.h" #include "mb/pg_wchar.h" +#include "nodes/miscnodes.h" #include "miscadmin.h" #include "pgstat.h" #include "postmaster/bgworker.h" @@ -611,6 +612,128 @@ errfinish(const char *filename, int lineno, const char *funcname) CHECK_FOR_INTERRUPTS(); } + +/* + * errsave_start --- begin a "soft" error-reporting cycle + * + * If "context" isn't an ErrorSaveContext node, this behaves as + * errstart(ERROR, domain), and the errsave() macro ends up acting + * exactly like ereport(ERROR, ...). + * + * If "context" is an ErrorSaveContext node, but the node creator only wants + * notification of the fact of a soft error without any details, just set + * the error_occurred flag in the ErrorSaveContext node and return false, + * which will cause us to skip the remaining error processing steps. + * + * Otherwise, create and initialize error stack entry and return true. + * Subsequently, errmsg() and perhaps other routines will be called to further + * populate the stack entry. Finally, errsave_finish() will be called to + * tidy up. + */ +bool +errsave_start(NodePtr context, const char *domain) +{ + ErrorSaveContext *escontext; + ErrorData *edata; + + /* + * Do we have a context for soft error reporting? If not, just punt to + * errstart(). + */ + if (context == NULL || !IsA(context, ErrorSaveContext)) + return errstart(ERROR, domain); + + /* Report that a soft error was detected */ + escontext = (ErrorSaveContext *) context; + escontext->error_occurred = true; + + /* Nothing else to do if caller wants no further details */ + if (!escontext->details_wanted) + return false; + + /* + * Okay, crank up a stack entry to store the info in. + */ + + recursion_depth++; + + /* Initialize data for this error frame */ + edata = get_error_stack_entry(); + edata->elevel = LOG; /* signal all is well to errsave_finish */ + set_stack_entry_domain(edata, domain); + /* Select default errcode based on the assumed elevel of ERROR */ + edata->sqlerrcode = ERRCODE_INTERNAL_ERROR; + + /* + * Any allocations for this error state level should go into the caller's + * context. We don't need to pollute ErrorContext, or even require it to + * exist, in this code path. + */ + edata->assoc_context = CurrentMemoryContext; + + recursion_depth--; + return true; +} + +/* + * errsave_finish --- end a "soft" error-reporting cycle + * + * If errsave_start() decided this was a regular error, behave as + * errfinish(). Otherwise, package up the error details and save + * them in the ErrorSaveContext node. + */ +void +errsave_finish(NodePtr context, const char *filename, int lineno, + const char *funcname) +{ + ErrorSaveContext *escontext = (ErrorSaveContext *) context; + ErrorData *edata = &errordata[errordata_stack_depth]; + + /* verify stack depth before accessing *edata */ + CHECK_STACK_DEPTH(); + + /* + * If errsave_start punted to errstart, then elevel will be ERROR or + * perhaps even PANIC. Punt likewise to errfinish. + */ + if (edata->elevel >= ERROR) + { + errfinish(filename, lineno, funcname); + pg_unreachable(); + } + + /* + * Else, we should package up the stack entry contents and deliver them to + * the caller. + */ + recursion_depth++; + + /* Save the last few bits of error state into the stack entry */ + set_stack_entry_location(edata, filename, lineno, funcname); + + /* Replace the LOG value that errsave_start inserted */ + edata->elevel = ERROR; + + /* + * We skip calling backtrace and context functions, which are more likely + * to cause trouble than provide useful context; they might act on the + * assumption that a transaction abort is about to occur. + */ + + /* + * Make a copy of the error info for the caller. All the subsidiary + * strings are already in the caller's context, so it's sufficient to + * flat-copy the stack entry. + */ + escontext->error_data = palloc_object(ErrorData); + memcpy(escontext->error_data, edata, sizeof(ErrorData)); + + /* Exit error-handling context */ + errordata_stack_depth--; + recursion_depth--; +} + + /* * get_error_stack_entry --- allocate and initialize a new stack entry * diff --git a/src/backend/utils/fmgr/README b/src/backend/utils/fmgr/README index 49845f67ac..9958d38992 100644 --- a/src/backend/utils/fmgr/README +++ b/src/backend/utils/fmgr/README @@ -267,6 +267,78 @@ See windowapi.h for more information. information about the context of the CALL statement, particularly whether it is within an "atomic" execution context. +* Some callers of datatype input functions (and in future perhaps +other classes of functions) pass an instance of ErrorSaveContext. +This indicates that the caller wishes to handle "soft" errors without +a transaction-terminating exception being thrown: instead, the callee +should store information about the error cause in the ErrorSaveContext +struct and return a dummy result value. Further details appear in +"Handling Soft Errors" below. + + +Handling Soft Errors +-------------------- + +Postgres' standard mechanism for reporting errors (ereport() or elog()) +is used for all sorts of error conditions. This means that throwing +an exception via ereport(ERROR) requires an expensive transaction or +subtransaction abort and cleanup, since the exception catcher dare not +make many assumptions about what has gone wrong. There are situations +where we would rather have a lighter-weight mechanism for dealing +with errors that are known to be safe to recover from without a full +transaction cleanup. SQL-callable functions can support this need +using the ErrorSaveContext context mechanism. + +To report a "soft" error, a SQL-callable function should call + errsave(fcinfo->context, ...) +where it would previously have done + ereport(ERROR, ...) +If the passed "context" is NULL or is not an ErrorSaveContext node, +then errsave behaves precisely as ereport(ERROR): the exception is +thrown via longjmp, so that control does not return. If "context" +is an ErrorSaveContext node, then the error information included in +errsave's subsidiary reporting calls is stored into the context node +and control returns from errsave normally. The function should then +return a dummy value to its caller. (SQL NULL is recommendable as +the dummy value; but anything will do, since the caller is expected +to ignore the function's return value once it sees that an error has +been reported in the ErrorSaveContext node.) + +If there is nothing to do except return after calling errsave(), +you can save a line or two by writing + ereturn(fcinfo->context, dummy_value, ...) +to perform errsave() and then "return dummy_value". + +An error reported "softly" must be safe, in the sense that there is +no question about our ability to continue normal processing of the +transaction. Error conditions that should NOT be handled this way +include out-of-memory, unexpected internal errors, or anything that +cannot easily be cleaned up after. Such cases should still be thrown +with ereport, as they have been in the past. + +Considering datatype input functions as examples, typical "soft" error +conditions include input syntax errors and out-of-range values. An +input function typically detects such cases with simple if-tests and +can easily change the ensuing ereport call to an errsave or ereturn. +Because of this restriction, it's typically not necessary to pass +the ErrorSaveContext pointer down very far, as errors reported by +low-level functions are typically reasonable to consider internal. +(Another way to frame the distinction is that input functions should +report all invalid-input conditions softly, but internal problems are +hard errors.) + +Because no transaction cleanup will occur, a function that is exiting +after errsave() returns will bear responsibility for resource cleanup. +It is not necessary to be concerned about small leakages of palloc'd +memory, since the caller should be running the function in a short-lived +memory context. However, resources such as locks, open files, or buffer +pins must be closed out cleanly, as they would be in the non-error code +path. + +Conventions for callers that use the ErrorSaveContext mechanism +to trap errors are discussed with the declaration of that struct, +in nodes/miscnodes.h. + Functions Accepting or Returning Sets ------------------------------------- diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c index 3c210297aa..493e893ada 100644 --- a/src/backend/utils/fmgr/fmgr.c +++ b/src/backend/utils/fmgr/fmgr.c @@ -23,6 +23,7 @@ #include "lib/stringinfo.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#include "nodes/miscnodes.h" #include "nodes/nodeFuncs.h" #include "pgstat.h" #include "utils/acl.h" @@ -1548,6 +1549,70 @@ InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod) return result; } +/* + * Call a previously-looked-up datatype input function, with non-exception + * handling of "soft" errors. + * + * This is basically like InputFunctionCall, but the converted Datum is + * returned into *result while the function result is true for success or + * false for failure. Also, the caller may pass an ErrorSaveContext node. + * (We declare that as "NodePtr" to avoid including nodes.h in fmgr.h.) + * + * If escontext points to an ErrorSaveContext, any "soft" errors detected by + * the input function will be reported by filling the escontext struct and + * returning false. (The caller can choose to test SOFT_ERROR_OCCURRED(), + * but checking the function result instead is usually cheaper.) + * + * If escontext does not point to an ErrorSaveContext, errors are reported + * via ereport(ERROR), so that there is no functional difference from + * InputFunctionCall; the result will always be true if control returns. + */ +bool +InputFunctionCallSafe(FmgrInfo *flinfo, char *str, + Oid typioparam, int32 typmod, + NodePtr escontext, + Datum *result) +{ + LOCAL_FCINFO(fcinfo, 3); + + if (str == NULL && flinfo->fn_strict) + { + *result = (Datum) 0; /* just return null result */ + return true; + } + + InitFunctionCallInfoData(*fcinfo, flinfo, 3, InvalidOid, escontext, NULL); + + fcinfo->args[0].value = CStringGetDatum(str); + fcinfo->args[0].isnull = false; + fcinfo->args[1].value = ObjectIdGetDatum(typioparam); + fcinfo->args[1].isnull = false; + fcinfo->args[2].value = Int32GetDatum(typmod); + fcinfo->args[2].isnull = false; + + *result = FunctionCallInvoke(fcinfo); + + /* Result value is garbage, and could be null, if an error was reported */ + if (SOFT_ERROR_OCCURRED(escontext)) + return false; + + /* Otherwise, should get null result if and only if str is NULL */ + if (str == NULL) + { + if (!fcinfo->isnull) + elog(ERROR, "input function %u returned non-NULL", + flinfo->fn_oid); + } + else + { + if (fcinfo->isnull) + elog(ERROR, "input function %u returned NULL", + flinfo->fn_oid); + } + + return true; +} + /* * Call a previously-looked-up datatype output function. * diff --git a/src/include/fmgr.h b/src/include/fmgr.h index 380a82b9de..d739f3dbd9 100644 --- a/src/include/fmgr.h +++ b/src/include/fmgr.h @@ -18,8 +18,7 @@ #ifndef FMGR_H #define FMGR_H -/* We don't want to include primnodes.h here, so make some stub references */ -typedef struct Node *fmNodePtr; +/* We don't want to include primnodes.h here, so make a stub reference */ typedef struct Aggref *fmAggrefPtr; /* Likewise, avoid including execnodes.h here */ @@ -63,7 +62,7 @@ typedef struct FmgrInfo unsigned char fn_stats; /* collect stats if track_functions > this */ void *fn_extra; /* extra space for use by handler */ MemoryContext fn_mcxt; /* memory context to store fn_extra in */ - fmNodePtr fn_expr; /* expression parse tree for call, or NULL */ + NodePtr fn_expr; /* expression parse tree for call, or NULL */ } FmgrInfo; /* @@ -85,8 +84,8 @@ typedef struct FmgrInfo typedef struct FunctionCallInfoBaseData { FmgrInfo *flinfo; /* ptr to lookup info used for this call */ - fmNodePtr context; /* pass info about context of call */ - fmNodePtr resultinfo; /* pass or return extra info about result */ + NodePtr context; /* pass info about context of call */ + NodePtr resultinfo; /* pass or return extra info about result */ Oid fncollation; /* collation for function to use */ #define FIELDNO_FUNCTIONCALLINFODATA_ISNULL 4 bool isnull; /* function must set true if result is NULL */ @@ -700,6 +699,10 @@ extern Datum OidFunctionCall9Coll(Oid functionId, Oid collation, /* Special cases for convenient invocation of datatype I/O functions. */ extern Datum InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod); +extern bool InputFunctionCallSafe(FmgrInfo *flinfo, char *str, + Oid typioparam, int32 typmod, + NodePtr escontext, + Datum *result); extern Datum OidInputFunctionCall(Oid functionId, char *str, Oid typioparam, int32 typmod); extern char *OutputFunctionCall(FmgrInfo *flinfo, Datum val); @@ -719,9 +722,9 @@ extern const Pg_finfo_record *fetch_finfo_record(void *filehandle, const char *f extern Oid fmgr_internal_function(const char *proname); extern Oid get_fn_expr_rettype(FmgrInfo *flinfo); extern Oid get_fn_expr_argtype(FmgrInfo *flinfo, int argnum); -extern Oid get_call_expr_argtype(fmNodePtr expr, int argnum); +extern Oid get_call_expr_argtype(NodePtr expr, int argnum); extern bool get_fn_expr_arg_stable(FmgrInfo *flinfo, int argnum); -extern bool get_call_expr_arg_stable(fmNodePtr expr, int argnum); +extern bool get_call_expr_arg_stable(NodePtr expr, int argnum); extern bool get_fn_expr_variadic(FmgrInfo *flinfo); extern bytea *get_fn_opclass_options(FmgrInfo *flinfo); extern bool has_fn_opclass_options(FmgrInfo *flinfo); diff --git a/src/include/nodes/meson.build b/src/include/nodes/meson.build index e63881086e..f0e60935b6 100644 --- a/src/include/nodes/meson.build +++ b/src/include/nodes/meson.build @@ -16,6 +16,7 @@ node_support_input_i = [ 'nodes/bitmapset.h', 'nodes/extensible.h', 'nodes/lockoptions.h', + 'nodes/miscnodes.h', 'nodes/replnodes.h', 'nodes/supportnodes.h', 'nodes/value.h', diff --git a/src/include/nodes/miscnodes.h b/src/include/nodes/miscnodes.h new file mode 100644 index 0000000000..b50ee60352 --- /dev/null +++ b/src/include/nodes/miscnodes.h @@ -0,0 +1,56 @@ +/*------------------------------------------------------------------------- + * + * miscnodes.h + * Definitions for hard-to-classify node types. + * + * Node types declared here are not part of parse trees, plan trees, + * or execution state trees. We only assign them NodeTag values because + * IsA() tests provide a convenient way to disambiguate what kind of + * structure is being passed through assorted APIs, such as function + * "context" pointers. + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/nodes/miscnodes.h + * + *------------------------------------------------------------------------- + */ +#ifndef MISCNODES_H +#define MISCNODES_H + +#include "nodes/nodes.h" + +/* + * ErrorSaveContext - + * function call context node for handling of "soft" errors + * + * A caller wishing to trap soft errors must initialize a struct like this + * with all fields zero/NULL except for the NodeTag. Optionally, set + * details_wanted = true if more than the bare knowledge that a soft error + * occurred is required. The struct is then passed to a SQL-callable function + * via the FunctionCallInfo.context field; or below the level of SQL calls, + * it could be passed to a subroutine directly. + * + * After calling code that might report an error this way, check + * error_occurred to see if an error happened. If so, and if details_wanted + * is true, error_data has been filled with error details (stored in the + * callee's memory context!). FreeErrorData() can be called to release + * error_data, although that step is typically not necessary if the called + * code was run in a short-lived context. + */ +typedef struct ErrorSaveContext +{ + NodeTag type; + bool error_occurred; /* set to true if we detect a soft error */ + bool details_wanted; /* does caller want more info than that? */ + ErrorData *error_data; /* details of error, if so */ +} ErrorSaveContext; + +/* Often-useful macro for checking if a soft error was reported */ +#define SOFT_ERROR_OCCURRED(escontext) \ + ((escontext) != NULL && IsA(escontext, ErrorSaveContext) && \ + ((ErrorSaveContext *) (escontext))->error_occurred) + +#endif /* MISCNODES_H */ diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h index f107a818e8..607c62b17c 100644 --- a/src/include/utils/elog.h +++ b/src/include/utils/elog.h @@ -18,6 +18,13 @@ #include "lib/stringinfo.h" +/* + * We cannot include nodes.h yet, so make a stub reference. (This is also + * used by fmgr.h, which doesn't want to depend on nodes.h either.) + */ +typedef struct Node *NodePtr; + + /* Error level codes */ #define DEBUG5 10 /* Debugging messages, in categories of * decreasing detail. */ @@ -235,6 +242,63 @@ extern int getinternalerrposition(void); ereport(elevel, errmsg_internal(__VA_ARGS__)) +/*---------- + * Support for reporting "soft" errors that don't require a full transaction + * abort to clean up. This is to be used in this way: + * errsave(context, + * errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + * errmsg("invalid input syntax for type %s: \"%s\"", + * "boolean", in_str), + * ... other errxxx() fields as needed ...); + * + * "context" is a node pointer or NULL, and the remaining auxiliary calls + * provide the same error details as in ereport(). If context is not a + * pointer to an ErrorSaveContext node, then errsave(context, ...) + * behaves identically to ereport(ERROR, ...). If context is a pointer + * to an ErrorSaveContext node, then the information provided by the + * auxiliary calls is stored in the context node and control returns + * normally. The caller of errsave() must then do any required cleanup + * and return control back to its caller. That caller must check the + * ErrorSaveContext node to see whether an error occurred before + * it can trust the function's result to be meaningful. + * + * errsave_domain() allows a message domain to be specified; it is + * precisely analogous to ereport_domain(). + *---------- + */ +#define errsave_domain(context, domain, ...) \ + do { \ + NodePtr context_ = (context); \ + pg_prevent_errno_in_scope(); \ + if (errsave_start(context_, domain)) \ + __VA_ARGS__, errsave_finish(context_, __FILE__, __LINE__, __func__); \ + } while(0) + +#define errsave(context, ...) \ + errsave_domain(context, TEXTDOMAIN, __VA_ARGS__) + +/* + * "ereturn(context, dummy_value, ...);" is exactly the same as + * "errsave(context, ...); return dummy_value;". This saves a bit + * of typing in the common case where a function has no cleanup + * actions to take after reporting a soft error. "dummy_value" + * can be empty if the function returns void. + */ +#define ereturn_domain(context, dummy_value, domain, ...) \ + do { \ + errsave_domain(context, domain, __VA_ARGS__); \ + return dummy_value; \ + } while(0) + +#define ereturn(context, dummy_value, ...) \ + ereturn_domain(context, dummy_value, TEXTDOMAIN, __VA_ARGS__) + +extern bool errsave_start(NodePtr context, const char *domain); +extern void errsave_finish(NodePtr context, + const char *filename, int lineno, + const char *funcname); + + /* Support for constructing error strings separately from ereport() calls */ extern void pre_format_elog_string(int errnumber, const char *domain); diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index e57ffce971..4fdd692e8e 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -24683,6 +24683,105 @@ SELECT collation for ('foo' COLLATE "de_DE"); </sect2> + <sect2 id="functions-info-validity"> + <title>Data Validity Checking Functions</title> + + <para> + The functions shown in <xref linkend="functions-info-validity-table"/> + can be helpful for checking validity of proposed input data. + </para> + + <table id="functions-info-validity-table"> + <title>Data Validity Checking Functions</title> + <tgroup cols="1"> + <thead> + <row> + <entry role="func_table_entry"><para role="func_signature"> + Function + </para> + <para> + Description + </para> + <para> + Example(s) + </para></entry> + </row> + </thead> + + <tbody> + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm> + <primary>pg_input_is_valid</primary> + </indexterm> + <function>pg_input_is_valid</function> ( + <parameter>string</parameter> <type>text</type>, + <parameter>type</parameter> <type>regtype</type> + <optional>, <parameter>typmod</parameter> <type>integer</type> </optional> + ) + <returnvalue>boolean</returnvalue> + </para> + <para> + Tests whether the given <parameter>string</parameter> is valid + input for the specified data type, returning true or false. + Since the data type is named by a <type>regtype</type> parameter, + it is possible to just write the type name in single quotes. An + encoded type modifier can also be supplied, if the data type pays + attention to that. + </para> + <para> + This function will only work as desired if the data type's input + function has been updated to report invalid input as + a <quote>soft</quote> error. Otherwise, invalid input will abort + the transaction, just as if the string had been cast to the type + directly. + </para> + <para> + <literal>pg_input_is_valid('42', 'integer')</literal> + <returnvalue>t</returnvalue> + </para> + <para> + <literal>pg_input_is_valid('42000000000', 'integer')</literal> + <returnvalue>f</returnvalue> + </para></entry> + </row> + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm> + <primary>pg_input_error_message</primary> + </indexterm> + <function>pg_input_error_message</function> ( + <parameter>string</parameter> <type>text</type>, + <parameter>type</parameter> <type>regtype</type> + <optional>, <parameter>typmod</parameter> <type>integer</type> </optional> + ) + <returnvalue>text</returnvalue> + </para> + <para> + Tests whether the given <parameter>string</parameter> is valid + input for the specified data type; if not, return the error + message that would have been thrown. If the input is valid, the + result is NULL. The inputs are the same as + for <function>pg_input_is_valid</function>. + </para> + <para> + This function will only work as desired if the data type's input + function has been updated to report invalid input as + a <quote>soft</quote> error. Otherwise, invalid input will abort + the transaction, just as if the string had been cast to the type + directly. + </para> + <para> + <literal>pg_input_error_message('42000000000', 'integer')</literal> + <returnvalue>value "42000000000" is out of range for type integer</returnvalue> + </para></entry> + </row> + </tbody> + </tgroup> + </table> + + </sect2> + <sect2 id="functions-info-snapshot"> <title>Transaction ID and Snapshot Information Functions</title> diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c index 9c13251231..09fae48658 100644 --- a/src/backend/utils/adt/misc.c +++ b/src/backend/utils/adt/misc.c @@ -32,6 +32,7 @@ #include "common/keywords.h" #include "funcapi.h" #include "miscadmin.h" +#include "nodes/miscnodes.h" #include "parser/scansup.h" #include "pgstat.h" #include "postmaster/syslogger.h" @@ -45,6 +46,22 @@ #include "utils/ruleutils.h" #include "utils/timestamp.h" +/* + * structure to cache metadata needed in pg_input_is_valid_common + */ +typedef struct BasicIOData +{ + Oid typoid; + Oid typiofunc; + Oid typioparam; + FmgrInfo proc; +} BasicIOData; + +static bool pg_input_is_valid_common(FunctionCallInfo fcinfo, + text *txt, Oid typoid, int32 typmod, + ErrorSaveContext *escontext); + + /* * Common subroutine for num_nulls() and num_nonnulls(). * Returns true if successful, false if function should return NULL. @@ -640,6 +657,146 @@ pg_column_is_updatable(PG_FUNCTION_ARGS) } +/* + * pg_input_is_valid - test whether string is valid input for datatype. + * + * Returns true if OK, false if not. + * + * This will only work usefully if the datatype's input function has been + * updated to return "soft" errors via errsave/ereturn. + */ +Datum +pg_input_is_valid(PG_FUNCTION_ARGS) +{ + text *txt = PG_GETARG_TEXT_PP(0); + Oid typoid = PG_GETARG_OID(1); + ErrorSaveContext escontext; + + /* Set up empty ErrorSaveContext */ + memset(&escontext, 0, sizeof(escontext)); + escontext.type = T_ErrorSaveContext; + + PG_RETURN_BOOL(pg_input_is_valid_common(fcinfo, txt, typoid, -1, + &escontext)); +} + +/* Same, with non-default typmod */ +Datum +pg_input_is_valid_mod(PG_FUNCTION_ARGS) +{ + text *txt = PG_GETARG_TEXT_PP(0); + Oid typoid = PG_GETARG_OID(1); + int32 typmod = PG_GETARG_INT32(2); + ErrorSaveContext escontext; + + /* Set up empty ErrorSaveContext */ + memset(&escontext, 0, sizeof(escontext)); + escontext.type = T_ErrorSaveContext; + + PG_RETURN_BOOL(pg_input_is_valid_common(fcinfo, txt, typoid, typmod, + &escontext)); +} + +/* + * pg_input_error_message - test whether string is valid input for datatype. + * + * Returns NULL if OK, else the primary message string from the error. + * + * This will only work usefully if the datatype's input function has been + * updated to return "soft" errors via errsave/ereturn. + */ +Datum +pg_input_error_message(PG_FUNCTION_ARGS) +{ + text *txt = PG_GETARG_TEXT_PP(0); + Oid typoid = PG_GETARG_OID(1); + ErrorSaveContext escontext; + + /* Set up empty ErrorSaveContext, but enable details_wanted */ + memset(&escontext, 0, sizeof(escontext)); + escontext.type = T_ErrorSaveContext; + escontext.details_wanted = true; + + if (pg_input_is_valid_common(fcinfo, txt, typoid, -1, + &escontext)) + PG_RETURN_NULL(); + + Assert(escontext.error_occurred); + Assert(escontext.error_data != NULL); + Assert(escontext.error_data->message != NULL); + + PG_RETURN_TEXT_P(cstring_to_text(escontext.error_data->message)); +} + +/* Same, with non-default typmod */ +Datum +pg_input_error_message_mod(PG_FUNCTION_ARGS) +{ + text *txt = PG_GETARG_TEXT_PP(0); + Oid typoid = PG_GETARG_OID(1); + int32 typmod = PG_GETARG_INT32(2); + ErrorSaveContext escontext; + + /* Set up empty ErrorSaveContext, but enable details_wanted */ + memset(&escontext, 0, sizeof(escontext)); + escontext.type = T_ErrorSaveContext; + escontext.details_wanted = true; + + if (pg_input_is_valid_common(fcinfo, txt, typoid, typmod, + &escontext)) + PG_RETURN_NULL(); + + Assert(escontext.error_occurred); + Assert(escontext.error_data != NULL); + Assert(escontext.error_data->message != NULL); + + PG_RETURN_TEXT_P(cstring_to_text(escontext.error_data->message)); +} + +/* Common subroutine for the above */ +static bool +pg_input_is_valid_common(FunctionCallInfo fcinfo, + text *txt, Oid typoid, int32 typmod, + ErrorSaveContext *escontext) +{ + char *str = text_to_cstring(txt); + BasicIOData *my_extra; + Datum converted; + + /* + * We arrange to look up the needed I/O info just once per series of + * calls, assuming the data type doesn't change underneath us. + */ + my_extra = (BasicIOData *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + fcinfo->flinfo->fn_extra = + MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(BasicIOData)); + my_extra = (BasicIOData *) fcinfo->flinfo->fn_extra; + my_extra->typoid = InvalidOid; + } + + if (my_extra->typoid != typoid) + { + getTypeInputInfo(typoid, + &my_extra->typiofunc, + &my_extra->typioparam); + fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc, + fcinfo->flinfo->fn_mcxt); + my_extra->typoid = typoid; + } + + /* Now we can try to perform the conversion */ + return InputFunctionCallSafe(&my_extra->proc, + str, + my_extra->typioparam, + typmod, + (Node *) escontext, + &converted); +} + + /* * Is character a valid identifier start? * Must match scan.l's {ident_start} character class. diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index f9301b2627..1593e43e24 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -7060,6 +7060,21 @@ prorettype => 'regnamespace', proargtypes => 'text', prosrc => 'to_regnamespace' }, +{ oid => '8050', descr => 'test whether string is valid input for data type', + proname => 'pg_input_is_valid', provolatile => 's', prorettype => 'bool', + proargtypes => 'text regtype', prosrc => 'pg_input_is_valid' }, +{ oid => '8051', descr => 'test whether string is valid input for data type', + proname => 'pg_input_is_valid', provolatile => 's', prorettype => 'bool', + proargtypes => 'text regtype int4', prosrc => 'pg_input_is_valid_mod' }, +{ oid => '8052', + descr => 'get error message if string is not valid input for data type', + proname => 'pg_input_error_message', provolatile => 's', prorettype => 'text', + proargtypes => 'text regtype', prosrc => 'pg_input_error_message' }, +{ oid => '8053', + descr => 'get error message if string is not valid input for data type', + proname => 'pg_input_error_message', provolatile => 's', prorettype => 'text', + proargtypes => 'text regtype int4', prosrc => 'pg_input_error_message_mod' }, + { oid => '1268', descr => 'parse qualified identifier to array of identifiers', proname => 'parse_ident', prorettype => '_text', proargtypes => 'text bool', diff --git a/src/test/regress/expected/create_type.out b/src/test/regress/expected/create_type.out index 0dfc88c1c8..7383fcdbb1 100644 --- a/src/test/regress/expected/create_type.out +++ b/src/test/regress/expected/create_type.out @@ -249,6 +249,31 @@ select format_type('bpchar'::regtype, -1); bpchar (1 row) +-- Test non-error-throwing APIs using widget, which still throws errors +SELECT pg_input_is_valid('(1,2,3)', 'widget'); + pg_input_is_valid +------------------- + t +(1 row) + +SELECT pg_input_is_valid('(1,2)', 'widget'); -- hard error expected +ERROR: invalid input syntax for type widget: "(1,2)" +SELECT pg_input_is_valid('{"(1,2,3)"}', 'widget[]'); + pg_input_is_valid +------------------- + t +(1 row) + +SELECT pg_input_is_valid('{"(1,2)"}', 'widget[]'); -- hard error expected +ERROR: invalid input syntax for type widget: "(1,2)" +SELECT pg_input_is_valid('("(1,2,3)")', 'mytab'); + pg_input_is_valid +------------------- + t +(1 row) + +SELECT pg_input_is_valid('("(1,2)")', 'mytab'); -- hard error expected +ERROR: invalid input syntax for type widget: "(1,2)" -- Test creation of an operator over a user-defined type CREATE FUNCTION pt_in_widget(point, widget) RETURNS bool diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index 548afb4438..2977045cc7 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -183,6 +183,11 @@ widget_in(PG_FUNCTION_ARGS) coord[i++] = p + 1; } + /* + * Note: DON'T convert this error to "soft" style (errsave/ereturn). We + * want this data type to stay permanently in the hard-error world so that + * it can be used for testing that such cases still work reasonably. + */ if (i < NARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), diff --git a/src/test/regress/sql/create_type.sql b/src/test/regress/sql/create_type.sql index c6fc4f9029..c25018029c 100644 --- a/src/test/regress/sql/create_type.sql +++ b/src/test/regress/sql/create_type.sql @@ -192,6 +192,14 @@ select format_type('bpchar'::regtype, null); -- this behavior difference is intentional select format_type('bpchar'::regtype, -1); +-- Test non-error-throwing APIs using widget, which still throws errors +SELECT pg_input_is_valid('(1,2,3)', 'widget'); +SELECT pg_input_is_valid('(1,2)', 'widget'); -- hard error expected +SELECT pg_input_is_valid('{"(1,2,3)"}', 'widget[]'); +SELECT pg_input_is_valid('{"(1,2)"}', 'widget[]'); -- hard error expected +SELECT pg_input_is_valid('("(1,2,3)")', 'mytab'); +SELECT pg_input_is_valid('("(1,2)")', 'mytab'); -- hard error expected + -- Test creation of an operator over a user-defined type CREATE FUNCTION pt_in_widget(point, widget) diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 495e449a9e..c011ebdfd9 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -90,14 +90,15 @@ typedef struct ArrayIteratorData } ArrayIteratorData; static bool array_isspace(char ch); -static int ArrayCount(const char *str, int *dim, char typdelim); -static void ReadArrayStr(char *arrayStr, const char *origStr, +static int ArrayCount(const char *str, int *dim, char typdelim, + Node *escontext); +static bool ReadArrayStr(char *arrayStr, const char *origStr, int nitems, int ndim, int *dim, FmgrInfo *inputproc, Oid typioparam, int32 typmod, char typdelim, int typlen, bool typbyval, char typalign, Datum *values, bool *nulls, - bool *hasnulls, int32 *nbytes); + bool *hasnulls, int32 *nbytes, Node *escontext); static void ReadArrayBinary(StringInfo buf, int nitems, FmgrInfo *receiveproc, Oid typioparam, int32 typmod, int typlen, bool typbyval, char typalign, @@ -177,6 +178,7 @@ array_in(PG_FUNCTION_ARGS) Oid element_type = PG_GETARG_OID(1); /* type of an array * element */ int32 typmod = PG_GETARG_INT32(2); /* typmod for array elements */ + Node *escontext = fcinfo->context; int typlen; bool typbyval; char typalign; @@ -258,7 +260,7 @@ array_in(PG_FUNCTION_ARGS) break; /* no more dimension items */ p++; if (ndim >= MAXDIM) - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", ndim + 1, MAXDIM))); @@ -266,7 +268,7 @@ array_in(PG_FUNCTION_ARGS) for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++) /* skip */ ; if (q == p) /* no digits? */ - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("\"[\" must introduce explicitly-specified array dimensions."))); @@ -280,7 +282,7 @@ array_in(PG_FUNCTION_ARGS) for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++) /* skip */ ; if (q == p) /* no digits? */ - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("Missing array dimension value."))); @@ -291,7 +293,7 @@ array_in(PG_FUNCTION_ARGS) lBound[ndim] = 1; } if (*q != ']') - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("Missing \"%s\" after array dimensions.", @@ -301,7 +303,7 @@ array_in(PG_FUNCTION_ARGS) ub = atoi(p); p = q + 1; if (ub < lBound[ndim]) - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), errmsg("upper bound cannot be less than lower bound"))); @@ -313,11 +315,13 @@ array_in(PG_FUNCTION_ARGS) { /* No array dimensions, so intuit dimensions from brace structure */ if (*p != '{') - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("Array value must start with \"{\" or dimension information."))); - ndim = ArrayCount(p, dim, typdelim); + ndim = ArrayCount(p, dim, typdelim, escontext); + if (ndim < 0) + PG_RETURN_NULL(); for (i = 0; i < ndim; i++) lBound[i] = 1; } @@ -328,7 +332,7 @@ array_in(PG_FUNCTION_ARGS) /* If array dimensions are given, expect '=' operator */ if (strncmp(p, ASSGN, strlen(ASSGN)) != 0) - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("Missing \"%s\" after array dimensions.", @@ -342,20 +346,22 @@ array_in(PG_FUNCTION_ARGS) * were given */ if (*p != '{') - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("Array contents must start with \"{\"."))); - ndim_braces = ArrayCount(p, dim_braces, typdelim); + ndim_braces = ArrayCount(p, dim_braces, typdelim, escontext); + if (ndim_braces < 0) + PG_RETURN_NULL(); if (ndim_braces != ndim) - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("Specified array dimensions do not match array contents."))); for (i = 0; i < ndim; ++i) { if (dim[i] != dim_braces[i]) - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", string), errdetail("Specified array dimensions do not match array contents."))); @@ -372,8 +378,11 @@ array_in(PG_FUNCTION_ARGS) #endif /* This checks for overflow of the array dimensions */ - nitems = ArrayGetNItems(ndim, dim); - ArrayCheckBounds(ndim, dim, lBound); + nitems = ArrayGetNItemsSafe(ndim, dim, escontext); + if (nitems < 0) + PG_RETURN_NULL(); + if (!ArrayCheckBoundsSafe(ndim, dim, lBound, escontext)) + PG_RETURN_NULL(); /* Empty array? */ if (nitems == 0) @@ -381,13 +390,14 @@ array_in(PG_FUNCTION_ARGS) dataPtr = (Datum *) palloc(nitems * sizeof(Datum)); nullsPtr = (bool *) palloc(nitems * sizeof(bool)); - ReadArrayStr(p, string, - nitems, ndim, dim, - &my_extra->proc, typioparam, typmod, - typdelim, - typlen, typbyval, typalign, - dataPtr, nullsPtr, - &hasnulls, &nbytes); + if (!ReadArrayStr(p, string, + nitems, ndim, dim, + &my_extra->proc, typioparam, typmod, + typdelim, + typlen, typbyval, typalign, + dataPtr, nullsPtr, + &hasnulls, &nbytes, escontext)) + PG_RETURN_NULL(); if (hasnulls) { dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, nitems); @@ -451,9 +461,12 @@ array_isspace(char ch) * * Returns number of dimensions as function result. The axis lengths are * returned in dim[], which must be of size MAXDIM. + * + * If we detect an error, fill *escontext with error details and return -1 + * (unless escontext isn't provided, in which case errors will be thrown). */ static int -ArrayCount(const char *str, int *dim, char typdelim) +ArrayCount(const char *str, int *dim, char typdelim, Node *escontext) { int nest_level = 0, i; @@ -488,11 +501,10 @@ ArrayCount(const char *str, int *dim, char typdelim) { case '\0': /* Signal a premature end of the string */ - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected end of input."))); - break; case '\\': /* @@ -504,7 +516,7 @@ ArrayCount(const char *str, int *dim, char typdelim) parse_state != ARRAY_ELEM_STARTED && parse_state != ARRAY_QUOTED_ELEM_STARTED && parse_state != ARRAY_ELEM_DELIMITED) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", @@ -515,7 +527,7 @@ ArrayCount(const char *str, int *dim, char typdelim) if (*(ptr + 1)) ptr++; else - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected end of input."))); @@ -530,7 +542,7 @@ ArrayCount(const char *str, int *dim, char typdelim) if (parse_state != ARRAY_LEVEL_STARTED && parse_state != ARRAY_QUOTED_ELEM_STARTED && parse_state != ARRAY_ELEM_DELIMITED) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected array element."))); @@ -551,14 +563,14 @@ ArrayCount(const char *str, int *dim, char typdelim) if (parse_state != ARRAY_NO_LEVEL && parse_state != ARRAY_LEVEL_STARTED && parse_state != ARRAY_LEVEL_DELIMITED) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", '{'))); parse_state = ARRAY_LEVEL_STARTED; if (nest_level >= MAXDIM) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", nest_level + 1, MAXDIM))); @@ -581,14 +593,14 @@ ArrayCount(const char *str, int *dim, char typdelim) parse_state != ARRAY_QUOTED_ELEM_COMPLETED && parse_state != ARRAY_LEVEL_COMPLETED && !(nest_level == 1 && parse_state == ARRAY_LEVEL_STARTED)) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", '}'))); parse_state = ARRAY_LEVEL_COMPLETED; if (nest_level == 0) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unmatched \"%c\" character.", '}'))); @@ -596,7 +608,7 @@ ArrayCount(const char *str, int *dim, char typdelim) if (nelems_last[nest_level] != 0 && nelems[nest_level] != nelems_last[nest_level]) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Multidimensional arrays must have " @@ -630,7 +642,7 @@ ArrayCount(const char *str, int *dim, char typdelim) parse_state != ARRAY_ELEM_COMPLETED && parse_state != ARRAY_QUOTED_ELEM_COMPLETED && parse_state != ARRAY_LEVEL_COMPLETED) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", @@ -653,7 +665,7 @@ ArrayCount(const char *str, int *dim, char typdelim) if (parse_state != ARRAY_LEVEL_STARTED && parse_state != ARRAY_ELEM_STARTED && parse_state != ARRAY_ELEM_DELIMITED) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected array element."))); @@ -673,7 +685,7 @@ ArrayCount(const char *str, int *dim, char typdelim) while (*ptr) { if (!array_isspace(*ptr++)) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Junk after closing right brace."))); @@ -713,11 +725,16 @@ ArrayCount(const char *str, int *dim, char typdelim) * *hasnulls: set true iff there are any null elements. * *nbytes: set to total size of data area needed (including alignment * padding but not including array header overhead). + * *escontext: if this points to an ErrorSaveContext, details of + * any error are reported there. + * + * Result: + * true for success, false for failure (if escontext is provided). * * Note that values[] and nulls[] are allocated by the caller, and must have * nitems elements. */ -static void +static bool ReadArrayStr(char *arrayStr, const char *origStr, int nitems, @@ -733,7 +750,8 @@ ReadArrayStr(char *arrayStr, Datum *values, bool *nulls, bool *hasnulls, - int32 *nbytes) + int32 *nbytes, + Node *escontext) { int i, nest_level = 0; @@ -784,7 +802,7 @@ ReadArrayStr(char *arrayStr, { case '\0': /* Signal a premature end of the string */ - ereport(ERROR, + ereturn(escontext, false, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); @@ -793,7 +811,7 @@ ReadArrayStr(char *arrayStr, /* Skip backslash, copy next character as-is. */ srcptr++; if (*srcptr == '\0') - ereport(ERROR, + ereturn(escontext, false, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); @@ -823,7 +841,7 @@ ReadArrayStr(char *arrayStr, if (!in_quotes) { if (nest_level >= ndim) - ereport(ERROR, + ereturn(escontext, false, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); @@ -838,7 +856,7 @@ ReadArrayStr(char *arrayStr, if (!in_quotes) { if (nest_level == 0) - ereport(ERROR, + ereturn(escontext, false, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); @@ -891,7 +909,7 @@ ReadArrayStr(char *arrayStr, *dstendptr = '\0'; if (i < 0 || i >= nitems) - ereport(ERROR, + ereturn(escontext, false, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", origStr))); @@ -900,14 +918,20 @@ ReadArrayStr(char *arrayStr, pg_strcasecmp(itemstart, "NULL") == 0) { /* it's a NULL item */ - values[i] = InputFunctionCall(inputproc, NULL, - typioparam, typmod); + if (!InputFunctionCallSafe(inputproc, NULL, + typioparam, typmod, + escontext, + &values[i])) + return false; nulls[i] = true; } else { - values[i] = InputFunctionCall(inputproc, itemstart, - typioparam, typmod); + if (!InputFunctionCallSafe(inputproc, itemstart, + typioparam, typmod, + escontext, + &values[i])) + return false; nulls[i] = false; } } @@ -930,7 +954,7 @@ ReadArrayStr(char *arrayStr, totbytes = att_align_nominal(totbytes, typalign); /* check for overflow of total request */ if (!AllocSizeIsValid(totbytes)) - ereport(ERROR, + ereturn(escontext, false, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("array size exceeds the maximum allowed (%d)", (int) MaxAllocSize))); @@ -938,6 +962,7 @@ ReadArrayStr(char *arrayStr, } *hasnulls = hasnull; *nbytes = totbytes; + return true; } diff --git a/src/backend/utils/adt/arrayutils.c b/src/backend/utils/adt/arrayutils.c index 051169a149..c52adc6259 100644 --- a/src/backend/utils/adt/arrayutils.c +++ b/src/backend/utils/adt/arrayutils.c @@ -74,6 +74,16 @@ ArrayGetOffset0(int n, const int *tup, const int *scale) */ int ArrayGetNItems(int ndim, const int *dims) +{ + return ArrayGetNItemsSafe(ndim, dims, NULL); +} + +/* + * This entry point can return the error into an ErrorSaveContext + * instead of throwing an exception. -1 is returned after an error. + */ +int +ArrayGetNItemsSafe(int ndim, const int *dims, NodePtr escontext) { int32 ret; int i; @@ -89,7 +99,7 @@ ArrayGetNItems(int ndim, const int *dims) /* A negative dimension implies that UB-LB overflowed ... */ if (dims[i] < 0) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("array size exceeds the maximum allowed (%d)", (int) MaxArraySize))); @@ -98,14 +108,14 @@ ArrayGetNItems(int ndim, const int *dims) ret = (int32) prod; if ((int64) ret != prod) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("array size exceeds the maximum allowed (%d)", (int) MaxArraySize))); } Assert(ret >= 0); if ((Size) ret > MaxArraySize) - ereport(ERROR, + ereturn(escontext, -1, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("array size exceeds the maximum allowed (%d)", (int) MaxArraySize))); @@ -126,6 +136,17 @@ ArrayGetNItems(int ndim, const int *dims) */ void ArrayCheckBounds(int ndim, const int *dims, const int *lb) +{ + (void) ArrayCheckBoundsSafe(ndim, dims, lb, NULL); +} + +/* + * This entry point can return the error into an ErrorSaveContext + * instead of throwing an exception. + */ +bool +ArrayCheckBoundsSafe(int ndim, const int *dims, const int *lb, + NodePtr escontext) { int i; @@ -135,11 +156,13 @@ ArrayCheckBounds(int ndim, const int *dims, const int *lb) int32 sum PG_USED_FOR_ASSERTS_ONLY; if (pg_add_s32_overflow(dims[i], lb[i], &sum)) - ereport(ERROR, + ereturn(escontext, false, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("array lower bound is too large: %d", lb[i]))); } + + return true; } /* diff --git a/src/backend/utils/adt/bool.c b/src/backend/utils/adt/bool.c index cd7335287f..e291672ae4 100644 --- a/src/backend/utils/adt/bool.c +++ b/src/backend/utils/adt/bool.c @@ -148,13 +148,10 @@ boolin(PG_FUNCTION_ARGS) if (parse_bool_with_len(str, len, &result)) PG_RETURN_BOOL(result); - ereport(ERROR, + ereturn(fcinfo->context, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type %s: \"%s\"", "boolean", in_str))); - - /* not reached */ - PG_RETURN_BOOL(false); } /* diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c index 42ddae99ef..e1837bee71 100644 --- a/src/backend/utils/adt/int.c +++ b/src/backend/utils/adt/int.c @@ -291,7 +291,7 @@ int4in(PG_FUNCTION_ARGS) { char *num = PG_GETARG_CSTRING(0); - PG_RETURN_INT32(pg_strtoint32(num)); + PG_RETURN_INT32(pg_strtoint32_safe(num, fcinfo->context)); } /* diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index a64422c8d0..0de0bed0e8 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -166,8 +166,11 @@ invalid_syntax: /* * Convert input string to a signed 32 bit integer. * - * Allows any number of leading or trailing whitespace characters. Will throw - * ereport() upon bad input format or overflow. + * Allows any number of leading or trailing whitespace characters. + * + * pg_strtoint32() will throw ereport() upon bad input format or overflow; + * while pg_strtoint32_safe() instead returns such complaints in *escontext, + * if it's an ErrorSaveContext. * * NB: Accumulate input as an unsigned number, to deal with two's complement * representation of the most negative number, which can't be represented as a @@ -175,6 +178,12 @@ invalid_syntax: */ int32 pg_strtoint32(const char *s) +{ + return pg_strtoint32_safe(s, NULL); +} + +int32 +pg_strtoint32_safe(const char *s, Node *escontext) { const char *ptr = s; uint32 tmp = 0; @@ -227,18 +236,16 @@ pg_strtoint32(const char *s) return (int32) tmp; out_of_range: - ereport(ERROR, + ereturn(escontext, 0, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("value \"%s\" is out of range for type %s", s, "integer"))); invalid_syntax: - ereport(ERROR, + ereturn(escontext, 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type %s: \"%s\"", "integer", s))); - - return 0; /* keep compiler quiet */ } /* diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c index db843a0fbf..bdafcff02d 100644 --- a/src/backend/utils/adt/rowtypes.c +++ b/src/backend/utils/adt/rowtypes.c @@ -77,6 +77,7 @@ record_in(PG_FUNCTION_ARGS) char *string = PG_GETARG_CSTRING(0); Oid tupType = PG_GETARG_OID(1); int32 tupTypmod = PG_GETARG_INT32(2); + Node *escontext = fcinfo->context; HeapTupleHeader result; TupleDesc tupdesc; HeapTuple tuple; @@ -100,7 +101,7 @@ record_in(PG_FUNCTION_ARGS) * supply a valid typmod, and then we can do something useful for RECORD. */ if (tupType == RECORDOID && tupTypmod < 0) - ereport(ERROR, + ereturn(escontext, (Datum) 0, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("input of anonymous composite types is not implemented"))); @@ -152,10 +153,13 @@ record_in(PG_FUNCTION_ARGS) while (*ptr && isspace((unsigned char) *ptr)) ptr++; if (*ptr++ != '(') - ereport(ERROR, + { + errsave(escontext, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Missing left parenthesis."))); + goto fail; + } initStringInfo(&buf); @@ -181,10 +185,13 @@ record_in(PG_FUNCTION_ARGS) ptr++; else /* *ptr must be ')' */ - ereport(ERROR, + { + errsave(escontext, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Too few columns."))); + goto fail; + } } /* Check for null: completely empty input means null */ @@ -204,19 +211,25 @@ record_in(PG_FUNCTION_ARGS) char ch = *ptr++; if (ch == '\0') - ereport(ERROR, + { + errsave(escontext, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Unexpected end of input."))); + goto fail; + } if (ch == '\\') { if (*ptr == '\0') - ereport(ERROR, + { + errsave(escontext, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Unexpected end of input."))); + goto fail; + } appendStringInfoChar(&buf, *ptr++); } else if (ch == '"') @@ -252,10 +265,13 @@ record_in(PG_FUNCTION_ARGS) column_info->column_type = column_type; } - values[i] = InputFunctionCall(&column_info->proc, - column_data, - column_info->typioparam, - att->atttypmod); + if (!InputFunctionCallSafe(&column_info->proc, + column_data, + column_info->typioparam, + att->atttypmod, + escontext, + &values[i])) + goto fail; /* * Prep for next column @@ -264,18 +280,24 @@ record_in(PG_FUNCTION_ARGS) } if (*ptr++ != ')') - ereport(ERROR, + { + errsave(escontext, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Too many columns."))); + goto fail; + } /* Allow trailing whitespace */ while (*ptr && isspace((unsigned char) *ptr)) ptr++; if (*ptr) - ereport(ERROR, + { + errsave(escontext, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Junk after right parenthesis."))); + goto fail; + } tuple = heap_form_tuple(tupdesc, values, nulls); @@ -294,6 +316,11 @@ record_in(PG_FUNCTION_ARGS) ReleaseTupleDesc(tupdesc); PG_RETURN_HEAPTUPLEHEADER(result); + + /* exit here once we've done lookup_rowtype_tupdesc */ +fail: + ReleaseTupleDesc(tupdesc); + PG_RETURN_NULL(); } /* diff --git a/src/include/utils/array.h b/src/include/utils/array.h index 2f794d1168..5ecb436a08 100644 --- a/src/include/utils/array.h +++ b/src/include/utils/array.h @@ -447,7 +447,11 @@ extern void array_free_iterator(ArrayIterator iterator); extern int ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx); extern int ArrayGetOffset0(int n, const int *tup, const int *scale); extern int ArrayGetNItems(int ndim, const int *dims); +extern int ArrayGetNItemsSafe(int ndim, const int *dims, + NodePtr escontext); extern void ArrayCheckBounds(int ndim, const int *dims, const int *lb); +extern bool ArrayCheckBoundsSafe(int ndim, const int *dims, const int *lb, + NodePtr escontext); extern void mda_get_range(int n, int *span, const int *st, const int *endp); extern void mda_get_prod(int n, const int *range, int *prod); extern void mda_get_offset_values(int n, int *dist, const int *prod, const int *span); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 81631f1645..fbfd8375e3 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -45,6 +45,7 @@ extern int namestrcmp(Name name, const char *str); /* numutils.c */ extern int16 pg_strtoint16(const char *s); extern int32 pg_strtoint32(const char *s); +extern int32 pg_strtoint32_safe(const char *s, Node *escontext); extern int64 pg_strtoint64(const char *s); extern int pg_itoa(int16 i, char *a); extern int pg_ultoa_n(uint32 value, char *a); diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index 97920f38c2..a2f9d7ed16 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -182,6 +182,31 @@ SELECT a,b,c FROM arrtest; [4:4]={NULL} | {3,4} | {foo,new_word} (3 rows) +-- test non-error-throwing API +SELECT pg_input_is_valid('{1,2,3}', 'integer[]'); + pg_input_is_valid +------------------- + t +(1 row) + +SELECT pg_input_is_valid('{1,2', 'integer[]'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT pg_input_is_valid('{1,zed}', 'integer[]'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT pg_input_error_message('{1,zed}', 'integer[]'); + pg_input_error_message +---------------------------------------------- + invalid input syntax for type integer: "zed" +(1 row) + -- test mixed slice/scalar subscripting select '{{1,2,3},{4,5,6},{7,8,9}}'::int[]; int4 diff --git a/src/test/regress/expected/boolean.out b/src/test/regress/expected/boolean.out index 4728fe2dfd..977124b20b 100644 --- a/src/test/regress/expected/boolean.out +++ b/src/test/regress/expected/boolean.out @@ -142,6 +142,25 @@ SELECT bool '' AS error; ERROR: invalid input syntax for type boolean: "" LINE 1: SELECT bool '' AS error; ^ +-- Also try it with non-error-throwing API +SELECT pg_input_is_valid('true', 'bool'); + pg_input_is_valid +------------------- + t +(1 row) + +SELECT pg_input_is_valid('asdf', 'bool'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT pg_input_error_message('junk', 'bool'); + pg_input_error_message +----------------------------------------------- + invalid input syntax for type boolean: "junk" +(1 row) + -- and, or, not in qualifications SELECT bool 't' or bool 'f' AS true; true diff --git a/src/test/regress/expected/int4.out b/src/test/regress/expected/int4.out index fbcc0e8d9e..b98007bd7a 100644 --- a/src/test/regress/expected/int4.out +++ b/src/test/regress/expected/int4.out @@ -45,6 +45,31 @@ SELECT * FROM INT4_TBL; -2147483647 (5 rows) +-- Also try it with non-error-throwing API +SELECT pg_input_is_valid('34', 'int4'); + pg_input_is_valid +------------------- + t +(1 row) + +SELECT pg_input_is_valid('asdf', 'int4'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT pg_input_is_valid('1000000000000', 'int4'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT pg_input_error_message('1000000000000', 'int4'); + pg_input_error_message +-------------------------------------------------------- + value "1000000000000" is out of range for type integer +(1 row) + SELECT i.* FROM INT4_TBL i WHERE i.f1 <> int2 '0'; f1 ------------- diff --git a/src/test/regress/expected/rowtypes.out b/src/test/regress/expected/rowtypes.out index a4cc2d8c12..1bcd2b499c 100644 --- a/src/test/regress/expected/rowtypes.out +++ b/src/test/regress/expected/rowtypes.out @@ -69,6 +69,32 @@ ERROR: malformed record literal: "(Joe,Blow) /" LINE 1: select '(Joe,Blow) /'::fullname; ^ DETAIL: Junk after right parenthesis. +-- test non-error-throwing API +create type twoints as (r integer, i integer); +SELECT pg_input_is_valid('(1,2)', 'twoints'); + pg_input_is_valid +------------------- + t +(1 row) + +SELECT pg_input_is_valid('(1,2', 'twoints'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT pg_input_is_valid('(1,zed)', 'twoints'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT pg_input_error_message('(1,zed)', 'twoints'); + pg_input_error_message +---------------------------------------------- + invalid input syntax for type integer: "zed" +(1 row) + create temp table quadtable(f1 int, q quad); insert into quadtable values (1, ((3.3,4.4),(5.5,6.6))); insert into quadtable values (2, ((null,4.4),(5.5,6.6))); diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql index 791af5c0ce..38e8dd440b 100644 --- a/src/test/regress/sql/arrays.sql +++ b/src/test/regress/sql/arrays.sql @@ -113,6 +113,12 @@ SELECT a FROM arrtest WHERE a[2] IS NULL; DELETE FROM arrtest WHERE a[2] IS NULL AND b IS NULL; SELECT a,b,c FROM arrtest; +-- test non-error-throwing API +SELECT pg_input_is_valid('{1,2,3}', 'integer[]'); +SELECT pg_input_is_valid('{1,2', 'integer[]'); +SELECT pg_input_is_valid('{1,zed}', 'integer[]'); +SELECT pg_input_error_message('{1,zed}', 'integer[]'); + -- test mixed slice/scalar subscripting select '{{1,2,3},{4,5,6},{7,8,9}}'::int[]; select ('{{1,2,3},{4,5,6},{7,8,9}}'::int[])[1:2][2]; diff --git a/src/test/regress/sql/boolean.sql b/src/test/regress/sql/boolean.sql index 4dd47aaf9d..dfaa55dd0f 100644 --- a/src/test/regress/sql/boolean.sql +++ b/src/test/regress/sql/boolean.sql @@ -62,6 +62,11 @@ SELECT bool '000' AS error; SELECT bool '' AS error; +-- Also try it with non-error-throwing API +SELECT pg_input_is_valid('true', 'bool'); +SELECT pg_input_is_valid('asdf', 'bool'); +SELECT pg_input_error_message('junk', 'bool'); + -- and, or, not in qualifications SELECT bool 't' or bool 'f' AS true; diff --git a/src/test/regress/sql/int4.sql b/src/test/regress/sql/int4.sql index f19077f3da..54420818de 100644 --- a/src/test/regress/sql/int4.sql +++ b/src/test/regress/sql/int4.sql @@ -17,6 +17,12 @@ INSERT INTO INT4_TBL(f1) VALUES (''); SELECT * FROM INT4_TBL; +-- Also try it with non-error-throwing API +SELECT pg_input_is_valid('34', 'int4'); +SELECT pg_input_is_valid('asdf', 'int4'); +SELECT pg_input_is_valid('1000000000000', 'int4'); +SELECT pg_input_error_message('1000000000000', 'int4'); + SELECT i.* FROM INT4_TBL i WHERE i.f1 <> int2 '0'; SELECT i.* FROM INT4_TBL i WHERE i.f1 <> int4 '0'; diff --git a/src/test/regress/sql/rowtypes.sql b/src/test/regress/sql/rowtypes.sql index ad5b7e128f..4cd6a49215 100644 --- a/src/test/regress/sql/rowtypes.sql +++ b/src/test/regress/sql/rowtypes.sql @@ -31,6 +31,13 @@ select '[]'::fullname; -- bad select ' (Joe,Blow) '::fullname; -- ok, extra whitespace select '(Joe,Blow) /'::fullname; -- bad +-- test non-error-throwing API +create type twoints as (r integer, i integer); +SELECT pg_input_is_valid('(1,2)', 'twoints'); +SELECT pg_input_is_valid('(1,2', 'twoints'); +SELECT pg_input_is_valid('(1,zed)', 'twoints'); +SELECT pg_input_error_message('(1,zed)', 'twoints'); + create temp table quadtable(f1 int, q quad); insert into quadtable values (1, ((3.3,4.4),(5.5,6.6)));
pgsql-hackers by date: