Re: Error-safe user functions - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: Error-safe user functions |
Date | |
Msg-id | 3564577.1671142683@sss.pgh.pa.us Whole thread Raw |
In response to | Re: Error-safe user functions (Robert Haas <robertmhaas@gmail.com>) |
Responses |
Re: Error-safe user functions
|
List | pgsql-hackers |
Robert Haas <robertmhaas@gmail.com> writes: > Tom, I just want to extend huge thanks to you for working on this > infrastructure. Thanks. I agree it's an important bit of work. I'm going to step back from this for now and get on with other work, but before that I thought there was one more input function I should look at: xml_in, because xml.c is such a hairy can of worms. It turns out to be not too bad, given our design principle that only "bad input" errors should be reported softly. xml_parse() now has two different ways of reporting errors depending on whether they're hard or soft, but it didn't take an undue amount of refactoring to make that work. While fixing that, my attention was drawn to wellformed_xml(), whose error handling is unbelievably horrid: it traps any longjmp whatsoever (query cancel, for instance) and reports it as ill-formed XML. 0002 attached makes use of this new code to get rid of the need for any PG_TRY there at all; instead, soft errors result in a "false" return but hard errors are allowed to propagate. xml_is_document was much more careful, but we can change it the same way to save code and cycles. regards, tom lane diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 3884babc1b..4b9877ee0b 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -119,9 +119,10 @@ struct PgXmlErrorContext static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID, xmlParserCtxtPtr ctxt); +static void xml_errsave(Node *escontext, PgXmlErrorContext *errcxt, + int sqlcode, const char *msg); static void xml_errorHandler(void *data, xmlErrorPtr error); -static void xml_ereport_by_code(int level, int sqlcode, - const char *msg, int code); +static int errdetail_for_xml_code(int code); static void chopStringInfoNewlines(StringInfo str); static void appendStringInfoLineSeparator(StringInfo str); @@ -143,7 +144,8 @@ static bool print_xml_decl(StringInfo buf, const xmlChar *version, pg_enc encoding, int standalone); static bool xml_doctype_in_content(const xmlChar *str); static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, - bool preserve_whitespace, int encoding); + bool preserve_whitespace, int encoding, + Node *escontext); static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt); static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, ArrayBuildState *astate, @@ -261,14 +263,18 @@ xml_in(PG_FUNCTION_ARGS) xmltype *vardata; xmlDocPtr doc; + /* Build the result object. */ vardata = (xmltype *) cstring_to_text(s); /* - * Parse the data to check if it is well-formed XML data. Assume that - * ERROR occurred if parsing failed. + * Parse the data to check if it is well-formed XML data. + * + * Note: we don't need to worry about whether a soft error is detected. */ - doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding()); - xmlFreeDoc(doc); + doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding(), + fcinfo->context); + if (doc != NULL) + xmlFreeDoc(doc); PG_RETURN_XML_P(vardata); #else @@ -323,9 +329,10 @@ xml_out_internal(xmltype *x, pg_enc target_encoding) return buf.data; } - xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR, - "could not parse XML declaration in stored value", - res_code); + ereport(WARNING, + errcode(ERRCODE_INTERNAL_ERROR), + errmsg_internal("could not parse XML declaration in stored value"), + errdetail_for_xml_code(res_code)); #endif return str; } @@ -392,7 +399,7 @@ xml_recv(PG_FUNCTION_ARGS) * Parse the data to check if it is well-formed XML data. Assume that * xml_parse will throw ERROR if not. */ - doc = xml_parse(result, xmloption, true, encoding); + doc = xml_parse(result, xmloption, true, encoding, NULL); xmlFreeDoc(doc); /* Now that we know what we're dealing with, convert to server encoding */ @@ -754,7 +761,7 @@ xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace) xmlDocPtr doc; doc = xml_parse(data, xmloption_arg, preserve_whitespace, - GetDatabaseEncoding()); + GetDatabaseEncoding(), NULL); xmlFreeDoc(doc); return (xmltype *) data; @@ -895,7 +902,7 @@ xml_is_document(xmltype *arg) PG_TRY(); { doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true, - GetDatabaseEncoding()); + GetDatabaseEncoding(), NULL); result = true; } PG_CATCH(); @@ -1500,17 +1507,26 @@ xml_doctype_in_content(const xmlChar *str) /* - * Convert a C string to XML internal representation + * Convert a text object to XML internal representation + * + * data is the source data (must not be toasted!), encoding is its encoding, + * and xmloption_arg and preserve_whitespace are options for the + * transformation. + * + * Errors normally result in ereport(ERROR), but if escontext is an + * ErrorSaveContext, then "safe" errors are reported there instead, and the + * caller must check SOFT_ERROR_OCCURRED() to see whether that happened. * * Note: it is caller's responsibility to xmlFreeDoc() the result, - * else a permanent memory leak will ensue! + * else a permanent memory leak will ensue! But note the result could + * be NULL after a soft error. * * TODO maybe libxml2's xmlreader is better? (do not construct DOM, * yet do not use SAX - see xmlreader.c) */ static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, - int encoding) + int encoding, Node *escontext) { int32 len; xmlChar *string; @@ -1519,9 +1535,20 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, volatile xmlParserCtxtPtr ctxt = NULL; volatile xmlDocPtr doc = NULL; + /* + * This step looks annoyingly redundant, but we must do it to have a + * null-terminated string in case encoding conversion isn't required. + */ len = VARSIZE_ANY_EXHDR(data); /* will be useful later */ string = xml_text2xmlChar(data); + /* + * If the data isn't UTF8, we must translate before giving it to libxml. + * + * XXX ideally, we'd catch any encoding conversion failure and return a + * soft error. However, failure to convert to UTF8 should be pretty darn + * rare, so for now this is left undone. + */ utf8string = pg_do_encoding_conversion(string, len, encoding, @@ -1539,6 +1566,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, xmlChar *version = NULL; int standalone = 0; + /* Any errors here are reported as hard ereport's */ xmlInitParser(); ctxt = xmlNewParserCtxt(); @@ -1555,9 +1583,13 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, res_code = parse_xml_decl(utf8string, &count, &version, NULL, &standalone); if (res_code != 0) - xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT, - "invalid XML content: invalid XML declaration", - res_code); + { + errsave(escontext, + errcode(ERRCODE_INVALID_XML_CONTENT), + errmsg_internal("invalid XML content: invalid XML declaration"), + errdetail_for_xml_code(res_code)); + goto fail; + } /* Is there a DOCTYPE element? */ if (xml_doctype_in_content(utf8string + count)) @@ -1580,20 +1612,30 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS)); if (doc == NULL || xmlerrcxt->err_occurred) { - /* Use original option to decide which error code to throw */ + /* Use original option to decide which error code to report */ if (xmloption_arg == XMLOPTION_DOCUMENT) - xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, + xml_errsave(escontext, xmlerrcxt, + ERRCODE_INVALID_XML_DOCUMENT, "invalid XML document"); else - xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT, + xml_errsave(escontext, xmlerrcxt, + ERRCODE_INVALID_XML_CONTENT, "invalid XML content"); + goto fail; } } else { doc = xmlNewDoc(version); + if (doc == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate XML document"); + Assert(doc->encoding == NULL); doc->encoding = xmlStrdup((const xmlChar *) "UTF-8"); + if (doc->encoding == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate XML document"); doc->standalone = standalone; /* allow empty content */ @@ -1602,10 +1644,17 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, utf8string + count, NULL); if (res_code != 0 || xmlerrcxt->err_occurred) - xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT, + { + xml_errsave(escontext, xmlerrcxt, + ERRCODE_INVALID_XML_CONTENT, "invalid XML content"); + goto fail; + } } } + +fail: + ; } PG_CATCH(); { @@ -1745,6 +1794,44 @@ xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg) } +/* + * xml_errsave --- save an XML-related error + * + * If escontext is an ErrorSaveContext, error details are saved into it, + * and control returns normally. + * + * Otherwise, the error is thrown, so that this is equivalent to + * xml_ereport() with level == ERROR. + * + * This should be used only for errors that we're sure we do not need + * a transaction abort to clean up after. + */ +static void +xml_errsave(Node *escontext, PgXmlErrorContext *errcxt, + int sqlcode, const char *msg) +{ + char *detail; + + /* Defend against someone passing us a bogus context struct */ + if (errcxt->magic != ERRCXT_MAGIC) + elog(ERROR, "xml_errsave called with invalid PgXmlErrorContext"); + + /* Flag that the current libxml error has been reported */ + errcxt->err_occurred = false; + + /* Include detail only if we have some text from libxml */ + if (errcxt->err_buf.len > 0) + detail = errcxt->err_buf.data; + else + detail = NULL; + + errsave(escontext, + (errcode(sqlcode), + errmsg_internal("%s", msg), + detail ? errdetail_internal("%s", detail) : 0)); +} + + /* * Error handler for libxml errors and warnings */ @@ -1917,15 +2004,16 @@ xml_errorHandler(void *data, xmlErrorPtr error) /* - * Wrapper for "ereport" function for XML-related errors. The "msg" - * is the SQL-level message; some can be adopted from the SQL/XML - * standard. This function uses "code" to create a textual detail - * message. At the moment, we only need to cover those codes that we + * Convert libxml error codes into textual errdetail messages. + * + * This should be called within an ereport or errsave invocation, + * just as errdetail would be. + * + * At the moment, we only need to cover those codes that we * may raise in this file. */ -static void -xml_ereport_by_code(int level, int sqlcode, - const char *msg, int code) +static int +errdetail_for_xml_code(int code) { const char *det; @@ -1954,10 +2042,7 @@ xml_ereport_by_code(int level, int sqlcode, break; } - ereport(level, - (errcode(sqlcode), - errmsg_internal("%s", msg), - errdetail(det, code))); + return errdetail(det, code); } @@ -4241,7 +4326,7 @@ wellformed_xml(text *data, XmlOptionType xmloption_arg) /* We want to catch any exceptions and return false */ PG_TRY(); { - doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding()); + doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(), NULL); result = true; } PG_CATCH(); diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index 948b4e702c..a672e24dae 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -18,6 +18,37 @@ SELECT * FROM xmltest; 2 | <value>two</value> (2 rows) +-- test non-throwing API, too +SELECT pg_input_is_valid('<value>one</value>', 'xml'); + pg_input_is_valid +------------------- + t +(1 row) + +SELECT pg_input_is_valid('<value>one</', 'xml'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT pg_input_error_message('<value>one</', 'xml'); + pg_input_error_message +------------------------ + invalid XML content +(1 row) + +SELECT pg_input_is_valid('<?xml version="1.0" standalone="y"?><foo/>', 'xml'); + pg_input_is_valid +------------------- + f +(1 row) + +SELECT pg_input_error_message('<?xml version="1.0" standalone="y"?><foo/>', 'xml'); + pg_input_error_message +---------------------------------------------- + invalid XML content: invalid XML declaration +(1 row) + SELECT xmlcomment('test'); xmlcomment ------------- diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql index 484e3637e4..ddff459297 100644 --- a/src/test/regress/sql/xml.sql +++ b/src/test/regress/sql/xml.sql @@ -9,6 +9,13 @@ INSERT INTO xmltest VALUES (3, '<wrong'); SELECT * FROM xmltest; +-- test non-throwing API, too +SELECT pg_input_is_valid('<value>one</value>', 'xml'); +SELECT pg_input_is_valid('<value>one</', 'xml'); +SELECT pg_input_error_message('<value>one</', 'xml'); +SELECT pg_input_is_valid('<?xml version="1.0" standalone="y"?><foo/>', 'xml'); +SELECT pg_input_error_message('<?xml version="1.0" standalone="y"?><foo/>', 'xml'); + SELECT xmlcomment('test'); SELECT xmlcomment('-test'); diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 4b9877ee0b..1928fcc2f0 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -81,6 +81,7 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "nodes/execnodes.h" +#include "nodes/miscnodes.h" #include "nodes/nodeFuncs.h" #include "utils/array.h" #include "utils/builtins.h" @@ -894,41 +895,18 @@ bool xml_is_document(xmltype *arg) { #ifdef USE_LIBXML - bool result; - volatile xmlDocPtr doc = NULL; - MemoryContext ccxt = CurrentMemoryContext; - - /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */ - PG_TRY(); - { - doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true, - GetDatabaseEncoding(), NULL); - result = true; - } - PG_CATCH(); - { - ErrorData *errdata; - MemoryContext ecxt; - - ecxt = MemoryContextSwitchTo(ccxt); - errdata = CopyErrorData(); - if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT) - { - FlushErrorState(); - result = false; - } - else - { - MemoryContextSwitchTo(ecxt); - PG_RE_THROW(); - } - } - PG_END_TRY(); + xmlDocPtr doc; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + /* + * We'll report "true" if no soft error is reported by xml_parse(). + */ + doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true, + GetDatabaseEncoding(), (Node *) &escontext); if (doc) xmlFreeDoc(doc); - return result; + return !escontext.error_occurred; #else /* not USE_LIBXML */ NO_XML_SUPPORT(); return false; @@ -4320,26 +4298,18 @@ xpath_exists(PG_FUNCTION_ARGS) static bool wellformed_xml(text *data, XmlOptionType xmloption_arg) { - bool result; - volatile xmlDocPtr doc = NULL; - - /* We want to catch any exceptions and return false */ - PG_TRY(); - { - doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(), NULL); - result = true; - } - PG_CATCH(); - { - FlushErrorState(); - result = false; - } - PG_END_TRY(); + xmlDocPtr doc; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + /* + * We'll report "true" if no soft error is reported by xml_parse(). + */ + doc = xml_parse(data, xmloption_arg, true, + GetDatabaseEncoding(), (Node *) &escontext); if (doc) xmlFreeDoc(doc); - return result; + return !escontext.error_occurred; } #endif
pgsql-hackers by date: