Re: Adding xpath_exists function - Mailing list pgsql-hackers
From | Mike Fowler |
---|---|
Subject | Re: Adding xpath_exists function |
Date | |
Msg-id | 4BE921A7.7060401@mlfowler.com Whole thread Raw |
In response to | Re: Adding xpath_exists function (Robert Haas <robertmhaas@gmail.com>) |
Responses |
Re: Adding xpath_exists function
|
List | pgsql-hackers |
Robert Haas wrote: > Please email your patch to the list (replying to this email is fine) > and add it here: > https://commitfest.postgresql.org/action/commitfest_view/open > Here's my patch, developed against HEAD, that adds the function 'xpath_exists'. The function is a lot simpler than originally thought, so none of the string manipulation previously discussed was required. I've also included some regression tests that test the function with and without xml namespaces. I should note that before I added my tests all existing tests passed. One observation that can be made is that I've largely copied the existing xpath function and altered it to use a different method from the libxml API. I've done it to save me redoing all the namespace handling, however it's apparent to me that if we wanted to expose more of the libxml api we will quickly start having a lot of duplicate code. I notice that refactoring existing code whilst adding new code is generally frowned upon, so once this patch is accepted I will look to refactor the xpath and xpath_exists function. I could even add an xpath_count method at the same time ;) . Thanks in advance for any and all feedback, -- Mike Fowler Registered Linux user: 379787 "I could be a genius if I just put my mind to it, and I, I could do anything, if only I could get 'round to it" -PULP 'Glory Days' Index: src/backend/utils/adt/xml.c =================================================================== RCS file: /home/mfowler/cvsrepo/pgrepo/pgsql/src/backend/utils/adt/xml.c,v retrieving revision 1.97 diff -c -r1.97 xml.c *** src/backend/utils/adt/xml.c 3 Mar 2010 17:29:45 -0000 1.97 --- src/backend/utils/adt/xml.c 11 May 2010 07:54:53 -0000 *************** *** 3495,3497 **** --- 3495,3670 ---- return 0; #endif } + + /* + * Determines if the node specified by the supplied XPath exists + * in a given XML document, returning a boolean. + * + * It is up to the user to ensure that the XML passed is in fact + * an XML document - XPath doesn't work easily on fragments without + * a context node being known. + */ + Datum + xpath_exists(PG_FUNCTION_ARGS) + { + #ifdef USE_LIBXML + text *xpath_expr_text = PG_GETARG_TEXT_P(0); + xmltype *data = PG_GETARG_XML_P(1); + ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2); + ArrayBuildState *astate = NULL; + xmlParserCtxtPtr ctxt = NULL; + xmlDocPtr doc = NULL; + xmlXPathContextPtr xpathctx = NULL; + xmlXPathCompExprPtr xpathcomp = NULL; + char *datastr; + int32 len; + int32 xpath_len; + xmlChar *string; + xmlChar *xpath_expr; + int i; + int res_nitems; + int ndim; + Datum *ns_names_uris; + bool *ns_names_uris_nulls; + int ns_count; + int result; + + /* + * Namespace mappings are passed as text[]. If an empty array is passed + * (ndim = 0, "0-dimensional"), then there are no namespace mappings. + * Else, a 2-dimensional array with length of the second axis being equal + * to 2 should be passed, i.e., every subarray contains 2 elements, the + * first element defining the name, the second one the URI. Example: + * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2', + * 'http://example2.com']]. + */ + ndim = ARR_NDIM(namespaces); + if (ndim != 0) + { + int *dims; + + dims = ARR_DIMS(namespaces); + + if (ndim != 2 || dims[1] != 2) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("invalid array for XML namespace mapping"), + errdetail("The array must be two-dimensional with length of the second axis equal to 2."))); + + Assert(ARR_ELEMTYPE(namespaces) == TEXTOID); + + deconstruct_array(namespaces, TEXTOID, -1, false, 'i', + &ns_names_uris, &ns_names_uris_nulls, + &ns_count); + + Assert((ns_count % 2) == 0); /* checked above */ + ns_count /= 2; /* count pairs only */ + } + else + { + ns_names_uris = NULL; + ns_names_uris_nulls = NULL; + ns_count = 0; + } + + datastr = VARDATA(data); + len = VARSIZE(data) - VARHDRSZ; + xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ; + if (xpath_len == 0) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("empty XPath expression"))); + + string = (xmlChar *) palloc((len + 1) * sizeof(xmlChar)); + memcpy(string, datastr, len); + string[len] = '\0'; + + xpath_expr = (xmlChar *) palloc((xpath_len + 1) * sizeof(xmlChar)); + memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len); + xpath_expr[xpath_len] = '\0'; + + pg_xml_init(); + xmlInitParser(); + + PG_TRY(); + { + /* + * redundant XML parsing (two parsings for the same value during one + * command execution are possible) + */ + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) + xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate parser context"); + doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0); + if (doc == NULL) + xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT, + "could not parse XML document"); + xpathctx = xmlXPathNewContext(doc); + if (xpathctx == NULL) + xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate XPath context"); + xpathctx->node = xmlDocGetRootElement(doc); + if (xpathctx->node == NULL) + xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, + "could not find root XML element"); + + /* register namespaces, if any */ + if (ns_count > 0) + { + for (i = 0; i < ns_count; i++) + { + char *ns_name; + char *ns_uri; + + if (ns_names_uris_nulls[i * 2] || + ns_names_uris_nulls[i * 2 + 1]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("neither namespace name nor URI may be null"))); + ns_name = TextDatumGetCString(ns_names_uris[i * 2]); + ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]); + if (xmlXPathRegisterNs(xpathctx, + (xmlChar *) ns_name, + (xmlChar *) ns_uri) != 0) + ereport(ERROR, /* is this an internal error??? */ + (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"", + ns_name, ns_uri))); + } + } + + xpathcomp = xmlXPathCompile(xpath_expr); + if (xpathcomp == NULL) /* TODO: show proper XPath error details */ + xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, + "invalid XPath expression"); + + result = xmlXPathCompiledEvalToBoolean(xpathcomp, xpathctx); + if (result == -1) /* TODO: reason? */ + xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, + "could not create XPath object"); + } + PG_CATCH(); + { + if (xpathcomp) + xmlXPathFreeCompExpr(xpathcomp); + if (xpathctx) + xmlXPathFreeContext(xpathctx); + if (doc) + xmlFreeDoc(doc); + if (ctxt) + xmlFreeParserCtxt(ctxt); + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlXPathFreeCompExpr(xpathcomp); + xmlXPathFreeContext(xpathctx); + xmlFreeDoc(doc); + xmlFreeParserCtxt(ctxt); + + PG_RETURN_BOOL(result); + #else + NO_XML_SUPPORT(); + return 0; + #endif + } Index: src/include/catalog/pg_proc.h =================================================================== RCS file: /home/mfowler/cvsrepo/pgrepo/pgsql/src/include/catalog/pg_proc.h,v retrieving revision 1.570 diff -c -r1.570 pg_proc.h *** src/include/catalog/pg_proc.h 26 Feb 2010 02:01:21 -0000 1.570 --- src/include/catalog/pg_proc.h 11 May 2010 07:46:09 -0000 *************** *** 4385,4390 **** --- 4385,4395 ---- DATA(insert OID = 2932 ( xpath PGNSP PGUID 14 1 0 0 f f f t f i 2 0 143 "25 142" _null_ _null_ _null_ _null_ "selectpg_catalog.xpath($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); DESCR("evaluate XPath expression"); + DATA(insert OID = 3037 ( xpath_exists PGNSP PGUID 12 1 0 0 f f f t f i 3 0 16 "25 142 1009" _null_ _null_ _null_ _null_xpath_exists _null_ _null_ _null_ )); + DESCR("evaluate XPath expression in a boolean context, with namespaces support"); + DATA(insert OID = 3038 ( xpath_exists PGNSP PGUID 14 1 0 0 f f f t f i 2 0 16 "25 142" _null_ _null_ _null_ _null_"select pg_catalog.xpath_exists($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); + DESCR("evaluate XPath expression in a boolean context"); + /* uuid */ DATA(insert OID = 2952 ( uuid_in PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_ _null_uuid_in _null_ _null_ _null_ )); DESCR("I/O"); Index: src/include/utils/xml.h =================================================================== RCS file: /home/mfowler/cvsrepo/pgrepo/pgsql/src/include/utils/xml.h,v retrieving revision 1.31 diff -c -r1.31 xml.h *** src/include/utils/xml.h 3 Mar 2010 17:29:45 -0000 1.31 --- src/include/utils/xml.h 10 May 2010 21:28:48 -0000 *************** *** 37,42 **** --- 37,43 ---- extern Datum xmltotext(PG_FUNCTION_ARGS); extern Datum xmlvalidate(PG_FUNCTION_ARGS); extern Datum xpath(PG_FUNCTION_ARGS); + extern Datum xpath_exists(PG_FUNCTION_ARGS); extern Datum table_to_xml(PG_FUNCTION_ARGS); extern Datum query_to_xml(PG_FUNCTION_ARGS); Index: src/test/regress/expected/xml.out =================================================================== RCS file: /home/mfowler/cvsrepo/pgrepo/pgsql/src/test/regress/expected/xml.out,v retrieving revision 1.25 diff -c -r1.25 xml.out *** src/test/regress/expected/xml.out 9 Jun 2009 22:00:57 -0000 1.25 --- src/test/regress/expected/xml.out 11 May 2010 09:04:20 -0000 *************** *** 502,504 **** --- 502,545 ---- {<b>two</b>,<b>etc</b>} (1 row) + -- Text xpath_exists evalutation + INSERT INTO xmltest VALUES (4, '<menu><beers><name>Budvar</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml); + INSERT INTO xmltest VALUES (5, '<menu><beers><name>Molson</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml); + INSERT INTO xmltest VALUES (6, '<myns:menu xmlns:myns="http://myns.com"><myns:beers><myns:name>Budvar</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml); + INSERT INTO xmltest VALUES (7, '<myns:menu xmlns:myns="http://myns.com"><myns:beers><myns:name>Molson</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml); + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beer',data); + count + ------- + 0 + (1 row) + + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers',data); + count + ------- + 2 + (1 row) + + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers/name[text() = ''Molson'']',data); + count + ------- + 1 + (1 row) + + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beer',data,ARRAY[ARRAY['myns','http://myns.com']]); + count + ------- + 0 + (1 row) + + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers',data,ARRAY[ARRAY['myns','http://myns.com']]); + count + ------- + 2 + (1 row) + + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers/myns:name[text() = ''Molson'']',data,ARRAY[ARRAY['myns','http://myns.com']]); + count + ------- + 1 + (1 row) + Index: src/test/regress/sql/xml.sql =================================================================== RCS file: /home/mfowler/cvsrepo/pgrepo/pgsql/src/test/regress/sql/xml.sql,v retrieving revision 1.19 diff -c -r1.19 xml.sql *** src/test/regress/sql/xml.sql 9 Jun 2009 22:00:57 -0000 1.19 --- src/test/regress/sql/xml.sql 11 May 2010 08:18:52 -0000 *************** *** 163,165 **** --- 163,178 ---- SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:pieceid="2" /></local:data>'); SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:pieceid="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'); + + -- Text xpath_exists evalutation + INSERT INTO xmltest VALUES (4, '<menu><beers><name>Budvar</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml); + INSERT INTO xmltest VALUES (5, '<menu><beers><name>Molson</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml); + INSERT INTO xmltest VALUES (6, '<myns:menu xmlns:myns="http://myns.com"><myns:beers><myns:name>Budvar</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml); + INSERT INTO xmltest VALUES (7, '<myns:menu xmlns:myns="http://myns.com"><myns:beers><myns:name>Molson</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml); + + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beer',data); + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers',data); + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers/name[text() = ''Molson'']',data); + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beer',data,ARRAY[ARRAY['myns','http://myns.com']]); + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers',data,ARRAY[ARRAY['myns','http://myns.com']]); + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers/myns:name[text() = ''Molson'']',data,ARRAY[ARRAY['myns','http://myns.com']]); \ No newline at end of file
pgsql-hackers by date: