[PATCH] Add XMLEXISTS function from the SQL/XML standard (was: Add xpath_exists Function) - Mailing list pgsql-hackers

From Mike Fowler
Subject [PATCH] Add XMLEXISTS function from the SQL/XML standard (was: Add xpath_exists Function)
Date
Msg-id 4BFBDF2C.801@mlfowler.com
Whole thread Raw
Responses Re: [PATCH] Add XMLEXISTS function from the SQL/XML standard (was: Add xpath_exists Function)
Re: [PATCH] Add XMLEXISTS function from the SQL/XML standard (was: Add xpath_exists Function)
List pgsql-hackers
I've been reading the SQL/XML standard and discovered that it defines a
function named XMLEXISTS that does exactly what the todo item
xpath_exists defines. My original patch named the function as per the
todo but I think using the function name from the standard is a better
idea. So this patch is the same as before, but the function is now named
XMLEXISTS instead of xpath_exists.

Regards,

--
Mike Fowler
Registered Linux user: 379787

Index: src/backend/utils/adt/xml.c
===================================================================
RCS file: /home/mfowler/cvsrepo/pgrepo/pgsql/src/backend/utils/adt/xml.c,v
retrieving revision 1.97
diff -c -r1.97 xml.c
*** src/backend/utils/adt/xml.c    3 Mar 2010 17:29:45 -0000    1.97
--- src/backend/utils/adt/xml.c    25 May 2010 14:02:33 -0000
***************
*** 3495,3497 ****
--- 3495,3668 ----
      return 0;
  #endif
  }
+
+ /*
+  * Determines if the node specified by the supplied XPath exists
+  * in a given XML document, returning a boolean.
+  *
+  * It is up to the user to ensure that the XML passed is in fact
+  * an XML document - XPath doesn't work easily on fragments without
+  * a context node being known.
+  */
+ Datum
+ xmlexists(PG_FUNCTION_ARGS)
+ {
+ #ifdef USE_LIBXML
+     text       *xpath_expr_text = PG_GETARG_TEXT_P(0);
+     xmltype    *data = PG_GETARG_XML_P(1);
+     ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
+     xmlParserCtxtPtr ctxt = NULL;
+     xmlDocPtr    doc = NULL;
+     xmlXPathContextPtr xpathctx = NULL;
+     xmlXPathCompExprPtr xpathcomp = NULL;
+     char       *datastr;
+     int32        len;
+     int32        xpath_len;
+     xmlChar    *string;
+     xmlChar    *xpath_expr;
+     int            i;
+     int            ndim;
+     Datum       *ns_names_uris;
+     bool       *ns_names_uris_nulls;
+     int            ns_count;
+     int            result;
+
+     /*
+      * Namespace mappings are passed as text[].  If an empty array is passed
+      * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
+      * Else, a 2-dimensional array with length of the second axis being equal
+      * to 2 should be passed, i.e., every subarray contains 2 elements, the
+      * first element defining the name, the second one the URI.  Example:
+      * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
+      * 'http://example2.com']].
+      */
+     ndim = ARR_NDIM(namespaces);
+     if (ndim != 0)
+     {
+         int           *dims;
+
+         dims = ARR_DIMS(namespaces);
+
+         if (ndim != 2 || dims[1] != 2)
+             ereport(ERROR,
+                     (errcode(ERRCODE_DATA_EXCEPTION),
+                      errmsg("invalid array for XML namespace mapping"),
+                      errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
+
+         Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
+
+         deconstruct_array(namespaces, TEXTOID, -1, false, 'i',
+                           &ns_names_uris, &ns_names_uris_nulls,
+                           &ns_count);
+
+         Assert((ns_count % 2) == 0);    /* checked above */
+         ns_count /= 2;            /* count pairs only */
+     }
+     else
+     {
+         ns_names_uris = NULL;
+         ns_names_uris_nulls = NULL;
+         ns_count = 0;
+     }
+
+     datastr = VARDATA(data);
+     len = VARSIZE(data) - VARHDRSZ;
+     xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ;
+     if (xpath_len == 0)
+         ereport(ERROR,
+                 (errcode(ERRCODE_DATA_EXCEPTION),
+                  errmsg("empty XPath expression")));
+
+     string = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
+     memcpy(string, datastr, len);
+     string[len] = '\0';
+
+     xpath_expr = (xmlChar *) palloc((xpath_len + 1) * sizeof(xmlChar));
+     memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
+     xpath_expr[xpath_len] = '\0';
+
+     pg_xml_init();
+     xmlInitParser();
+
+     PG_TRY();
+     {
+         /*
+          * redundant XML parsing (two parsings for the same value during one
+          * command execution are possible)
+          */
+         ctxt = xmlNewParserCtxt();
+         if (ctxt == NULL)
+             xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
+                         "could not allocate parser context");
+         doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
+         if (doc == NULL)
+             xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+                         "could not parse XML document");
+         xpathctx = xmlXPathNewContext(doc);
+         if (xpathctx == NULL)
+             xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
+                         "could not allocate XPath context");
+         xpathctx->node = xmlDocGetRootElement(doc);
+         if (xpathctx->node == NULL)
+             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+                         "could not find root XML element");
+
+         /* register namespaces, if any */
+         if (ns_count > 0)
+         {
+             for (i = 0; i < ns_count; i++)
+             {
+                 char       *ns_name;
+                 char       *ns_uri;
+
+                 if (ns_names_uris_nulls[i * 2] ||
+                     ns_names_uris_nulls[i * 2 + 1])
+                     ereport(ERROR,
+                             (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+                       errmsg("neither namespace name nor URI may be null")));
+                 ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
+                 ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
+                 if (xmlXPathRegisterNs(xpathctx,
+                                        (xmlChar *) ns_name,
+                                        (xmlChar *) ns_uri) != 0)
+                     ereport(ERROR,        /* is this an internal error??? */
+                             (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
+                                     ns_name, ns_uri)));
+             }
+         }
+
+         xpathcomp = xmlXPathCompile(xpath_expr);
+         if (xpathcomp == NULL)    /* TODO: show proper XPath error details */
+             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+                         "invalid XPath expression");
+
+         result = xmlXPathCompiledEvalToBoolean(xpathcomp, xpathctx);
+         if (result == -1)    /* TODO: reason? */
+             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+                         "could not create XPath object");
+     }
+     PG_CATCH();
+     {
+         if (xpathcomp)
+             xmlXPathFreeCompExpr(xpathcomp);
+         if (xpathctx)
+             xmlXPathFreeContext(xpathctx);
+         if (doc)
+             xmlFreeDoc(doc);
+         if (ctxt)
+             xmlFreeParserCtxt(ctxt);
+         PG_RE_THROW();
+     }
+     PG_END_TRY();
+
+     xmlXPathFreeCompExpr(xpathcomp);
+     xmlXPathFreeContext(xpathctx);
+     xmlFreeDoc(doc);
+     xmlFreeParserCtxt(ctxt);
+
+     PG_RETURN_BOOL(result);
+ #else
+     NO_XML_SUPPORT();
+     return 0;
+ #endif
+ }
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /home/mfowler/cvsrepo/pgrepo/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.570
diff -c -r1.570 pg_proc.h
*** src/include/catalog/pg_proc.h    26 Feb 2010 02:01:21 -0000    1.570
--- src/include/catalog/pg_proc.h    25 May 2010 14:04:27 -0000
***************
*** 4385,4390 ****
--- 4385,4396 ----
  DATA(insert OID = 2932 (  xpath         PGNSP PGUID 14 1 0 0 f f f t f i 2 0 143 "25 142" _null_ _null_ _null_ _null_
"selectpg_catalog.xpath($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); 
  DESCR("evaluate XPath expression");

+ DATA(insert OID = 3037 (  xmlexists     PGNSP PGUID 12 1 0 0 f f f t f i 3 0 16 "25 142 1009" _null_ _null_ _null_
_null_xmlexists _null_ _null_ _null_ )); 
+ DESCR("evaluate XPath expression in a boolean context, with namespaces support");
+ DATA(insert OID = 3038 (  xmlexists     PGNSP PGUID 14 1 0 0 f f f t f i 2 0 16 "25 142" _null_ _null_ _null_ _null_
"selectpg_catalog.xmlexists($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); 
+ DESCR("evaluate XPath expression in a boolean context");
+
+
  /* uuid */
  DATA(insert OID = 2952 (  uuid_in           PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_
_null_uuid_in _null_ _null_ _null_ )); 
  DESCR("I/O");
Index: src/include/utils/xml.h
===================================================================
RCS file: /home/mfowler/cvsrepo/pgrepo/pgsql/src/include/utils/xml.h,v
retrieving revision 1.31
diff -c -r1.31 xml.h
*** src/include/utils/xml.h    3 Mar 2010 17:29:45 -0000    1.31
--- src/include/utils/xml.h    25 May 2010 13:45:32 -0000
***************
*** 37,42 ****
--- 37,43 ----
  extern Datum xmltotext(PG_FUNCTION_ARGS);
  extern Datum xmlvalidate(PG_FUNCTION_ARGS);
  extern Datum xpath(PG_FUNCTION_ARGS);
+ extern Datum xmlexists(PG_FUNCTION_ARGS);

  extern Datum table_to_xml(PG_FUNCTION_ARGS);
  extern Datum query_to_xml(PG_FUNCTION_ARGS);
Index: src/test/regress/expected/xml.out
===================================================================
RCS file: /home/mfowler/cvsrepo/pgrepo/pgsql/src/test/regress/expected/xml.out,v
retrieving revision 1.25
diff -c -r1.25 xml.out
*** src/test/regress/expected/xml.out    9 Jun 2009 22:00:57 -0000    1.25
--- src/test/regress/expected/xml.out    25 May 2010 14:12:04 -0000
***************
*** 502,504 ****
--- 502,545 ----
   {<b>two</b>,<b>etc</b>}
  (1 row)

+ -- Text xmlexists evalutation
+ INSERT INTO xmltest VALUES (4,
'<menu><beers><name>Budvar</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (5,
'<menu><beers><name>Molson</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (6, '<myns:menu
xmlns:myns="http://myns.com"><myns:beers><myns:name>Budvar</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+ INSERT INTO xmltest VALUES (7, '<myns:menu
xmlns:myns="http://myns.com"><myns:beers><myns:name>Molson</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beer',data);
+  count
+ -------
+      0
+ (1 row)
+
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beers',data);
+  count
+ -------
+      2
+ (1 row)
+
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beers/name[text() = ''Molson'']',data);
+  count
+ -------
+      1
+ (1 row)
+
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/myns:menu/myns:beer',data,ARRAY[ARRAY['myns','http://myns.com']]);
+  count
+ -------
+      0
+ (1 row)
+
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/myns:menu/myns:beers',data,ARRAY[ARRAY['myns','http://myns.com']]);
+  count
+ -------
+      2
+ (1 row)
+
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/myns:menu/myns:beers/myns:name[text() =
''Molson'']',data,ARRAY[ARRAY['myns','http://myns.com']]);
+  count
+ -------
+      1
+ (1 row)
+
Index: src/test/regress/sql/xml.sql
===================================================================
RCS file: /home/mfowler/cvsrepo/pgrepo/pgsql/src/test/regress/sql/xml.sql,v
retrieving revision 1.19
diff -c -r1.19 xml.sql
*** src/test/regress/sql/xml.sql    9 Jun 2009 22:00:57 -0000    1.19
--- src/test/regress/sql/xml.sql    25 May 2010 14:09:02 -0000
***************
*** 163,165 ****
--- 163,178 ----
  SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number
one</local:piece><local:pieceid="2" /></local:data>'); 
  SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number
one</local:piece><local:pieceid="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); 
  SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
+
+ -- Text xmlexists evalutation
+ INSERT INTO xmltest VALUES (4,
'<menu><beers><name>Budvar</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (5,
'<menu><beers><name>Molson</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (6, '<myns:menu
xmlns:myns="http://myns.com"><myns:beers><myns:name>Budvar</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+ INSERT INTO xmltest VALUES (7, '<myns:menu
xmlns:myns="http://myns.com"><myns:beers><myns:name>Molson</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beer',data);
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beers',data);
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beers/name[text() = ''Molson'']',data);
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/myns:menu/myns:beer',data,ARRAY[ARRAY['myns','http://myns.com']]);
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/myns:menu/myns:beers',data,ARRAY[ARRAY['myns','http://myns.com']]);
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/myns:menu/myns:beers/myns:name[text() =
''Molson'']',data,ARRAY[ARRAY['myns','http://myns.com']]);

pgsql-hackers by date:

Previous
From: Stephen Frost
Date:
Subject: Re: ExecutorCheckPerms() hook
Next
From: Andrew Dunstan
Date:
Subject: Re: libpq, PQexecPrepared, data size sent to FE vs. FETCH_COUNT