Re: xpath improvement V2 with xml.c - Mailing list pgsql-hackers

From Arie Bikker
Subject Re: xpath improvement V2 with xml.c
Date
Msg-id 4B75D0F8.5040705@abikker.nl
Whole thread Raw
In response to xpath improvement V2  (Arie Bikker <arie@abikker.nl>)
List pgsql-hackers
And a patch for the code implementing this

*** doc/src/sgml/func.sgml.org    Tue Feb  2 12:53:59 2010
--- doc/src/sgml/func.sgml    Fri Feb 12 21:49:01 2010
***************
*** 1,4 ****
! <!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.482.2.2 2009/11/24 19:21:04 petere Exp $ -->

   <chapter id="functions">
    <title>Functions and Operators</title>
--- 1,4 ----
! <!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.482 2009/06/17 21:58:49 tgl Exp $ -->

   <chapter id="functions">
    <title>Functions and Operators</title>
***************
*** 821,827 ****
        <row>
         <entry><literal><function>random</function>()</literal></entry>
         <entry><type>dp</type></entry>
!        <entry>random value in the range 0.0 <= x < 1.0</entry>
         <entry><literal>random()</literal></entry>
         <entry></entry>
        </row>
--- 821,827 ----
        <row>
         <entry><literal><function>random</function>()</literal></entry>
         <entry><type>dp</type></entry>
!        <entry>random value between 0.0 and 1.0, inclusive</entry>
         <entry><literal>random()</literal></entry>
         <entry></entry>
        </row>
***************
*** 5251,5259 ****
       <listitem>
        <para>
          <function>to_char(..., 'ID')</function>'s day of the week numbering
!         matches the <function>extract(isodow from ...)</function> function, but
          <function>to_char(..., 'D')</function>'s does not match
!         <function>extract(dow from ...)</function>'s day numbering.
        </para>
       </listitem>

--- 5251,5259 ----
       <listitem>
        <para>
          <function>to_char(..., 'ID')</function>'s day of the week numbering
!         matches the <function>extract('isodow', ...)</function> function, but
          <function>to_char(..., 'D')</function>'s does not match
!         <function>extract('dow', ...)</function>'s day numbering.
        </para>
       </listitem>

***************
*** 8464,8474 ****
     </indexterm>

     <para>
!     To process values of data type <type>xml</type>, PostgreSQL offers
!     the function <function>xpath</function>, which evaluates XPath 1.0
!     expressions.
     </para>

  <synopsis>
  <function>xpath</function>(<replaceable>xpath</replaceable>, <replaceable>xml</replaceable><optional>,
<replaceable>nsarray</replaceable></optional>)
  </synopsis>
--- 8464,8486 ----
     </indexterm>

     <para>
!     To retrieve information from an <type>xml</type> document an Xpath 1.0
!     expression evaluation can be caried out. The result of this evaluation
!     can have several data types depending on the expression and the content
!     of the document. The functions implementing this all use an expression
!     and a document as input and an optional namespace array.
!     They differ in the type of expression they interpret and, consequently, the result they provide.
     </para>

+    <sect3 id="functions-xml-processing-nodeset">
+     <title>Nodeset processing</title>
+
+    <para>
+     To process expressions returning a nodeset, PostgreSQL offers
+     the function <function>xpath</function>, which returns an  array of
+     <type>xml</type> values.
+    </para>
+
  <synopsis>
  <function>xpath</function>(<replaceable>xpath</replaceable>, <replaceable>xml</replaceable><optional>,
<replaceable>nsarray</replaceable></optional>)
  </synopsis>
***************
*** 8506,8511 ****
--- 8518,8699 ----
  (1 row)
  ]]></screen>
     </para>
+    </sect3>
+
+    <sect3 id="functions-xml-processing-values">
+     <title>Value returning functions</title>
+    <para>
+     To retrieve single values from data type <type>xml</type> PostgreSQL offers two
+     functions akin to <function>xpath</function>.
+     The function <function>xpath_value_text</function> returns a single
+ as  <type>text</type> result; the function <function>xpath_value_strict</function> returns a specific type governed
byan input parameter used as type example. 
+    </para>
+
+ <synopsis>
+ <function>xpath_value_text</function>(<replaceable>xpath</replaceable>, <replaceable>xml</replaceable><optional>,
<replaceable>nsarray</replaceable></optional>)
+ </synopsis>
+ <synopsis>
+ <function>xpath_value_strict</function>(<replaceable>typexample</replaceable>, <replaceable>xpath</replaceable>,
<replaceable>xml</replaceable><optional>,<replaceable>nsarray</replaceable></optional>) 
+ </synopsis>
+
+    <para>
+     The function <function>xpath_value_text</function> evaluates the XPath
+     expression <replaceable>xpath</replaceable> against the XML value
+     <replaceable>xml</replaceable>.  It returns a text value
+     corresponding to the evaluation produced by the XPath expression.
+     <replaceable>xpath</replaceable> must be an expression that returns
+     a single value, not a nodeset.
+     <replaceable>xpath</replaceable> expressions resulting in boolean, string or number are supported.
+    </para>
+
+   <para>
+     The second argument must be a well formed XML document. In particular,
+     it must have a single root node element.
+   </para>
+
+    <para>
+     The third argument of the function is an array of namespace
+     mappings with the same restrictions as for the <replaceable>xpath</replaceable> function.
+    </para>
+
+    <para>
+     This example will return the tagname of the root element:
+ <screen><![CDATA[
+ SELECT xpath_value_text('name()', '<my:a xmlns:my="http://example.com">test</my:a>',
+              ARRAY[ARRAY['my', 'http://example.com']]);
+
+  xpath_value_text
+ ------------------
+  my:a
+ (1 row)
+ ]]></screen>
+    </para>
+
+    <para>
+     The function <function>xpath_value_strict</function> evaluates the XPath
+     expression <replaceable>xpath</replaceable> against the XML value
+     <replaceable>xml</replaceable>.  It returns a value
+     of the same type as <replaceable>typexample</replaceable>.
+     <replaceable>xpath</replaceable> must be an expression that returns
+     a single value of the same type as <replaceable>typexample</replaceable>.
+     When <replaceable>typexample</replaceable> is of type <type>regtype</type>
+     the function does not return the result value of the expression evaluation,
+     but the type of the result value.
+    </para>
+
+   <para>
+     The first argument serves as an example of the output required by this function.
+     It can be any constant or variable of type <type>boolean</type>,
+     <type>text</type> or <type>float8</type> corresponding to the basic value
+     type used in libxml2. Or it can be a <type>regtype</type> to retrieve type
+     information of the expression evaluation result.
+   </para>
+
+   <para>
+     The second argument must be a XPath expression with a result value
+     corresponding to the example offered.
+     An error is genereated if <replaceable>typexample</replaceable> and
+     the result of <replaceable>xpath</replaceable> do not match in type.
+   </para>
+
+   <para>
+     The third argument must be a well formed XML document with a single
+     root node element.
+   </para>
+
+    <para>
+     The fourth argument of the function is an (optional) array of namespace
+     mappings.
+    </para>
+
+    <para>
+     The following table shows some valid input/output combinations:
+
+     <informaltable>
+      <tgroup cols="5">
+       <thead>
+        <row>
+         <entry><replaceable>typexample</replaceable></entry>
+         <entry><replaceable>xpath</replaceable></entry>
+         <entry><replaceable>xml</replaceable></entry>
+         <entry>result value</entry>
+         <entry>result type</entry>
+        </row>
+       </thead>
+
+       <tbody>
+        <row>
+         <entry>TRUE::boolean</entry>
+         <entry>'1=1'</entry>
+         <entry><![CDATA['<foo/>']]></entry>
+         <entry>TRUE</entry>
+         <entry><type>boolean</type></entry>
+        </row>
+        <row>
+         <entry>1::float8</entry>
+         <entry>'1 div 3'</entry>
+         <entry><![CDATA['<foo/>']]></entry>
+         <entry>0.333333333333333</entry>
+         <entry><type>float8</type></entry>
+        </row>
+        <row>
+         <entry>'a'::text</entry>
+         <entry>'name()'</entry>
+         <entry><![CDATA['<foo/>']]></entry>
+         <entry>foo</entry>
+         <entry><type>text</type></entry>
+        </row>
+        <row>
+         <entry>'text'::regtype</entry>
+         <entry>'1=1'</entry>
+         <entry><![CDATA['<foo/>']]></entry>
+         <entry>boolean</entry>
+         <entry><type>regtype</type></entry>
+        </row>
+       </tbody>
+      </tgroup>
+     </informaltable>
+    </para>
+
+    <para>
+    An error is generated if if the types of <replaceable>typexample</replaceable> and the expression result do not
match.Some examples are shown in the following table: 
+
+     <informaltable>
+      <tgroup cols="4">
+       <thead>
+        <row>
+         <entry><replaceable>typexample</replaceable></entry>
+         <entry><replaceable>xpath</replaceable></entry>
+         <entry><replaceable>xml</replaceable></entry>
+         <entry>fail reason</entry>
+        </row>
+       </thead>
+       <tbody>
+        <row>
+         <entry>'a'::text</entry>
+         <entry>'/'</entry>
+         <entry><![CDATA['<foo/>']]></entry>
+         <entry>expression evaluates to nodeset, not string</entry>
+        </row>
+        <row>
+         <entry>'a'::text</entry>
+         <entry>'/@width'</entry>
+         <entry><![CDATA['<foo width="20"/>']]></entry>
+         <entry>expression evaluates to nodeset, albeit with a single node</entry>
+        </row>
+        <row>
+         <entry>1::float8</entry>
+         <entry>'number(@height)'</entry>
+         <entry><![CDATA['<foo width="20"/>']]></entry>
+         <entry>returns NaN because argument for number() is empty nodeset</entry>
+        </row>
+       </tbody>
+      </tgroup>
+     </informaltable>
+
+    </para>
+
+    </sect3>
    </sect2>

    <sect2 id="functions-xml-mapping">
*** src/include/catalog/pg_proc.h.org    Thu Jan 21 23:12:07 2010
--- src/include/catalog/pg_proc.h    Fri Jan 29 22:50:38 2010
***************
*** 4305,4314 ****
  DESCR("map database contents and structure to XML and XML Schema");

  DATA(insert OID = 2931 (  xpath         PGNSP PGUID 12 1 0 0 f f f t f i 3 0 143 "25 142 1009" _null_ _null_ _null_
_null_xpath _null_ _null_ _null_ )); 
! DESCR("evaluate XPath expression, with namespaces support");
  DATA(insert OID = 2932 (  xpath         PGNSP PGUID 14 1 0 0 f f f t f i 2 0 143 "25 142" _null_ _null_ _null_ _null_
"selectpg_catalog.xpath($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); 
! DESCR("evaluate XPath expression");

  /* uuid */
  DATA(insert OID = 2952 (  uuid_in           PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_
_null_uuid_in _null_ _null_ _null_ )); 
  DESCR("I/O");
--- 4305,4323 ----
  DESCR("map database contents and structure to XML and XML Schema");

  DATA(insert OID = 2931 (  xpath         PGNSP PGUID 12 1 0 0 f f f t f i 3 0 143 "25 142 1009" _null_ _null_ _null_
_null_xpath _null_ _null_ _null_ )); 
! DESCR("evaluate XPath nodeset expression, with namespaces support");
  DATA(insert OID = 2932 (  xpath         PGNSP PGUID 14 1 0 0 f f f t f i 2 0 143 "25 142" _null_ _null_ _null_ _null_
"selectpg_catalog.xpath($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); 
! DESCR("evaluate XPath nodeset expression");

+ DATA(insert OID = 2995 (  xpath_value_text         PGNSP PGUID 12 1 0 0 f f f t f i 3 0 25 "25 142 1009" _null_
_null__null_ _null_ xpath_value_text _null_ _null_ _null_ )); 
+ DESCR("evaluate XPath value expression, with namespaces support");
+ DATA(insert OID = 2996 (  xpath_value_text         PGNSP PGUID 14 1 0 0 f f f t f i 2 0 25 "25 142" _null_ _null_
_null__null_ "select pg_catalog.xpath_value_text($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); 
+ DESCR("evaluate XPath value expression");
+ DATA(insert OID = 2997 (  xpath_value_strict     PGNSP PGUID 12 1 0 0 f f f t f i 4 0 2283 "2283 25 142 1009" _null_
_null__null_ _null_ xpath_value_strict _null_ _null_ _null_ )); 
+ DESCR("evaluate XPath value expression by type example, with namespaces support");
+ DATA(insert OID = 2998 (  xpath_value_strict         PGNSP PGUID 14 1 0 0 f f f t f i 3 0 2283 "2283 25 142" _null_
_null__null_ _null_ "select pg_catalog.xpath_value_strict($1, $2, $3, ''{}''::pg_catalog.text[])" _null_ _null_ _null_
));
+ DESCR("evaluate XPath value expression by type example");
+
  /* uuid */
  DATA(insert OID = 2952 (  uuid_in           PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_
_null_uuid_in _null_ _null_ _null_ )); 
  DESCR("I/O");
*** src/include/utils/xml.h.org    Sat Jan 16 23:57:35 2010
--- src/include/utils/xml.h    Fri Jan 29 00:11:52 2010
***************
*** 37,42 ****
--- 37,44 ----
  extern Datum xmltotext(PG_FUNCTION_ARGS);
  extern Datum xmlvalidate(PG_FUNCTION_ARGS);
  extern Datum xpath(PG_FUNCTION_ARGS);
+ extern Datum xpath_value_text(PG_FUNCTION_ARGS);
+ extern Datum xpath_value_strict(PG_FUNCTION_ARGS);

  extern Datum table_to_xml(PG_FUNCTION_ARGS);
  extern Datum query_to_xml(PG_FUNCTION_ARGS);
*** src/backend/utils/fmgrtab.c.org    Tue Jan  5 21:52:42 2010
--- src/backend/utils/fmgrtab.c    Fri Jan 29 22:51:07 2010
***************
*** 1686,1691 ****
--- 1686,1693 ----
  extern Datum record_le (PG_FUNCTION_ARGS);
  extern Datum record_ge (PG_FUNCTION_ARGS);
  extern Datum btrecordcmp (PG_FUNCTION_ARGS);
+ extern Datum xpath_value_text (PG_FUNCTION_ARGS);
+ extern Datum xpath_value_strict (PG_FUNCTION_ARGS);
  extern Datum has_foreign_data_wrapper_privilege_name_name (PG_FUNCTION_ARGS);
  extern Datum has_foreign_data_wrapper_privilege_name_id (PG_FUNCTION_ARGS);
  extern Datum has_foreign_data_wrapper_privilege_id_name (PG_FUNCTION_ARGS);
***************
*** 3752,3757 ****
--- 3754,3761 ----
    { 2985, "record_le", 2, true, false, record_le },
    { 2986, "record_ge", 2, true, false, record_ge },
    { 2987, "btrecordcmp", 2, true, false, btrecordcmp },
+   { 2995, "xpath_value_text", 3, true, false, xpath_value_text },
+   { 2997, "xpath_value_strict", 4, true, false, xpath_value_strict },
    { 3000, "has_foreign_data_wrapper_privilege_name_name", 3, true, false,
has_foreign_data_wrapper_privilege_name_name}, 
    { 3001, "has_foreign_data_wrapper_privilege_name_id", 3, true, false, has_foreign_data_wrapper_privilege_name_id },
    { 3002, "has_foreign_data_wrapper_privilege_id_name", 3, true, false, has_foreign_data_wrapper_privilege_id_name },
*** src/backend/utils/adt/xml.c.org    Fri Sep  4 12:49:43 2009
--- src/backend/utils/adt/xml.c    Wed Feb 10 13:33:32 2010
***************
*** 111,116 ****
--- 111,118 ----
  static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
            bool preserve_whitespace, int encoding);
  static text *xml_xmlnodetoxmltype(xmlNodePtr cur);
+ xmlXPathObjectPtr xml_xpathinternal(xmlChar *xpath, xmlChar *datastr, int datasize, ArrayType *nsarray);
+ static xmlXPathContextPtr xml_xmlxpathcontextaddns(xmlXPathContextPtr xpctxt, ArrayType *nsarray);
  #endif   /* USE_LIBXML */

  static StringInfo query_to_xml_internal(const char *query, char *tablename,
***************
*** 3265,3308 ****

      return result;
  }
- #endif

-
  /*
!  * Evaluate XPath expression and return array of XML values.
!  *
!  * As we have no support of XQuery sequences yet, this function seems
!  * to be the most useful one (array of XML functions plays a role of
!  * some kind of substitution for XQuery sequences).
!  *
!  * It is up to the user to ensure that the XML passed is in fact
!  * an XML document - XPath doesn't work easily on fragments without
!  * a context node being known.
   */
! Datum
! xpath(PG_FUNCTION_ARGS)
! {
! #ifdef USE_LIBXML
!     text       *xpath_expr_text = PG_GETARG_TEXT_P(0);
!     xmltype    *data = PG_GETARG_XML_P(1);
!     ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
!     ArrayBuildState *astate = NULL;
      xmlParserCtxtPtr ctxt = NULL;
      xmlDocPtr    doc = NULL;
      xmlXPathContextPtr xpathctx = NULL;
      xmlXPathCompExprPtr xpathcomp = NULL;
      xmlXPathObjectPtr xpathobj = NULL;
!     char       *datastr;
!     int32        len;
!     int32        xpath_len;
!     xmlChar    *string;
!     xmlChar    *xpath_expr;
!     int            i;
!     int            res_nitems;
      int            ndim;
      Datum       *ns_names_uris;
      bool       *ns_names_uris_nulls;
!     int            ns_count;

      /*
       * Namespace mappings are passed as text[].  If an empty array is passed
--- 3267,3359 ----

      return result;
  }

  /*
!  * Retrieve an the result of a query (xpath) on xmldata (datastr) within a given
!  * context.
!  * See function xml_xpathexprtoxmlchar for the creation of the xpath argument
!  * See function xml_xmlctxtmanipulate for the creation of the xpathctxt argument
   */
! xmlXPathObjectPtr xml_xpathinternal(xmlChar *xpath, xmlChar *datastr, int datasize,  ArrayType *nsarray) {
      xmlParserCtxtPtr ctxt = NULL;
      xmlDocPtr    doc = NULL;
      xmlXPathContextPtr xpathctx = NULL;
      xmlXPathCompExprPtr xpathcomp = NULL;
      xmlXPathObjectPtr xpathobj = NULL;
!
!     xml_init();
!     xmlInitParser();
!
!     PG_TRY();
!     {
!         /*
!          * redundant XML parsing (two parsings for the same value during one
!          * command execution are possible)
!          */
!         ctxt = xmlNewParserCtxt();
!         if (ctxt == NULL)
!             xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
!                         "could not allocate parser context");
!         doc = xmlCtxtReadMemory(ctxt, (char *) datastr, datasize, NULL, NULL, 0);
!         if (doc == NULL)
!             xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
!                         "could not parse XML document");
!         xpathctx = xmlXPathNewContext(doc);
!         if (xpathctx == NULL)
!             xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
!                         "could not allocate XPath context");
!         xpathctx->node = xmlDocGetRootElement(doc);
!         if (xpathctx->node == NULL)
!             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
!                         "could not find root XML element");
!
!         /* register namespaces, if any */
!         xpathctx = xml_xmlxpathcontextaddns(xpathctx, nsarray);
!
!         xpathcomp = xmlXPathCompile(xpath);
!         if (xpathcomp == NULL)    /* TODO: show proper XPath error details */
!             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
!                         "invalid XPath expression");
!
!         xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
!         if (xpathobj == NULL)    /* TODO: reason? */
!             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
!                         "could not create XPath object");
!     }
!     PG_CATCH();
!     {
!         if (xpathobj)
!             xmlXPathFreeObject(xpathobj);
!         if (xpathcomp)
!             xmlXPathFreeCompExpr(xpathcomp);
!         if (xpathctx)
!             xmlXPathFreeContext(xpathctx);
!         if (doc)
!             xmlFreeDoc(doc);
!         if (ctxt)
!             xmlFreeParserCtxt(ctxt);
!         PG_RE_THROW();
!     }
!     PG_END_TRY();
!
!     xmlXPathFreeCompExpr(xpathcomp);
!     xmlXPathFreeContext(xpathctx);
!     xmlFreeDoc(doc);
!     xmlFreeParserCtxt(ctxt);
!
!     return(xpathobj);
! }
!
! /*
!  * Registers the namespaces in nsarray into the context xpctxt
!  * returns xpctxt
!  */
! static xmlXPathContextPtr xml_xmlxpathcontextaddns(xmlXPathContextPtr xpctxt, ArrayType *nsarray) {
      int            ndim;
      Datum       *ns_names_uris;
      bool       *ns_names_uris_nulls;
!     int    i;
!     int    ns_count;

      /*
       * Namespace mappings are passed as text[].  If an empty array is passed
***************
*** 3313,3324 ****
       * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
       * 'http://example2.com']].
       */
!     ndim = ARR_NDIM(namespaces);
      if (ndim != 0)
      {
          int           *dims;

!         dims = ARR_DIMS(namespaces);

          if (ndim != 2 || dims[1] != 2)
              ereport(ERROR,
--- 3364,3375 ----
       * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
       * 'http://example2.com']].
       */
!     ndim = ARR_NDIM(nsarray);
      if (ndim != 0)
      {
          int           *dims;

!         dims = ARR_DIMS(nsarray);

          if (ndim != 2 || dims[1] != 2)
              ereport(ERROR,
***************
*** 3326,3390 ****
                       errmsg("invalid array for XML namespace mapping"),
                       errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));

!         Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);

!         deconstruct_array(namespaces, TEXTOID, -1, false, 'i',
                            &ns_names_uris, &ns_names_uris_nulls,
                            &ns_count);

          Assert((ns_count % 2) == 0);    /* checked above */
          ns_count /= 2;            /* count pairs only */
-     }
-     else
-     {
-         ns_names_uris = NULL;
-         ns_names_uris_nulls = NULL;
-         ns_count = 0;
-     }
-
-     datastr = VARDATA(data);
-     len = VARSIZE(data) - VARHDRSZ;
-     xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ;
-     if (xpath_len == 0)
-         ereport(ERROR,
-                 (errcode(ERRCODE_DATA_EXCEPTION),
-                  errmsg("empty XPath expression")));
-
-     string = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
-     memcpy(string, datastr, len);
-     string[len] = '\0';
-
-     xpath_expr = (xmlChar *) palloc((xpath_len + 1) * sizeof(xmlChar));
-     memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
-     xpath_expr[xpath_len] = '\0';
-
-     xml_init();
-     xmlInitParser();
-
-     PG_TRY();
-     {
-         /*
-          * redundant XML parsing (two parsings for the same value during one
-          * command execution are possible)
-          */
-         ctxt = xmlNewParserCtxt();
-         if (ctxt == NULL)
-             xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
-                         "could not allocate parser context");
-         doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
-         if (doc == NULL)
-             xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
-                         "could not parse XML document");
-         xpathctx = xmlXPathNewContext(doc);
-         if (xpathctx == NULL)
-             xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
-                         "could not allocate XPath context");
-         xpathctx->node = xmlDocGetRootElement(doc);
-         if (xpathctx->node == NULL)
-             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
-                         "could not find root XML element");
-
-         /* register namespaces, if any */
          if (ns_count > 0)
          {
              for (i = 0; i < ns_count; i++)
--- 3377,3390 ----
                       errmsg("invalid array for XML namespace mapping"),
                       errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));

!         Assert(ARR_ELEMTYPE(nsarray) == TEXTOID);

!         deconstruct_array(nsarray, TEXTOID, -1, false, 'i',
                            &ns_names_uris, &ns_names_uris_nulls,
                            &ns_count);

          Assert((ns_count % 2) == 0);    /* checked above */
          ns_count /= 2;            /* count pairs only */
          if (ns_count > 0)
          {
              for (i = 0; i < ns_count; i++)
***************
*** 3399,3405 ****
                        errmsg("neither namespace name nor URI may be null")));
                  ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
                  ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
!                 if (xmlXPathRegisterNs(xpathctx,
                                         (xmlChar *) ns_name,
                                         (xmlChar *) ns_uri) != 0)
                      ereport(ERROR,        /* is this an internal error??? */
--- 3399,3405 ----
                        errmsg("neither namespace name nor URI may be null")));
                  ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
                  ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
!                 if (xmlXPathRegisterNs(xpctxt,
                                         (xmlChar *) ns_name,
                                         (xmlChar *) ns_uri) != 0)
                      ereport(ERROR,        /* is this an internal error??? */
***************
*** 3407,3423 ****
                                      ns_name, ns_uri)));
              }
          }

!         xpathcomp = xmlXPathCompile(xpath_expr);
!         if (xpathcomp == NULL)    /* TODO: show proper XPath error details */
!             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
!                         "invalid XPath expression");

!         xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
!         if (xpathobj == NULL)    /* TODO: reason? */
!             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
!                         "could not create XPath object");
!
          /* return empty array in cases when nothing is found */
          if (xpathobj->nodesetval == NULL)
              res_nitems = 0;
--- 3407,3450 ----
                                      ns_name, ns_uri)));
              }
          }
+     }
+     return (xpctxt);
+ }
+ #endif

! /*
!  * Evaluate XPath expression and return array of XML values.
!  *
!  * As we have no support of XQuery sequences yet, this function seems
!  * to be the most useful one (array of XML functions plays a role of
!  * some kind of substitution for XQuery sequences).
!  *
!  * It is up to the user to ensure that the XML passed is in fact
!  * an XML document - XPath doesn't work easily on fragments without
!  * a context node being known.
!  */
! Datum
! xpath(PG_FUNCTION_ARGS)
! {
! #ifdef USE_LIBXML
!     text       *xpath_expr_text = PG_GETARG_TEXT_P(0);
!     xmltype    *data = PG_GETARG_XML_P(1);
!     ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
!     ArrayBuildState *astate = NULL;
!     xmlXPathObjectPtr xpathobj = NULL;
!     xmlChar    *string;
!     xmlChar    *xpath_expr;
!     int    res_nitems, i;

!     string = xml_text2xmlChar(data);
!     if ((VARSIZE(xpath_expr_text) - VARHDRSZ) == 0)
!         ereport(ERROR,
!                 (errcode(ERRCODE_DATA_EXCEPTION),
!                  errmsg("empty XPath expression")));
!     xpath_expr = xml_text2xmlChar(xpath_expr_text);
!     xpathobj = xml_xpathinternal(xpath_expr, string, (VARSIZE(data)-VARHDRSZ), namespaces);
!     if (xpathobj->type == XPATH_NODESET)
!     {
          /* return empty array in cases when nothing is found */
          if (xpathobj->nodesetval == NULL)
              res_nitems = 0;
***************
*** 3432,3471 ****
                  bool        elemisnull = false;

                  elem = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i]));
!                 astate = accumArrayResult(astate, elem,
!                                           elemisnull, XMLOID,
!                                           CurrentMemoryContext);
              }
          }
      }
!     PG_CATCH();
      {
!         if (xpathobj)
!             xmlXPathFreeObject(xpathobj);
!         if (xpathcomp)
!             xmlXPathFreeCompExpr(xpathcomp);
!         if (xpathctx)
!             xmlXPathFreeContext(xpathctx);
!         if (doc)
!             xmlFreeDoc(doc);
!         if (ctxt)
!             xmlFreeParserCtxt(ctxt);
!         PG_RE_THROW();
      }
-     PG_END_TRY();
-
      xmlXPathFreeObject(xpathobj);
-     xmlXPathFreeCompExpr(xpathcomp);
-     xmlXPathFreeContext(xpathctx);
-     xmlFreeDoc(doc);
-     xmlFreeParserCtxt(ctxt);

!     if (res_nitems == 0)
          PG_RETURN_ARRAYTYPE_P(construct_empty_array(XMLOID));
      else
          PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
  #else
      NO_XML_SUPPORT();
      return 0;
  #endif
  }
--- 3459,3641 ----
                  bool        elemisnull = false;

                  elem = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i]));
!                 astate = accumArrayResult(astate, elem, elemisnull, XMLOID, CurrentMemoryContext);
              }
          }
      }
!     else
      {
!         xmlXPathFreeObject(xpathobj);
!         ereport(ERROR,
!             (errcode(ERRCODE_DATA_EXCEPTION),
!             errmsg("evaluation returned non-nodeset")));
      }
      xmlXPathFreeObject(xpathobj);

!     if (astate == NULL)
          PG_RETURN_ARRAYTYPE_P(construct_empty_array(XMLOID));
      else
          PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
  #else
      NO_XML_SUPPORT();
+     return 0;
+ #endif
+ }
+
+ /*
+  * Evaluate XPath expression and return the text representation
+  * of a single XML value. A warning is logged and NULL returned
+  * if XPath expression does not result in one of the supported types
+  * (boolean, numeric or string)
+  *
+  * It is up to the user to ensure that the XML passed is in fact
+  * an XML document - XPath doesn't work easily on fragments without
+  * a context node being known.
+  */
+ Datum
+ xpath_value_text(PG_FUNCTION_ARGS)
+ {
+ #ifdef USE_LIBXML
+     text       *xpath_expr_text = PG_GETARG_TEXT_P(0);
+     xmltype    *data = PG_GETARG_XML_P(1);
+     ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
+     xmlXPathObjectPtr xpathobj = NULL;
+     xmlChar    *string;
+     xmlChar    *xpath_expr;
+
+     string = xml_text2xmlChar(data);
+     if ((VARSIZE(xpath_expr_text) - VARHDRSZ) == 0)
+         ereport(ERROR,
+                 (errcode(ERRCODE_DATA_EXCEPTION),
+                  errmsg("empty XPath expression")));
+     xpath_expr = xml_text2xmlChar(xpath_expr_text);
+     xpathobj = xml_xpathinternal(xpath_expr, string, (VARSIZE(data)-VARHDRSZ), namespaces);
+     switch (xpathobj->type)
+     {
+         case XPATH_BOOLEAN:
+         {
+             bool retval = (xpathobj->boolval != 0);
+             xmlXPathFreeObject(xpathobj);
+             PG_RETURN_TEXT_P(cstring_to_text(retval?"t":"f"));
+             /* not reached */
+             break;
+         }
+         case XPATH_NUMBER:
+         {
+             float8 fval = xpathobj->floatval;
+             xmlXPathFreeObject(xpathobj);
+             PG_RETURN_DATUM(CStringGetTextDatum(DatumGetCString(DirectFunctionCall1(float8out,
Float8GetDatumFast(fval)))));
+             /* not reached */
+             break;
+         }
+         case XPATH_STRING:
+         {
+             text *retval = cstring_to_text(xpathobj->stringval);
+             xmlXPathFreeObject(xpathobj);
+             PG_RETURN_TEXT_P(retval);
+             /* not reached */
+             break;
+         }
+         case XPATH_NODESET:
+         {
+             xmlXPathFreeObject(xpathobj);
+             ereport(ERROR,
+                 (errcode(ERRCODE_DATA_EXCEPTION),
+                 errmsg("evaluation returned nodeset")));
+             }
+         default:
+         {
+             ereport(WARNING, (errmsg("Unknown PathObjectType (%d)", xpathobj->type)));
+         }
+     }
+
+     xmlXPathFreeObject(xpathobj);
+     PG_RETURN_NULL();
+ #else
+     NO_XML_SUPPORT();
+     return 0;
+ #endif
+ }
+
+ Datum
+ xpath_value_strict(PG_FUNCTION_ARGS)
+ {
+ #ifdef USE_LIBXML
+     Oid       reqtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
+     text       *xpath_expr_text = PG_GETARG_TEXT_P(1);
+     xmltype    *data = PG_GETARG_XML_P(2);
+     ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(3);
+     xmlXPathObjectPtr xpathobj = NULL;
+     xmlChar    *string;
+     xmlChar    *xpath_expr;
+     Datum       retdatum;
+     Oid       retdirect = InvalidOid;
+
+     string = xml_text2xmlChar(data);
+     if ((VARSIZE(xpath_expr_text) - VARHDRSZ) == 0)
+         ereport(ERROR,
+                 (errcode(ERRCODE_DATA_EXCEPTION),
+                  errmsg("empty XPath expression")));
+     xpath_expr = xml_text2xmlChar(xpath_expr_text);
+     xpathobj = xml_xpathinternal(xpath_expr, string, (VARSIZE(data)-VARHDRSZ), namespaces);
+     switch (xpathobj->type)
+     {
+         case XPATH_BOOLEAN:
+         {
+             retdatum = BoolGetDatum(xpathobj->boolval != 0);
+             retdirect = BOOLOID;
+             break;
+         }
+         case XPATH_NUMBER:
+         {
+             double xval = xpathobj->floatval;
+             float8 fval = xval;
+             retdatum = Float8GetDatumFast(fval);
+             retdirect = FLOAT8OID;
+             break;
+         }
+         case XPATH_STRING:
+         {
+             retdatum = CStringGetTextDatum(xpathobj->stringval);
+             retdirect = TEXTOID;
+             break;
+         }
+         case XPATH_NODESET:
+         {
+             xmlXPathFreeObject(xpathobj);
+             ereport(ERROR,
+                 (errcode(ERRCODE_DATA_EXCEPTION),
+                 errmsg("evaluation returned nodeset")));
+         }
+         default:
+         {
+             ereport(ERROR,
+                 (errcode(ERRCODE_DATA_EXCEPTION),
+                 errmsg("Unknown PathObjectType (%d)", xpathobj->type)));
+         }
+     }
+
+     xmlXPathFreeObject(xpathobj);
+     if (OidIsValid(retdirect))
+     {
+         if (reqtype == REGTYPEOID)
+         {
+             /* for regtype example return the result type */
+             retdatum = Int32GetDatum(retdirect);
+             retdirect = REGTYPEOID;
+         }
+         if (reqtype == retdirect) PG_RETURN_DATUM(retdatum);
+         ereport(ERROR,
+             (errcode(ERRCODE_DATA_EXCEPTION),
+             errmsg("evaluation result type mismatch (%d <> %d)", retdirect, reqtype)));
+         PG_RETURN_DATUM(0);
+     }
+     ereport(ERROR,
+         (errcode(ERRCODE_DATA_EXCEPTION),
+         errmsg("evaluation result type failed (%d)", reqtype)));
+     PG_RETURN_DATUM(0);
+ #else
+     NO_XML_SUPPORT();
      return 0;
  #endif
  }
*** src/test/regress/sql/xml.sql.org    Tue Jan 26 21:46:48 2010
--- src/test/regress/sql/xml.sql    Tue Feb  2 12:50:55 2010
***************
*** 163,165 ****
--- 163,185 ----
  SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number
one</local:piece><local:pieceid="2" /></local:data>'); 
  SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number
one</local:piece><local:pieceid="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); 
  SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
+
+ -- Test XPath expressions for non-nodeset results
+ -- These test only the interface, not the abundant possibilities in xpath
+
+ -- Test expression output for basic types as text
+ SELECT xpath_value_text('count(/a)','<a/>');
+ SELECT xpath_value_text('1=0','<a/>');
+ SELECT xpath_value_text('name()','<a/>');
+
+ -- Test resulting type for basic expressions
+ SELECT pg_typeof(xpath_value_strict(TRUE::boolean,'1=1','<a/>'));
+ SELECT pg_typeof(xpath_value_strict(1::float8,'count(/)','<a/>'));
+ SELECT pg_typeof(xpath_value_strict('a'::text,'name()','<a/>'));
+
+ -- Test obtaining the result type of an expression
+ SELECT xpath_value_strict('anyelement'::regtype,'1=1','<a/>');
+ SELECT xpath_value_strict('anyelement'::regtype,'count(/)','<a/>');
+ SELECT xpath_value_strict('anyelement'::regtype,'name()','<a/>');
+ SELECT xpath_value_strict('a'::text,'text()',data) FROM xmltest;
+ SELECT xpath_value_strict('a'::text,'string()',data) FROM xmltest;

pgsql-hackers by date:

Previous
From: Tom Lane
Date:
Subject: WITH ... VALUES
Next
From: Jeff Davis
Date:
Subject: Re: WITH ... VALUES