Re: xpath improvement V2 - Mailing list pgsql-hackers

From Arie Bikker
Subject Re: xpath improvement V2
Date
Msg-id 4B75CCF9.8070204@abikker.nl
Whole thread Raw
In response to xpath improvement V2  (Arie Bikker <arie@abikker.nl>)
List pgsql-hackers
And here is the patch.
*** doc/src/sgml/func.sgml.org    Tue Feb  2 12:53:59 2010
--- doc/src/sgml/func.sgml    Fri Feb 12 21:49:01 2010
***************
*** 1,4 ****
! <!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.482.2.2 2009/11/24 19:21:04 petere Exp $ -->

   <chapter id="functions">
    <title>Functions and Operators</title>
--- 1,4 ----
! <!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.482 2009/06/17 21:58:49 tgl Exp $ -->

   <chapter id="functions">
    <title>Functions and Operators</title>
***************
*** 821,827 ****
        <row>
         <entry><literal><function>random</function>()</literal></entry>
         <entry><type>dp</type></entry>
!        <entry>random value in the range 0.0 <= x < 1.0</entry>
         <entry><literal>random()</literal></entry>
         <entry></entry>
        </row>
--- 821,827 ----
        <row>
         <entry><literal><function>random</function>()</literal></entry>
         <entry><type>dp</type></entry>
!        <entry>random value between 0.0 and 1.0, inclusive</entry>
         <entry><literal>random()</literal></entry>
         <entry></entry>
        </row>
***************
*** 5251,5259 ****
       <listitem>
        <para>
          <function>to_char(..., 'ID')</function>'s day of the week numbering
!         matches the <function>extract(isodow from ...)</function> function, but
          <function>to_char(..., 'D')</function>'s does not match
!         <function>extract(dow from ...)</function>'s day numbering.
        </para>
       </listitem>

--- 5251,5259 ----
       <listitem>
        <para>
          <function>to_char(..., 'ID')</function>'s day of the week numbering
!         matches the <function>extract('isodow', ...)</function> function, but
          <function>to_char(..., 'D')</function>'s does not match
!         <function>extract('dow', ...)</function>'s day numbering.
        </para>
       </listitem>

***************
*** 8464,8474 ****
     </indexterm>

     <para>
!     To process values of data type <type>xml</type>, PostgreSQL offers
!     the function <function>xpath</function>, which evaluates XPath 1.0
!     expressions.
     </para>

  <synopsis>
  <function>xpath</function>(<replaceable>xpath</replaceable>, <replaceable>xml</replaceable><optional>,
<replaceable>nsarray</replaceable></optional>)
  </synopsis>
--- 8464,8486 ----
     </indexterm>

     <para>
!     To retrieve information from an <type>xml</type> document an Xpath 1.0
!     expression evaluation can be caried out. The result of this evaluation
!     can have several data types depending on the expression and the content
!     of the document. The functions implementing this all use an expression
!     and a document as input and an optional namespace array.
!     They differ in the type of expression they interpret and, consequently, the result they provide.
     </para>

+    <sect3 id="functions-xml-processing-nodeset">
+     <title>Nodeset processing</title>
+
+    <para>
+     To process expressions returning a nodeset, PostgreSQL offers
+     the function <function>xpath</function>, which returns an  array of
+     <type>xml</type> values.
+    </para>
+
  <synopsis>
  <function>xpath</function>(<replaceable>xpath</replaceable>, <replaceable>xml</replaceable><optional>,
<replaceable>nsarray</replaceable></optional>)
  </synopsis>
***************
*** 8506,8511 ****
--- 8518,8699 ----
  (1 row)
  ]]></screen>
     </para>
+    </sect3>
+
+    <sect3 id="functions-xml-processing-values">
+     <title>Value returning functions</title>
+    <para>
+     To retrieve single values from data type <type>xml</type> PostgreSQL offers two
+     functions akin to <function>xpath</function>.
+     The function <function>xpath_value_text</function> returns a single
+ as  <type>text</type> result; the function <function>xpath_value_strict</function> returns a specific type governed
byan input parameter used as type example. 
+    </para>
+
+ <synopsis>
+ <function>xpath_value_text</function>(<replaceable>xpath</replaceable>, <replaceable>xml</replaceable><optional>,
<replaceable>nsarray</replaceable></optional>)
+ </synopsis>
+ <synopsis>
+ <function>xpath_value_strict</function>(<replaceable>typexample</replaceable>, <replaceable>xpath</replaceable>,
<replaceable>xml</replaceable><optional>,<replaceable>nsarray</replaceable></optional>) 
+ </synopsis>
+
+    <para>
+     The function <function>xpath_value_text</function> evaluates the XPath
+     expression <replaceable>xpath</replaceable> against the XML value
+     <replaceable>xml</replaceable>.  It returns a text value
+     corresponding to the evaluation produced by the XPath expression.
+     <replaceable>xpath</replaceable> must be an expression that returns
+     a single value, not a nodeset.
+     <replaceable>xpath</replaceable> expressions resulting in boolean, string or number are supported.
+    </para>
+
+   <para>
+     The second argument must be a well formed XML document. In particular,
+     it must have a single root node element.
+   </para>
+
+    <para>
+     The third argument of the function is an array of namespace
+     mappings with the same restrictions as for the <replaceable>xpath</replaceable> function.
+    </para>
+
+    <para>
+     This example will return the tagname of the root element:
+ <screen><![CDATA[
+ SELECT xpath_value_text('name()', '<my:a xmlns:my="http://example.com">test</my:a>',
+              ARRAY[ARRAY['my', 'http://example.com']]);
+
+  xpath_value_text
+ ------------------
+  my:a
+ (1 row)
+ ]]></screen>
+    </para>
+
+    <para>
+     The function <function>xpath_value_strict</function> evaluates the XPath
+     expression <replaceable>xpath</replaceable> against the XML value
+     <replaceable>xml</replaceable>.  It returns a value
+     of the same type as <replaceable>typexample</replaceable>.
+     <replaceable>xpath</replaceable> must be an expression that returns
+     a single value of the same type as <replaceable>typexample</replaceable>.
+     When <replaceable>typexample</replaceable> is of type <type>regtype</type>
+     the function does not return the result value of the expression evaluation,
+     but the type of the result value.
+    </para>
+
+   <para>
+     The first argument serves as an example of the output required by this function.
+     It can be any constant or variable of type <type>boolean</type>,
+     <type>text</type> or <type>float8</type> corresponding to the basic value
+     type used in libxml2. Or it can be a <type>regtype</type> to retrieve type
+     information of the expression evaluation result.
+   </para>
+
+   <para>
+     The second argument must be a XPath expression with a result value
+     corresponding to the example offered.
+     An error is genereated if <replaceable>typexample</replaceable> and
+     the result of <replaceable>xpath</replaceable> do not match in type.
+   </para>
+
+   <para>
+     The third argument must be a well formed XML document with a single
+     root node element.
+   </para>
+
+    <para>
+     The fourth argument of the function is an (optional) array of namespace
+     mappings.
+    </para>
+
+    <para>
+     The following table shows some valid input/output combinations:
+
+     <informaltable>
+      <tgroup cols="5">
+       <thead>
+        <row>
+         <entry><replaceable>typexample</replaceable></entry>
+         <entry><replaceable>xpath</replaceable></entry>
+         <entry><replaceable>xml</replaceable></entry>
+         <entry>result value</entry>
+         <entry>result type</entry>
+        </row>
+       </thead>
+
+       <tbody>
+        <row>
+         <entry>TRUE::boolean</entry>
+         <entry>'1=1'</entry>
+         <entry><![CDATA['<foo/>']]></entry>
+         <entry>TRUE</entry>
+         <entry><type>boolean</type></entry>
+        </row>
+        <row>
+         <entry>1::float8</entry>
+         <entry>'1 div 3'</entry>
+         <entry><![CDATA['<foo/>']]></entry>
+         <entry>0.333333333333333</entry>
+         <entry><type>float8</type></entry>
+        </row>
+        <row>
+         <entry>'a'::text</entry>
+         <entry>'name()'</entry>
+         <entry><![CDATA['<foo/>']]></entry>
+         <entry>foo</entry>
+         <entry><type>text</type></entry>
+        </row>
+        <row>
+         <entry>'text'::regtype</entry>
+         <entry>'1=1'</entry>
+         <entry><![CDATA['<foo/>']]></entry>
+         <entry>boolean</entry>
+         <entry><type>regtype</type></entry>
+        </row>
+       </tbody>
+      </tgroup>
+     </informaltable>
+    </para>
+
+    <para>
+    An error is generated if if the types of <replaceable>typexample</replaceable> and the expression result do not
match.Some examples are shown in the following table: 
+
+     <informaltable>
+      <tgroup cols="4">
+       <thead>
+        <row>
+         <entry><replaceable>typexample</replaceable></entry>
+         <entry><replaceable>xpath</replaceable></entry>
+         <entry><replaceable>xml</replaceable></entry>
+         <entry>fail reason</entry>
+        </row>
+       </thead>
+       <tbody>
+        <row>
+         <entry>'a'::text</entry>
+         <entry>'/'</entry>
+         <entry><![CDATA['<foo/>']]></entry>
+         <entry>expression evaluates to nodeset, not string</entry>
+        </row>
+        <row>
+         <entry>'a'::text</entry>
+         <entry>'/@width'</entry>
+         <entry><![CDATA['<foo width="20"/>']]></entry>
+         <entry>expression evaluates to nodeset, albeit with a single node</entry>
+        </row>
+        <row>
+         <entry>1::float8</entry>
+         <entry>'number(@height)'</entry>
+         <entry><![CDATA['<foo width="20"/>']]></entry>
+         <entry>returns NaN because argument for number() is empty nodeset</entry>
+        </row>
+       </tbody>
+      </tgroup>
+     </informaltable>
+
+    </para>
+
+    </sect3>
    </sect2>

    <sect2 id="functions-xml-mapping">
*** src/include/catalog/pg_proc.h.org    Thu Jan 21 23:12:07 2010
--- src/include/catalog/pg_proc.h    Fri Jan 29 22:50:38 2010
***************
*** 4305,4314 ****
  DESCR("map database contents and structure to XML and XML Schema");

  DATA(insert OID = 2931 (  xpath         PGNSP PGUID 12 1 0 0 f f f t f i 3 0 143 "25 142 1009" _null_ _null_ _null_
_null_xpath _null_ _null_ _null_ )); 
! DESCR("evaluate XPath expression, with namespaces support");
  DATA(insert OID = 2932 (  xpath         PGNSP PGUID 14 1 0 0 f f f t f i 2 0 143 "25 142" _null_ _null_ _null_ _null_
"selectpg_catalog.xpath($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); 
! DESCR("evaluate XPath expression");

  /* uuid */
  DATA(insert OID = 2952 (  uuid_in           PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_
_null_uuid_in _null_ _null_ _null_ )); 
  DESCR("I/O");
--- 4305,4323 ----
  DESCR("map database contents and structure to XML and XML Schema");

  DATA(insert OID = 2931 (  xpath         PGNSP PGUID 12 1 0 0 f f f t f i 3 0 143 "25 142 1009" _null_ _null_ _null_
_null_xpath _null_ _null_ _null_ )); 
! DESCR("evaluate XPath nodeset expression, with namespaces support");
  DATA(insert OID = 2932 (  xpath         PGNSP PGUID 14 1 0 0 f f f t f i 2 0 143 "25 142" _null_ _null_ _null_ _null_
"selectpg_catalog.xpath($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); 
! DESCR("evaluate XPath nodeset expression");

+ DATA(insert OID = 2995 (  xpath_value_text         PGNSP PGUID 12 1 0 0 f f f t f i 3 0 25 "25 142 1009" _null_
_null__null_ _null_ xpath_value_text _null_ _null_ _null_ )); 
+ DESCR("evaluate XPath value expression, with namespaces support");
+ DATA(insert OID = 2996 (  xpath_value_text         PGNSP PGUID 14 1 0 0 f f f t f i 2 0 25 "25 142" _null_ _null_
_null__null_ "select pg_catalog.xpath_value_text($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); 
+ DESCR("evaluate XPath value expression");
+ DATA(insert OID = 2997 (  xpath_value_strict     PGNSP PGUID 12 1 0 0 f f f t f i 4 0 2283 "2283 25 142 1009" _null_
_null__null_ _null_ xpath_value_strict _null_ _null_ _null_ )); 
+ DESCR("evaluate XPath value expression by type example, with namespaces support");
+ DATA(insert OID = 2998 (  xpath_value_strict         PGNSP PGUID 14 1 0 0 f f f t f i 3 0 2283 "2283 25 142" _null_
_null__null_ _null_ "select pg_catalog.xpath_value_strict($1, $2, $3, ''{}''::pg_catalog.text[])" _null_ _null_ _null_
));
+ DESCR("evaluate XPath value expression by type example");
+
  /* uuid */
  DATA(insert OID = 2952 (  uuid_in           PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_
_null_uuid_in _null_ _null_ _null_ )); 
  DESCR("I/O");
*** src/include/utils/xml.h.org    Sat Jan 16 23:57:35 2010
--- src/include/utils/xml.h    Fri Jan 29 00:11:52 2010
***************
*** 37,42 ****
--- 37,44 ----
  extern Datum xmltotext(PG_FUNCTION_ARGS);
  extern Datum xmlvalidate(PG_FUNCTION_ARGS);
  extern Datum xpath(PG_FUNCTION_ARGS);
+ extern Datum xpath_value_text(PG_FUNCTION_ARGS);
+ extern Datum xpath_value_strict(PG_FUNCTION_ARGS);

  extern Datum table_to_xml(PG_FUNCTION_ARGS);
  extern Datum query_to_xml(PG_FUNCTION_ARGS);
*** src/backend/utils/fmgrtab.c.org    Tue Jan  5 21:52:42 2010
--- src/backend/utils/fmgrtab.c    Fri Jan 29 22:51:07 2010
***************
*** 1686,1691 ****
--- 1686,1693 ----
  extern Datum record_le (PG_FUNCTION_ARGS);
  extern Datum record_ge (PG_FUNCTION_ARGS);
  extern Datum btrecordcmp (PG_FUNCTION_ARGS);
+ extern Datum xpath_value_text (PG_FUNCTION_ARGS);
+ extern Datum xpath_value_strict (PG_FUNCTION_ARGS);
  extern Datum has_foreign_data_wrapper_privilege_name_name (PG_FUNCTION_ARGS);
  extern Datum has_foreign_data_wrapper_privilege_name_id (PG_FUNCTION_ARGS);
  extern Datum has_foreign_data_wrapper_privilege_id_name (PG_FUNCTION_ARGS);
***************
*** 3752,3757 ****
--- 3754,3761 ----
    { 2985, "record_le", 2, true, false, record_le },
    { 2986, "record_ge", 2, true, false, record_ge },
    { 2987, "btrecordcmp", 2, true, false, btrecordcmp },
+   { 2995, "xpath_value_text", 3, true, false, xpath_value_text },
+   { 2997, "xpath_value_strict", 4, true, false, xpath_value_strict },
    { 3000, "has_foreign_data_wrapper_privilege_name_name", 3, true, false,
has_foreign_data_wrapper_privilege_name_name}, 
    { 3001, "has_foreign_data_wrapper_privilege_name_id", 3, true, false, has_foreign_data_wrapper_privilege_name_id },
    { 3002, "has_foreign_data_wrapper_privilege_id_name", 3, true, false, has_foreign_data_wrapper_privilege_id_name },
*** src/test/regress/sql/xml.sql.org    Tue Jan 26 21:46:48 2010
--- src/test/regress/sql/xml.sql    Tue Feb  2 12:50:55 2010
***************
*** 163,165 ****
--- 163,185 ----
  SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number
one</local:piece><local:pieceid="2" /></local:data>'); 
  SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number
one</local:piece><local:pieceid="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); 
  SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
+
+ -- Test XPath expressions for non-nodeset results
+ -- These test only the interface, not the abundant possibilities in xpath
+
+ -- Test expression output for basic types as text
+ SELECT xpath_value_text('count(/a)','<a/>');
+ SELECT xpath_value_text('1=0','<a/>');
+ SELECT xpath_value_text('name()','<a/>');
+
+ -- Test resulting type for basic expressions
+ SELECT pg_typeof(xpath_value_strict(TRUE::boolean,'1=1','<a/>'));
+ SELECT pg_typeof(xpath_value_strict(1::float8,'count(/)','<a/>'));
+ SELECT pg_typeof(xpath_value_strict('a'::text,'name()','<a/>'));
+
+ -- Test obtaining the result type of an expression
+ SELECT xpath_value_strict('anyelement'::regtype,'1=1','<a/>');
+ SELECT xpath_value_strict('anyelement'::regtype,'count(/)','<a/>');
+ SELECT xpath_value_strict('anyelement'::regtype,'name()','<a/>');
+ SELECT xpath_value_strict('a'::text,'text()',data) FROM xmltest;
+ SELECT xpath_value_strict('a'::text,'string()',data) FROM xmltest;

pgsql-hackers by date:

Previous
From: Arie Bikker
Date:
Subject: xpath improvement V2
Next
From: Tom Lane
Date:
Subject: WITH ... VALUES