[PATCH] Re: Adding XMLEXISTS to the grammar - Mailing list pgsql-hackers

From Mike Fowler
Subject [PATCH] Re: Adding XMLEXISTS to the grammar
Date
Msg-id 4C27769F.80408@mlfowler.com
Whole thread Raw
In response to Re: Adding XMLEXISTS to the grammar  (Robert Haas <robertmhaas@gmail.com>)
Responses Re: [PATCH] Re: Adding XMLEXISTS to the grammar
Re: [PATCH] Re: Adding XMLEXISTS to the grammar
List pgsql-hackers
>> and finally in pg_proc.h I have:
>>
>> DATA(insert OID = 3037 (  xmlexists     PGNSP PGUID 12 1 0 0 f f f t f i 3 0
>> 16 "25 142" _null_ _null_ _null_ _null_ xml_exists _null_ _null_ _null_ ));
>> DESCR("evaluate XPath expression in a boolean context");
>>
> It looks like the pg_proc entry is creating an SQL function called
> xmlexists referencing a C function called xml_exists, and the gram.y
> changes want there to be an SQL function called xml_exists.  I think
> you should rip out all the catalog and parser changes for starters,
> and just try to get it working as a regular old function.  Once you
> have that working, you can add the syntax support back in.  I'd
> suggest making the C and SQL function names the same as each other,
> but different from the keyword you're planning to use (xmlexists).
>
Thanks again for your help Robert, turns out the fault was in the
pg_proc entry (the 3 up there should've been a two!). Once I took the
grammar out it was quickly obvious where I'd gone wrong.

Attached is a patch with the revised XMLEXISTS function, complete with
grammar support and regression tests. The implemented grammar is:

XMLEXISTS ( xpath_expression PASSING BY REF xml_value [BY REF] )

Though the full grammar makes everything after the xpath_expression
optional, I've left it has mandatory simply to avoid lots of rework of
the function (would need new null checks, memory handling would need
reworking).

--
Mike Fowler
Registered Linux user: 379787

*** a/src/backend/parser/gram.y
--- b/src/backend/parser/gram.y
***************
*** 423,431 **** static TypeName *TableFuncTypeName(List *columns);
  %type <list>    opt_check_option

  %type <target>    xml_attribute_el
! %type <list>    xml_attribute_list xml_attributes
  %type <node>    xml_root_version opt_xml_root_standalone
! %type <ival>    document_or_content
  %type <boolean> xml_whitespace_option

  %type <node>     common_table_expr
--- 423,432 ----
  %type <list>    opt_check_option

  %type <target>    xml_attribute_el
! %type <list>    xml_attribute_list xml_attributes xmlexists_list
  %type <node>    xml_root_version opt_xml_root_standalone
! %type <node>    xmlexists_query_argument_list xml_default_passing_mechanism xml_passing_mechanism
! %type <ival>    document_or_content
  %type <boolean> xml_whitespace_option

  %type <node>     common_table_expr
***************
*** 511,523 **** static TypeName *TableFuncTypeName(List *columns);
      OBJECT_P OF OFF OFFSET OIDS ON ONLY OPERATOR OPTION OPTIONS OR
      ORDER OUT_P OUTER_P OVER OVERLAPS OVERLAY OWNED OWNER

!     PARSER PARTIAL PARTITION PASSWORD PLACING PLANS POSITION
      PRECEDING PRECISION PRESERVE PREPARE PREPARED PRIMARY
      PRIOR PRIVILEGES PROCEDURAL PROCEDURE

      QUOTE

!     RANGE READ REAL REASSIGN RECHECK RECURSIVE REFERENCES REINDEX
      RELATIVE_P RELEASE RENAME REPEATABLE REPLACE REPLICA RESET RESTART
      RESTRICT RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK ROW ROWS RULE

--- 512,524 ----
      OBJECT_P OF OFF OFFSET OIDS ON ONLY OPERATOR OPTION OPTIONS OR
      ORDER OUT_P OUTER_P OVER OVERLAPS OVERLAY OWNED OWNER

!     PARSER PARTIAL PARTITION PASSING PASSWORD PLACING PLANS POSITION
      PRECEDING PRECISION PRESERVE PREPARE PREPARED PRIMARY
      PRIOR PRIVILEGES PROCEDURAL PROCEDURE

      QUOTE

!     RANGE READ REAL REASSIGN RECHECK RECURSIVE REF REFERENCES REINDEX
      RELATIVE_P RELEASE RENAME REPEATABLE REPLACE REPLICA RESET RESTART
      RESTRICT RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK ROW ROWS RULE

***************
*** 539,545 **** static TypeName *TableFuncTypeName(List *columns);

      WHEN WHERE WHITESPACE_P WINDOW WITH WITHOUT WORK WRAPPER WRITE

!     XML_P XMLATTRIBUTES XMLCONCAT XMLELEMENT XMLFOREST XMLPARSE
      XMLPI XMLROOT XMLSERIALIZE

      YEAR_P YES_P
--- 540,546 ----

      WHEN WHERE WHITESPACE_P WINDOW WITH WITHOUT WORK WRAPPER WRITE

!     XML_P XMLATTRIBUTES XMLCONCAT XMLELEMENT XMLEXISTS XMLFOREST XMLPARSE
      XMLPI XMLROOT XMLSERIALIZE

      YEAR_P YES_P
***************
*** 9806,9811 **** func_expr:    func_name '(' ')' over_clause
--- 9807,9828 ----
                  {
                      $$ = makeXmlExpr(IS_XMLELEMENT, $4, $6, $8, @1);
                  }
+             | XMLEXISTS '(' xmlexists_list ')'
+                 {
+                     /* xmlexists(A [PASSING BY REF B [BY REF]]) is converted to
+                      * xmlexists(A, B)*/
+
+                     FuncCall *n = makeNode(FuncCall);
+                     n->funcname = SystemFuncName("xmlexists");
+                     n->args = $3;
+                     n->agg_order = NIL;
+                     n->agg_star = FALSE;
+                     n->agg_distinct = FALSE;
+                     n->func_variadic = FALSE;
+                     n->over = NULL;
+                     n->location = @1;
+                     $$ = (Node *)n;
+                 }
              | XMLFOREST '(' xml_attribute_list ')'
                  {
                      $$ = makeXmlExpr(IS_XMLFOREST, NULL, $3, NIL, @1);
***************
*** 9896,9901 **** xml_whitespace_option: PRESERVE WHITESPACE_P        { $$ = TRUE; }
--- 9913,9946 ----
              | /*EMPTY*/                                { $$ = FALSE; }
          ;

+ xmlexists_list:
+             AexprConst xmlexists_query_argument_list
+                 {
+                     $$ = list_make2(makeTypeCast($1,SystemTypeName("text"), -1), $2);
+                 }
+         ;
+
+ xmlexists_query_argument_list:
+             xml_default_passing_mechanism c_expr
+                 {
+                     $$ = $2;
+                 }
+             | xml_default_passing_mechanism c_expr xml_passing_mechanism
+                 {
+                     $$ = $2;
+                 }
+         ;
+
+ xml_default_passing_mechanism:
+             PASSING BY REF
+                 { $$ = NULL; }
+         ;
+
+ xml_passing_mechanism:
+             BY REF
+                 { $$ = NULL; }
+         ;
+
  /*
   * Window Definitions
   */
***************
*** 10966,10971 **** unreserved_keyword:
--- 11011,11017 ----
              | PARSER
              | PARTIAL
              | PARTITION
+             | PASSING
              | PASSWORD
              | PLANS
              | PRECEDING
***************
*** 10982,10987 **** unreserved_keyword:
--- 11028,11034 ----
              | REASSIGN
              | RECHECK
              | RECURSIVE
+             | REF
              | REINDEX
              | RELATIVE_P
              | RELEASE
***************
*** 11115,11120 **** col_name_keyword:
--- 11162,11168 ----
              | XMLATTRIBUTES
              | XMLCONCAT
              | XMLELEMENT
+             | XMLEXISTS
              | XMLFOREST
              | XMLPARSE
              | XMLPI
*** a/src/backend/utils/adt/xml.c
--- b/src/backend/utils/adt/xml.c
***************
*** 3495,3497 **** xpath(PG_FUNCTION_ARGS)
--- 3495,3611 ----
      return 0;
  #endif
  }
+
+ /*
+  * Determines if the node specified by the supplied XPath exists
+  * in a given XML document, returning a boolean.
+  *
+  * It is up to the user to ensure that the XML passed is in fact
+  * an XML document - XPath doesn't work easily on fragments without
+  * a context node being known.
+  */
+ Datum xml_exists(PG_FUNCTION_ARGS)
+ {
+ #ifdef USE_LIBXML
+     text       *xpath_expr_text = PG_GETARG_TEXT_P(0);
+     xmltype    *data = PG_GETARG_XML_P(1);
+     xmlParserCtxtPtr ctxt = NULL;
+     xmlDocPtr    doc = NULL;
+     xmlXPathContextPtr xpathctx = NULL;
+     xmlXPathCompExprPtr xpathcomp = NULL;
+     xmlXPathObjectPtr xpathobj = NULL;
+     char       *datastr;
+     int32        len;
+     int32        xpath_len;
+     xmlChar    *string;
+     xmlChar    *xpath_expr;
+     int            result;
+
+     datastr = VARDATA(data);
+     len = VARSIZE(data) - VARHDRSZ;
+     xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ;
+     if (xpath_len == 0)
+         ereport(ERROR,
+                 (errcode(ERRCODE_DATA_EXCEPTION),
+                  errmsg("empty XPath expression")));
+
+     string = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
+     memcpy(string, datastr, len);
+     string[len] = '\0';
+
+     xpath_expr = (xmlChar *) palloc((xpath_len + 1) * sizeof(xmlChar));
+     memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
+     xpath_expr[xpath_len] = '\0';
+
+     pg_xml_init();
+     xmlInitParser();
+
+     PG_TRY();
+     {
+         /*
+          * redundant XML parsing (two parsings for the same value during one
+          * command execution are possible)
+          */
+         ctxt = xmlNewParserCtxt();
+         if (ctxt == NULL)
+             xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
+                         "could not allocate parser context");
+         doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
+         if (doc == NULL)
+             xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+                         "could not parse XML document");
+         xpathctx = xmlXPathNewContext(doc);
+         if (xpathctx == NULL)
+             xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
+                         "could not allocate XPath context");
+         xpathctx->node = xmlDocGetRootElement(doc);
+         if (xpathctx->node == NULL)
+             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+                         "could not find root XML element");
+
+         xpathcomp = xmlXPathCompile(xpath_expr);
+         if (xpathcomp == NULL)    /* TODO: show proper XPath error details */
+             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+                         "invalid XPath expression");
+
+         /* Version 2.6.27 introduces a function named xmlXPathCompiledEvalToBoolean
+          * however we can derive the existence by whether any nodes are returned
+          * thereby preventing a library version upgrade */
+         xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
+         if (xpathobj == NULL)    /* TODO: reason? */
+             xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+                         "could not create XPath object");
+
+         if (xpathobj->nodesetval == NULL)
+             result = 0;
+         else
+             result = xpathobj->nodesetval->nodeNr;
+     }
+     PG_CATCH();
+     {
+         if (xpathobj)
+             xmlXPathFreeObject(xpathobj);
+         if (xpathcomp)
+             xmlXPathFreeCompExpr(xpathcomp);
+         if (xpathctx)
+             xmlXPathFreeContext(xpathctx);
+         if (doc)
+             xmlFreeDoc(doc);
+         if (ctxt)
+             xmlFreeParserCtxt(ctxt);
+         PG_RE_THROW();
+     }
+     PG_END_TRY();
+
+     xmlXPathFreeObject(xpathobj);
+     xmlXPathFreeCompExpr(xpathcomp);
+     xmlXPathFreeContext(xpathctx);
+     xmlFreeDoc(doc);
+     xmlFreeParserCtxt(ctxt);
+
+     return result;
+ #else
+     NO_XML_SUPPORT();
+     return 0;
+ #endif
+ }
*** a/src/include/catalog/pg_proc.h
--- b/src/include/catalog/pg_proc.h
***************
*** 4385,4390 **** DESCR("evaluate XPath expression, with namespaces support");
--- 4385,4393 ----
  DATA(insert OID = 2932 (  xpath         PGNSP PGUID 14 1 0 0 f f f t f i 2 0 143 "25 142" _null_ _null_ _null_ _null_
"selectpg_catalog.xpath($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); 
  DESCR("evaluate XPath expression");

+ DATA(insert OID = 3037 (  xmlexists     PGNSP PGUID 12 1 0 0 f f f t f i 2 0 16 "25 142" _null_ _null_ _null_ _null_
xml_exists_null_ _null_ _null_ )); 
+ DESCR("evaluate XPath expression in a boolean context");
+
  /* uuid */
  DATA(insert OID = 2952 (  uuid_in           PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_
_null_uuid_in _null_ _null_ _null_ )); 
  DESCR("I/O");
*** a/src/include/parser/kwlist.h
--- b/src/include/parser/kwlist.h
***************
*** 280,285 **** PG_KEYWORD("owner", OWNER, UNRESERVED_KEYWORD)
--- 280,286 ----
  PG_KEYWORD("parser", PARSER, UNRESERVED_KEYWORD)
  PG_KEYWORD("partial", PARTIAL, UNRESERVED_KEYWORD)
  PG_KEYWORD("partition", PARTITION, UNRESERVED_KEYWORD)
+ PG_KEYWORD("passing", PASSING, UNRESERVED_KEYWORD)
  PG_KEYWORD("password", PASSWORD, UNRESERVED_KEYWORD)
  PG_KEYWORD("placing", PLACING, RESERVED_KEYWORD)
  PG_KEYWORD("plans", PLANS, UNRESERVED_KEYWORD)
***************
*** 301,306 **** PG_KEYWORD("real", REAL, COL_NAME_KEYWORD)
--- 302,308 ----
  PG_KEYWORD("reassign", REASSIGN, UNRESERVED_KEYWORD)
  PG_KEYWORD("recheck", RECHECK, UNRESERVED_KEYWORD)
  PG_KEYWORD("recursive", RECURSIVE, UNRESERVED_KEYWORD)
+ PG_KEYWORD("ref", REF, UNRESERVED_KEYWORD)
  PG_KEYWORD("references", REFERENCES, RESERVED_KEYWORD)
  PG_KEYWORD("reindex", REINDEX, UNRESERVED_KEYWORD)
  PG_KEYWORD("relative", RELATIVE_P, UNRESERVED_KEYWORD)
***************
*** 413,418 **** PG_KEYWORD("xml", XML_P, UNRESERVED_KEYWORD)
--- 415,421 ----
  PG_KEYWORD("xmlattributes", XMLATTRIBUTES, COL_NAME_KEYWORD)
  PG_KEYWORD("xmlconcat", XMLCONCAT, COL_NAME_KEYWORD)
  PG_KEYWORD("xmlelement", XMLELEMENT, COL_NAME_KEYWORD)
+ PG_KEYWORD("xmlexists", XMLEXISTS, COL_NAME_KEYWORD)
  PG_KEYWORD("xmlforest", XMLFOREST, COL_NAME_KEYWORD)
  PG_KEYWORD("xmlparse", XMLPARSE, COL_NAME_KEYWORD)
  PG_KEYWORD("xmlpi", XMLPI, COL_NAME_KEYWORD)
*** a/src/include/utils/xml.h
--- b/src/include/utils/xml.h
***************
*** 37,42 **** extern Datum texttoxml(PG_FUNCTION_ARGS);
--- 37,43 ----
  extern Datum xmltotext(PG_FUNCTION_ARGS);
  extern Datum xmlvalidate(PG_FUNCTION_ARGS);
  extern Datum xpath(PG_FUNCTION_ARGS);
+ extern Datum xml_exists(PG_FUNCTION_ARGS);

  extern Datum table_to_xml(PG_FUNCTION_ARGS);
  extern Datum query_to_xml(PG_FUNCTION_ARGS);
*** a/src/test/regress/expected/xml.out
--- b/src/test/regress/expected/xml.out
***************
*** 502,504 **** SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
--- 502,543 ----
   {<b>two</b>,<b>etc</b>}
  (1 row)

+ -- Test xmlexists evaluation
+ INSERT INTO xmltest VALUES (4,
'<menu><beers><name>Budvar</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (5,
'<menu><beers><name>Molson</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (6, '<myns:menu
xmlns:myns="http://myns.com"><myns:beers><myns:name>Budvar</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+ INSERT INTO xmltest VALUES (7, '<myns:menu
xmlns:myns="http://myns.com"><myns:beers><myns:name>Molson</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beer' PASSING BY REF data);
+  count
+ -------
+      0
+ (1 row)
+
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beer' PASSING BY REF data BY REF);
+  count
+ -------
+      0
+ (1 row)
+
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beer');
+ ERROR:  syntax error at or near ")"
+ LINE 1: SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beer');
+                                                                   ^
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beers' PASSING BY REF data);
+  count
+ -------
+      2
+ (1 row)
+
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beers/name[text() = ''Molson'']' PASSING BY REF data);
+  count
+ -------
+      1
+ (1 row)
+
+ CREATE TABLE query ( expr TEXT );
+ INSERT INTO query VALUES ('/menu/beers/cost[text() = ''lots'']');
+ SELECT COUNT(id) FROM xmltest,query WHERE xmlexists(expr PASSING BY REF data);
+ ERROR:  syntax error at or near "PASSING"
+ LINE 1: ...COUNT(id) FROM xmltest,query WHERE xmlexists(expr PASSING BY...
+                                                              ^
*** a/src/test/regress/sql/xml.sql
--- b/src/test/regress/sql/xml.sql
***************
*** 163,165 **** SELECT xpath('', '<!-- error -->');
--- 163,182 ----
  SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number
one</local:piece><local:pieceid="2" /></local:data>'); 
  SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number
one</local:piece><local:pieceid="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); 
  SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
+
+ -- Test xmlexists evaluation
+ INSERT INTO xmltest VALUES (4,
'<menu><beers><name>Budvar</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (5,
'<menu><beers><name>Molson</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml);
+ INSERT INTO xmltest VALUES (6, '<myns:menu
xmlns:myns="http://myns.com"><myns:beers><myns:name>Budvar</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+ INSERT INTO xmltest VALUES (7, '<myns:menu
xmlns:myns="http://myns.com"><myns:beers><myns:name>Molson</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml);
+
+
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beer' PASSING BY REF data);
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beer' PASSING BY REF data BY REF);
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beer');
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beers' PASSING BY REF data);
+ SELECT COUNT(id) FROM xmltest WHERE xmlexists('/menu/beers/name[text() = ''Molson'']' PASSING BY REF data);
+
+ CREATE TABLE query ( expr TEXT );
+ INSERT INTO query VALUES ('/menu/beers/cost[text() = ''lots'']');
+ SELECT COUNT(id) FROM xmltest,query WHERE xmlexists(expr PASSING BY REF data);

pgsql-hackers by date:

Previous
From: Tom Lane
Date:
Subject: Re: Why are these modules built without respecting my LDFLAGS?
Next
From: Mike Fowler
Date:
Subject: [PATCH] Re: Adding xpath_exists function