Thread: plperl better array support

plperl better array support

From
Andrew Dunstan
Date:
The attached patch (submitted for comment) is somewhat adapted from one
submitted last October. This allows returning a perl array where a
postgres array is expected.

example:

andrew=# create function blurfl() returns text[] language plperl as $$
andrew$# return ['a','b','c','a"b\c'];
andrew$# $$;
CREATE FUNCTION
andrew=# select blurfl();
      blurfl
-------------------
 {a,b,c,"a\"b\\c"}


Unlike the patch from October, this patch does not implement ANYARRAY or
ANYELEMENT pseudotypes. However it does escape/quote array elements
where necessary. It also preserves the old behaviour (if the plperl
function returns a string it is just passed through).

I'm not happy about constructing a string which we then parse out again
into an array - that strikes me as quite inefficient. (And there are
other inelegancies that I'd like to get rid of.) Much better would be to
use some array contruction calls directly - any pointers on how to do
that would be apprciated :-)

cheers

andrew


Index: plperl.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/pl/plperl/plperl.c,v
retrieving revision 1.76
diff -c -r1.76 plperl.c
*** plperl.c    5 Jun 2005 03:16:35 -0000    1.76
--- plperl.c    20 Jun 2005 08:54:15 -0000
***************
*** 80,85 ****
--- 80,86 ----
      bool        lanpltrusted;
      bool        fn_retistuple;    /* true, if function returns tuple */
      bool        fn_retisset;    /* true, if function returns set */
+     bool        fn_retisarray;  /* true if function returns array */
      Oid            result_oid;        /* Oid of result type */
      FmgrInfo    result_in_func;    /* I/O function and arg for result type */
      Oid            result_typioparam;
***************
*** 323,328 ****
--- 324,408 ----
      return tup;
  }

+ /* substitute(string, pattern)
+  *
+  * Used for =~ operations that modify their left-hand side (s/// and tr///)
+  *
+  * Returns the number of successful matches, and
+  * modifies the input string if there were any.
+  *
+  * (almost) straight from perlembed man page.
+  */
+
+ static I32
+ plperl_substitute(SV **string, char *pattern)
+ {
+     SV *command = NEWSV(1099, 0);
+     I32 retval;
+     STRLEN n_a;
+
+     sv_setpvf(command, "$_plp_string = '%s'; ($_plp_string =~ %s)",
+               SvPV(*string,n_a), pattern);
+
+     retval = eval_sv(command, TRUE);
+
+     *string = get_sv("_plp_string", FALSE);
+     return retval;
+ }
+
+ /*
+  * convert perl array to postgres string representation
+  */
+ static SV*
+ plperl_convert_to_pg_array(SV *src)
+ {
+     SV* rv;
+     SV**    val;
+     AV* internal;
+     int len,
+         i;
+
+     internal=(AV*)SvRV(src);
+     len = av_len(internal)+1;
+
+     rv = newSVpv("{ ",0);
+     for(i=0; i<len; i++)
+     {
+         val = av_fetch(internal, i, FALSE);
+         if (SvTYPE(*val)==SVt_RV)
+         {
+             /*
+              * If there's a reference type val, call this func
+              * recursively to handle a nested array, and error out on any
+              * other reference type.
+              */
+
+             if (SvTYPE(SvRV(*val))==SVt_PVAV)
+                 sv_catpvf(rv, "%s",
+                           SvPV(plperl_convert_to_pg_array(*val),PL_na) );
+             else
+                 ereport(ERROR,
+                         (errcode(ERRCODE_DATATYPE_MISMATCH),
+                          errmsg("returned array contains non-array ref")));
+         }
+         else
+         {
+             /* non-reference case - append the stringified value */
+             SV * copyval;
+
+             copyval= newSVpv(SvPV(*val,PL_na),0);
+             plperl_substitute(©val,"s/([\"\\\\])/\\\\$1/g");
+             sv_catpvf(rv, "\"%s\"", SvPV(copyval,PL_na));
+         }
+
+         if (i != len-1) sv_catpvf(rv, ",");
+     }
+
+     sv_catpvf(rv, "}");
+
+     return rv;
+ }
+

  /* Set up the arguments for a trigger call. */

***************
*** 817,823 ****

      rsi = (ReturnSetInfo *)fcinfo->resultinfo;

!     if (prodesc->fn_retisset) {
          if (!rsi || !IsA(rsi, ReturnSetInfo) ||
              (rsi->allowedModes & SFRM_Materialize) == 0 ||
              rsi->expectedDesc == NULL)
--- 897,904 ----

      rsi = (ReturnSetInfo *)fcinfo->resultinfo;

!     if (prodesc->fn_retisset)
!     {
          if (!rsi || !IsA(rsi, ReturnSetInfo) ||
              (rsi->allowedModes & SFRM_Materialize) == 0 ||
              rsi->expectedDesc == NULL)
***************
*** 838,844 ****
              int i = 0;
              SV **svp = 0;
              AV *rav = (AV *)SvRV(perlret);
!             while ((svp = av_fetch(rav, i, FALSE)) != NULL) {
                  plperl_return_next(*svp);
                  i++;
              }
--- 919,926 ----
              int i = 0;
              SV **svp = 0;
              AV *rav = (AV *)SvRV(perlret);
!             while ((svp = av_fetch(rav, i, FALSE)) != NULL)
!             {
                  plperl_return_next(*svp);
                  i++;
              }
***************
*** 852,858 ****
          }

          rsi->returnMode = SFRM_Materialize;
!         if (prodesc->tuple_store) {
              rsi->setResult = prodesc->tuple_store;
              rsi->setDesc = prodesc->tuple_desc;
          }
--- 934,941 ----
          }

          rsi->returnMode = SFRM_Materialize;
!         if (prodesc->tuple_store)
!         {
              rsi->setResult = prodesc->tuple_store;
              rsi->setDesc = prodesc->tuple_desc;
          }
***************
*** 897,904 ****
      }
      else
      {
!         /* Return a perl string converted to a Datum */
!         char *val = SvPV(perlret, PL_na);
          retval = FunctionCall3(&prodesc->result_in_func,
                                 CStringGetDatum(val),
                                 ObjectIdGetDatum(prodesc->result_typioparam),
--- 980,999 ----
      }
      else
      {
!         /* Return a perl string converted to a Datum */
!         char *val;
!         SV* array_ret;
!
!
!         if (prodesc->fn_retisarray && SvTYPE(SvRV(perlret)) == SVt_PVAV)
!         {
!             array_ret = plperl_convert_to_pg_array(perlret);
!             SvREFCNT_dec(perlret);
!             perlret = array_ret;
!         }
!
!         val = SvPV(perlret, PL_na);
!
          retval = FunctionCall3(&prodesc->result_in_func,
                                 CStringGetDatum(val),
                                 ObjectIdGetDatum(prodesc->result_typioparam),
***************
*** 1156,1161 ****
--- 1251,1259 ----
              prodesc->fn_retistuple = (typeStruct->typtype == 'c' ||
                                        procStruct->prorettype == RECORDOID);

+             prodesc->fn_retisarray =
+                 (typeStruct->typlen == -1 && typeStruct->typelem) ;
+
              perm_fmgr_info(typeStruct->typinput, &(prodesc->result_in_func));
              prodesc->result_typioparam = getTypeIOParam(typeTup);


Re: plperl better array support

From
David Fetter
Date:
On Mon, Jun 20, 2005 at 05:06:32AM -0400, Andrew Dunstan wrote:
>
> The attached patch (submitted for comment) is somewhat adapted from one
> submitted last October. This allows returning a perl array where a
> postgres array is expected.
>
> example:
>
> andrew=# create function blurfl() returns text[] language plperl as $$
> andrew$# return ['a','b','c','a"b\c'];
> andrew$# $$;
> CREATE FUNCTION
> andrew=# select blurfl();
>      blurfl
> -------------------
> {a,b,c,"a\"b\\c"}
>
>
> Unlike the patch from October, this patch does not implement ANYARRAY or
> ANYELEMENT pseudotypes. However it does escape/quote array elements
> where necessary. It also preserves the old behaviour (if the plperl
> function returns a string it is just passed through).
>
> I'm not happy about constructing a string which we then parse out again
> into an array - that strikes me as quite inefficient. (And there are
> other inelegancies that I'd like to get rid of.) Much better would be to
> use some array contruction calls directly - any pointers on how to do
> that would be apprciated :-)

Here's some pointers I'm qualified to point out, for what that's worth. ;)

In src/backend/utils/adt/arrayfuncs.c there are direct
array-manipulation functions.  In the perlapi docs for perl, there are
ways to manipulate arrays directly.  It should be possible to go
between perl arrays and postgresql arrays this way, except...perl
arrays can contain undef, which usually translates as NULL.  Would it
be easier to add NULL support to postgresql arrays than to handle
undefs in perl arrays automagically?

Cheers,
D
--
David Fetter david@fetter.org http://fetter.org/
phone: +1 510 893 6100   mobile: +1 415 235 3778

Remember to vote!

Re: plperl better array support

From
Andrew Dunstan
Date:

David Fetter wrote:

>In src/backend/utils/adt/arrayfuncs.c there are direct
>array-manipulation functions.
>

And in other places - I had already found that stuff :-) . But a worked
example would help. If it's not available I'll muddle through some time.


> In the perlapi docs for perl, there are
>ways to manipulate arrays directly.
>

The patch is already using them. In fact, you can't delve even mildly
into plperl.c without having the perlapi docs close at hand.

> It should be possible to go
>between perl arrays and postgresql arrays this way, except...perl
>arrays can contain undef, which usually translates as NULL.  Would it
>be easier to add NULL support to postgresql arrays than to handle
>undefs in perl arrays automagically?
>
>
>
>

We should have NULL support in arrays. In the absence of that I'm not
particularly inclined to make special provsion for undef in an array.

cheers

andrew

Re: plperl better array support

From
Andrew Dunstan
Date:

I wrote:

>
> The attached patch (submitted for comment) is somewhat adapted from
> one submitted last October. This allows returning a perl array where a
> postgres array is expected.
>
> example:
>
> andrew=# create function blurfl() returns text[] language plperl as $$
> andrew$# return ['a','b','c','a"b\c'];
> andrew$# $$;
> CREATE FUNCTION
> andrew=# select blurfl();
>      blurfl      -------------------
> {a,b,c,"a\"b\\c"}
>
>
> Unlike the patch from October, this patch does not implement ANYARRAY
> or ANYELEMENT pseudotypes. However it does escape/quote array elements
> where necessary. It also preserves the old behaviour (if the plperl
> function returns a string it is just passed through).
>
> I'm not happy about constructing a string which we then parse out
> again into an array - that strikes me as quite inefficient. (And there
> are other inelegancies that I'd like to get rid of.) Much better would
> be to use some array contruction calls directly - any pointers on how
> to do that would be apprciated :-)
>
[snip]

I have been wondering whether a better approach might be to provide a
builtin plperl function that would construct the array string for us.
The downside is that the plperl programmer would have to be aware of it.
The upside would be that it would probably be faster (no massive calling
from C->perl) and it would take better account of things like binary
nulls, which we might otherwise barf on. They aren't mutually exclusive
- we could implement what I originally intended later on if we go with
this approach.

thoughts?

cheers

andrew