Thread: Re: [BUGS] casting strings to multidimensional arrays yields strange

Re: [BUGS] casting strings to multidimensional arrays yields strange

From
Joe Conway
Date:
Tom Lane wrote:
> [ cc'ing pghackers in case anyone wants to object ]
> Joe Conway <mail@joeconway.com> writes:
>>I think that even once we support NULL array elements, they should be
>>explicitly requested -- i.e. throwing an error on non-rectangular input
>>is still the right thing to do. I haven't suggested that in the past
>>because of the backward-compatibility issue, but maybe now is the time
>>to bite the bullet.
>
> Okay with me.  Anyone on pghackers not happy?
>
>>If you think this qualifies as a bug fix for 7.5, I can take a look at
>>it next week.
>
> Yeah, we can call it a bug fix.

The attached addresses the above issue as well as the ones mentioned in
my RFC post from yesterday found here:
http://archives.postgresql.org/pgsql-hackers/2004-08/msg00126.php

So whereas you used to get this:
test=# select '{{{1,2},{4,5,6}},{7,8}}'::int[];
  int4
------
  {}
(1 row)

you now get this:
test=# select '{{{1,2},{4,5,6}},{7,8}}'::int[];
ERROR:  multidimensional arrays must have array expressions with
matching dimensions

Negative lower bound indicies now work also, and array_out will always
output explicit dimensions for an array with any dimension having a
lower bound of other than one. This allows the dimensions to be
preserved across pg_dump/reload cycles:

CREATE TABLE foo (
     f1 integer[]
);
COPY foo (f1) FROM stdin;
[1:1][3:4][-2:0]={{{1,2,3},{4,5,6}}}
[-42:-41][12:14]={{1,2,3},{4,5,6}}
\.

test=# select f1, array_lower(f1, 1) as lb1, array_lower(f1, 2) as lb2,
array_lower(f1, 3) as lb3 from foo;
                   f1                  | lb1 | lb2 | lb3
--------------------------------------+-----+-----+-----
  [1:1][3:4][-2:0]={{{1,2,3},{4,5,6}}} |   1 |   3 |  -2
  [-42:-41][12:14]={{1,2,3},{4,5,6}}   | -42 |  12 |
(2 rows)

If there are no objections, I'll adjust the appropriate regression
script/expected files and commit tonight. And of course I'll follow up
with documentation updates too.

Any thoughts on whether or not to apply this to 7.4?

Thanks,

Joe
Index: src/backend/utils/adt/arrayfuncs.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/backend/utils/adt/arrayfuncs.c,v
retrieving revision 1.105
diff -c -r1.105 arrayfuncs.c
*** src/backend/utils/adt/arrayfuncs.c    16 Jun 2004 01:26:47 -0000    1.105
--- src/backend/utils/adt/arrayfuncs.c    4 Aug 2004 15:37:34 -0000
***************
*** 217,223 ****
                       errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
                              ndim, MAXDIM)));

!         for (q = p; isdigit((unsigned char) *q); q++);
          if (q == p)                /* no digits? */
              ereport(ERROR,
                      (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
--- 217,223 ----
                       errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
                              ndim, MAXDIM)));

!         for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++);
          if (q == p)                /* no digits? */
              ereport(ERROR,
                      (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
***************
*** 229,235 ****
              *q = '\0';
              lBound[ndim] = atoi(p);
              p = q + 1;
!             for (q = p; isdigit((unsigned char) *q); q++);
              if (q == p)            /* no digits? */
                  ereport(ERROR,
                          (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
--- 229,235 ----
              *q = '\0';
              lBound[ndim] = atoi(p);
              p = q + 1;
!             for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++);
              if (q == p)            /* no digits? */
                  ereport(ERROR,
                          (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
***************
*** 270,275 ****
--- 270,278 ----
      }
      else
      {
+         int            ndim_braces,
+                     dim_braces[MAXDIM];
+
          /* If array dimensions are given, expect '=' operator */
          if (strncmp(p, ASSGN, strlen(ASSGN)) != 0)
              ereport(ERROR,
***************
*** 278,283 ****
--- 281,307 ----
          p += strlen(ASSGN);
          while (isspace((unsigned char) *p))
              p++;
+
+         /*
+          * intuit dimensions from brace structure -- it better match what
+          * we were given
+          */
+         if (*p != '{')
+             ereport(ERROR,
+                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                      errmsg("array value must start with \"{\" or dimension information")));
+         ndim_braces = ArrayCount(p, dim_braces, typdelim);
+         if (ndim_braces != ndim)
+             ereport(ERROR,
+                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                      errmsg("array dimensions incompatible with array literal")));
+         for (i = 0; i < ndim; ++i)
+         {
+             if (dim[i] != dim_braces[i])
+                 ereport(ERROR,
+                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                         errmsg("array dimensions incompatible with array literal")));
+         }
      }

  #ifdef ARRAYDEBUG
***************
*** 303,309 ****
          ereport(ERROR,
                  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                   errmsg("missing left brace")));
-
      dataPtr = ReadArrayStr(p, nitems, ndim, dim, &my_extra->proc, typioparam,
                             typmod, typdelim, typlen, typbyval, typalign,
                             &nbytes);
--- 327,332 ----
***************
*** 334,346 ****
      int            nest_level = 0,
                  i;
      int            ndim = 1,
!                 temp[MAXDIM];
      bool        scanning_string = false;
      bool        eoArray = false;
      char       *ptr;

      for (i = 0; i < MAXDIM; ++i)
          temp[i] = dim[i] = 0;

      if (strncmp(str, "{}", 2) == 0)
          return 0;
--- 357,374 ----
      int            nest_level = 0,
                  i;
      int            ndim = 1,
!                 temp[MAXDIM],
!                 nelems[MAXDIM],
!                 nelems_last[MAXDIM];
      bool        scanning_string = false;
      bool        eoArray = false;
      char       *ptr;

      for (i = 0; i < MAXDIM; ++i)
+     {
          temp[i] = dim[i] = 0;
+         nelems_last[i] = nelems[i] = 1;
+     }

      if (strncmp(str, "{}", 2) == 0)
          return 0;
***************
*** 394,399 ****
--- 422,437 ----
                              (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                               errmsg("malformed array literal: \"%s\"", str)));
                          nest_level--;
+
+                         if ((nelems_last[nest_level] != 1) &&
+                             (nelems[nest_level] != nelems_last[nest_level]))
+                             ereport(ERROR,
+                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                  errmsg("multidimensional arrays must have "
+                                         "array expressions with matching "
+                                         "dimensions")));
+                         nelems_last[nest_level] = nelems[nest_level];
+                         nelems[nest_level] = 1;
                          if (nest_level == 0)
                              eoArray = itemdone = true;
                          else
***************
*** 408,414 ****
--- 446,455 ----
                      break;
                  default:
                      if (*ptr == typdelim && !scanning_string)
+                     {
                          itemdone = true;
+                         nelems[nest_level - 1]++;
+                     }
                      break;
              }
              if (!itemdone)
***************
*** 684,699 ****
      char       *p,
                 *tmp,
                 *retval,
!               **values;
!     bool       *needquotes;
      int            nitems,
                  overall_length,
                  i,
                  j,
                  k,
                  indx[MAXDIM];
      int            ndim,
!                *dim;
      ArrayMetaState *my_extra;

      element_type = ARR_ELEMTYPE(v);
--- 725,748 ----
      char       *p,
                 *tmp,
                 *retval,
!               **values,
!                 /*
!                  * each dim: (sign + 10 digits) * 2, semicolon, 2 brackets
!                  * assignment operator + terminator
!                  */
!                 dims_str[(MAXDIM * 25) + 2];
!     bool       *needquotes,
!                 needdims = false;
      int            nitems,
                  overall_length,
+                 dims_str_length = 0,
                  i,
                  j,
                  k,
                  indx[MAXDIM];
      int            ndim,
!                *dim,
!                *lb;
      ArrayMetaState *my_extra;

      element_type = ARR_ELEMTYPE(v);
***************
*** 734,739 ****
--- 783,789 ----

      ndim = ARR_NDIM(v);
      dim = ARR_DIMS(v);
+     lb = ARR_LBOUND(v);
      nitems = ArrayGetNItems(ndim, dim);

      if (nitems == 0)
***************
*** 742,747 ****
--- 792,806 ----
          PG_RETURN_CSTRING(retval);
      }

+     for (i = 0; i < ndim; i++)
+     {
+         if (lb[i] != 1)
+         {
+             needdims = true;
+             break;
+         }
+     }
+
      /*
       * Convert all values to string form, count total space needed
       * (including any overhead such as escaping backslashes), and detect
***************
*** 798,809 ****
       */
      for (i = j = 0, k = 1; i < ndim; k *= dim[i++], j += k);

!     retval = (char *) palloc(overall_length + 2 * j);
      p = retval;

  #define APPENDSTR(str)    (strcpy(p, (str)), p += strlen(p))
  #define APPENDCHAR(ch)    (*p++ = (ch), *p = '\0')

      APPENDCHAR('{');
      for (i = 0; i < ndim; indx[i++] = 0);
      j = 0;
--- 857,896 ----
       */
      for (i = j = 0, k = 1; i < ndim; k *= dim[i++], j += k);

!     if (needdims)
!     {
!         char   *ptr = dims_str;
!
!         for (i = 0; i < ndim; i++)
!         {
!             char    buf_lb[12];    /* sign, 10 digits, '\0' */
!             char    buf_ub[12];    /* sign, 10 digits, '\0' */
!
!             *ptr++ = '[';
!
!             sprintf(buf_lb, "%d", lb[i]);
!             strcpy(ptr, buf_lb), ptr += strlen(buf_lb);
!
!             *ptr++ = ':';
!
!             sprintf(buf_ub, "%d", lb[i] + dim[i] - 1);
!             strcpy(ptr, buf_ub), ptr += strlen(buf_ub);
!
!             *ptr++ = ']';
!         }
!         *ptr++ = *ASSGN;
!         *ptr = '\0';
!         dims_str_length = strlen(dims_str);
!     }
!
!     retval = (char *) palloc(dims_str_length + overall_length + 2 * j);
      p = retval;

  #define APPENDSTR(str)    (strcpy(p, (str)), p += strlen(p))
  #define APPENDCHAR(ch)    (*p++ = (ch), *p = '\0')

+     if (needdims)
+         APPENDSTR(dims_str);
      APPENDCHAR('{');
      for (i = 0; i < ndim; indx[i++] = 0);
      j = 0;

Joe Conway <mail@joeconway.com> writes:
> Negative lower bound indicies now work also, and array_out will always
> output explicit dimensions for an array with any dimension having a
> lower bound of other than one.

Seems reasonable to me.

> Any thoughts on whether or not to apply this to 7.4?

I would say not; it's a sufficiently large change that it will doubtless
break a few people's applications.  It's better to do such things at
major version revs.  People don't expect to have to do application
compatibility testing on bug-fix updates.  Also, the base bug (lack of
dimension info from array_out) has existed long enough that I don't
think there is great urgency about getting the fix into the field...

            regards, tom lane

Joe Conway <mail@joeconway.com> writes:
> If there are no objections, I'll adjust the appropriate regression
> script/expected files and commit tonight. And of course I'll follow up
> with documentation updates too.

BTW, I believe the plan is to wrap 8.0beta1 tomorrow, so please do get
this in tonight if you can get the docs updates done too.  (Otherwise
it might be better to wait till the docs are done.)

            regards, tom lane

Re: [BUGS] casting strings to multidimensional arrays yields strange

From
Joe Conway
Date:
Tom Lane wrote:
> Joe Conway <mail@joeconway.com> writes:
>>If there are no objections, I'll adjust the appropriate regression
>>script/expected files and commit tonight. And of course I'll follow up
>>with documentation updates too.
>
> BTW, I believe the plan is to wrap 8.0beta1 tomorrow, so please do get
> this in tonight if you can get the docs updates done too.  (Otherwise
> it might be better to wait till the docs are done.)

OK, will do. I ought to be able to finish it up today and commit tonight.

Thanks,

Joe

Joe Conway <mail@joeconway.com> writes:
> OK, this is done, but there is still an ugly bug in there (i.e. was
> there prior to my just committed changes and still exists) somewhere
> that I noticed while modifying domain.sql. Namely, if you forget to put
> a delimiter inbetween subarrays in a multidimensional array literal, you
> get odd results:

I had pretty much come to the conclusion that the array_in code should
be thrown away and rewritten from the ground up --- whoever wrote it
originally had no calling as a programmer :-(.  I didn't look at the
details of your patch, but I have little faith in half measures in this
area.  I've already wasted a lot of time trying to patch that code in
past releases.

            regards, tom lane

Re: [BUGS] casting strings to multidimensional arrays yields strange

From
Joe Conway
Date:
Tom Lane wrote:
> I had pretty much come to the conclusion that the array_in code should
> be thrown away and rewritten from the ground up --- whoever wrote it
> originally had no calling as a programmer :-(.  I didn't look at the
> details of your patch, but I have little faith in half measures in this
> area.  I've already wasted a lot of time trying to patch that code in
> past releases.

Agreed, but I don't think I'll be able to get that done tonight.

While looking at it the last day or so, I started to think it might be
better to use bison to parse array literals -- or is that a bad idea?

In any case, I'm planning to find the time to work on NULL array
elements as soon as we branch 8.0 from head. At that time I'll also look
at cleaning up array_in (and friends undoubtedly).

Joe

Joe Conway <mail@joeconway.com> writes:
> While looking at it the last day or so, I started to think it might be
> better to use bison to parse array literals -- or is that a bad idea?

Offhand it doesn't seem like a super-appropriate tool.  Once you get
past the lexical details like quoting, the syntax of array literals
is not complicated enough to need a bison parser.  Also, the issues
you're facing now like enforcing consistent dimensions are not amenable
to solution by a context-free grammar --- so you'd still need most of
the dimension-checking mechanisms.

There might be something out there that is more useful, but I dunno
what.

            regards, tom lane

Re: [BUGS] casting strings to multidimensional arrays yields strange

From
Joe Conway
Date:
Tom Lane wrote:
> Joe Conway <mail@joeconway.com> writes:
>>While looking at it the last day or so, I started to think it might be
>>better to use bison to parse array literals -- or is that a bad idea?
>
> Offhand it doesn't seem like a super-appropriate tool.  Once you get
> past the lexical details like quoting, the syntax of array literals
> is not complicated enough to need a bison parser.  Also, the issues
> you're facing now like enforcing consistent dimensions are not amenable
> to solution by a context-free grammar --- so you'd still need most of
> the dimension-checking mechanisms.

I'm hesitant to apply the attached this late before the beta without
review, but it seems to take care of the pathological cases I came up
with, doesn't break anything AFAICS, and passes all regression tests. I
guess it can go into beta 2.

Joe
Index: src/backend/utils/adt/arrayfuncs.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/backend/utils/adt/arrayfuncs.c,v
retrieving revision 1.106
diff -c -r1.106 arrayfuncs.c
*** src/backend/utils/adt/arrayfuncs.c    5 Aug 2004 03:29:37 -0000    1.106
--- src/backend/utils/adt/arrayfuncs.c    5 Aug 2004 05:50:07 -0000
***************
*** 351,368 ****
   *         The syntax for array input is C-like nested curly braces
   *-----------------------------------------------------------------------------
   */
  static int
  ArrayCount(char *str, int *dim, char typdelim)
  {
!     int            nest_level = 0,
!                 i;
!     int            ndim = 1,
!                 temp[MAXDIM],
!                 nelems[MAXDIM],
!                 nelems_last[MAXDIM];
!     bool        scanning_string = false;
!     bool        eoArray = false;
!     char       *ptr;

      for (i = 0; i < MAXDIM; ++i)
      {
--- 351,378 ----
   *         The syntax for array input is C-like nested curly braces
   *-----------------------------------------------------------------------------
   */
+ typedef enum
+ {
+     ARRAY_NO_LEVEL,
+     ARRAY_LEVEL_STARTED,
+     ARRAY_ELEM_STARTED,
+     ARRAY_LEVEL_COMPLETED,
+     ARRAY_LEVEL_DELIMITED
+ } ArrayParseState;
+
  static int
  ArrayCount(char *str, int *dim, char typdelim)
  {
!     int                nest_level = 0,
!                     i;
!     int                ndim = 1,
!                     temp[MAXDIM],
!                     nelems[MAXDIM],
!                     nelems_last[MAXDIM];
!     bool            scanning_string = false;
!     bool            eoArray = false;
!     char           *ptr;
!     ArrayParseState    parse_state = ARRAY_NO_LEVEL;

      for (i = 0; i < MAXDIM; ++i)
      {
***************
*** 389,394 ****
--- 399,416 ----
                          errmsg("malformed array literal: \"%s\"", str)));
                      break;
                  case '\\':
+                     /*
+                      * An escape must be after a level start, within an
+                      * element, or after a delimiter. In any case
+                      * we now must be past an element start.
+                      */
+                     if (parse_state != ARRAY_LEVEL_STARTED &&
+                         parse_state != ARRAY_ELEM_STARTED &&
+                         parse_state != ARRAY_LEVEL_DELIMITED)
+                         ereport(ERROR,
+                             (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                             errmsg("malformed array literal: \"%s\"", str)));
+                     parse_state = ARRAY_ELEM_STARTED;
                      /* skip the escaped character */
                      if (*(ptr + 1))
                          ptr++;
***************
*** 398,408 ****
--- 420,454 ----
                          errmsg("malformed array literal: \"%s\"", str)));
                      break;
                  case '\"':
+                     /*
+                      * A quote must be after a level start, within an
+                      * element, or after a delimiter. In any case
+                      * we now must be past an element start.
+                      */
+                     if (parse_state != ARRAY_LEVEL_STARTED &&
+                         parse_state != ARRAY_ELEM_STARTED &&
+                         parse_state != ARRAY_LEVEL_DELIMITED)
+                         ereport(ERROR,
+                             (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                             errmsg("malformed array literal: \"%s\"", str)));
+                     parse_state = ARRAY_ELEM_STARTED;
                      scanning_string = !scanning_string;
                      break;
                  case '{':
                      if (!scanning_string)
                      {
+                         /*
+                          * A left brace can occur if no nesting has
+                          * occurred yet, after a level start, or
+                          * after a delimiter.
+                          */
+                         if (parse_state != ARRAY_NO_LEVEL &&
+                             parse_state != ARRAY_LEVEL_STARTED &&
+                             parse_state != ARRAY_LEVEL_DELIMITED)
+                             ereport(ERROR,
+                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", str)));
+                         parse_state = ARRAY_LEVEL_STARTED;
                          if (nest_level >= MAXDIM)
                              ereport(ERROR,
                                  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
***************
*** 417,422 ****
--- 463,480 ----
                  case '}':
                      if (!scanning_string)
                      {
+                         /*
+                          * A right brace can occur after a level start,
+                          * after an element start, or after a level
+                          * completion.
+                          */
+                         if (parse_state != ARRAY_LEVEL_STARTED &&
+                             parse_state != ARRAY_ELEM_STARTED &&
+                             parse_state != ARRAY_LEVEL_COMPLETED)
+                             ereport(ERROR,
+                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", str)));
+                         parse_state = ARRAY_LEVEL_COMPLETED;
                          if (nest_level == 0)
                              ereport(ERROR,
                              (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
***************
*** 447,455 ****
--- 505,540 ----
                  default:
                      if (*ptr == typdelim && !scanning_string)
                      {
+                         /*
+                         * Delimiters can occur after an element start
+                         * or after a level completion
+                         */
+                         if (parse_state != ARRAY_ELEM_STARTED &&
+                             parse_state != ARRAY_LEVEL_COMPLETED)
+                             ereport(ERROR,
+                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", str)));
+                         parse_state = ARRAY_LEVEL_DELIMITED;
+
                          itemdone = true;
                          nelems[nest_level - 1]++;
                      }
+                     else if (!isspace(*ptr) && !scanning_string)
+                     {
+                         /*
+                         * Other non-space characters
+                         * must be after a level start, within an
+                         * element, or after a delimiter. In any case
+                         * we now must be past an element start.
+                         */
+                         if (parse_state != ARRAY_LEVEL_STARTED &&
+                             parse_state != ARRAY_ELEM_STARTED &&
+                             parse_state != ARRAY_LEVEL_DELIMITED)
+                             ereport(ERROR,
+                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", str)));
+                         parse_state = ARRAY_ELEM_STARTED;
+                     }
                      break;
              }
              if (!itemdone)