Re: [PATCHES] [BUGS] casting strings to multidimensional arrays yields - Mailing list pgsql-hackers

From Joe Conway
Subject Re: [PATCHES] [BUGS] casting strings to multidimensional arrays yields
Date
Msg-id 41143549.4030405@joeconway.com
Whole thread Raw
In response to Re: [PATCHES] [BUGS] casting strings to multidimensional arrays yields strange  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: [PATCHES] [BUGS] casting strings to multidimensional arrays yields strange
List pgsql-hackers
Tom Lane wrote:
> I think that suppressing unquoted trailing whitespace is probably
> reasonable.  I'm much less enthusiastic about rejecting unquoted
> embedded whitespace, though.  I think that's significantly likely
> to break apps, and it doesn't seem like it's really needed to have
> sane behavior.  The leading/trailing whitespace asymmetry is just
> weird, but it doesn't follow that embedded whitespace is weird.
>
> If we are going to make such changes, 8.0 is the right time to do it.

The attached patch suppresses trailing whitespace, but allows embedded
whitespace in unquoted elements as discussed above. It also rejects some
previously accepted cases that were just too strange to be correct:

-- Postgres 8.0, with the patch
-- none of these should be accepted
select '{{1,{2}},{2,3}}'::text[];
ERROR:  malformed array literal: "{{1,{2}},{2,3}}"
select '{{},{}}'::text[];
ERROR:  malformed array literal: "{{},{}}"
select '{{1,2},\\{2,3}}'::text[];
ERROR:  malformed array literal: "{{1,2},\{2,3}}"
select '{{"1 2" x},{3}}'::text[];
ERROR:  malformed array literal: "{{"1 2" x},{3}}"

The third case above actually does get an ERROR in 7.4 but it looks
suspicious itself:


-- in Postgres 7.4
select '{{1,2},\\{2,3}}'::text[];
ERROR:  malformed array literal: "{{1"


More examples:

-- Postgres 8.0, with the patch
-- all of these should be accepted
select '{}'::text[];
  text
------
  {}
(1 row)

select '{0 second  ,0 second}'::interval[];
       interval
---------------------
  {00:00:00,00:00:00}
(1 row)

select '{ { "," } , { 3 } }'::text[];
     text
-------------
  {{","},{3}}
(1 row)

select '  {   {  "  0 second  "   ,  0 second  }   }'::text[];
              text
-------------------------------
  {{"  0 second  ","0 second"}}
(1 row)


If there are no objections, I'll update the docs and commit tomorrow
sometime.

Thanks,

Joe
Index: src/backend/utils/adt/arrayfuncs.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/backend/utils/adt/arrayfuncs.c,v
retrieving revision 1.106
diff -c -r1.106 arrayfuncs.c
*** src/backend/utils/adt/arrayfuncs.c    5 Aug 2004 03:29:37 -0000    1.106
--- src/backend/utils/adt/arrayfuncs.c    7 Aug 2004 01:34:02 -0000
***************
*** 351,368 ****
   *         The syntax for array input is C-like nested curly braces
   *-----------------------------------------------------------------------------
   */
  static int
  ArrayCount(char *str, int *dim, char typdelim)
  {
!     int            nest_level = 0,
!                 i;
!     int            ndim = 1,
!                 temp[MAXDIM],
!                 nelems[MAXDIM],
!                 nelems_last[MAXDIM];
!     bool        scanning_string = false;
!     bool        eoArray = false;
!     char       *ptr;

      for (i = 0; i < MAXDIM; ++i)
      {
--- 351,382 ----
   *         The syntax for array input is C-like nested curly braces
   *-----------------------------------------------------------------------------
   */
+ typedef enum
+ {
+     ARRAY_NO_LEVEL,
+     ARRAY_LEVEL_STARTED,
+     ARRAY_ELEM_STARTED,
+     ARRAY_ELEM_COMPLETED,
+     ARRAY_QUOTED_ELEM_STARTED,
+     ARRAY_QUOTED_ELEM_COMPLETED,
+     ARRAY_ELEM_DELIMITED,
+     ARRAY_LEVEL_COMPLETED,
+     ARRAY_LEVEL_DELIMITED
+ } ArrayParseState;
+
  static int
  ArrayCount(char *str, int *dim, char typdelim)
  {
!     int                nest_level = 0,
!                     i;
!     int                ndim = 1,
!                     temp[MAXDIM],
!                     nelems[MAXDIM],
!                     nelems_last[MAXDIM];
!     bool            scanning_string = false;
!     bool            eoArray = false;
!     char           *ptr;
!     ArrayParseState    parse_state = ARRAY_NO_LEVEL;

      for (i = 0; i < MAXDIM; ++i)
      {
***************
*** 370,375 ****
--- 384,390 ----
          nelems_last[i] = nelems[i] = 1;
      }

+     /* special case for an empty array */
      if (strncmp(str, "{}", 2) == 0)
          return 0;

***************
*** 389,394 ****
--- 404,423 ----
                          errmsg("malformed array literal: \"%s\"", str)));
                      break;
                  case '\\':
+                     /*
+                      * An escape must be after a level start, after an
+                      * element start, or after an element delimiter. In any
+                      * case we now must be past an element start.
+                      */
+                     if (parse_state != ARRAY_LEVEL_STARTED &&
+                         parse_state != ARRAY_ELEM_STARTED &&
+                         parse_state != ARRAY_QUOTED_ELEM_STARTED &&
+                         parse_state != ARRAY_ELEM_DELIMITED)
+                         ereport(ERROR,
+                             (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                             errmsg("malformed array literal: \"%s\"", str)));
+                     if (parse_state != ARRAY_QUOTED_ELEM_STARTED)
+                         parse_state = ARRAY_ELEM_STARTED;
                      /* skip the escaped character */
                      if (*(ptr + 1))
                          ptr++;
***************
*** 398,408 ****
--- 427,464 ----
                          errmsg("malformed array literal: \"%s\"", str)));
                      break;
                  case '\"':
+                     /*
+                      * A quote must be after a level start, after a quoted
+                      * element start, or after an element delimiter. In any
+                      * case we now must be past an element start.
+                      */
+                     if (parse_state != ARRAY_LEVEL_STARTED &&
+                         parse_state != ARRAY_QUOTED_ELEM_STARTED &&
+                         parse_state != ARRAY_ELEM_DELIMITED)
+                         ereport(ERROR,
+                             (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                             errmsg("malformed array literal: \"%s\"", str)));
                      scanning_string = !scanning_string;
+                     if (scanning_string)
+                         parse_state = ARRAY_QUOTED_ELEM_STARTED;
+                     else
+                         parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
                      break;
                  case '{':
                      if (!scanning_string)
                      {
+                         /*
+                          * A left brace can occur if no nesting has
+                          * occurred yet, after a level start, or
+                          * after a level delimiter.
+                          */
+                         if (parse_state != ARRAY_NO_LEVEL &&
+                             parse_state != ARRAY_LEVEL_STARTED &&
+                             parse_state != ARRAY_LEVEL_DELIMITED)
+                             ereport(ERROR,
+                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", str)));
+                         parse_state = ARRAY_LEVEL_STARTED;
                          if (nest_level >= MAXDIM)
                              ereport(ERROR,
                                  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
***************
*** 417,422 ****
--- 473,491 ----
                  case '}':
                      if (!scanning_string)
                      {
+                         /*
+                          * A right brace can occur after an element start,
+                          * an element completion, a quoted element completion,
+                          * or a level completion.
+                          */
+                         if (parse_state != ARRAY_ELEM_STARTED &&
+                             parse_state != ARRAY_ELEM_COMPLETED &&
+                             parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
+                             parse_state != ARRAY_LEVEL_COMPLETED)
+                             ereport(ERROR,
+                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", str)));
+                         parse_state = ARRAY_LEVEL_COMPLETED;
                          if (nest_level == 0)
                              ereport(ERROR,
                              (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
***************
*** 445,454 ****
                      }
                      break;
                  default:
!                     if (*ptr == typdelim && !scanning_string)
                      {
!                         itemdone = true;
!                         nelems[nest_level - 1]++;
                      }
                      break;
              }
--- 514,558 ----
                      }
                      break;
                  default:
!                     if (!scanning_string)
                      {
!                         if (*ptr == typdelim)
!                         {
!                             /*
!                             * Delimiters can occur after an element start,
!                             * an element completion, a quoted element
!                             * completion, or a level completion.
!                             */
!                             if (parse_state != ARRAY_ELEM_STARTED &&
!                                 parse_state != ARRAY_ELEM_COMPLETED &&
!                                 parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
!                                 parse_state != ARRAY_LEVEL_COMPLETED)
!                                 ereport(ERROR,
!                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
!                                     errmsg("malformed array literal: \"%s\"", str)));
!                             if (parse_state == ARRAY_LEVEL_COMPLETED)
!                                 parse_state = ARRAY_LEVEL_DELIMITED;
!                             else
!                                 parse_state = ARRAY_ELEM_DELIMITED;
!                             itemdone = true;
!                             nelems[nest_level - 1]++;
!                         }
!                         else if (!isspace(*ptr))
!                         {
!                             /*
!                             * Other non-space characters must be after a level
!                             * start, after an element start, or after an element
!                             * delimiter. In any case we now must be past an
!                             * element start.
!                             */
!                             if (parse_state != ARRAY_LEVEL_STARTED &&
!                                 parse_state != ARRAY_ELEM_STARTED &&
!                                 parse_state != ARRAY_ELEM_DELIMITED)
!                                 ereport(ERROR,
!                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
!                                     errmsg("malformed array literal: \"%s\"", str)));
!                             parse_state = ARRAY_ELEM_STARTED;
!                         }
                      }
                      break;
              }
***************
*** 511,522 ****
--- 615,629 ----
      while (!eoArray)
      {
          bool        itemdone = false;
+         bool        itemquoted = false;
          int            i = -1;
          char       *itemstart;
+         char       *eptr;

          /* skip leading whitespace */
          while (isspace((unsigned char) *ptr))
              ptr++;
+
          itemstart = ptr;

          while (!itemdone)
***************
*** 547,557 ****
                          char       *cptr;

                          scanning_string = !scanning_string;
!                         /* Crunch the string on top of the quote. */
!                         for (cptr = ptr; *cptr != '\0'; cptr++)
!                             *cptr = *(cptr + 1);
!                         /* Back up to not miss following character. */
!                         ptr--;
                          break;
                      }
                  case '{':
--- 654,668 ----
                          char       *cptr;

                          scanning_string = !scanning_string;
!                         if (scanning_string)
!                         {
!                             itemquoted = true;
!                             /* Crunch the string on top of the first quote. */
!                             for (cptr = ptr; *cptr != '\0'; cptr++)
!                                 *cptr = *(cptr + 1);
!                             /* Back up to not miss following character. */
!                             ptr--;
!                         }
                          break;
                      }
                  case '{':
***************
*** 615,620 ****
--- 726,750 ----
                      (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                     errmsg("malformed array literal: \"%s\"", arrayStr)));

+         /*
+          * skip trailing whitespace
+          */
+         eptr = ptr - 1;
+         if (!itemquoted)
+         {
+             /* skip to last non-NULL, non-space, character */
+             while ((*eptr == '\0') || (isspace((unsigned char) *eptr)))
+                 eptr--;
+             *(++eptr) = '\0';
+         }
+         else
+         {
+             /* skip to last quote character */
+             while (*eptr != '"')
+                 eptr--;
+             *eptr = '\0';
+         }
+
          values[i] = FunctionCall3(inputproc,
                                    CStringGetDatum(itemstart),
                                    ObjectIdGetDatum(typioparam),

pgsql-hackers by date:

Previous
From: Bruce Momjian
Date:
Subject: Re: parameter hints to the optimizer
Next
From: "Marc G. Fournier"
Date:
Subject: cvsweb down temporarily