Re: [BUGS] casting strings to multidimensional arrays yields strange - Mailing list pgsql-patches

From Joe Conway
Subject Re: [BUGS] casting strings to multidimensional arrays yields strange
Date
Msg-id 4111CD22.1010804@joeconway.com
Whole thread Raw
In response to Re: [BUGS] casting strings to multidimensional arrays yields strange results  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-patches
Tom Lane wrote:
> Joe Conway <mail@joeconway.com> writes:
>>While looking at it the last day or so, I started to think it might be
>>better to use bison to parse array literals -- or is that a bad idea?
>
> Offhand it doesn't seem like a super-appropriate tool.  Once you get
> past the lexical details like quoting, the syntax of array literals
> is not complicated enough to need a bison parser.  Also, the issues
> you're facing now like enforcing consistent dimensions are not amenable
> to solution by a context-free grammar --- so you'd still need most of
> the dimension-checking mechanisms.

I'm hesitant to apply the attached this late before the beta without
review, but it seems to take care of the pathological cases I came up
with, doesn't break anything AFAICS, and passes all regression tests. I
guess it can go into beta 2.

Joe
Index: src/backend/utils/adt/arrayfuncs.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/backend/utils/adt/arrayfuncs.c,v
retrieving revision 1.106
diff -c -r1.106 arrayfuncs.c
*** src/backend/utils/adt/arrayfuncs.c    5 Aug 2004 03:29:37 -0000    1.106
--- src/backend/utils/adt/arrayfuncs.c    5 Aug 2004 05:50:07 -0000
***************
*** 351,368 ****
   *         The syntax for array input is C-like nested curly braces
   *-----------------------------------------------------------------------------
   */
  static int
  ArrayCount(char *str, int *dim, char typdelim)
  {
!     int            nest_level = 0,
!                 i;
!     int            ndim = 1,
!                 temp[MAXDIM],
!                 nelems[MAXDIM],
!                 nelems_last[MAXDIM];
!     bool        scanning_string = false;
!     bool        eoArray = false;
!     char       *ptr;

      for (i = 0; i < MAXDIM; ++i)
      {
--- 351,378 ----
   *         The syntax for array input is C-like nested curly braces
   *-----------------------------------------------------------------------------
   */
+ typedef enum
+ {
+     ARRAY_NO_LEVEL,
+     ARRAY_LEVEL_STARTED,
+     ARRAY_ELEM_STARTED,
+     ARRAY_LEVEL_COMPLETED,
+     ARRAY_LEVEL_DELIMITED
+ } ArrayParseState;
+
  static int
  ArrayCount(char *str, int *dim, char typdelim)
  {
!     int                nest_level = 0,
!                     i;
!     int                ndim = 1,
!                     temp[MAXDIM],
!                     nelems[MAXDIM],
!                     nelems_last[MAXDIM];
!     bool            scanning_string = false;
!     bool            eoArray = false;
!     char           *ptr;
!     ArrayParseState    parse_state = ARRAY_NO_LEVEL;

      for (i = 0; i < MAXDIM; ++i)
      {
***************
*** 389,394 ****
--- 399,416 ----
                          errmsg("malformed array literal: \"%s\"", str)));
                      break;
                  case '\\':
+                     /*
+                      * An escape must be after a level start, within an
+                      * element, or after a delimiter. In any case
+                      * we now must be past an element start.
+                      */
+                     if (parse_state != ARRAY_LEVEL_STARTED &&
+                         parse_state != ARRAY_ELEM_STARTED &&
+                         parse_state != ARRAY_LEVEL_DELIMITED)
+                         ereport(ERROR,
+                             (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                             errmsg("malformed array literal: \"%s\"", str)));
+                     parse_state = ARRAY_ELEM_STARTED;
                      /* skip the escaped character */
                      if (*(ptr + 1))
                          ptr++;
***************
*** 398,408 ****
--- 420,454 ----
                          errmsg("malformed array literal: \"%s\"", str)));
                      break;
                  case '\"':
+                     /*
+                      * A quote must be after a level start, within an
+                      * element, or after a delimiter. In any case
+                      * we now must be past an element start.
+                      */
+                     if (parse_state != ARRAY_LEVEL_STARTED &&
+                         parse_state != ARRAY_ELEM_STARTED &&
+                         parse_state != ARRAY_LEVEL_DELIMITED)
+                         ereport(ERROR,
+                             (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                             errmsg("malformed array literal: \"%s\"", str)));
+                     parse_state = ARRAY_ELEM_STARTED;
                      scanning_string = !scanning_string;
                      break;
                  case '{':
                      if (!scanning_string)
                      {
+                         /*
+                          * A left brace can occur if no nesting has
+                          * occurred yet, after a level start, or
+                          * after a delimiter.
+                          */
+                         if (parse_state != ARRAY_NO_LEVEL &&
+                             parse_state != ARRAY_LEVEL_STARTED &&
+                             parse_state != ARRAY_LEVEL_DELIMITED)
+                             ereport(ERROR,
+                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", str)));
+                         parse_state = ARRAY_LEVEL_STARTED;
                          if (nest_level >= MAXDIM)
                              ereport(ERROR,
                                  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
***************
*** 417,422 ****
--- 463,480 ----
                  case '}':
                      if (!scanning_string)
                      {
+                         /*
+                          * A right brace can occur after a level start,
+                          * after an element start, or after a level
+                          * completion.
+                          */
+                         if (parse_state != ARRAY_LEVEL_STARTED &&
+                             parse_state != ARRAY_ELEM_STARTED &&
+                             parse_state != ARRAY_LEVEL_COMPLETED)
+                             ereport(ERROR,
+                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", str)));
+                         parse_state = ARRAY_LEVEL_COMPLETED;
                          if (nest_level == 0)
                              ereport(ERROR,
                              (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
***************
*** 447,455 ****
--- 505,540 ----
                  default:
                      if (*ptr == typdelim && !scanning_string)
                      {
+                         /*
+                         * Delimiters can occur after an element start
+                         * or after a level completion
+                         */
+                         if (parse_state != ARRAY_ELEM_STARTED &&
+                             parse_state != ARRAY_LEVEL_COMPLETED)
+                             ereport(ERROR,
+                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", str)));
+                         parse_state = ARRAY_LEVEL_DELIMITED;
+
                          itemdone = true;
                          nelems[nest_level - 1]++;
                      }
+                     else if (!isspace(*ptr) && !scanning_string)
+                     {
+                         /*
+                         * Other non-space characters
+                         * must be after a level start, within an
+                         * element, or after a delimiter. In any case
+                         * we now must be past an element start.
+                         */
+                         if (parse_state != ARRAY_LEVEL_STARTED &&
+                             parse_state != ARRAY_ELEM_STARTED &&
+                             parse_state != ARRAY_LEVEL_DELIMITED)
+                             ereport(ERROR,
+                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", str)));
+                         parse_state = ARRAY_ELEM_STARTED;
+                     }
                      break;
              }
              if (!itemdone)

pgsql-patches by date:

Previous
From: Michael Glaesemann
Date:
Subject: Re: Epoch to timestamp conversion function patch
Next
From: Zhenbang Wei
Date:
Subject: pg_dump-zh_TW.po for current