Re: [BUGS] casting strings to multidimensional arrays yields strange - Mailing list pgsql-patches
From | Joe Conway |
---|---|
Subject | Re: [BUGS] casting strings to multidimensional arrays yields strange |
Date | |
Msg-id | 4111CD22.1010804@joeconway.com Whole thread Raw |
In response to | Re: [BUGS] casting strings to multidimensional arrays yields strange results (Tom Lane <tgl@sss.pgh.pa.us>) |
List | pgsql-patches |
Tom Lane wrote: > Joe Conway <mail@joeconway.com> writes: >>While looking at it the last day or so, I started to think it might be >>better to use bison to parse array literals -- or is that a bad idea? > > Offhand it doesn't seem like a super-appropriate tool. Once you get > past the lexical details like quoting, the syntax of array literals > is not complicated enough to need a bison parser. Also, the issues > you're facing now like enforcing consistent dimensions are not amenable > to solution by a context-free grammar --- so you'd still need most of > the dimension-checking mechanisms. I'm hesitant to apply the attached this late before the beta without review, but it seems to take care of the pathological cases I came up with, doesn't break anything AFAICS, and passes all regression tests. I guess it can go into beta 2. Joe Index: src/backend/utils/adt/arrayfuncs.c =================================================================== RCS file: /cvsroot/pgsql-server/src/backend/utils/adt/arrayfuncs.c,v retrieving revision 1.106 diff -c -r1.106 arrayfuncs.c *** src/backend/utils/adt/arrayfuncs.c 5 Aug 2004 03:29:37 -0000 1.106 --- src/backend/utils/adt/arrayfuncs.c 5 Aug 2004 05:50:07 -0000 *************** *** 351,368 **** * The syntax for array input is C-like nested curly braces *----------------------------------------------------------------------------- */ static int ArrayCount(char *str, int *dim, char typdelim) { ! int nest_level = 0, ! i; ! int ndim = 1, ! temp[MAXDIM], ! nelems[MAXDIM], ! nelems_last[MAXDIM]; ! bool scanning_string = false; ! bool eoArray = false; ! char *ptr; for (i = 0; i < MAXDIM; ++i) { --- 351,378 ---- * The syntax for array input is C-like nested curly braces *----------------------------------------------------------------------------- */ + typedef enum + { + ARRAY_NO_LEVEL, + ARRAY_LEVEL_STARTED, + ARRAY_ELEM_STARTED, + ARRAY_LEVEL_COMPLETED, + ARRAY_LEVEL_DELIMITED + } ArrayParseState; + static int ArrayCount(char *str, int *dim, char typdelim) { ! int nest_level = 0, ! i; ! int ndim = 1, ! temp[MAXDIM], ! nelems[MAXDIM], ! nelems_last[MAXDIM]; ! bool scanning_string = false; ! bool eoArray = false; ! char *ptr; ! ArrayParseState parse_state = ARRAY_NO_LEVEL; for (i = 0; i < MAXDIM; ++i) { *************** *** 389,394 **** --- 399,416 ---- errmsg("malformed array literal: \"%s\"", str))); break; case '\\': + /* + * An escape must be after a level start, within an + * element, or after a delimiter. In any case + * we now must be past an element start. + */ + if (parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_ELEM_STARTED && + parse_state != ARRAY_LEVEL_DELIMITED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); + parse_state = ARRAY_ELEM_STARTED; /* skip the escaped character */ if (*(ptr + 1)) ptr++; *************** *** 398,408 **** --- 420,454 ---- errmsg("malformed array literal: \"%s\"", str))); break; case '\"': + /* + * A quote must be after a level start, within an + * element, or after a delimiter. In any case + * we now must be past an element start. + */ + if (parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_ELEM_STARTED && + parse_state != ARRAY_LEVEL_DELIMITED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); + parse_state = ARRAY_ELEM_STARTED; scanning_string = !scanning_string; break; case '{': if (!scanning_string) { + /* + * A left brace can occur if no nesting has + * occurred yet, after a level start, or + * after a delimiter. + */ + if (parse_state != ARRAY_NO_LEVEL && + parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_LEVEL_DELIMITED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); + parse_state = ARRAY_LEVEL_STARTED; if (nest_level >= MAXDIM) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), *************** *** 417,422 **** --- 463,480 ---- case '}': if (!scanning_string) { + /* + * A right brace can occur after a level start, + * after an element start, or after a level + * completion. + */ + if (parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_ELEM_STARTED && + parse_state != ARRAY_LEVEL_COMPLETED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); + parse_state = ARRAY_LEVEL_COMPLETED; if (nest_level == 0) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), *************** *** 447,455 **** --- 505,540 ---- default: if (*ptr == typdelim && !scanning_string) { + /* + * Delimiters can occur after an element start + * or after a level completion + */ + if (parse_state != ARRAY_ELEM_STARTED && + parse_state != ARRAY_LEVEL_COMPLETED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); + parse_state = ARRAY_LEVEL_DELIMITED; + itemdone = true; nelems[nest_level - 1]++; } + else if (!isspace(*ptr) && !scanning_string) + { + /* + * Other non-space characters + * must be after a level start, within an + * element, or after a delimiter. In any case + * we now must be past an element start. + */ + if (parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_ELEM_STARTED && + parse_state != ARRAY_LEVEL_DELIMITED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); + parse_state = ARRAY_ELEM_STARTED; + } break; } if (!itemdone)
pgsql-patches by date: