Re: [PATCHES] [BUGS] casting strings to multidimensional arrays yields - Mailing list pgsql-hackers
From | Joe Conway |
---|---|
Subject | Re: [PATCHES] [BUGS] casting strings to multidimensional arrays yields |
Date | |
Msg-id | 41143549.4030405@joeconway.com Whole thread Raw |
In response to | Re: [PATCHES] [BUGS] casting strings to multidimensional arrays yields strange (Tom Lane <tgl@sss.pgh.pa.us>) |
Responses |
Re: [PATCHES] [BUGS] casting strings to multidimensional arrays yields strange
|
List | pgsql-hackers |
Tom Lane wrote: > I think that suppressing unquoted trailing whitespace is probably > reasonable. I'm much less enthusiastic about rejecting unquoted > embedded whitespace, though. I think that's significantly likely > to break apps, and it doesn't seem like it's really needed to have > sane behavior. The leading/trailing whitespace asymmetry is just > weird, but it doesn't follow that embedded whitespace is weird. > > If we are going to make such changes, 8.0 is the right time to do it. The attached patch suppresses trailing whitespace, but allows embedded whitespace in unquoted elements as discussed above. It also rejects some previously accepted cases that were just too strange to be correct: -- Postgres 8.0, with the patch -- none of these should be accepted select '{{1,{2}},{2,3}}'::text[]; ERROR: malformed array literal: "{{1,{2}},{2,3}}" select '{{},{}}'::text[]; ERROR: malformed array literal: "{{},{}}" select '{{1,2},\\{2,3}}'::text[]; ERROR: malformed array literal: "{{1,2},\{2,3}}" select '{{"1 2" x},{3}}'::text[]; ERROR: malformed array literal: "{{"1 2" x},{3}}" The third case above actually does get an ERROR in 7.4 but it looks suspicious itself: -- in Postgres 7.4 select '{{1,2},\\{2,3}}'::text[]; ERROR: malformed array literal: "{{1" More examples: -- Postgres 8.0, with the patch -- all of these should be accepted select '{}'::text[]; text ------ {} (1 row) select '{0 second ,0 second}'::interval[]; interval --------------------- {00:00:00,00:00:00} (1 row) select '{ { "," } , { 3 } }'::text[]; text ------------- {{","},{3}} (1 row) select ' { { " 0 second " , 0 second } }'::text[]; text ------------------------------- {{" 0 second ","0 second"}} (1 row) If there are no objections, I'll update the docs and commit tomorrow sometime. Thanks, Joe Index: src/backend/utils/adt/arrayfuncs.c =================================================================== RCS file: /cvsroot/pgsql-server/src/backend/utils/adt/arrayfuncs.c,v retrieving revision 1.106 diff -c -r1.106 arrayfuncs.c *** src/backend/utils/adt/arrayfuncs.c 5 Aug 2004 03:29:37 -0000 1.106 --- src/backend/utils/adt/arrayfuncs.c 7 Aug 2004 01:34:02 -0000 *************** *** 351,368 **** * The syntax for array input is C-like nested curly braces *----------------------------------------------------------------------------- */ static int ArrayCount(char *str, int *dim, char typdelim) { ! int nest_level = 0, ! i; ! int ndim = 1, ! temp[MAXDIM], ! nelems[MAXDIM], ! nelems_last[MAXDIM]; ! bool scanning_string = false; ! bool eoArray = false; ! char *ptr; for (i = 0; i < MAXDIM; ++i) { --- 351,382 ---- * The syntax for array input is C-like nested curly braces *----------------------------------------------------------------------------- */ + typedef enum + { + ARRAY_NO_LEVEL, + ARRAY_LEVEL_STARTED, + ARRAY_ELEM_STARTED, + ARRAY_ELEM_COMPLETED, + ARRAY_QUOTED_ELEM_STARTED, + ARRAY_QUOTED_ELEM_COMPLETED, + ARRAY_ELEM_DELIMITED, + ARRAY_LEVEL_COMPLETED, + ARRAY_LEVEL_DELIMITED + } ArrayParseState; + static int ArrayCount(char *str, int *dim, char typdelim) { ! int nest_level = 0, ! i; ! int ndim = 1, ! temp[MAXDIM], ! nelems[MAXDIM], ! nelems_last[MAXDIM]; ! bool scanning_string = false; ! bool eoArray = false; ! char *ptr; ! ArrayParseState parse_state = ARRAY_NO_LEVEL; for (i = 0; i < MAXDIM; ++i) { *************** *** 370,375 **** --- 384,390 ---- nelems_last[i] = nelems[i] = 1; } + /* special case for an empty array */ if (strncmp(str, "{}", 2) == 0) return 0; *************** *** 389,394 **** --- 404,423 ---- errmsg("malformed array literal: \"%s\"", str))); break; case '\\': + /* + * An escape must be after a level start, after an + * element start, or after an element delimiter. In any + * case we now must be past an element start. + */ + if (parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_ELEM_STARTED && + parse_state != ARRAY_QUOTED_ELEM_STARTED && + parse_state != ARRAY_ELEM_DELIMITED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); + if (parse_state != ARRAY_QUOTED_ELEM_STARTED) + parse_state = ARRAY_ELEM_STARTED; /* skip the escaped character */ if (*(ptr + 1)) ptr++; *************** *** 398,408 **** --- 427,464 ---- errmsg("malformed array literal: \"%s\"", str))); break; case '\"': + /* + * A quote must be after a level start, after a quoted + * element start, or after an element delimiter. In any + * case we now must be past an element start. + */ + if (parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_QUOTED_ELEM_STARTED && + parse_state != ARRAY_ELEM_DELIMITED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); scanning_string = !scanning_string; + if (scanning_string) + parse_state = ARRAY_QUOTED_ELEM_STARTED; + else + parse_state = ARRAY_QUOTED_ELEM_COMPLETED; break; case '{': if (!scanning_string) { + /* + * A left brace can occur if no nesting has + * occurred yet, after a level start, or + * after a level delimiter. + */ + if (parse_state != ARRAY_NO_LEVEL && + parse_state != ARRAY_LEVEL_STARTED && + parse_state != ARRAY_LEVEL_DELIMITED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); + parse_state = ARRAY_LEVEL_STARTED; if (nest_level >= MAXDIM) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), *************** *** 417,422 **** --- 473,491 ---- case '}': if (!scanning_string) { + /* + * A right brace can occur after an element start, + * an element completion, a quoted element completion, + * or a level completion. + */ + if (parse_state != ARRAY_ELEM_STARTED && + parse_state != ARRAY_ELEM_COMPLETED && + parse_state != ARRAY_QUOTED_ELEM_COMPLETED && + parse_state != ARRAY_LEVEL_COMPLETED) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str))); + parse_state = ARRAY_LEVEL_COMPLETED; if (nest_level == 0) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), *************** *** 445,454 **** } break; default: ! if (*ptr == typdelim && !scanning_string) { ! itemdone = true; ! nelems[nest_level - 1]++; } break; } --- 514,558 ---- } break; default: ! if (!scanning_string) { ! if (*ptr == typdelim) ! { ! /* ! * Delimiters can occur after an element start, ! * an element completion, a quoted element ! * completion, or a level completion. ! */ ! if (parse_state != ARRAY_ELEM_STARTED && ! parse_state != ARRAY_ELEM_COMPLETED && ! parse_state != ARRAY_QUOTED_ELEM_COMPLETED && ! parse_state != ARRAY_LEVEL_COMPLETED) ! ereport(ERROR, ! (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), ! errmsg("malformed array literal: \"%s\"", str))); ! if (parse_state == ARRAY_LEVEL_COMPLETED) ! parse_state = ARRAY_LEVEL_DELIMITED; ! else ! parse_state = ARRAY_ELEM_DELIMITED; ! itemdone = true; ! nelems[nest_level - 1]++; ! } ! else if (!isspace(*ptr)) ! { ! /* ! * Other non-space characters must be after a level ! * start, after an element start, or after an element ! * delimiter. In any case we now must be past an ! * element start. ! */ ! if (parse_state != ARRAY_LEVEL_STARTED && ! parse_state != ARRAY_ELEM_STARTED && ! parse_state != ARRAY_ELEM_DELIMITED) ! ereport(ERROR, ! (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), ! errmsg("malformed array literal: \"%s\"", str))); ! parse_state = ARRAY_ELEM_STARTED; ! } } break; } *************** *** 511,522 **** --- 615,629 ---- while (!eoArray) { bool itemdone = false; + bool itemquoted = false; int i = -1; char *itemstart; + char *eptr; /* skip leading whitespace */ while (isspace((unsigned char) *ptr)) ptr++; + itemstart = ptr; while (!itemdone) *************** *** 547,557 **** char *cptr; scanning_string = !scanning_string; ! /* Crunch the string on top of the quote. */ ! for (cptr = ptr; *cptr != '\0'; cptr++) ! *cptr = *(cptr + 1); ! /* Back up to not miss following character. */ ! ptr--; break; } case '{': --- 654,668 ---- char *cptr; scanning_string = !scanning_string; ! if (scanning_string) ! { ! itemquoted = true; ! /* Crunch the string on top of the first quote. */ ! for (cptr = ptr; *cptr != '\0'; cptr++) ! *cptr = *(cptr + 1); ! /* Back up to not miss following character. */ ! ptr--; ! } break; } case '{': *************** *** 615,620 **** --- 726,750 ---- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", arrayStr))); + /* + * skip trailing whitespace + */ + eptr = ptr - 1; + if (!itemquoted) + { + /* skip to last non-NULL, non-space, character */ + while ((*eptr == '\0') || (isspace((unsigned char) *eptr))) + eptr--; + *(++eptr) = '\0'; + } + else + { + /* skip to last quote character */ + while (*eptr != '"') + eptr--; + *eptr = '\0'; + } + values[i] = FunctionCall3(inputproc, CStringGetDatum(itemstart), ObjectIdGetDatum(typioparam),
pgsql-hackers by date: