From f411c054a838ff294cea85f654f41591986d0177 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 4 Jul 2023 14:03:40 -0400 Subject: [PATCH v4 2/7] Rewrite ArrayCount() to make dimensionality checks more complete. ArrayCount has had a few layers of revision over the years, reaching a point where it's really quite baroque and unintelligible. There are actually four different arrays holding dimensions, which can be reduced to two with little effort. It was also close to impossible to figure out why the dimension-counting actually worked correctly. Rewrite to make it perhaps a little less opaque. In particular, a new element or subarray is now counted when we see the start of the element or subarray, not at some later point where we might not even be at the same nest_level anymore. Also, install guards to catch non-rectangularity cases that were previously missed, along the same lines as recent plperl and plpython fixes: we were not checking that all scalar elements appear at the same nesting depth. Thanks to the very different logic, this seems not to have caused any of the sort of internal errors that the PLs suffered from, but it's still wrong. On the other hand, the behavior of plperl and plpython suggests that we should allow empty sub-arrays, as in '{{},{}}'; those languages interpret equivalent constructs such as '[[],[]]' as an empty (hence zero-dimensional) array, and it seems unclear why array_in should not. Remove the hack that was installed to reject that case. Also, fix things so that it's possible to write an empty array with explicit bounds spec, along the lines of '[1:0]={}'. Before, that was rejected with a complaint about upper bound less than lower, but I don't see a good reason not to allow it. In passing, remove ArrayParseState.ARRAY_ELEM_COMPLETED, a state that the state machine never entered yet wasted cycles checking for. (This patch looks a bit odd because I tried to avoid reindenting existing code, despite having removed one level of loop. That seemed to me to make it easier to read the code changes. A follow-up pgindent run is needed.) --- src/backend/utils/adt/arrayfuncs.c | 241 +++++++++++++++++---------- src/test/regress/expected/arrays.out | 68 +++++++- src/test/regress/sql/arrays.sql | 13 +- 3 files changed, 230 insertions(+), 92 deletions(-) diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index df1ebb47..59bb0614 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -58,7 +58,6 @@ typedef enum ARRAY_NO_LEVEL, ARRAY_LEVEL_STARTED, ARRAY_ELEM_STARTED, - ARRAY_ELEM_COMPLETED, ARRAY_QUOTED_ELEM_STARTED, ARRAY_QUOTED_ELEM_COMPLETED, ARRAY_ELEM_DELIMITED, @@ -302,10 +301,18 @@ array_in(PG_FUNCTION_ARGS) *q = '\0'; ub = atoi(p); p = q + 1; - if (ub < lBound[ndim]) + + /* Upper bound of INT_MAX is disallowed, cf ArrayCheckBounds() */ + if (ub == INT_MAX) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("array upper bound is too large: %d", + ub))); + /* Now it's safe to compute ub + 1 */ + if (ub + 1 < lBound[ndim]) ereturn(escontext, (Datum) 0, (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("upper bound cannot be less than lower bound"))); + errmsg("upper bound cannot be less than lower bound minus one"))); dim[ndim] = ub - lBound[ndim] + 1; ndim++; @@ -440,7 +447,9 @@ array_in(PG_FUNCTION_ARGS) * syntax-checking the array structure decoration (braces and delimiters). * * Returns number of dimensions as function result. The axis lengths are - * returned in dim[], which must be of size MAXDIM. + * returned in dim[], which must be of size MAXDIM. This function does not + * special-case empty arrays: it will return a positive result with some + * zero axis length(s). * * If we detect an error, fill *escontext with error details and return -1 * (unless escontext isn't provided, in which case errors will be thrown). @@ -451,31 +460,22 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext) int nest_level = 0, i; int ndim = 1, - temp[MAXDIM], - nelems[MAXDIM], - nelems_last[MAXDIM]; + nelems[MAXDIM]; + bool ndim_frozen = false; bool in_quotes = false; bool eoArray = false; - bool empty_array = true; const char *ptr; ArrayParseState parse_state = ARRAY_NO_LEVEL; + /* Initialize dim[] entries to -1 meaning "unknown" */ for (i = 0; i < MAXDIM; ++i) - { - temp[i] = dim[i] = nelems_last[i] = 0; - nelems[i] = 1; - } + dim[i] = -1; + /* Scan string until we reach closing brace */ ptr = str; while (!eoArray) { - bool itemdone = false; - - while (!itemdone) - { - if (parse_state == ARRAY_ELEM_STARTED || - parse_state == ARRAY_QUOTED_ELEM_STARTED) - empty_array = false; + bool new_element = false; switch (*ptr) { @@ -492,17 +492,25 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext) * start, or after an element delimiter. In any case we * now must be past an element start. */ - if (parse_state != ARRAY_LEVEL_STARTED && - parse_state != ARRAY_ELEM_STARTED && - parse_state != ARRAY_QUOTED_ELEM_STARTED && - parse_state != ARRAY_ELEM_DELIMITED) + switch (parse_state) + { + case ARRAY_LEVEL_STARTED: + case ARRAY_ELEM_DELIMITED: + /* start new unquoted element */ + parse_state = ARRAY_ELEM_STARTED; + new_element = true; + break; + case ARRAY_ELEM_STARTED: + case ARRAY_QUOTED_ELEM_STARTED: + /* already in element */ + break; + default: ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", '\\'))); - if (parse_state != ARRAY_QUOTED_ELEM_STARTED) - parse_state = ARRAY_ELEM_STARTED; + } /* skip the escaped character */ if (*(ptr + 1)) ptr++; @@ -519,18 +527,28 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext) * element start, or after an element delimiter. In any * case we now must be past an element start. */ - if (parse_state != ARRAY_LEVEL_STARTED && - parse_state != ARRAY_QUOTED_ELEM_STARTED && - parse_state != ARRAY_ELEM_DELIMITED) + switch (parse_state) + { + case ARRAY_LEVEL_STARTED: + case ARRAY_ELEM_DELIMITED: + /* start new quoted element */ + Assert(!in_quotes); + in_quotes = true; + parse_state = ARRAY_QUOTED_ELEM_STARTED; + new_element = true; + break; + case ARRAY_QUOTED_ELEM_STARTED: + /* already in element, end it */ + Assert(in_quotes); + in_quotes = false; + parse_state = ARRAY_QUOTED_ELEM_COMPLETED; + break; + default: ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected array element."))); - in_quotes = !in_quotes; - if (in_quotes) - parse_state = ARRAY_QUOTED_ELEM_STARTED; - else - parse_state = ARRAY_QUOTED_ELEM_COMPLETED; + } break; case '{': if (!in_quotes) @@ -538,27 +556,52 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext) /* * A left brace can occur if no nesting has occurred * yet, after a level start, or after a level - * delimiter. + * delimiter. If we see one after an element + * delimiter, we have something like "{1,{2}}", so + * complain about non-rectangularity. */ - if (parse_state != ARRAY_NO_LEVEL && - parse_state != ARRAY_LEVEL_STARTED && - parse_state != ARRAY_LEVEL_DELIMITED) + switch (parse_state) + { + case ARRAY_NO_LEVEL: + case ARRAY_LEVEL_STARTED: + case ARRAY_LEVEL_DELIMITED: + /* okay */ + break; + case ARRAY_ELEM_DELIMITED: + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str), + errdetail("Multidimensional arrays must have sub-arrays with matching dimensions."))); + default: ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected \"%c\" character.", '{'))); + } parse_state = ARRAY_LEVEL_STARTED; + /* Nested sub-arrays count as elements of outer level */ + if (nest_level > 0) + nelems[nest_level - 1]++; + /* Initialize element counting in the new level */ if (nest_level >= MAXDIM) ereturn(escontext, -1, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)", nest_level + 1, MAXDIM))); - temp[nest_level] = 0; + nelems[nest_level] = 0; nest_level++; if (ndim < nest_level) + { + /* Can't increase ndim once it's frozen */ + if (ndim_frozen) + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str), + errdetail("Multidimensional arrays must have sub-arrays with matching dimensions."))); ndim = nest_level; } + } break; case '}': if (!in_quotes) @@ -566,46 +609,56 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext) /* * A right brace can occur after an element start, an * element completion, a quoted element completion, or - * a level completion. + * a level completion. We also allow it after a level + * start, that is an empty sub-array "{}" --- but that + * freezes the number of dimensions and all such + * sub-arrays must be at the same level, just like + * sub-arrays containing elements. */ - if (parse_state != ARRAY_ELEM_STARTED && - parse_state != ARRAY_ELEM_COMPLETED && - parse_state != ARRAY_QUOTED_ELEM_COMPLETED && - parse_state != ARRAY_LEVEL_COMPLETED && - !(nest_level == 1 && parse_state == ARRAY_LEVEL_STARTED)) + switch (parse_state) + { + case ARRAY_ELEM_STARTED: + case ARRAY_QUOTED_ELEM_COMPLETED: + case ARRAY_LEVEL_COMPLETED: + /* okay */ + break; + case ARRAY_LEVEL_STARTED: + /* empty sub-array: OK if at correct nest_level */ + ndim_frozen = true; + if (nest_level != ndim) ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), - errdetail("Unexpected \"%c\" character.", - '}'))); - parse_state = ARRAY_LEVEL_COMPLETED; - if (nest_level == 0) + errdetail("Multidimensional arrays must have sub-arrays with matching dimensions."))); + break; + default: ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), - errdetail("Unmatched \"%c\" character.", '}'))); + errdetail("Unexpected \"%c\" character.", + '}'))); + } + parse_state = ARRAY_LEVEL_COMPLETED; + /* The parse state check assured we're in a level. */ + Assert(nest_level > 0); nest_level--; - if (nelems_last[nest_level] != 0 && - nelems[nest_level] != nelems_last[nest_level]) + if (dim[nest_level] < 0) + { + /* Save length of first sub-array of this level */ + dim[nest_level] = nelems[nest_level]; + } + else if (nelems[nest_level] != dim[nest_level]) + { + /* Subsequent sub-arrays must have same length */ ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), - errdetail("Multidimensional arrays must have " - "sub-arrays with matching " - "dimensions."))); - nelems_last[nest_level] = nelems[nest_level]; - nelems[nest_level] = 1; - if (nest_level == 0) - eoArray = itemdone = true; - else - { - /* - * We don't set itemdone here; see comments in - * ReadArrayStr - */ - temp[nest_level - 1]++; + errdetail("Multidimensional arrays must have sub-arrays with matching dimensions."))); } + /* Done if this is the outermost level's '}' */ + if (nest_level == 0) + eoArray = true; } break; default: @@ -614,12 +667,11 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext) if (*ptr == typdelim) { /* - * Delimiters can occur after an element start, an - * element completion, a quoted element - * completion, or a level completion. + * Delimiters can occur after an element start, a + * quoted element completion, or a level + * completion. */ if (parse_state != ARRAY_ELEM_STARTED && - parse_state != ARRAY_ELEM_COMPLETED && parse_state != ARRAY_QUOTED_ELEM_COMPLETED && parse_state != ARRAY_LEVEL_COMPLETED) ereturn(escontext, -1, @@ -631,8 +683,6 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext) parse_state = ARRAY_LEVEL_DELIMITED; else parse_state = ARRAY_ELEM_DELIMITED; - itemdone = true; - nelems[nest_level - 1]++; } else if (!scanner_isspace(*ptr)) { @@ -641,23 +691,51 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext) * level start, after an element start, or after * an element delimiter. In any case we now must * be past an element start. + * + * If it's a space character, we can ignore it; it + * might be data or not, but it doesn't change the + * parsing state. */ - if (parse_state != ARRAY_LEVEL_STARTED && - parse_state != ARRAY_ELEM_STARTED && - parse_state != ARRAY_ELEM_DELIMITED) + switch (parse_state) + { + case ARRAY_LEVEL_STARTED: + case ARRAY_ELEM_DELIMITED: + /* start new unquoted element */ + parse_state = ARRAY_ELEM_STARTED; + new_element = true; + break; + case ARRAY_ELEM_STARTED: + /* already in element */ + break; + default: ereturn(escontext, -1, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed array literal: \"%s\"", str), errdetail("Unexpected array element."))); - parse_state = ARRAY_ELEM_STARTED; + } } } break; } - if (!itemdone) - ptr++; + + /* To reduce duplication, all new-element cases go through here. */ + if (new_element) + { + /* + * Once we have found an element, the number of dimensions can no + * longer increase, and subsequent elements must all be at the + * same nesting depth. + */ + ndim_frozen = true; + if (nest_level != ndim) + ereturn(escontext, -1, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed array literal: \"%s\"", str), + errdetail("Multidimensional arrays must have sub-arrays with matching dimensions."))); + /* Count the new element */ + nelems[nest_level - 1]++; } - temp[ndim - 1]++; + ptr++; } @@ -671,13 +749,6 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext) errdetail("Junk after closing right brace."))); } - /* special case for an empty array */ - if (empty_array) - return 0; - - for (i = 0; i < ndim; ++i) - dim[i] = temp[i]; - return ndim; } diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index 70643914..26d4281d 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -1475,12 +1475,7 @@ select '{{1,{2}},{2,3}}'::text[]; ERROR: malformed array literal: "{{1,{2}},{2,3}}" LINE 1: select '{{1,{2}},{2,3}}'::text[]; ^ -DETAIL: Unexpected "{" character. -select '{{},{}}'::text[]; -ERROR: malformed array literal: "{{},{}}" -LINE 1: select '{{},{}}'::text[]; - ^ -DETAIL: Unexpected "}" character. +DETAIL: Multidimensional arrays must have sub-arrays with matching dimensions. select E'{{1,2},\\{2,3}}'::text[]; ERROR: malformed array literal: "{{1,2},\{2,3}}" LINE 1: select E'{{1,2},\\{2,3}}'::text[]; @@ -1501,6 +1496,49 @@ ERROR: malformed array literal: "{ }}" LINE 1: select '{ }}'::text[]; ^ DETAIL: Junk after closing right brace. +select '{{1},{{2}}}'::text[]; +ERROR: malformed array literal: "{{1},{{2}}}" +LINE 1: select '{{1},{{2}}}'::text[]; + ^ +DETAIL: Multidimensional arrays must have sub-arrays with matching dimensions. +select '{{{1}},{2}}'::text[]; +ERROR: malformed array literal: "{{{1}},{2}}" +LINE 1: select '{{{1}},{2}}'::text[]; + ^ +DETAIL: Multidimensional arrays must have sub-arrays with matching dimensions. +select '{{},{{}}}'::text[]; +ERROR: malformed array literal: "{{},{{}}}" +LINE 1: select '{{},{{}}}'::text[]; + ^ +DETAIL: Multidimensional arrays must have sub-arrays with matching dimensions. +select '{{{}},{}}'::text[]; +ERROR: malformed array literal: "{{{}},{}}" +LINE 1: select '{{{}},{}}'::text[]; + ^ +DETAIL: Multidimensional arrays must have sub-arrays with matching dimensions. +select '{{1},{}}'::text[]; +ERROR: malformed array literal: "{{1},{}}" +LINE 1: select '{{1},{}}'::text[]; + ^ +DETAIL: Multidimensional arrays must have sub-arrays with matching dimensions. +select '{{},{1}}'::text[]; +ERROR: malformed array literal: "{{},{1}}" +LINE 1: select '{{},{1}}'::text[]; + ^ +DETAIL: Multidimensional arrays must have sub-arrays with matching dimensions. +select '[2147483646:2147483647]={1,2}'::int[]; +ERROR: array upper bound is too large: 2147483647 +LINE 1: select '[2147483646:2147483647]={1,2}'::int[]; + ^ +select '[1:-1]={}'::int[]; +ERROR: upper bound cannot be less than lower bound minus one +LINE 1: select '[1:-1]={}'::int[]; + ^ +select '[1:0]={1}'::int[]; +ERROR: malformed array literal: "[1:0]={1}" +LINE 1: select '[1:0]={1}'::int[]; + ^ +DETAIL: Specified array dimensions do not match array contents. select array[]; ERROR: cannot determine type of empty array LINE 1: select array[]; @@ -1514,6 +1552,12 @@ select '{}'::text[]; {} (1 row) +select '{{},{}}'::text[]; + text +------ + {} +(1 row) + select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[]; text ----------------------------------------------- @@ -1559,6 +1603,18 @@ select '[0:1]={1.1,2.2}'::float8[]; [0:1]={1.1,2.2} (1 row) +select '[2147483646:2147483646]={1}'::int[]; + int4 +----------------------------- + [2147483646:2147483646]={1} +(1 row) + +select '[1:0]={}'::int[]; + int4 +------ + {} +(1 row) + -- all of the above should be accepted -- tests for array aggregates CREATE TEMP TABLE arraggtest ( f1 INT[], f2 TEXT[][], f3 FLOAT[]); diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql index f1375621..b7e2b180 100644 --- a/src/test/regress/sql/arrays.sql +++ b/src/test/regress/sql/arrays.sql @@ -454,16 +454,25 @@ select 'foo' ilike all (array['F%', '%O']); -- t -- none of the following should be accepted select '{{1,{2}},{2,3}}'::text[]; -select '{{},{}}'::text[]; select E'{{1,2},\\{2,3}}'::text[]; select '{{"1 2" x},{3}}'::text[]; select '{}}'::text[]; select '{ }}'::text[]; +select '{{1},{{2}}}'::text[]; +select '{{{1}},{2}}'::text[]; +select '{{},{{}}}'::text[]; +select '{{{}},{}}'::text[]; +select '{{1},{}}'::text[]; +select '{{},{1}}'::text[]; +select '[2147483646:2147483647]={1,2}'::int[]; +select '[1:-1]={}'::int[]; +select '[1:0]={1}'::int[]; select array[]; -- none of the above should be accepted -- all of the following should be accepted select '{}'::text[]; +select '{{},{}}'::text[]; select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[]; select '{0 second ,0 second}'::interval[]; select '{ { "," } , { 3 } }'::text[]; @@ -474,6 +483,8 @@ select '{ }'::interval[]; select array[]::text[]; select '[0:1]={1.1,2.2}'::float8[]; +select '[2147483646:2147483646]={1}'::int[]; +select '[1:0]={}'::int[]; -- all of the above should be accepted -- tests for array aggregates -- 2.34.1