Re: array_in sub function ReadArrayDimensions error message - Mailing list pgsql-hackers

From David G. Johnston
Subject Re: array_in sub function ReadArrayDimensions error message
Date
Msg-id CAKFQuwZYvHnqF-Wzf7tX3oPgPEb+bC2Ev=to8j9R0M11dx-Saw@mail.gmail.com
Whole thread Raw
In response to Re: array_in sub function ReadArrayDimensions error message  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-hackers
On Tue, Jul 9, 2024 at 8:59 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
"David G. Johnston" <david.g.johnston@gmail.com> writes:

> I'd add a hint if the first symbol is [ and we fail to get to the point of
> actually seeing the equal sign or the first subsequent unquoted symbol is a
> comma instead of a colon.

That seems closely related to my suggestion of applying strchr() to
see if the character we are expecting to see actually appears
anywhere.  Or did you have something else in mind?  Please be
specific, both about the test you are thinking of and how the
message should be worded.


Pseudo-code, the syntax for adding a conditional errhint I didn't try to learn along with figuring out the logic.  Also not totally sure on the ReadDimensionInt behavior interplay here.

In short, the ambiguous first dimension [n] case is cleared by seeing either a second dimension or the equal sign separator. (ToDo: see how end-of-string get resolved)

The [n:m] first dimension case clears as soon as the colon is seen.

The normal, undimensioned case clears once the first non-blank character is not a [

If we error before clearing we assume the query author provided an SQL array using json syntax and point out that the delimiters for SQL arrays are {}.
We also assume, in the single bounds case, that a too-large upper-bound value means they did intend to supply a single number in a json array format.  We would need to modify these tests so they occur after checking whether the next part of the string is [ or = but, in the [ case, before processing the next dimension.

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index d6641b570d..0ac1eabba1 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -404,7 +404,7 @@ ReadArrayDimensions(char **srcptr, int *ndim_p, int *dim, int *lBound,
 {
        char       *p = *srcptr;
        int                     ndim;
-
+    bool couldBeJson = true;
        /*
         * Dimension info takes the form of one or more [n] or [m:n] items.  This
         * loop iterates once per dimension item.
@@ -422,8 +422,19 @@ ReadArrayDimensions(char **srcptr, int *ndim_p, int *dim, int *lBound,
                 */
                while (scanner_isspace(*p))
                        p++;
+               if (*p == '=') {
+                       //if we have an equals sign we are not dealing with a json array
+                       couldBeJson = false;
+                       break; // and we are at the end of the bounds specification for the SQL array literal we do have
+               }
                if (*p != '[')
-                       break;                          /* no more dimension items */
+               {
+                       couldBeJson = false; // json arrays will start with [
+                       break;  
+                       // on subsequent passes we better have either this or an equal sign and the later is covered above
+               }                       /* no (more?) dimension items */
+               if (ndim > 0)
+                       couldBeJson = false; //multi-dimensional arrays specs are invalid json arrays
                p++;
                if (ndim >= MAXDIM)
                        ereturn(escontext, false,
@@ -438,11 +449,18 @@ ReadArrayDimensions(char **srcptr, int *ndim_p, int *dim, int *lBound,
                        ereturn(escontext, false,
                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                         errmsg("malformed array literal: \"%s\"", origStr),
-                                        errdetail("\"[\" must introduce explicitly-specified array dimensions.")));
+                                        errdetail("\"[\" must introduce explicitly-specified array dimensions.")),
+                                        //if it isn't a digit and we might still have a json array its a good bet it is one
+                                        //with non-numeric content
+                                        couldBeJson ? errhint("SQL array vaLues are delimited by {}" : null));
 
                if (*p == ':')
                {
                        /* [m:n] format */
+                       //if we have numbers as the first entry, the presence of a colon,
+                       //which is not a valid json separator, in a number literal or an array,
+                       //means we have a bounds specification in an SQL array
+                       couldBeJson = false;
                        lBound[ndim] = i;
                        p++;
                        q = p;
@@ -454,18 +472,22 @@ ReadArrayDimensions(char **srcptr, int *ndim_p, int *dim, int *lBound,
                                                 errmsg("malformed array literal: \"%s\"", origStr),
                                                 errdetail("Missing array dimension value.")));
                }
-               else
+               else if (*p == ']')
                {
                        /* [n] format */
+                       //single digit doesn't disprove json with single number element
                        lBound[ndim] = 1;
                        ub = i;
                }
-               if (*p != ']')
+               else
+               {
                        ereturn(escontext, false,
                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                         errmsg("malformed array literal: \"%s\"", origStr),
                                         errdetail("Missing \"%s\" after array dimensions.",
-                                                          "]")));
+                                                          "]")),
+                                       couldBeJson ? errhint("SQL array values are delimited by {}" : null));
+               }
                p++;
 
                /*
@@ -475,29 +497,37 @@ ReadArrayDimensions(char **srcptr, int *ndim_p, int *dim, int *lBound,
                 * would be equivalent.  Given the lack of field demand, there seems
                 * little point in allowing such cases.
                 */
+               //not possible in the single bound case so cannot be json
                if (ub < lBound[ndim])
                        ereturn(escontext, false,
                                        (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
                                         errmsg("upper bound cannot be less than lower bound")));
 
                /* Upper bound of INT_MAX must be disallowed, cf ArrayCheckBounds() */
+               // the singular json number may very well be larger than an integer...
                if (ub == INT_MAX)
                        ereturn(escontext, false,
                                        (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                                        errmsg("array upper bound is too large: %d", ub)));
+                                        errmsg("array upper bound is too large: %d", ub),
+                                        couldBeJson ? errhint("SQL array values are delimited by {}" : null)));
 
                /* Compute "ub - lBound[ndim] + 1", detecting overflow */
+               //a whatever this threshold is a random number passed in a json array likely will exceed it
                if (pg_sub_s32_overflow(ub, lBound[ndim], &ub) ||
                        pg_add_s32_overflow(ub, 1, &ub))
                        ereturn(escontext, false,
                                        (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                                         errmsg("array size exceeds the maximum allowed (%d)",
-                                                       (int) MaxArraySize)));
+                                                       (int) MaxArraySize,
+                                       couldBeJson ? errhint("SQL array values are delimited by {}" : null))));
 
                dim[ndim] = ub;
                ndim++;
        }
 
+    if (couldBeJson == true)
+           assert('not reachable, need to disprove json array literal prior to returning');
+
        *srcptr = p;
        *ndim_p = ndim;
        return true;


David J.

pgsql-hackers by date:

Previous
From: Alvaro Herrera
Date:
Subject: Re: why there is not VACUUM FULL CONCURRENTLY?
Next
From: Robert Haas
Date:
Subject: Re: Add a GUC check hook to ensure summarize_wal cannot be enabled when wal_level is minimal