From f299c3318244e1342970062f3b063d7406e32707 Mon Sep 17 00:00:00 2001 From: jian he Date: Thu, 13 Nov 2025 17:35:45 +0800 Subject: [PATCH v13 1/1] refactor v12-0003 refactor pg_ndistinct input function. based on: https://postgr.es/m/CADkLM=c8-U4GLMw5VdeDdfp1ae6BW=PCfEQqAky04iZMbckCFw@mail.gmail.com --- src/backend/utils/adt/pg_ndistinct.c | 133 ++++++++++----------- src/test/regress/expected/pg_ndistinct.out | 63 ++++++++++ src/test/regress/sql/pg_ndistinct.sql | 21 ++++ 3 files changed, 150 insertions(+), 67 deletions(-) diff --git a/src/backend/utils/adt/pg_ndistinct.c b/src/backend/utils/adt/pg_ndistinct.c index 96eaa09b4ed..72315ce34fb 100644 --- a/src/backend/utils/adt/pg_ndistinct.c +++ b/src/backend/utils/adt/pg_ndistinct.c @@ -32,7 +32,7 @@ typedef enum NDIST_EXPECT_ATTNUM_LIST, NDIST_EXPECT_ATTNUM, NDIST_EXPECT_NDISTINCT, - NDIST_EXPECT_COMPLETE + NDIST_EXPECT_COMPLETE, } NDistinctSemanticState; typedef struct @@ -40,12 +40,12 @@ typedef struct const char *str; NDistinctSemanticState state; - List *distinct_items; /* Accumulated complete MVNDistinctItems */ + List *distinct_items; /* Accumulated complete MVNDistinctItems */ Node *escontext; bool found_attributes; /* Item has an attributes key */ bool found_ndistinct; /* Item has ndistinct key */ - List *attnum_list; /* Accumulated attributes attnums */ + List *attnum_list; /* Accumulated attributes attnums */ int64 ndistinct; } NDistinctParseState; @@ -70,10 +70,10 @@ ndistinct_object_start(void *state) break; default: - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errsave(parse->escontext, + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Expected Item object."))); + errdetail("Expected Item object.")); } return JSON_SEM_ACTION_FAILED; @@ -110,19 +110,19 @@ ndistinct_object_end(void *state) if (!parse->found_attributes) { - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Item must contain \"" PG_NDISTINCT_KEY_ATTRIBUTES "\" key."))); + errsave(parse->escontext, + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Item must contain \"" PG_NDISTINCT_KEY_ATTRIBUTES "\" key.")); return JSON_SEM_ACTION_FAILED; } if (!parse->found_ndistinct) { - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Item must contain \"" PG_NDISTINCT_KEY_NDISTINCT "\" key."))); + errsave(parse->escontext, + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Item must contain \"" PG_NDISTINCT_KEY_NDISTINCT "\" key.")); return JSON_SEM_ACTION_FAILED; } @@ -133,11 +133,10 @@ ndistinct_object_end(void *state) natts = parse->attnum_list->length; if (natts < 2) { - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("The \"" PG_NDISTINCT_KEY_ATTRIBUTES - "\" key must contain an array of at least two attnums."))); + errsave(parse->escontext, + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("The \"" PG_NDISTINCT_KEY_ATTRIBUTES "\" key must contain an array of at least two attnums.")); return JSON_SEM_ACTION_FAILED; } @@ -162,10 +161,10 @@ ndistinct_object_end(void *state) { if (attrsort[i] == attrsort[i - 1]) { - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("attnum list duplicate value found: %d.", attrsort[i]))); + errsave(parse->escontext, + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("attnum list duplicate value found: %d.", attrsort[i])); return JSON_SEM_ACTION_FAILED; } @@ -207,10 +206,10 @@ ndistinct_array_start(void *state) break; default: - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Array found in unexpected place."))); + errsave(parse->escontext, + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Array found in unexpected place.")); return JSON_SEM_ACTION_FAILED; } @@ -233,11 +232,11 @@ ndistinct_array_end(void *state) return JSON_SUCCESS; } - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errsave(parse->escontext, + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed pg_ndistinct: \"%s\"", parse->str), errdetail("The \"" PG_NDISTINCT_KEY_ATTRIBUTES - "\" key must be an non-empty array."))); + "\" key must be an non-empty array.")); return JSON_SEM_ACTION_FAILED; break; @@ -249,18 +248,18 @@ ndistinct_array_end(void *state) return JSON_SUCCESS; } - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errsave(parse->escontext, + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Item array cannot be empty."))); + errdetail("Item array cannot be empty.")); return JSON_SEM_ACTION_FAILED; break; default: - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errsave(parse->escontext, + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Array found in unexpected place."))); + errdetail("Array found in unexpected place.")); } return JSON_SEM_ACTION_FAILED; } @@ -290,12 +289,12 @@ ndistinct_object_field_start(void *state, char *fname, bool isnull) return JSON_SUCCESS; } - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Invalid key \"%s\". Only allowed keys are \"" + errsave(parse->escontext, + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Invalid key \"%s\". Only allowed keys are \"" PG_NDISTINCT_KEY_ATTRIBUTES "\" and \"" - PG_NDISTINCT_KEY_NDISTINCT "\".", fname))); + PG_NDISTINCT_KEY_NDISTINCT "\".", fname)); return JSON_SEM_ACTION_FAILED; } @@ -313,10 +312,10 @@ ndistinct_array_element_start(void *state, bool isnull) if (!isnull) return JSON_SUCCESS; - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Attnum list elements cannot be null."))); + errsave(parse->escontext, + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Attnum list elements cannot be null.")); break; @@ -324,18 +323,18 @@ ndistinct_array_element_start(void *state, bool isnull) if (!isnull) return JSON_SUCCESS; - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Item list elements cannot be null."))); + errsave(parse->escontext, + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Item list elements cannot be null.")); break; default: - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errsave(parse->escontext, + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Unexpected array element."))); + errdetail("Unexpected array element.")); } return JSON_SEM_ACTION_FAILED; @@ -365,11 +364,11 @@ ndistinct_scalar(void *state, char *token, JsonTokenType tokentype) case NDIST_EXPECT_NDISTINCT: /* - * While the structure dictates that ndistinct in a double precision - * floating point, in practice it has always been an integer, and it - * is output as such. Therefore, we follow usage precendent over the - * actual storage structure, and read it in as an integer. - */ + * While the structure dictates that ndistinct in a double precision + * floating point, in practice it has always been an integer, and it + * is output as such. Therefore, we follow usage precendent over the + * actual storage structure, and read it in as an integer. + */ parse->ndistinct = pg_strtoint64_safe(token, parse->escontext); if (SOFT_ERROR_OCCURRED(parse->escontext)) @@ -380,10 +379,10 @@ ndistinct_scalar(void *state, char *token, JsonTokenType tokentype) break; default: - ereturn(parse->escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errsave(parse->escontext, + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed pg_ndistinct: \"%s\"", parse->str), - errdetail("Unexpected scalar."))); + errdetail("Unexpected scalar.")); } return JSON_SEM_ACTION_FAILED; @@ -540,10 +539,10 @@ pg_ndistinct_in(PG_FUNCTION_ARGS) if (has_duplicate_attributes(item, &ndistinct->items[j])) { ereturn(parse_state.escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed pg_ndistinct: \"%s\"", str), errdetail("Duplicate \"" PG_NDISTINCT_KEY_ATTRIBUTES "\" array : [%s]", - item_attnum_list(item)))); + item_attnum_list(item))); PG_RETURN_NULL(); } } @@ -587,11 +586,11 @@ pg_ndistinct_in(PG_FUNCTION_ARGS) const char *refitem_list = item_attnum_list(refitem); ereturn(parse_state.escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed pg_ndistinct: \"%s\"", str), errdetail("\"" PG_NDISTINCT_KEY_ATTRIBUTES "\" array: [%s]" "must be a subset of array: [%s]", - item_list, refitem_list))); + item_list, refitem_list)); PG_RETURN_NULL(); } } @@ -610,9 +609,9 @@ pg_ndistinct_in(PG_FUNCTION_ARGS) /* Anything else is a generic JSON parse error */ ereturn(parse_state.escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed pg_ndistinct: \"%s\"", str), - errdetail("Must be valid JSON."))); + errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", str), + errdetail("Must be valid JSON.")); PG_RETURN_NULL(); } diff --git a/src/test/regress/expected/pg_ndistinct.out b/src/test/regress/expected/pg_ndistinct.out index d99e84a2bce..8267cd42bb6 100644 --- a/src/test/regress/expected/pg_ndistinct.out +++ b/src/test/regress/expected/pg_ndistinct.out @@ -5,11 +5,37 @@ ERROR: malformed pg_ndistinct: "[]" LINE 1: SELECT '[]'::pg_ndistinct; ^ DETAIL: Item array cannot be empty. +SELECT '{}'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "{}" +LINE 1: SELECT '{}'::pg_ndistinct; + ^ +DETAIL: Expected Item object. +SELECT '{[]}'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "{[]}" +LINE 1: SELECT '{[]}'::pg_ndistinct; + ^ +DETAIL: Expected Item object. +SELECT '[{}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{}]" +LINE 1: SELECT '[{}]'::pg_ndistinct; + ^ +DETAIL: Item must contain "attributes" key. +SELECT '[{null}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{null}]" +LINE 1: SELECT '[{null}]'::pg_ndistinct; + ^ +DETAIL: Must be valid JSON. SELECT '[null]'::pg_ndistinct; ERROR: malformed pg_ndistinct: "[null]" LINE 1: SELECT '[null]'::pg_ndistinct; ^ DETAIL: Item list elements cannot be null. +SELECT NULL::pg_ndistinct; + pg_ndistinct +-------------- + +(1 row) + -- Invalid keys SELECT '[{"attributes_invalid" : [2,3], "ndistinct" : 4}]'::pg_ndistinct; ERROR: malformed pg_ndistinct: "[{"attributes_invalid" : [2,3], "ndistinct" : 4}]" @@ -38,6 +64,25 @@ ERROR: malformed pg_ndistinct: "[{"attributes" : null, "ndistinct" : 4}]" LINE 1: SELECT '[{"attributes" : null, "ndistinct" : 4}]'::pg_ndisti... ^ DETAIL: Unexpected scalar. +SELECT '[{"attributes" : [1, 2], "ndistinct" : }]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"attributes" : [1, 2], "ndistinct" : }]" +LINE 1: SELECT '[{"attributes" : [1, 2], "ndistinct" : }]'::pg_ndist... + ^ +DETAIL: Must be valid JSON. +SELECT '[{"attributes" : [1, 65538], "ndistinct" : 11}]'::pg_ndistinct; +ERROR: value "65538" is out of range for type smallint +LINE 1: SELECT '[{"attributes" : [1, 65538], "ndistinct" : 11}]'::pg... + ^ +SELECT '[{"attributes" : [1, 2], "ndistinct" : 2147483648}]'::pg_ndistinct; --error + pg_ndistinct +---------------------------------------------------- + [{"attributes": [1, 2], "ndistinct": -2147483648}] +(1 row) + +SELECT '[{"attributes" : [1, 2], "ndistinct" : 1.1}]'::pg_ndistinct; --error +ERROR: invalid input syntax for type bigint: "1.1" +LINE 1: SELECT '[{"attributes" : [1, 2], "ndistinct" : 1.1}]'::pg_nd... + ^ SELECT '[{"attributes" : [2,null], "ndistinct" : 4}]'::pg_ndistinct; ERROR: malformed pg_ndistinct: "[{"attributes" : [2,null], "ndistinct" : 4}]" LINE 1: SELECT '[{"attributes" : [2,null], "ndistinct" : 4}]'::pg_nd... @@ -86,6 +131,24 @@ ERROR: malformed pg_ndistinct: "[{"attributes" : [2,2], "ndistinct" : 4}]" LINE 1: SELECT '[{"attributes" : [2,2], "ndistinct" : 4}]'::pg_ndist... ^ DETAIL: attnum list duplicate value found: 2. +SELECT str as source, + pg_input_is_valid(str,'pg_ndistinct') as ok, + errinfo.sql_error_code, + errinfo.message, + errinfo.detail, + errinfo.hint +FROM unnest(ARRAY[$$[{"attributes" : [2,2], "ndistinct" : 4}]$$::text, + '[{"attributes_invalid" : [2,3], "ndistinct" : 4}]', + '[{"attributes" : [2,3]}]' + ]) str, +LATERAL pg_input_error_info(str, 'pg_ndistinct') as errinfo; + source | ok | sql_error_code | message | detail | hint +---------------------------------------------------+----+----------------+-----------------------------------------------------------------------------+---------------------------------------------------------------------------------------+------ + [{"attributes" : [2,2], "ndistinct" : 4}] | f | 22P02 | malformed pg_ndistinct: "[{"attributes" : [2,2], "ndistinct" : 4}]" | attnum list duplicate value found: 2. | + [{"attributes_invalid" : [2,3], "ndistinct" : 4}] | f | 22P02 | malformed pg_ndistinct: "[{"attributes_invalid" : [2,3], "ndistinct" : 4}]" | Invalid key "attributes_invalid". Only allowed keys are "attributes" and "ndistinct". | + [{"attributes" : [2,3]}] | f | 22P02 | malformed pg_ndistinct: "[{"attributes" : [2,3]}]" | Item must contain "ndistinct" key. | +(3 rows) + -- Valid inputs -- Duplicated attribute lists. SELECT '[{"attributes" : [2,3], "ndistinct" : 4}, diff --git a/src/test/regress/sql/pg_ndistinct.sql b/src/test/regress/sql/pg_ndistinct.sql index ca89fed6fe2..60036cc5d08 100644 --- a/src/test/regress/sql/pg_ndistinct.sql +++ b/src/test/regress/sql/pg_ndistinct.sql @@ -2,7 +2,12 @@ -- Invalid inputs SELECT '[]'::pg_ndistinct; +SELECT '{}'::pg_ndistinct; +SELECT '{[]}'::pg_ndistinct; +SELECT '[{}]'::pg_ndistinct; +SELECT '[{null}]'::pg_ndistinct; SELECT '[null]'::pg_ndistinct; +SELECT NULL::pg_ndistinct; -- Invalid keys SELECT '[{"attributes_invalid" : [2,3], "ndistinct" : 4}]'::pg_ndistinct; SELECT '[{"attributes" : [2,3], "invalid" : 3, "ndistinct" : 4}]'::pg_ndistinct; @@ -11,6 +16,10 @@ SELECT '[{"attributes" : [2,3]}]'::pg_ndistinct; SELECT '[{"ndistinct" : 4}]'::pg_ndistinct; -- Valid keys, invalid values SELECT '[{"attributes" : null, "ndistinct" : 4}]'::pg_ndistinct; +SELECT '[{"attributes" : [1, 2], "ndistinct" : }]'::pg_ndistinct; +SELECT '[{"attributes" : [1, 65538], "ndistinct" : 11}]'::pg_ndistinct; +SELECT '[{"attributes" : [1, 2], "ndistinct" : 2147483648}]'::pg_ndistinct; --error +SELECT '[{"attributes" : [1, 2], "ndistinct" : 1.1}]'::pg_ndistinct; --error SELECT '[{"attributes" : [2,null], "ndistinct" : 4}]'::pg_ndistinct; SELECT '[{"attributes" : [2,3], "ndistinct" : null}]'::pg_ndistinct; SELECT '[{"attributes" : [2,"a"], "ndistinct" : 4}]'::pg_ndistinct; @@ -23,6 +32,18 @@ SELECT '[{"attributes" : "a", "ndistinct" : 4}]'::pg_ndistinct; -- Duplicated attributes SELECT '[{"attributes" : [2,2], "ndistinct" : 4}]'::pg_ndistinct; +SELECT str as source, + pg_input_is_valid(str,'pg_ndistinct') as ok, + errinfo.sql_error_code, + errinfo.message, + errinfo.detail, + errinfo.hint +FROM unnest(ARRAY[$$[{"attributes" : [2,2], "ndistinct" : 4}]$$::text, + '[{"attributes_invalid" : [2,3], "ndistinct" : 4}]', + '[{"attributes" : [2,3]}]' + ]) str, +LATERAL pg_input_error_info(str, 'pg_ndistinct') as errinfo; + -- Valid inputs -- Duplicated attribute lists. SELECT '[{"attributes" : [2,3], "ndistinct" : 4}, -- 2.34.1