commit 1fd790fa375383e70a641ac4b2d62c2932f1715c Author: Joel Jakobsson Date: Thu Jun 15 08:23:25 2023 +0200 Enhance parsing and reorder headers in hashset module Allow whitespaces in hashset input and reorder the inclusion of header files, placing PostgreSQL headers first. Additionally, update deprecated pq_sendint calls to pq_sendint32. Add tests for improved parsing functionality. diff --git a/Makefile b/Makefile index 68c29a2..b09a50f 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ SERVER_INCLUDES=-I$(shell pg_config --includedir-server) CLIENT_INCLUDES=-I$(shell pg_config --includedir) LIBRARY_PATH = -L$(shell pg_config --libdir) -REGRESS = prelude basic io_varying_lengths random table invalid order +REGRESS = prelude basic io_varying_lengths random table invalid order parsing REGRESS_OPTS = --inputdir=test PG_CONFIG = pg_config diff --git a/hashset.c b/hashset.c index 3bc7fdc..b9025b4 100644 --- a/hashset.c +++ b/hashset.c @@ -4,13 +4,6 @@ * Copyright (C) Tomas Vondra, 2019 */ -#include -#include -#include -#include -#include -#include - #include "postgres.h" #include "libpq/pqformat.h" #include "nodes/memnodes.h" @@ -21,6 +14,13 @@ #include "catalog/pg_type.h" #include "common/hashfn.h" +#include +#include +#include +#include +#include +#include + PG_MODULE_MAGIC; /* @@ -94,6 +94,28 @@ Datum int4hashset_gt(PG_FUNCTION_ARGS); Datum int4hashset_ge(PG_FUNCTION_ARGS); Datum int4hashset_cmp(PG_FUNCTION_ARGS); +/* + * hashset_isspace() --- a non-locale-dependent isspace() + * + * Identical to array_isspace() in src/backend/utils/adt/arrayfuncs.c. + * We used to use isspace() for parsing hashset values, but that has + * undesirable results: a hashset value might be silently interpreted + * differently depending on the locale setting. So here, we hard-wire + * the traditional ASCII definition of isspace(). + */ +static bool +hashset_isspace(char ch) +{ + if (ch == ' ' || + ch == '\t' || + ch == '\n' || + ch == '\r' || + ch == '\v' || + ch == '\f') + return true; + return false; +} + static int4hashset_t * int4hashset_allocate(int maxelements) { @@ -135,14 +157,18 @@ int4hashset_in(PG_FUNCTION_ARGS) char *endptr; int32 len = strlen(str); int4hashset_t *set; + int64 value; - /* Check the opening and closing braces */ - if (str[0] != '{' || str[len - 1] != '}') + /* Skip initial spaces */ + while (hashset_isspace(*str)) str++; + + /* Check the opening brace */ + if (*str != '{') { ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for hashset: \"%s\"", str), - errdetail("Hashset representation must start with \"{\" and end with \"}\"."))); + errdetail("Hashset representation must start with \"{\"."))); } /* Start parsing from the first number (after the opening brace) */ @@ -151,22 +177,27 @@ int4hashset_in(PG_FUNCTION_ARGS) /* Initial size based on input length (arbitrary, could be optimized) */ set = int4hashset_allocate(len/2); - /* Check for empty set */ - if (*str == '}') + while (true) { - PG_RETURN_POINTER(set); - } + /* Skip spaces before number */ + while (hashset_isspace(*str)) str++; - while (*str != '}') - { - int64 value = strtol(str, &endptr, 10); + /* Check for closing brace, handling the case for an empty set */ + if (*str == '}') + { + str++; /* Move past the closing brace */ + break; + } + + /* Parse the number */ + value = strtol(str, &endptr, 10); if (errno == ERANGE || value < PG_INT32_MIN || value > PG_INT32_MAX) { ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("value \"%s\" is out of range for type %s", str, - "integer"))); + "integer"))); } /* Add the value to the hashset, resize if needed */ @@ -183,21 +214,36 @@ int4hashset_in(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for integer: \"%s\"", str))); } - else if (*endptr == ',') + + str = endptr; /* Move to next potential number or closing brace */ + + /* Skip spaces before the next number or closing brace */ + while (hashset_isspace(*str)) str++; + + if (*str == ',') { - str = endptr + 1; /* Move to the next number */ + str++; /* Skip comma before next loop iteration */ } - else if (*endptr != '}') + else if (*str != '}') { /* Unexpected character */ ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("unexpected character \"%c\" in hashset input", *endptr))); + errmsg("unexpected character \"%c\" in hashset input", *str))); } - else /* *endptr is '}', move to next iteration */ + } + + /* Only whitespace is allowed after the closing brace */ + while (*str) + { + if (!hashset_isspace(*str)) { - str = endptr; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed hashset literal: \"%s\"", str), + errdetail("Junk after closing right brace."))); } + str++; } PG_RETURN_POINTER(set); @@ -257,10 +303,10 @@ int4hashset_send(PG_FUNCTION_ARGS) pq_begintypsend(&buf); /* Send the non-data fields */ - pq_sendint(&buf, set->flags, 4); - pq_sendint(&buf, set->maxelements, 4); - pq_sendint(&buf, set->nelements, 4); - pq_sendint(&buf, set->hashfn_id, 4); + pq_sendint32(&buf, set->flags); + pq_sendint32(&buf, set->maxelements); + pq_sendint32(&buf, set->nelements); + pq_sendint32(&buf, set->hashfn_id); /* Compute and send the size of the data field */ data_size = VARSIZE(set) - offsetof(int4hashset_t, data); diff --git a/test/expected/parsing.out b/test/expected/parsing.out new file mode 100644 index 0000000..263797e --- /dev/null +++ b/test/expected/parsing.out @@ -0,0 +1,71 @@ +/* Valid */ +SELECT '{1,23,-456}'::int4hashset; + int4hashset +------------- + {1,-456,23} +(1 row) + +SELECT ' { 1 , 23 , -456 } '::int4hashset; + int4hashset +------------- + {1,-456,23} +(1 row) + +/* Only whitespace is allowed after the closing brace */ +SELECT ' { 1 , 23 , -456 } 1'::int4hashset; -- error +ERROR: malformed hashset literal: "1" +LINE 2: SELECT ' { 1 , 23 , -456 } 1'::int4hashset; + ^ +DETAIL: Junk after closing right brace. +SELECT ' { 1 , 23 , -456 } ,'::int4hashset; -- error +ERROR: malformed hashset literal: "," +LINE 1: SELECT ' { 1 , 23 , -456 } ,'::int4hashset; + ^ +DETAIL: Junk after closing right brace. +SELECT ' { 1 , 23 , -456 } {'::int4hashset; -- error +ERROR: malformed hashset literal: "{" +LINE 1: SELECT ' { 1 , 23 , -456 } {'::int4hashset; + ^ +DETAIL: Junk after closing right brace. +SELECT ' { 1 , 23 , -456 } }'::int4hashset; -- error +ERROR: malformed hashset literal: "}" +LINE 1: SELECT ' { 1 , 23 , -456 } }'::int4hashset; + ^ +DETAIL: Junk after closing right brace. +SELECT ' { 1 , 23 , -456 } x'::int4hashset; -- error +ERROR: malformed hashset literal: "x" +LINE 1: SELECT ' { 1 , 23 , -456 } x'::int4hashset; + ^ +DETAIL: Junk after closing right brace. +/* Unexpected character when expecting closing brace */ +SELECT ' { 1 , 23 , -456 1'::int4hashset; -- error +ERROR: unexpected character "1" in hashset input +LINE 2: SELECT ' { 1 , 23 , -456 1'::int4hashset; + ^ +SELECT ' { 1 , 23 , -456 {'::int4hashset; -- error +ERROR: unexpected character "{" in hashset input +LINE 1: SELECT ' { 1 , 23 , -456 {'::int4hashset; + ^ +SELECT ' { 1 , 23 , -456 x'::int4hashset; -- error +ERROR: unexpected character "x" in hashset input +LINE 1: SELECT ' { 1 , 23 , -456 x'::int4hashset; + ^ +/* Error handling for strtol */ +SELECT ' { , 23 , -456 } '::int4hashset; -- error +ERROR: invalid input syntax for integer: ", 23 , -456 } " +LINE 2: SELECT ' { , 23 , -456 } '::int4hashset; + ^ +SELECT ' { 1 , 23 , '::int4hashset; -- error +ERROR: invalid input syntax for integer: "" +LINE 1: SELECT ' { 1 , 23 , '::int4hashset; + ^ +SELECT ' { s , 23 , -456 } '::int4hashset; -- error +ERROR: invalid input syntax for integer: "s , 23 , -456 } " +LINE 1: SELECT ' { s , 23 , -456 } '::int4hashset; + ^ +/* Missing opening brace */ +SELECT ' 1 , 23 , -456 } '::int4hashset; -- error +ERROR: invalid input syntax for hashset: "1 , 23 , -456 } " +LINE 2: SELECT ' 1 , 23 , -456 } '::int4hashset; + ^ +DETAIL: Hashset representation must start with "{". diff --git a/test/sql/parsing.sql b/test/sql/parsing.sql new file mode 100644 index 0000000..1e56bbe --- /dev/null +++ b/test/sql/parsing.sql @@ -0,0 +1,23 @@ +/* Valid */ +SELECT '{1,23,-456}'::int4hashset; +SELECT ' { 1 , 23 , -456 } '::int4hashset; + +/* Only whitespace is allowed after the closing brace */ +SELECT ' { 1 , 23 , -456 } 1'::int4hashset; -- error +SELECT ' { 1 , 23 , -456 } ,'::int4hashset; -- error +SELECT ' { 1 , 23 , -456 } {'::int4hashset; -- error +SELECT ' { 1 , 23 , -456 } }'::int4hashset; -- error +SELECT ' { 1 , 23 , -456 } x'::int4hashset; -- error + +/* Unexpected character when expecting closing brace */ +SELECT ' { 1 , 23 , -456 1'::int4hashset; -- error +SELECT ' { 1 , 23 , -456 {'::int4hashset; -- error +SELECT ' { 1 , 23 , -456 x'::int4hashset; -- error + +/* Error handling for strtol */ +SELECT ' { , 23 , -456 } '::int4hashset; -- error +SELECT ' { 1 , 23 , '::int4hashset; -- error +SELECT ' { s , 23 , -456 } '::int4hashset; -- error + +/* Missing opening brace */ +SELECT ' 1 , 23 , -456 } '::int4hashset; -- error