From 0fcef15b15879c73ff3e93b492fe02fe6ea7628f Mon Sep 17 00:00:00 2001 From: "Garen J. Torikian" Date: Tue, 4 Oct 2022 12:45:12 -0400 Subject: [PATCH] Expand character set for ltree labels This patch expands the character set for ltree labels to include four additional characters. These characters can be used to represent non-alphanumeric characters, via punycode or HTML encoding. Furthermore, the label length is doubled to account for potentially longer strings when saving an encoded format. --- contrib/ltree/expected/ltree.out | 72 ++++++++++++++++++++++++-------- contrib/ltree/ltree.h | 13 ++++-- contrib/ltree/ltree_io.c | 10 ++--- contrib/ltree/ltxtquery_io.c | 4 +- contrib/ltree/sql/ltree.sql | 19 ++++++--- doc/src/sgml/ltree.sgml | 6 +-- 6 files changed, 87 insertions(+), 37 deletions(-) diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out index c6d8f3ef75..6581c32b28 100644 --- a/contrib/ltree/expected/ltree.out +++ b/contrib/ltree/expected/ltree.out @@ -25,6 +25,24 @@ SELECT '1.2'::ltree; 1.2 (1 row) +SELECT '1.2.#3'::ltree; + ltree +-------- + 1.2.#3 +(1 row) + +SELECT '1.2.-3'::ltree; + ltree +-------- + 1.2.-3 +(1 row) + +SELECT '1.2.;3'::ltree; + ltree +-------- + 1.2.;3 +(1 row) + SELECT '1.2._3'::ltree; ltree -------- @@ -45,15 +63,15 @@ ERROR: ltree syntax error LINE 1: SELECT '1.2.'::ltree; ^ DETAIL: Unexpected end of input. -SELECT repeat('x', 255)::ltree; - repeat ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ - xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +SELECT repeat('x', 511)::ltree; + repeat +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx (1 row) -SELECT repeat('x', 256)::ltree; +SELECT repeat('x', 512)::ltree; ERROR: label string is too long -DETAIL: Label length is 256, must be at most 255, at character 257. +DETAIL: Label length is 512, must be at most 511, at character 513. SELECT ltree2text('1.2.3.34.sdf'); ltree2text -------------- @@ -531,24 +549,24 @@ SELECT '1.2.3|@.4'::lquery; ERROR: lquery syntax error at character 7 LINE 1: SELECT '1.2.3|@.4'::lquery; ^ -SELECT (repeat('x', 255) || '*@@*')::lquery; - lquery -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx@* +SELECT (repeat('x', 511) || '*@@*')::lquery; + lquery +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx@* (1 row) -SELECT (repeat('x', 256) || '*@@*')::lquery; +SELECT (repeat('x', 512) || '*@@*')::lquery; ERROR: label string is too long -DETAIL: Label length is 256, must be at most 255, at character 257. -SELECT ('!' || repeat('x', 255))::lquery; - lquery ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - !xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +DETAIL: Label length is 512, must be at most 511, at character 513. +SELECT ('!' || repeat('x', 511))::lquery; + lquery +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + !xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx (1 row) -SELECT ('!' || repeat('x', 256))::lquery; +SELECT ('!' || repeat('x', 512))::lquery; ERROR: label string is too long -DETAIL: Label length is 256, must be at most 255, at character 258. +DETAIL: Label length is 512, must be at most 511, at character 514. SELECT nlevel('1.2.3.4'); nlevel -------- @@ -1195,6 +1213,24 @@ SELECT 'tree & aw_qw%*'::ltxtquery; tree & aw_qw%* (1 row) +SELECT 'tree & aw#qw%*'::ltxtquery; + ltxtquery +---------------- + tree & aw#qw%* +(1 row) + +SELECT 'tree & aw-qw%*'::ltxtquery; + ltxtquery +---------------- + tree & aw-qw%* +(1 row) + +SELECT 'tree & aw;qw%*'::ltxtquery; + ltxtquery +---------------- + tree & aw;qw%* +(1 row) + SELECT 'ltree.awdfg'::ltree @ '!tree & aWdf@*'::ltxtquery; ?column? ---------- diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h index 40aed0ca0c..cd585fa042 100644 --- a/contrib/ltree/ltree.h +++ b/contrib/ltree/ltree.h @@ -12,10 +12,10 @@ /* * We want the maximum length of a label to be encoding-independent, so - * set it somewhat arbitrarily at 255 characters (not bytes), while using + * set it somewhat arbitrarily at 511 characters (not bytes), while using * uint16 fields to hold the byte length. */ -#define LTREE_LABEL_MAX_CHARS 255 +#define LTREE_LABEL_MAX_CHARS 511 /* * LOWER_NODE used to be defined in the Makefile via the compile flags. @@ -126,7 +126,14 @@ typedef struct #define LQUERY_HASNOT 0x01 -#define ISALNUM(x) ( t_isalpha(x) || t_isdigit(x) || ( pg_mblen(x) == 1 && t_iseq((x), '_') ) ) +#define ISPRINT(x) (( pg_mblen(x) == 1 && \ + (t_iseq((x), '#') || \ + t_iseq((x), '-') || \ + t_iseq((x), ';') || \ + t_iseq((x), '_')))) +#define ISALNUM(x) ( t_isalpha(x) || t_isdigit(x)) + +#define ISVALID(x) ( ISALNUM(x) || ISPRINT(x) ) /* full text query */ diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c index 15115cb29f..b1dd7104e0 100644 --- a/contrib/ltree/ltree_io.c +++ b/contrib/ltree/ltree_io.c @@ -74,7 +74,7 @@ parse_ltree(const char *buf) switch (state) { case LTPRS_WAITNAME: - if (ISALNUM(ptr)) + if (ISVALID(ptr)) { lptr->start = ptr; lptr->wlen = 0; @@ -91,7 +91,7 @@ parse_ltree(const char *buf) lptr++; state = LTPRS_WAITNAME; } - else if (!ISALNUM(ptr)) + else if (!ISVALID(ptr)) UNCHAR; break; default: @@ -310,7 +310,7 @@ parse_lquery(const char *buf) switch (state) { case LQPRS_WAITLEVEL: - if (ISALNUM(ptr)) + if (ISVALID(ptr)) { GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1)); lptr->start = ptr; @@ -333,7 +333,7 @@ parse_lquery(const char *buf) UNCHAR; break; case LQPRS_WAITVAR: - if (ISALNUM(ptr)) + if (ISVALID(ptr)) { lptr++; lptr->start = ptr; @@ -376,7 +376,7 @@ parse_lquery(const char *buf) state = LQPRS_WAITLEVEL; curqlevel = NEXTLEV(curqlevel); } - else if (ISALNUM(ptr)) + else if (ISVALID(ptr)) { /* disallow more chars after a flag */ if (lptr->flag) diff --git a/contrib/ltree/ltxtquery_io.c b/contrib/ltree/ltxtquery_io.c index 3eca5cb8ff..dfe641399e 100644 --- a/contrib/ltree/ltxtquery_io.c +++ b/contrib/ltree/ltxtquery_io.c @@ -76,7 +76,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint (state->buf)++; return OPEN; } - else if (ISALNUM(state->buf)) + else if (ISVALID(state->buf)) { state->state = INOPERAND; *strval = state->buf; @@ -89,7 +89,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint errmsg("operand syntax error"))); break; case INOPERAND: - if (ISALNUM(state->buf)) + if (ISVALID(state->buf)) { if (*flag) ereport(ERROR, diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql index bf733ed17b..612c2072dc 100644 --- a/contrib/ltree/sql/ltree.sql +++ b/contrib/ltree/sql/ltree.sql @@ -8,6 +8,9 @@ WHERE opc.oid >= 16384 AND NOT amvalidate(opc.oid); SELECT ''::ltree; SELECT '1'::ltree; SELECT '1.2'::ltree; +SELECT '1.2.#3'::ltree; +SELECT '1.2.-3'::ltree; +SELECT '1.2.;3'::ltree; SELECT '1.2._3'::ltree; -- empty labels not allowed @@ -15,8 +18,8 @@ SELECT '.2.3'::ltree; SELECT '1..3'::ltree; SELECT '1.2.'::ltree; -SELECT repeat('x', 255)::ltree; -SELECT repeat('x', 256)::ltree; +SELECT repeat('x', 511)::ltree; +SELECT repeat('x', 512)::ltree; SELECT ltree2text('1.2.3.34.sdf'); SELECT text2ltree('1.2.3.34.sdf'); @@ -111,10 +114,10 @@ SELECT '1.!.3'::lquery; SELECT '1.2.!'::lquery; SELECT '1.2.3|@.4'::lquery; -SELECT (repeat('x', 255) || '*@@*')::lquery; -SELECT (repeat('x', 256) || '*@@*')::lquery; -SELECT ('!' || repeat('x', 255))::lquery; -SELECT ('!' || repeat('x', 256))::lquery; +SELECT (repeat('x', 511) || '*@@*')::lquery; +SELECT (repeat('x', 512) || '*@@*')::lquery; +SELECT ('!' || repeat('x', 511))::lquery; +SELECT ('!' || repeat('x', 512))::lquery; SELECT nlevel('1.2.3.4'); SELECT nlevel(('1' || repeat('.1', 65534))::ltree); @@ -233,6 +236,10 @@ SELECT 'QWER_GY'::ltree ~ 'q_t%@*'; --ltxtquery SELECT '!tree & aWdf@*'::ltxtquery; SELECT 'tree & aw_qw%*'::ltxtquery; +SELECT 'tree & aw#qw%*'::ltxtquery; +SELECT 'tree & aw-qw%*'::ltxtquery; +SELECT 'tree & aw;qw%*'::ltxtquery; + SELECT 'ltree.awdfg'::ltree @ '!tree & aWdf@*'::ltxtquery; SELECT 'tree.awdfg'::ltree @ '!tree & aWdf@*'::ltxtquery; SELECT 'tree.awdfg'::ltree @ '!tree | aWdf@*'::ltxtquery; diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml index 508f404ae8..144bccd2c5 100644 --- a/doc/src/sgml/ltree.sgml +++ b/doc/src/sgml/ltree.sgml @@ -24,9 +24,9 @@ A label is a sequence of alphanumeric characters - and underscores (for example, in C locale the characters - A-Za-z0-9_ are allowed). - Labels must be less than 256 characters long. + and a subset of printable characters. In C locale, the characters + A-Za-z0-9#-;_ are allowed. + Labels must be less than 512 characters long. -- 2.37.3