Re: fix for BUG #3720: wrong results at using ltree - Mailing list pgsql-hackers

From Tom Lane
Subject Re: fix for BUG #3720: wrong results at using ltree
Date
Msg-id 23304.1585591242@sss.pgh.pa.us
Whole thread Raw
In response to Re: fix for BUG #3720: wrong results at using ltree  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: fix for BUG #3720: wrong results at using ltree  (Nikita Glukhov <n.gluhov@postgrespro.ru>)
List pgsql-hackers
I wrote:
> Hence, attached are two revised patches that attack the problem
> this way.  The first one is somewhat unrelated to the original
> point --- it's trying to clean up the error messages in ltree_in
> and lquery_in so that they are more consistent and agree with
> the terminology used in the documentation.  (Notably, the term
> "level" is used nowhere in the ltree docs, except in the legacy
> function name nlevel().)

One thing I changed in that patch was to change the syntax error
reports to say "at character %d" not "in position %d", because
I thought the latter was pretty confusing --- it's not obvious
whether it's counting characters or labels or what.  However,
I now notice that what the code is providing is a zero-based
character index, which is out of line with our practice
elsewhere: core parser errors are reported using 1-based indexes.
If we reword these messages then we should adopt that practice too.
Hence, new patch versions that do it like that.  (0002 is unchanged.)

            regards, tom lane

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index c78d372..610cb6f 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -464,7 +464,7 @@ SELECT nlevel(('1' || repeat('.1', 65534))::ltree);
 (1 row)

 SELECT nlevel(('1' || repeat('.1', 65535))::ltree);
-ERROR:  number of ltree levels (65536) exceeds the maximum allowed (65535)
+ERROR:  number of ltree labels (65536) exceeds the maximum allowed (65535)
 SELECT nlevel(('1' || repeat('.1', 65534))::ltree || '1');
 ERROR:  number of ltree levels (65536) exceeds the maximum allowed (65535)
 SELECT ('1' || repeat('.1', 65534))::lquery IS NULL;
@@ -474,7 +474,7 @@ SELECT ('1' || repeat('.1', 65534))::lquery IS NULL;
 (1 row)

 SELECT ('1' || repeat('.1', 65535))::lquery IS NULL;
-ERROR:  number of lquery levels (65536) exceeds the maximum allowed (65535)
+ERROR:  number of lquery items (65536) exceeds the maximum allowed (65535)
 SELECT '*{65535}'::lquery;
   lquery
 ----------
@@ -485,7 +485,7 @@ SELECT '*{65536}'::lquery;
 ERROR:  lquery syntax error
 LINE 1: SELECT '*{65536}'::lquery;
                ^
-DETAIL:  Low limit (65536) exceeds the maximum allowed (65535).
+DETAIL:  Low limit (65536) exceeds the maximum allowed (65535), at character 3.
 SELECT '*{,65534}'::lquery;
   lquery
 -----------
@@ -502,7 +502,12 @@ SELECT '*{,65536}'::lquery;
 ERROR:  lquery syntax error
 LINE 1: SELECT '*{,65536}'::lquery;
                ^
-DETAIL:  High limit (65536) exceeds the maximum allowed (65535).
+DETAIL:  High limit (65536) exceeds the maximum allowed (65535), at character 4.
+SELECT '*{4,3}'::lquery;
+ERROR:  lquery syntax error
+LINE 1: SELECT '*{4,3}'::lquery;
+               ^
+DETAIL:  Low limit (4) is greater than high limit (3), at character 5.
 SELECT '1.2'::ltree  < '2.2'::ltree;
  ?column?
 ----------
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index 2503d47..e806a14 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -17,12 +17,6 @@ PG_FUNCTION_INFO_V1(lquery_in);
 PG_FUNCTION_INFO_V1(lquery_out);


-#define UNCHAR ereport(ERROR, \
-                       (errcode(ERRCODE_SYNTAX_ERROR), \
-                        errmsg("syntax error at position %d", \
-                        pos)));
-
-
 typedef struct
 {
     char       *start;
@@ -47,7 +41,12 @@ ltree_in(PG_FUNCTION_ARGS)
     ltree       *result;
     ltree_level *curlevel;
     int            charlen;
-    int            pos = 0;
+    int            pos = 1;        /* character position for error messages */
+
+#define UNCHAR ereport(ERROR, \
+                       errcode(ERRCODE_SYNTAX_ERROR), \
+                       errmsg("ltree syntax error at character %d", \
+                              pos))

     ptr = buf;
     while (*ptr)
@@ -61,7 +60,7 @@ ltree_in(PG_FUNCTION_ARGS)
     if (num + 1 > LTREE_MAX_LEVELS)
         ereport(ERROR,
                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                 errmsg("number of ltree levels (%d) exceeds the maximum allowed (%d)",
+                 errmsg("number of ltree labels (%d) exceeds the maximum allowed (%d)",
                         num + 1, LTREE_MAX_LEVELS)));
     list = lptr = (nodeitem *) palloc(sizeof(nodeitem) * (num + 1));
     ptr = buf;
@@ -88,10 +87,10 @@ ltree_in(PG_FUNCTION_ARGS)
                 if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
                     ereport(ERROR,
                             (errcode(ERRCODE_NAME_TOO_LONG),
-                             errmsg("name of level is too long"),
-                             errdetail("Name length is %d, must "
-                                       "be < 256, in position %d.",
-                                       lptr->wlen, pos)));
+                             errmsg("label string is too long"),
+                             errdetail("Label length is %d, must be at most %d, at character %d.",
+                                       lptr->wlen, LTREE_LABEL_MAX_CHARS,
+                                       pos)));

                 totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
                 lptr++;
@@ -115,10 +114,9 @@ ltree_in(PG_FUNCTION_ARGS)
         if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
             ereport(ERROR,
                     (errcode(ERRCODE_NAME_TOO_LONG),
-                     errmsg("name of level is too long"),
-                     errdetail("Name length is %d, must "
-                               "be < 256, in position %d.",
-                               lptr->wlen, pos)));
+                     errmsg("label string is too long"),
+                     errdetail("Label length is %d, must be at most %d, at character %d.",
+                               lptr->wlen, LTREE_LABEL_MAX_CHARS, pos)));

         totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
         lptr++;
@@ -126,8 +124,8 @@ ltree_in(PG_FUNCTION_ARGS)
     else if (!(state == LTPRS_WAITNAME && lptr == list))
         ereport(ERROR,
                 (errcode(ERRCODE_SYNTAX_ERROR),
-                 errmsg("syntax error"),
-                 errdetail("Unexpected end of line.")));
+                 errmsg("ltree syntax error"),
+                 errdetail("Unexpected end of input.")));

     result = (ltree *) palloc0(LTREE_HDRSIZE + totallen);
     SET_VARSIZE(result, LTREE_HDRSIZE + totallen);
@@ -144,6 +142,8 @@ ltree_in(PG_FUNCTION_ARGS)

     pfree(list);
     PG_RETURN_POINTER(result);
+
+#undef UNCHAR
 }

 Datum
@@ -208,7 +208,12 @@ lquery_in(PG_FUNCTION_ARGS)
     bool        hasnot = false;
     bool        wasbad = false;
     int            charlen;
-    int            pos = 0;
+    int            pos = 1;        /* character position for error messages */
+
+#define UNCHAR ereport(ERROR, \
+                       errcode(ERRCODE_SYNTAX_ERROR), \
+                       errmsg("lquery syntax error at character %d", \
+                              pos))

     ptr = buf;
     while (*ptr)
@@ -230,7 +235,7 @@ lquery_in(PG_FUNCTION_ARGS)
     if (num > LQUERY_MAX_LEVELS)
         ereport(ERROR,
                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                 errmsg("number of lquery levels (%d) exceeds the maximum allowed (%d)",
+                 errmsg("number of lquery items (%d) exceeds the maximum allowed (%d)",
                         num, LQUERY_MAX_LEVELS)));
     curqlevel = tmpql = (lquery_level *) palloc0(ITEMSIZE * num);
     ptr = buf;
@@ -305,10 +310,10 @@ lquery_in(PG_FUNCTION_ARGS)
                 if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
                     ereport(ERROR,
                             (errcode(ERRCODE_NAME_TOO_LONG),
-                             errmsg("name of level is too long"),
-                             errdetail("Name length is %d, must "
-                                       "be < 256, in position %d.",
-                                       lptr->wlen, pos)));
+                             errmsg("label string is too long"),
+                             errdetail("Label length is %d, must be at most %d, at character %d.",
+                                       lptr->wlen, LTREE_LABEL_MAX_CHARS,
+                                       pos)));

                 state = LQPRS_WAITVAR;
             }
@@ -321,10 +326,10 @@ lquery_in(PG_FUNCTION_ARGS)
                 if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
                     ereport(ERROR,
                             (errcode(ERRCODE_NAME_TOO_LONG),
-                             errmsg("name of level is too long"),
-                             errdetail("Name length is %d, must "
-                                       "be < 256, in position %d.",
-                                       lptr->wlen, pos)));
+                             errmsg("label string is too long"),
+                             errdetail("Label length is %d, must be at most %d, at character %d.",
+                                       lptr->wlen, LTREE_LABEL_MAX_CHARS,
+                                       pos)));

                 state = LQPRS_WAITLEVEL;
                 curqlevel = NEXTLEV(curqlevel);
@@ -361,10 +366,10 @@ lquery_in(PG_FUNCTION_ARGS)

                 if (low < 0 || low > LTREE_MAX_LEVELS)
                     ereport(ERROR,
-                            (errcode(ERRCODE_SYNTAX_ERROR),
+                            (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                              errmsg("lquery syntax error"),
-                             errdetail("Low limit (%d) exceeds the maximum allowed (%d).",
-                                       low, LTREE_MAX_LEVELS)));
+                             errdetail("Low limit (%d) exceeds the maximum allowed (%d), at character %d.",
+                                       low, LTREE_MAX_LEVELS, pos)));

                 curqlevel->low = (uint16) low;
                 state = LQPRS_WAITND;
@@ -380,10 +385,16 @@ lquery_in(PG_FUNCTION_ARGS)

                 if (high < 0 || high > LTREE_MAX_LEVELS)
                     ereport(ERROR,
+                            (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                             errmsg("lquery syntax error"),
+                             errdetail("High limit (%d) exceeds the maximum allowed (%d), at character %d.",
+                                       high, LTREE_MAX_LEVELS, pos)));
+                else if (curqlevel->low > high)
+                    ereport(ERROR,
                             (errcode(ERRCODE_SYNTAX_ERROR),
                              errmsg("lquery syntax error"),
-                             errdetail("High limit (%d) exceeds the maximum allowed (%d).",
-                                       high, LTREE_MAX_LEVELS)));
+                             errdetail("Low limit (%d) is greater than high limit (%d), at character %d.",
+                                       curqlevel->low, high, pos)));

                 curqlevel->high = (uint16) high;
                 state = LQPRS_WAITCLOSE;
@@ -441,7 +452,7 @@ lquery_in(PG_FUNCTION_ARGS)
             ereport(ERROR,
                     (errcode(ERRCODE_SYNTAX_ERROR),
                      errmsg("lquery syntax error"),
-                     errdetail("Unexpected end of line.")));
+                     errdetail("Unexpected end of input.")));

         lptr->len = ptr - lptr->start -
             ((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
@@ -451,15 +462,14 @@ lquery_in(PG_FUNCTION_ARGS)
             ereport(ERROR,
                     (errcode(ERRCODE_SYNTAX_ERROR),
                      errmsg("lquery syntax error"),
-                     errdetail("Unexpected end of line.")));
+                     errdetail("Unexpected end of input.")));

         if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
             ereport(ERROR,
                     (errcode(ERRCODE_NAME_TOO_LONG),
-                     errmsg("name of level is too long"),
-                     errdetail("Name length is %d, must "
-                               "be < 256, in position %d.",
-                               lptr->wlen, pos)));
+                     errmsg("label string is too long"),
+                     errdetail("Label length is %d, must be at most %d, at character %d.",
+                               lptr->wlen, LTREE_LABEL_MAX_CHARS, pos)));
     }
     else if (state == LQPRS_WAITOPEN)
         curqlevel->high = LTREE_MAX_LEVELS;
@@ -467,7 +477,7 @@ lquery_in(PG_FUNCTION_ARGS)
         ereport(ERROR,
                 (errcode(ERRCODE_SYNTAX_ERROR),
                  errmsg("lquery syntax error"),
-                 errdetail("Unexpected end of line.")));
+                 errdetail("Unexpected end of input.")));

     curqlevel = tmpql;
     totallen = LQUERY_HDRSIZE;
@@ -483,13 +493,6 @@ lquery_in(PG_FUNCTION_ARGS)
                 lptr++;
             }
         }
-        else if (curqlevel->low > curqlevel->high)
-            ereport(ERROR,
-                    (errcode(ERRCODE_SYNTAX_ERROR),
-                     errmsg("lquery syntax error"),
-                     errdetail("Low limit (%d) is greater than upper (%d).",
-                               curqlevel->low, curqlevel->high)));
-
         curqlevel = NEXTLEV(curqlevel);
     }

@@ -543,6 +546,8 @@ lquery_in(PG_FUNCTION_ARGS)

     pfree(tmpql);
     PG_RETURN_POINTER(result);
+
+#undef UNCHAR
 }

 Datum
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index d8489cb..f6d73b8 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -100,6 +100,7 @@ SELECT '*{65536}'::lquery;
 SELECT '*{,65534}'::lquery;
 SELECT '*{,65535}'::lquery;
 SELECT '*{,65536}'::lquery;
+SELECT '*{4,3}'::lquery;

 SELECT '1.2'::ltree  < '2.2'::ltree;
 SELECT '1.2'::ltree  <= '2.2'::ltree;
diff --git a/contrib/ltree_plpython/expected/ltree_plpython.out b/contrib/ltree_plpython/expected/ltree_plpython.out
index 4779755..f28897f 100644
--- a/contrib/ltree_plpython/expected/ltree_plpython.out
+++ b/contrib/ltree_plpython/expected/ltree_plpython.out
@@ -38,6 +38,6 @@ $$;
 -- because it will try to parse the Python list as an ltree input
 -- string.
 SELECT test2();
-ERROR:  syntax error at position 0
+ERROR:  ltree syntax error at character 1
 CONTEXT:  while creating return value
 PL/Python function "test2"
diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 610cb6f..5d9102c 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -445,6 +445,12 @@ SELECT '1.*.4|3|2.*{1}'::lquery;
  1.*.4|3|2.*{1}
 (1 row)

+SELECT 'foo.bar{,}.!a*|b{1,}.c{,44}.d{3,4}'::lquery;
+               lquery
+------------------------------------
+ foo.bar{,}.!a*|b{1,}.c{,44}.d{3,4}
+(1 row)
+
 SELECT 'qwerty%@*.tu'::lquery;
     lquery
 --------------
@@ -727,7 +733,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.d.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!d.*';
  ?column?
 ----------
- f
+ t
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ '*.!d';
@@ -757,7 +763,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!e';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!e.*';
  ?column?
 ----------
- f
+ t
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!e';
@@ -775,7 +781,7 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!d';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!d.*';
  ?column?
 ----------
- f
+ t
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!f.*';
@@ -793,7 +799,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!f.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!d.*';
  ?column?
 ----------
- f
+ t
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ '*.a.!d.*';
@@ -817,13 +823,13 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.!d.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!d.*';
  ?column?
 ----------
- f
+ t
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*';
  ?column?
 ----------
- f
+ t
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.c.*';
@@ -835,7 +841,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!b.c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.c.*';
  ?column?
 ----------
- f
+ t
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ '!b.*.c.*';
@@ -883,31 +889,31 @@ SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*.!c.*.e';
 SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*{1}.!c.*.e';
  ?column?
 ----------
- t
+ f
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*.e';
  ?column?
 ----------
- t
+ f
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ '!b.*{1}.!c.*.e';
  ?column?
 ----------
- t
+ f
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*.e';
  ?column?
 ----------
- t
+ f
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*.e';
  ?column?
 ----------
- f
+ t
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ '!b.!c.*';
@@ -937,19 +943,19 @@ SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*{1}.!c.*';
  ?column?
 ----------
- t
+ f
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*';
  ?column?
 ----------
- t
+ f
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ '!b.*{1}.!c.*';
  ?column?
 ----------
- t
+ f
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*';
@@ -961,7 +967,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*';
  ?column?
 ----------
- f
+ t
 (1 row)

 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{2}.*{2}';
@@ -988,6 +994,78 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*{5}.*';
  f
 (1 row)

+SELECT '5.0.1.0'::ltree ~ '5.!0.!0.0';
+ ?column?
+----------
+ f
+(1 row)
+
+SELECT 'a.b'::ltree ~ '!a.!a';
+ ?column?
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}';
+ ?column?
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{1,}.*';
+ ?column?
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}.!a{,}';
+ ?column?
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,}.!a{,}';
+ ?column?
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,2}.!a{1,}';
+ ?column?
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,2}.!a{1,}';
+ ?column?
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!x{,}';
+ ?column?
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!c{,}';
+ ?column?
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!a{2,}';
+ ?column?
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!d{2,}.*';
+ ?column?
+----------
+ t
+(1 row)
+
 SELECT 'QWER_TY'::ltree ~ 'q%@*';
  ?column?
 ----------
diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
index 5c7afe5..e7a7f7f 100644
--- a/contrib/ltree/lquery_op.c
+++ b/contrib/ltree/lquery_op.c
@@ -9,6 +9,7 @@

 #include "catalog/pg_collation.h"
 #include "ltree.h"
+#include "miscadmin.h"
 #include "utils/formatting.h"

 PG_FUNCTION_INFO_V1(ltq_regex);
@@ -19,16 +20,6 @@ PG_FUNCTION_INFO_V1(lt_q_rregex);

 #define NEXTVAL(x) ( (lquery*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )

-typedef struct
-{
-    lquery_level *q;
-    int            nq;
-    ltree_level *t;
-    int            nt;
-    int            posq;
-    int            post;
-} FieldNot;
-
 static char *
 getlexeme(char *start, char *end, int *len)
 {
@@ -99,238 +90,145 @@ ltree_strncasecmp(const char *a, const char *b, size_t s)
 }

 /*
- * See if a (non-star) lquery_level matches an ltree_level
+ * See if an lquery_level matches an ltree_level
  *
- * Does not consider level's possible LQL_NOT flag.
+ * This accounts for all flags including LQL_NOT, but does not
+ * consider repetition counts.
  */
 static bool
 checkLevel(lquery_level *curq, ltree_level *curt)
 {
-    int            (*cmpptr) (const char *, const char *, size_t);
     lquery_variant *curvar = LQL_FIRST(curq);
-    int            i;
+    bool        success;
+
+    success = (curq->flag & LQL_NOT) ? false : true;
+
+    /* numvar == 0 means '*' which matches anything */
+    if (curq->numvar == 0)
+        return success;

-    for (i = 0; i < curq->numvar; i++)
+    for (int i = 0; i < curq->numvar; i++)
     {
+        int            (*cmpptr) (const char *, const char *, size_t);
+
         cmpptr = (curvar->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp;

         if (curvar->flag & LVAR_SUBLEXEME)
         {
-            if (compare_subnode(curt, curvar->name, curvar->len, cmpptr, (curvar->flag & LVAR_ANYEND)))
-                return true;
+            if (compare_subnode(curt, curvar->name, curvar->len, cmpptr,
+                                (curvar->flag & LVAR_ANYEND)))
+                return success;
         }
         else if ((curvar->len == curt->len ||
                   (curt->len > curvar->len && (curvar->flag & LVAR_ANYEND))) &&
                  (*cmpptr) (curvar->name, curt->name, curvar->len) == 0)
-        {
+            return success;

-            return true;
-        }
         curvar = LVAR_NEXT(curvar);
     }
-    return false;
+    return !success;
 }

 /*
-void
-printFieldNot(FieldNot *fn ) {
-    while(fn->q) {
-        elog(NOTICE,"posQ:%d lenQ:%d posT:%d lenT:%d", fn->posq,fn->nq,fn->post,fn->nt);
-        fn++;
-    }
-}
-*/
-
-/*
- * Try to match an lquery (of query_numlevel items) to an ltree (of
- * tree_numlevel items)
- *
- * If the query contains any NOT flags, "ptr" must point to a FieldNot
- * workspace initialized with ptr->q == NULL.  Otherwise it can be NULL.
- * (LQL_NOT flags will be ignored if ptr == NULL.)
- *
- * high_pos is the last ltree position the first lquery item is allowed
- * to match at; it should be zero for external calls.
- *
- * force_advance must be false except in internal recursive calls.
+ * Try to match an lquery (of qlen items) to an ltree (of tlen items)
  */
 static bool
-checkCond(lquery_level *curq, int query_numlevel,
-          ltree_level *curt, int tree_numlevel,
-          FieldNot *ptr,
-          uint32 high_pos,
-          bool force_advance)
+checkCond(lquery_level *curq, int qlen,
+          ltree_level *curt, int tlen)
 {
-    uint32        low_pos = 0,    /* first allowed ltree position for match */
-                cur_tpos = 0;    /* ltree position of curt */
-    int            tlen = tree_numlevel,    /* counts of remaining items */
-                qlen = query_numlevel;
-    lquery_level *prevq = NULL;
-
-    /* advance curq (setting up prevq) if requested */
-    if (force_advance)
-    {
-        Assert(qlen > 0);
-        prevq = curq;
-        curq = LQL_NEXT(curq);
-        qlen--;
-    }
+    /* Since this function recurses, it could be driven to stack overflow */
+    check_stack_depth();

-    while (tlen > 0 && qlen > 0)
+    /* Normal case where we have some query and text items to match */
+    if (tlen > 0 && qlen > 0)
     {
-        if (curq->numvar)
+        int            low,
+                    high,
+                    matchcnt;
+        lquery_level *nextq;
+        ltree_level *nextt;
+
+        /*
+         * Get min and max repetition counts for this query item, dealing with
+         * the backwards-compatibility hack that the low/high fields aren't
+         * meaningful for non-'*' items unless LQL_COUNT is set.
+         */
+        if ((curq->flag & LQL_COUNT) || curq->numvar == 0)
+            low = curq->low, high = curq->high;
+        else
+            low = high = 1;
+
+        /* Fail if a match of required number of items is impossible */
+        if (tlen < low || high < low)
+            return false;
+
+        /* Check minimum number of matches */
+        nextt = curt;
+        for (matchcnt = 0; matchcnt < low; matchcnt++)
+        {
+            if (!checkLevel(curq, nextt))
+                return false;
+            nextt = LEVEL_NEXT(nextt);
+        }
+
+        /*
+         * Recursively check the rest of the pattern against each possible
+         * start point following this item's match(es).
+         */
+        nextq = LQL_NEXT(curq);
+        for (;;)
         {
-            /* Current query item is not '*' */
-            ltree_level *prevt = curt;
+            /*
+             * If the rest of the pattern matches beginning here, we're good.
+             */
+            if (checkCond(nextq, qlen - 1,
+                          nextt, tlen - matchcnt))
+                return true;

-            /* skip tree items that must be ignored due to prior * items */
-            while (cur_tpos < low_pos)
+            /*
+             * Otherwise, try to match one more text item to this query item.
+             */
+            if (matchcnt < high && matchcnt < tlen)
             {
-                curt = LEVEL_NEXT(curt);
-                tlen--;
-                cur_tpos++;
-                if (tlen == 0)
+                if (!checkLevel(curq, nextt))
                     return false;
-                if (ptr && ptr->q)
-                    ptr->nt++;
-            }
-
-            if (ptr && (curq->flag & LQL_NOT))
-            {
-                /* Deal with a NOT item */
-                if (!(prevq && prevq->numvar == 0))
-                    prevq = curq;
-                if (ptr->q == NULL)
-                {
-                    ptr->t = prevt;
-                    ptr->q = prevq;
-                    ptr->nt = 1;
-                    ptr->nq = 1 + ((prevq == curq) ? 0 : 1);
-                    ptr->posq = query_numlevel - qlen - ((prevq == curq) ? 0 : 1);
-                    ptr->post = cur_tpos;
-                }
-                else
-                {
-                    ptr->nt++;
-                    ptr->nq++;
-                }
-
-                if (qlen == 1 && ptr->q->numvar == 0)
-                    ptr->nt = tree_numlevel - ptr->post;
-                curt = LEVEL_NEXT(curt);
-                tlen--;
-                cur_tpos++;
-                if (high_pos < cur_tpos)
-                    high_pos++;
+                nextt = LEVEL_NEXT(nextt);
+                matchcnt++;
             }
             else
             {
-                /* Not a NOT item, check for normal match */
-                bool        isok = false;
-
-                while (cur_tpos <= high_pos && tlen > 0 && !isok)
-                {
-                    isok = checkLevel(curq, curt);
-                    curt = LEVEL_NEXT(curt);
-                    tlen--;
-                    cur_tpos++;
-                    if (isok && prevq && prevq->numvar == 0 &&
-                        tlen > 0 && cur_tpos <= high_pos)
-                    {
-                        FieldNot    tmpptr;
-
-                        if (ptr)
-                            memcpy(&tmpptr, ptr, sizeof(FieldNot));
-                        if (checkCond(prevq, qlen + 1,
-                                      curt, tlen,
-                                      (ptr) ? &tmpptr : NULL,
-                                      high_pos - cur_tpos,
-                                      true))
-                            return true;
-                    }
-                    if (!isok && ptr && ptr->q)
-                        ptr->nt++;
-                }
-                if (!isok)
-                    return false;
-
-                if (ptr && ptr->q)
-                {
-                    if (checkCond(ptr->q, ptr->nq,
-                                  ptr->t, ptr->nt,
-                                  NULL,
-                                  0,
-                                  false))
-                        return false;
-                    ptr->q = NULL;
-                }
-                low_pos = cur_tpos;
-                high_pos = cur_tpos;
-            }
-        }
-        else
-        {
-            /* Current query item is '*' */
-            low_pos += curq->low;
-
-            if (low_pos > tree_numlevel)
+                /* No more text, or max match count exceeded, so fail */
                 return false;
-
-            high_pos = Min(high_pos + curq->high, tree_numlevel);
-
-            if (ptr && ptr->q)
-            {
-                ptr->nq++;
-                if (qlen == 1)
-                    ptr->nt = tree_numlevel - ptr->post;
             }
         }
-
-        prevq = curq;
-        curq = LQL_NEXT(curq);
-        qlen--;
     }

-    /* Fail if we've already run out of ltree items */
-    if (low_pos > tree_numlevel || tree_numlevel > high_pos)
-        return false;
-
-    /* Remaining lquery items must be NOT or '*' items */
+    /*
+     * If we're out of text, but query items remain, we can match only if all
+     * remaining query items permit zero matches.
+     */
     while (qlen > 0)
     {
-        if (curq->numvar)
-        {
-            if (!(curq->flag & LQL_NOT))
-                return false;
-        }
-        else
-        {
-            low_pos += curq->low;
+        int            low;

-            if (low_pos > tree_numlevel)
-                return false;
+        /* As above, extract the correct minimum match count. */
+        if ((curq->flag & LQL_COUNT) || curq->numvar == 0)
+            low = curq->low;
+        else
+            low = 1;

-            high_pos = Min(high_pos + curq->high, tree_numlevel);
-        }
+        if (low > 0)
+            return false;

         curq = LQL_NEXT(curq);
         qlen--;
     }

-    /* Fail if trailing '*'s require more ltree items than we have */
-    if (low_pos > tree_numlevel || tree_numlevel > high_pos)
-        return false;
-
-    /* Finish pending NOT check, if any */
-    if (ptr && ptr->q &&
-        checkCond(ptr->q, ptr->nq,
-                  ptr->t, ptr->nt,
-                  NULL,
-                  0,
-                  false))
-        return false;
-
-    return true;
+    /*
+     * Once we're out of query items, we match only if there's no remaining
+     * text either.
+     */
+    return (tlen == 0);
 }

 Datum
@@ -338,28 +236,10 @@ ltq_regex(PG_FUNCTION_ARGS)
 {
     ltree       *tree = PG_GETARG_LTREE_P(0);
     lquery       *query = PG_GETARG_LQUERY_P(1);
-    bool        res = false;
-
-    if (query->flag & LQUERY_HASNOT)
-    {
-        FieldNot    fn;
+    bool        res;

-        fn.q = NULL;
-
-        res = checkCond(LQUERY_FIRST(query), query->numlevel,
-                        LTREE_FIRST(tree), tree->numlevel,
-                        &fn,
-                        0,
-                        false);
-    }
-    else
-    {
-        res = checkCond(LQUERY_FIRST(query), query->numlevel,
-                        LTREE_FIRST(tree), tree->numlevel,
-                        NULL,
-                        0,
-                        false);
-    }
+    res = checkCond(LQUERY_FIRST(query), query->numlevel,
+                    LTREE_FIRST(tree), tree->numlevel);

     PG_FREE_IF_COPY(tree, 0);
     PG_FREE_IF_COPY(query, 1);
diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h
index 429cdc8..7eac7c9 100644
--- a/contrib/ltree/ltree.h
+++ b/contrib/ltree/ltree.h
@@ -65,14 +65,20 @@ typedef struct
 /*
  * In an lquery_level, "flag" contains the union of the variants' flags
  * along with possible LQL_xxx flags; so those bit sets can't overlap.
+ *
+ * "low" and "high" are nominally the minimum and maximum number of matches.
+ * However, for backwards compatibility with pre-v13 on-disk lqueries,
+ * non-'*' levels (those with numvar > 0) only have valid low/high if the
+ * LQL_COUNT flag is set; otherwise those fields are zero, but the behavior
+ * is as if they were both 1.
  */
 typedef struct
 {
     uint16        totallen;        /* total length of this level, in bytes */
     uint16        flag;            /* see LQL_xxx and LVAR_xxx flags */
     uint16        numvar;            /* number of variants; 0 means '*' */
-    uint16        low;            /* minimum repeat count for '*' */
-    uint16        high;            /* maximum repeat count for '*' */
+    uint16        low;            /* minimum repeat count */
+    uint16        high;            /* maximum repeat count */
     /* Array of maxalign'd lquery_variant structs follows: */
     char        variants[FLEXIBLE_ARRAY_MEMBER];
 } lquery_level;
@@ -82,6 +88,7 @@ typedef struct
 #define LQL_FIRST(x)    ( (lquery_variant*)( ((char*)(x))+LQL_HDRSIZE ) )

 #define LQL_NOT        0x10        /* level has '!' (NOT) prefix */
+#define LQL_COUNT    0x20        /* level is non-'*' and has repeat counts */

 #ifdef LOWER_NODE
 #define FLG_CANLOOKSIGN(x) ( ( (x) & ( LQL_NOT | LVAR_ANYEND | LVAR_SUBLEXEME ) ) == 0 )
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index e806a14..c6ea5de 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -317,6 +317,23 @@ lquery_in(PG_FUNCTION_ARGS)

                 state = LQPRS_WAITVAR;
             }
+            else if (charlen == 1 && t_iseq(ptr, '{'))
+            {
+                lptr->len = ptr - lptr->start -
+                    ((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
+                    ((lptr->flag & LVAR_INCASE) ? 1 : 0) -
+                    ((lptr->flag & LVAR_ANYEND) ? 1 : 0);
+                if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
+                    ereport(ERROR,
+                            (errcode(ERRCODE_NAME_TOO_LONG),
+                             errmsg("label string is too long"),
+                             errdetail("Label length is %d, must be at most %d, at character %d.",
+                                       lptr->wlen, LTREE_LABEL_MAX_CHARS,
+                                       pos)));
+
+                curqlevel->flag |= LQL_COUNT;
+                state = LQPRS_WAITFNUM;
+            }
             else if (charlen == 1 && t_iseq(ptr, '.'))
             {
                 lptr->len = ptr - lptr->start -
@@ -348,6 +365,7 @@ lquery_in(PG_FUNCTION_ARGS)
                 state = LQPRS_WAITFNUM;
             else if (charlen == 1 && t_iseq(ptr, '.'))
             {
+                /* We only get here for '*', so these are correct defaults */
                 curqlevel->low = 0;
                 curqlevel->high = LTREE_MAX_LEVELS;
                 curqlevel = NEXTLEV(curqlevel);
@@ -567,7 +585,11 @@ lquery_out(PG_FUNCTION_ARGS)
     {
         totallen++;
         if (curqlevel->numvar)
+        {
             totallen += 1 + (curqlevel->numvar * 4) + curqlevel->totallen;
+            if (curqlevel->flag & LQL_COUNT)
+                totallen += 2 * 11 + 3;
+        }
         else
             totallen += 2 * 11 + 4;
         curqlevel = LQL_NEXT(curqlevel);
@@ -619,26 +641,37 @@ lquery_out(PG_FUNCTION_ARGS)
         }
         else
         {
+            *ptr = '*';
+            ptr++;
+        }
+
+        if ((curqlevel->flag & LQL_COUNT) || curqlevel->numvar == 0)
+        {
             if (curqlevel->low == curqlevel->high)
             {
-                sprintf(ptr, "*{%d}", curqlevel->low);
+                sprintf(ptr, "{%d}", curqlevel->low);
             }
             else if (curqlevel->low == 0)
             {
                 if (curqlevel->high == LTREE_MAX_LEVELS)
                 {
-                    *ptr = '*';
-                    *(ptr + 1) = '\0';
+                    if (curqlevel->numvar == 0)
+                    {
+                        /* This is default for '*', so print nothing */
+                        *ptr = '\0';
+                    }
+                    else
+                        sprintf(ptr, "{,}");
                 }
                 else
-                    sprintf(ptr, "*{,%d}", curqlevel->high);
+                    sprintf(ptr, "{,%d}", curqlevel->high);
             }
             else if (curqlevel->high == LTREE_MAX_LEVELS)
             {
-                sprintf(ptr, "*{%d,}", curqlevel->low);
+                sprintf(ptr, "{%d,}", curqlevel->low);
             }
             else
-                sprintf(ptr, "*{%d,%d}", curqlevel->low, curqlevel->high);
+                sprintf(ptr, "{%d,%d}", curqlevel->low, curqlevel->high);
             ptr = strchr(ptr, '\0');
         }

diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index f6d73b8..0cf3dd6 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -87,6 +87,7 @@ SELECT '1.*.4|3|2.*{1,4}'::lquery;
 SELECT '1.*.4|3|2.*{,4}'::lquery;
 SELECT '1.*.4|3|2.*{1,}'::lquery;
 SELECT '1.*.4|3|2.*{1}'::lquery;
+SELECT 'foo.bar{,}.!a*|b{1,}.c{,44}.d{3,4}'::lquery;
 SELECT 'qwerty%@*.tu'::lquery;

 SELECT nlevel('1.2.3.4');
@@ -184,6 +185,19 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*{2}.*{2}';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{2}.e';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{4}';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{5}.*';
+SELECT '5.0.1.0'::ltree ~ '5.!0.!0.0';
+SELECT 'a.b'::ltree ~ '!a.!a';
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}';
+SELECT 'a.b.c.d.e'::ltree ~ 'a{1,}.*';
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}.!a{,}';
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,}.!a{,}';
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,2}.!a{1,}';
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,2}.!a{1,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!x{,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!c{,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!a{2,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!d{2,}.*';

 SELECT 'QWER_TY'::ltree ~ 'q%@*';
 SELECT 'QWER_TY'::ltree ~ 'Q_t%@*';
diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml
index ae4b33e..d7dd555 100644
--- a/doc/src/sgml/ltree.sgml
+++ b/doc/src/sgml/ltree.sgml
@@ -60,7 +60,8 @@
      <type>lquery</type> represents a regular-expression-like pattern
      for matching <type>ltree</type> values.  A simple word matches that
      label within a path.  A star symbol (<literal>*</literal>) matches zero
-     or more labels.  For example:
+     or more labels.  These can be joined with dots to form a pattern that
+     must match the whole label path.  For example:
 <synopsis>
 foo         <lineannotation>Match the exact label path <literal>foo</literal></lineannotation>
 *.foo.*     <lineannotation>Match any label path containing the label <literal>foo</literal></lineannotation>
@@ -69,19 +70,25 @@ foo         <lineannotation>Match the exact label path <literal>foo</literal></l
     </para>

     <para>
-     Star symbols can also be quantified to restrict how many labels
-     they can match:
+     Both star symbols and simple words can be quantified to restrict how many
+     labels they can match:
 <synopsis>
 *{<replaceable>n</replaceable>}        <lineannotation>Match exactly <replaceable>n</replaceable>
labels</lineannotation>
 *{<replaceable>n</replaceable>,}       <lineannotation>Match at least <replaceable>n</replaceable>
labels</lineannotation>
 *{<replaceable>n</replaceable>,<replaceable>m</replaceable>}      <lineannotation>Match at least
<replaceable>n</replaceable>but not more than <replaceable>m</replaceable> labels</lineannotation> 
-*{,<replaceable>m</replaceable>}       <lineannotation>Match at most <replaceable>m</replaceable> labels — same
as</lineannotation> *{0,<replaceable>m</replaceable>} 
+*{,<replaceable>m</replaceable>}       <lineannotation>Match at most <replaceable>m</replaceable> labels — same
as</lineannotation>*{0,<replaceable>m</replaceable>} 
+foo{<replaceable>n</replaceable>,<replaceable>m</replaceable>}    <lineannotation>Match at least
<replaceable>n</replaceable>but not more than <replaceable>m</replaceable> occurrences of
<literal>foo</literal></lineannotation>
+foo{,}      <lineannotation>Match any number of occurrences of <literal>foo</literal>, including zero</lineannotation>
 </synopsis>
+     In the absence of any explicit quantifier, the default for a star symbol
+     is to match any number of labels (that is, <literal>{,}</literal>) while
+     the default for a non-star item is to match exactly once (that
+     is, <literal>{1}</literal>).
     </para>

     <para>
      There are several modifiers that can be put at the end of a non-star
-     label in <type>lquery</type> to make it match more than just the exact match:
+     <type>lquery</type> item to make it match more than just the exact match:
 <synopsis>
 @           <lineannotation>Match case-insensitively, for example <literal>a@</literal> matches
<literal>A</literal></lineannotation>
 *           <lineannotation>Match any label with this prefix, for example <literal>foo*</literal> matches
<literal>foobar</literal></lineannotation>
@@ -97,17 +104,20 @@ foo         <lineannotation>Match the exact label path <literal>foo</literal></l
     </para>

     <para>
-     Also, you can write several possibly-modified labels separated with
-     <literal>|</literal> (OR) to match any of those labels, and you can put
-     <literal>!</literal> (NOT) at the start to match any label that doesn't
-     match any of the alternatives.
+     Also, you can write several possibly-modified non-star items separated with
+     <literal>|</literal> (OR) to match any of those items, and you can put
+     <literal>!</literal> (NOT) at the start of a non-star group to match any
+     label that doesn't match any of the alternatives.  A quantifier, if any,
+     goes at the end of the group; it means some number of matches for the
+     group as a whole (that is, some number of labels matching or not matching
+     any of the alternatives).
     </para>

     <para>
      Here's an annotated example of <type>lquery</type>:
 <programlisting>
-Top.*{0,2}.sport*@.!football|tennis.Russ*|Spain
-a.  b.     c.      d.               e.
+Top.*{0,2}.sport*@.!football|tennis{1,}.Russ*|Spain
+a.  b.     c.      d.                   e.
 </programlisting>
      This query will match any label path that:
     </para>
@@ -129,8 +139,8 @@ a.  b.     c.      d.               e.
      </listitem>
      <listitem>
       <para>
-       then a label not matching <literal>football</literal> nor
-       <literal>tennis</literal>
+       then has one or more labels, none of which
+       match <literal>football</literal> nor <literal>tennis</literal>
       </para>
      </listitem>
      <listitem>
@@ -632,7 +642,7 @@ ltreetest=> SELECT path FROM test WHERE path ~ '*.Astronomy.*';
  Top.Collections.Pictures.Astronomy.Astronauts
 (7 rows)

-ltreetest=> SELECT path FROM test WHERE path ~ '*.!pictures@.*.Astronomy.*';
+ltreetest=> SELECT path FROM test WHERE path ~ '*.!pictures@.Astronomy.*';
                 path
 ------------------------------------
  Top.Science.Astronomy

pgsql-hackers by date:

Previous
From: Tom Lane
Date:
Subject: Re: snapper vs. HEAD
Next
From: Alexey Kondratov
Date:
Subject: Re: Allow CLUSTER, VACUUM FULL and REINDEX to change tablespace onthe fly