Thread: contrib/ltree patches

contrib/ltree patches

From
Dan Langille
Date:
I have been looking at contrib/ltree in the PostgreSQL repository.  I've
modified the code to allow / as a node delimiter instead of . which is the
default.

Below are the patches to make this change.  I have also moved the
delimiter to a DEFINE so that other customizations are easily done.  This
is a work in progress.

My thanks to DarbyD for assistance.

cheers


--- ltree.h.orig    Tue Nov 26 18:57:58 2002
+++ ltree.h    Tue Nov 26 20:16:40 2002
@@ -6,6 +6,8 @@#include "utils/palloc.h"#include "utils/builtins.h"

+#define    NODE_DELIMITER    '/'
+typedef struct{    uint8        len;
@@ -88,7 +90,7 @@#ifndef abs#define abs(a)                    ((a) <    (0) ? -(a) : (a))#endif
-#define ISALNUM(x)    ( isalnum((unsigned int)(x)) || (x) == '_' )
+#define ISALNUM(x)    ( isalnum((unsigned int)(x)) || (x) == '_' || (x) == NODE_DELIMITER )
/* full text query */

--- ltree_io.c    Tue Nov 26 20:23:45 2002
+++ ltree_io.c.orig    Tue Nov 26 18:57:26 2002
@@ -48,7 +48,7 @@    ptr = buf;    while (*ptr)    {
-        if (*ptr == NODE_DELIMITER)
+        if (*ptr == '.')            num++;        ptr++;    }
@@ -69,7 +69,7 @@        }        else if (state == LTPRS_WAITDELIM)        {
-            if (*ptr == NODE_DELIMITER)
+            if (*ptr == '.')            {                lptr->len = ptr - lptr->start;                if (lptr->len >
255)
@@ -131,7 +131,7 @@    {        if (i != 0)        {
-            *ptr = NODE_DELIMITER;
+            *ptr = '.';            ptr++;        }        memcpy(ptr, curlevel->name, curlevel->len);
@@ -181,7 +181,7 @@    ptr = buf;    while (*ptr)    {
-        if (*ptr == NODE_DELIMITER)
+        if (*ptr == '.')            num++;        else if (*ptr == '|')            numOR++;
@@ -265,7 +265,7 @@                         lptr->len, (int) (lptr->start - buf));                state =
LQPRS_WAITVAR;           }
 
-            else if (*ptr == NODE_DELIMITER)
+            else if (*ptr == '.')            {                lptr->len = ptr - lptr->start -
((lptr->flag& LVAR_SUBLEXEM) ? 1 : 0) -
 
@@ -289,7 +289,7 @@        {            if (*ptr == '{')                state = LQPRS_WAITFNUM;
-            else if (*ptr == NODE_DELIMITER)
+            else if (*ptr == '.')            {                curqlevel->low = 0;                curqlevel->high =
0xffff;
@@ -347,7 +347,7 @@        }        else if (state == LQPRS_WAITEND)        {
-            if (*ptr == NODE_DELIMITER)
+            if (*ptr == '.')            {                state = LQPRS_WAITLEVEL;                curqlevel =
NEXTLEV(curqlevel);
@@ -471,7 +471,7 @@    {        if (i != 0)        {
-            *ptr = NODE_DELIMITER;
+            *ptr = '.';            ptr++;        }        if (curqlevel->numvar)



Re: contrib/ltree patches

From
Teodor Sigaev
Date:

Dan Langille wrote:
> I have been looking at contrib/ltree in the PostgreSQL repository.  I've
> modified the code to allow / as a node delimiter instead of . which is the
> default.
What is the reason for changing delimiter?

> 
> Below are the patches to make this change.  I have also moved the
> delimiter to a DEFINE so that other customizations are easily done.  This
> is a work in progress.
It's good.

> 
> My thanks to DarbyD for assistance.
> 
> cheers
> 
> 
> --- ltree.h.orig    Tue Nov 26 18:57:58 2002
> +++ ltree.h    Tue Nov 26 20:16:40 2002
> @@ -6,6 +6,8 @@
>  #include "utils/palloc.h"
>  #include "utils/builtins.h"
> 
> +#define    NODE_DELIMITER    '/'
> +
>  typedef struct
>  {
>      uint8        len;
> @@ -88,7 +90,7 @@
>  #ifndef abs
>  #define abs(a)                    ((a) <    (0) ? -(a) : (a))
>  #endif
> -#define ISALNUM(x)    ( isalnum((unsigned int)(x)) || (x) == '_' )
> +#define ISALNUM(x)    ( isalnum((unsigned int)(x)) || (x) == '_' || (x) == NODE_DELIMITER )
It seems to me  that it's mistake. ISALNUM shoud define correct character in 
name of node (level).  Try to test
with incorrect ltree value 'a..b'.

-- 
Teodor Sigaev
teodor@stack.net




Re: contrib/ltree patches

From
"Dan Langille"
Date:
On 27 Nov 2002 at 12:16, Teodor Sigaev wrote:

> Dan Langille wrote:
> > I have been looking at contrib/ltree in the PostgreSQL repository. 
> > I've modified the code to allow / as a node delimiter instead of .
> > which is the default.

> What is the reason for changing delimiter?

My tree represents a file system.  Here are some entries:

# select id, pathname from element_pathnames order by pathname; 77024 | doc/de_DE.ISO8859-1 77028 |
doc/de_DE.ISO8859-1/books84590 | doc/de_DE.ISO8859-1/books/Makefile.inc 77029 | doc/de_DE.ISO8859-1/books/faq 84591 |
doc/de_DE.ISO8859-1/books/faq/Makefile77030 | doc/de_DE.ISO8859-1/books/faq/book.sgml 77691 |
doc/de_DE.ISO8859-1/books/handbook77704 | doc/de_DE.ISO8859-1/books/handbook/Makefile
 
110592 | doc/de_DE.ISO8859-1/books/handbook/advanced-networking

> > Below are the patches to make this change.  I have also moved the
> > delimiter to a DEFINE so that other customizations are easily done. 
> > This is a work in progress.

> It's good.

Thank you.  More patches will follow as I get closer to my objective.

> > -#define ISALNUM(x)    ( isalnum((unsigned int)(x)) || (x) == '_' )
> > +#define ISALNUM(x)    ( isalnum((unsigned int)(x)) || (x) == '_' ||
> > +#(x) == NODE_DELIMITER )

> It seems to me  that it's mistake. ISALNUM shoud define correct
> character in name of node (level).  Try to test with incorrect ltree
> value 'a..b'.

I just did some simple tests and I see what you mean:

ltree_test=# select * from tree;id |     pathname
----+------------------ 1 | /ports 2 | ports/security 2 | ports//security 2 | /ports//security 2 | a..b
(5 rows)

Then I removed NODE_DELIMITER from ISALNUM and tried again:

ltree_test=# insert into tree values (2, '/ports//security');
ERROR:  Syntax error in position 0 near '/'
ltree_test=# insert into tree values (2, 'ports//security');
ERROR:  Syntax error in position 6 near '/'
ltree_test=# insert into tree values (2, 'ports/security');
INSERT 29955201 1
ltree_test=# insert into tree values (2, 'ports/security/');
ERROR:  Unexpected end of line
ltree_test=# insert into tree values (2, 'ports/security/things');
INSERT 29955202 1

ltree_test=# select * from tree;id |       pathname
----+----------------------- 1 | /ports 2 | ports/security 2 | ports//security 2 | /ports//security 2 | a..b 2 |
ports/security2 | ports/security/things
 
(7 rows)

Removing NODE_DELIMITER from ISALNUM makes sense.  Thank you.  Here 
is the reason why NODE_DELIMITER was added. My initial data sample 
was of the form "/usr/local/" (i.e. it started with a 
NODE_DELIMITER).  I have since changed my data so it does not start 
with a leading / because queries were not working.

Based upon the sample data I was using (approximately 120,000 nodes 
as taken from a real file system), I had to change ISALNUM as I went 
along.  Here is the current definition for ISALNUM:

#define ISALNUM(x)   ( isalnum((unsigned int)(x)) || (x) == '_' || 
(x) == '-' || (x) == '.' || (x) == '+' || (x) == ':' || (x) == '~' || 
(x) == '%' || (x) == ',' || (x) == '#')

Given that I am trying to allow any valid filename, I think ISALNUM 
needs to allow any ASCII character.

I also think I will need to modify the parsing within lquery_in to 
allow escaping of characters it recognizes but which may be part of a 
file name (e.g. :%~ may be part of a file name, but these are special 
characters to lquery_in).  That I think will be the biggest change.

Thank you for your interest and help.

-- 
Dan Langille : http://www.langille.org/



Re: contrib/ltree patches

From
Teodor Sigaev
Date:
>>What is the reason for changing delimiter?
> 
> 
> My tree represents a file system.  Here are some entries:
>>>Below are the patches to make this change.  I have also moved the
>>>delimiter to a DEFINE so that other customizations are easily done. 
>>>This is a work in progress.
>>
> 
>>It's good.

> 
> #define ISALNUM(x)   ( isalnum((unsigned int)(x)) || (x) == '_' || 
> (x) == '-' || (x) == '.' || (x) == '+' || (x) == ':' || (x) == '~' || 
> (x) == '%' || (x) == ',' || (x) == '#')
> 
> Given that I am trying to allow any valid filename, I think ISALNUM 
> needs to allow any ASCII character.
> 
> I also think I will need to modify the parsing within lquery_in to 
> allow escaping of characters it recognizes but which may be part of a 
> file name (e.g. :%~ may be part of a file name, but these are special 
> characters to lquery_in).  That I think will be the biggest change.

Ok, I think it's a good extension. Let you prepare cumulative patch. 
Nevertheless, we have no chance to insert this to 7.3 release :(.  Only for 
7.3.1 or even 7.4.

-- 
Teodor Sigaev
teodor@stack.net




Re: contrib/ltree patches

From
"Dan Langille"
Date:
On 27 Nov 2002 at 19:55, Teodor Sigaev wrote:

> Ok, I think it's a good extension. Let you prepare cumulative patch.
> Nevertheless, we have no chance to insert this to 7.3 release :(. 
> Only for 7.3.1 or even 7.4.

Thanks.  As for the 7.3 release, yes, it would be nice, but that was 
not my goal.  :)
-- 
Dan Langille : http://www.langille.org/



Re: contrib/ltree patches

From
Bruce Momjian
Date:
Dan, is this ready to be applied to CVS?

---------------------------------------------------------------------------

Dan Langille wrote:
> I have been looking at contrib/ltree in the PostgreSQL repository.  I've
> modified the code to allow / as a node delimiter instead of . which is the
> default.
> 
> Below are the patches to make this change.  I have also moved the
> delimiter to a DEFINE so that other customizations are easily done.  This
> is a work in progress.
> 
> My thanks to DarbyD for assistance.
> 
> cheers
> 
> 
> --- ltree.h.orig    Tue Nov 26 18:57:58 2002
> +++ ltree.h    Tue Nov 26 20:16:40 2002
> @@ -6,6 +6,8 @@
>  #include "utils/palloc.h"
>  #include "utils/builtins.h"
> 
> +#define    NODE_DELIMITER    '/'
> +
>  typedef struct
>  {
>      uint8        len;
> @@ -88,7 +90,7 @@
>  #ifndef abs
>  #define abs(a)                    ((a) <    (0) ? -(a) : (a))
>  #endif
> -#define ISALNUM(x)    ( isalnum((unsigned int)(x)) || (x) == '_' )
> +#define ISALNUM(x)    ( isalnum((unsigned int)(x)) || (x) == '_' || (x) == NODE_DELIMITER )
> 
>  /* full text query */
> 
> --- ltree_io.c    Tue Nov 26 20:23:45 2002
> +++ ltree_io.c.orig    Tue Nov 26 18:57:26 2002
> @@ -48,7 +48,7 @@
>      ptr = buf;
>      while (*ptr)
>      {
> -        if (*ptr == NODE_DELIMITER)
> +        if (*ptr == '.')
>              num++;
>          ptr++;
>      }
> @@ -69,7 +69,7 @@
>          }
>          else if (state == LTPRS_WAITDELIM)
>          {
> -            if (*ptr == NODE_DELIMITER)
> +            if (*ptr == '.')
>              {
>                  lptr->len = ptr - lptr->start;
>                  if (lptr->len > 255)
> @@ -131,7 +131,7 @@
>      {
>          if (i != 0)
>          {
> -            *ptr = NODE_DELIMITER;
> +            *ptr = '.';
>              ptr++;
>          }
>          memcpy(ptr, curlevel->name, curlevel->len);
> @@ -181,7 +181,7 @@
>      ptr = buf;
>      while (*ptr)
>      {
> -        if (*ptr == NODE_DELIMITER)
> +        if (*ptr == '.')
>              num++;
>          else if (*ptr == '|')
>              numOR++;
> @@ -265,7 +265,7 @@
>                           lptr->len, (int) (lptr->start - buf));
>                  state = LQPRS_WAITVAR;
>              }
> -            else if (*ptr == NODE_DELIMITER)
> +            else if (*ptr == '.')
>              {
>                  lptr->len = ptr - lptr->start -
>                      ((lptr->flag & LVAR_SUBLEXEM) ? 1 : 0) -
> @@ -289,7 +289,7 @@
>          {
>              if (*ptr == '{')
>                  state = LQPRS_WAITFNUM;
> -            else if (*ptr == NODE_DELIMITER)
> +            else if (*ptr == '.')
>              {
>                  curqlevel->low = 0;
>                  curqlevel->high = 0xffff;
> @@ -347,7 +347,7 @@
>          }
>          else if (state == LQPRS_WAITEND)
>          {
> -            if (*ptr == NODE_DELIMITER)
> +            if (*ptr == '.')
>              {
>                  state = LQPRS_WAITLEVEL;
>                  curqlevel = NEXTLEV(curqlevel);
> @@ -471,7 +471,7 @@
>      {
>          if (i != 0)
>          {
> -            *ptr = NODE_DELIMITER;
> +            *ptr = '.';
>              ptr++;
>          }
>          if (curqlevel->numvar)
> 
> 
> ---------------------------(end of broadcast)---------------------------
> TIP 4: Don't 'kill -9' the postmaster
> 

--  Bruce Momjian                        |  http://candle.pha.pa.us pgman@candle.pha.pa.us               |  (610)
359-1001+  If your life is a hard drive,     |  13 Roberts Road +  Christ can be your backup.        |  Newtown Square,
Pennsylvania19073
 


Re: contrib/ltree patches

From
Teodor Sigaev
Date:
Don't do it! It's a wrong patch. Dan will prepare correct patch (with other 
changes).


Bruce Momjian wrote:
> Dan, is this ready to be applied to CVS?
> 
> ---------------------------------------------------------------------------
> 
> Dan Langille wrote:
> 
>>I have been looking at contrib/ltree in the PostgreSQL repository.  I've
>>modified the code to allow / as a node delimiter instead of . which is the
>>default.
>>
>>Below are the patches to make this change.  I have also moved the
>>delimiter to a DEFINE so that other customizations are easily done.  This
>>is a work in progress.
>>
>>My thanks to DarbyD for assistance.
>>
>>cheers
>>
>>
>>--- ltree.h.orig    Tue Nov 26 18:57:58 2002
>>+++ ltree.h    Tue Nov 26 20:16:40 2002
>>@@ -6,6 +6,8 @@
>> #include "utils/palloc.h"
>> #include "utils/builtins.h"
>>
>>+#define    NODE_DELIMITER    '/'
>>+
>> typedef struct
>> {
>>     uint8        len;
>>@@ -88,7 +90,7 @@
>> #ifndef abs
>> #define abs(a)                    ((a) <    (0) ? -(a) : (a))
>> #endif
>>-#define ISALNUM(x)    ( isalnum((unsigned int)(x)) || (x) == '_' )
>>+#define ISALNUM(x)    ( isalnum((unsigned int)(x)) || (x) == '_' || (x) == NODE_DELIMITER )
>>
>> /* full text query */
>>
>>--- ltree_io.c    Tue Nov 26 20:23:45 2002
>>+++ ltree_io.c.orig    Tue Nov 26 18:57:26 2002
>>@@ -48,7 +48,7 @@
>>     ptr = buf;
>>     while (*ptr)
>>     {
>>-        if (*ptr == NODE_DELIMITER)
>>+        if (*ptr == '.')
>>             num++;
>>         ptr++;
>>     }
>>@@ -69,7 +69,7 @@
>>         }
>>         else if (state == LTPRS_WAITDELIM)
>>         {
>>-            if (*ptr == NODE_DELIMITER)
>>+            if (*ptr == '.')
>>             {
>>                 lptr->len = ptr - lptr->start;
>>                 if (lptr->len > 255)
>>@@ -131,7 +131,7 @@
>>     {
>>         if (i != 0)
>>         {
>>-            *ptr = NODE_DELIMITER;
>>+            *ptr = '.';
>>             ptr++;
>>         }
>>         memcpy(ptr, curlevel->name, curlevel->len);
>>@@ -181,7 +181,7 @@
>>     ptr = buf;
>>     while (*ptr)
>>     {
>>-        if (*ptr == NODE_DELIMITER)
>>+        if (*ptr == '.')
>>             num++;
>>         else if (*ptr == '|')
>>             numOR++;
>>@@ -265,7 +265,7 @@
>>                          lptr->len, (int) (lptr->start - buf));
>>                 state = LQPRS_WAITVAR;
>>             }
>>-            else if (*ptr == NODE_DELIMITER)
>>+            else if (*ptr == '.')
>>             {
>>                 lptr->len = ptr - lptr->start -
>>                     ((lptr->flag & LVAR_SUBLEXEM) ? 1 : 0) -
>>@@ -289,7 +289,7 @@
>>         {
>>             if (*ptr == '{')
>>                 state = LQPRS_WAITFNUM;
>>-            else if (*ptr == NODE_DELIMITER)
>>+            else if (*ptr == '.')
>>             {
>>                 curqlevel->low = 0;
>>                 curqlevel->high = 0xffff;
>>@@ -347,7 +347,7 @@
>>         }
>>         else if (state == LQPRS_WAITEND)
>>         {
>>-            if (*ptr == NODE_DELIMITER)
>>+            if (*ptr == '.')
>>             {
>>                 state = LQPRS_WAITLEVEL;
>>                 curqlevel = NEXTLEV(curqlevel);
>>@@ -471,7 +471,7 @@
>>     {
>>         if (i != 0)
>>         {
>>-            *ptr = NODE_DELIMITER;
>>+            *ptr = '.';
>>             ptr++;
>>         }
>>         if (curqlevel->numvar)
>>
>>
>>---------------------------(end of broadcast)---------------------------
>>TIP 4: Don't 'kill -9' the postmaster
>>
> 
> 

-- 
Teodor Sigaev
teodor@stack.net




Re: contrib/ltree patches

From
"Dan Langille"
Date:
Thanks for asking.  I have been diverted to other tasks and won't be 
able to get back to this for a short while.  The basics work (i.e. 
population and simple compares) but I know for sure that certain 
functions will not work now that we allow what were previously 
operators to be part of the node name.  In short, the code needs to 
allow for operators to be escaped if they are part of the node name.

On 5 Dec 2002 at 0:54, Bruce Momjian wrote:

> 
> Dan, is this ready to be applied to CVS?
> 
> ----------------------------------------------------------------------
> -----
> 
> Dan Langille wrote:
> > I have been looking at contrib/ltree in the PostgreSQL repository. 
> > I've modified the code to allow / as a node delimiter instead of .
> > which is the default.
> > 
> > Below are the patches to make this change.  I have also moved the
> > delimiter to a DEFINE so that other customizations are easily done. 
> > This is a work in progress.
> > 
> > My thanks to DarbyD for assistance.
> > 
> > cheers
> > 
> > 
> > --- ltree.h.orig    Tue Nov 26 18:57:58 2002
> > +++ ltree.h    Tue Nov 26 20:16:40 2002
> > @@ -6,6 +6,8 @@
> >  #include "utils/palloc.h"
> >  #include "utils/builtins.h"
> > 
> > +#define    NODE_DELIMITER    '/'
> > +
> >  typedef struct
> >  {
> >      uint8        len;
> > @@ -88,7 +90,7 @@
> >  #ifndef abs
> >  #define abs(a)                    ((a) <    (0) ? -(a) : (a))
> >  #endif
> > -#define ISALNUM(x)    ( isalnum((unsigned int)(x)) || (x) == '_' )
> > +#define ISALNUM(x)    ( isalnum((unsigned int)(x)) || (x) == '_' ||
> > +#(x) == NODE_DELIMITER )
> > 
> >  /* full text query */
> > 
> > --- ltree_io.c    Tue Nov 26 20:23:45 2002
> > +++ ltree_io.c.orig    Tue Nov 26 18:57:26 2002
> > @@ -48,7 +48,7 @@
> >      ptr = buf;
> >      while (*ptr)
> >      {
> > -        if (*ptr == NODE_DELIMITER)
> > +        if (*ptr == '.')
> >              num++;
> >          ptr++;
> >      }
> > @@ -69,7 +69,7 @@
> >          }
> >          else if (state == LTPRS_WAITDELIM)
> >          {
> > -            if (*ptr == NODE_DELIMITER)
> > +            if (*ptr == '.')
> >              {
> >                  lptr->len = ptr - lptr->start;
> >                  if (lptr->len > 255)
> > @@ -131,7 +131,7 @@
> >      {
> >          if (i != 0)
> >          {
> > -            *ptr = NODE_DELIMITER;
> > +            *ptr = '.';
> >              ptr++;
> >          }
> >          memcpy(ptr, curlevel->name, curlevel->len);
> > @@ -181,7 +181,7 @@
> >      ptr = buf;
> >      while (*ptr)
> >      {
> > -        if (*ptr == NODE_DELIMITER)
> > +        if (*ptr == '.')
> >              num++;
> >          else if (*ptr == '|')
> >              numOR++;
> > @@ -265,7 +265,7 @@
> >                           lptr->len, (int) (lptr->start - buf));
> >                  state = LQPRS_WAITVAR;
> >              }
> > -            else if (*ptr == NODE_DELIMITER)
> > +            else if (*ptr == '.')
> >              {
> >                  lptr->len = ptr - lptr->start -
> >                      ((lptr->flag & LVAR_SUBLEXEM) ? 1 : 0) -
> > @@ -289,7 +289,7 @@
> >          {
> >              if (*ptr == '{')
> >                  state = LQPRS_WAITFNUM;
> > -            else if (*ptr == NODE_DELIMITER)
> > +            else if (*ptr == '.')
> >              {
> >                  curqlevel->low = 0;
> >                  curqlevel->high = 0xffff;
> > @@ -347,7 +347,7 @@
> >          }
> >          else if (state == LQPRS_WAITEND)
> >          {
> > -            if (*ptr == NODE_DELIMITER)
> > +            if (*ptr == '.')
> >              {
> >                  state = LQPRS_WAITLEVEL;
> >                  curqlevel = NEXTLEV(curqlevel);
> > @@ -471,7 +471,7 @@
> >      {
> >          if (i != 0)
> >          {
> > -            *ptr = NODE_DELIMITER;
> > +            *ptr = '.';
> >              ptr++;
> >          }
> >          if (curqlevel->numvar)
> > 
> > 
> > ---------------------------(end of
> > broadcast)--------------------------- TIP 4: Don't 'kill -9' the
> > postmaster
> > 
> 
> -- 
>   Bruce Momjian                        |  http://candle.pha.pa.us
>   pgman@candle.pha.pa.us               |  (610) 359-1001 +  If your
>   life is a hard drive,     |  13 Roberts Road +  Christ can be your
>   backup.        |  Newtown Square, Pennsylvania 19073
> 


-- 
Dan Langille : http://www.langille.org/