Thread: contrib/ltree patches
I have been looking at contrib/ltree in the PostgreSQL repository. I've modified the code to allow / as a node delimiter instead of . which is the default. Below are the patches to make this change. I have also moved the delimiter to a DEFINE so that other customizations are easily done. This is a work in progress. My thanks to DarbyD for assistance. cheers --- ltree.h.orig Tue Nov 26 18:57:58 2002 +++ ltree.h Tue Nov 26 20:16:40 2002 @@ -6,6 +6,8 @@#include "utils/palloc.h"#include "utils/builtins.h" +#define NODE_DELIMITER '/' +typedef struct{ uint8 len; @@ -88,7 +90,7 @@#ifndef abs#define abs(a) ((a) < (0) ? -(a) : (a))#endif -#define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' ) +#define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' || (x) == NODE_DELIMITER ) /* full text query */ --- ltree_io.c Tue Nov 26 20:23:45 2002 +++ ltree_io.c.orig Tue Nov 26 18:57:26 2002 @@ -48,7 +48,7 @@ ptr = buf; while (*ptr) { - if (*ptr == NODE_DELIMITER) + if (*ptr == '.') num++; ptr++; } @@ -69,7 +69,7 @@ } else if (state == LTPRS_WAITDELIM) { - if (*ptr == NODE_DELIMITER) + if (*ptr == '.') { lptr->len = ptr - lptr->start; if (lptr->len > 255) @@ -131,7 +131,7 @@ { if (i != 0) { - *ptr = NODE_DELIMITER; + *ptr = '.'; ptr++; } memcpy(ptr, curlevel->name, curlevel->len); @@ -181,7 +181,7 @@ ptr = buf; while (*ptr) { - if (*ptr == NODE_DELIMITER) + if (*ptr == '.') num++; else if (*ptr == '|') numOR++; @@ -265,7 +265,7 @@ lptr->len, (int) (lptr->start - buf)); state = LQPRS_WAITVAR; } - else if (*ptr == NODE_DELIMITER) + else if (*ptr == '.') { lptr->len = ptr - lptr->start - ((lptr->flag& LVAR_SUBLEXEM) ? 1 : 0) - @@ -289,7 +289,7 @@ { if (*ptr == '{') state = LQPRS_WAITFNUM; - else if (*ptr == NODE_DELIMITER) + else if (*ptr == '.') { curqlevel->low = 0; curqlevel->high = 0xffff; @@ -347,7 +347,7 @@ } else if (state == LQPRS_WAITEND) { - if (*ptr == NODE_DELIMITER) + if (*ptr == '.') { state = LQPRS_WAITLEVEL; curqlevel = NEXTLEV(curqlevel); @@ -471,7 +471,7 @@ { if (i != 0) { - *ptr = NODE_DELIMITER; + *ptr = '.'; ptr++; } if (curqlevel->numvar)
Dan Langille wrote: > I have been looking at contrib/ltree in the PostgreSQL repository. I've > modified the code to allow / as a node delimiter instead of . which is the > default. What is the reason for changing delimiter? > > Below are the patches to make this change. I have also moved the > delimiter to a DEFINE so that other customizations are easily done. This > is a work in progress. It's good. > > My thanks to DarbyD for assistance. > > cheers > > > --- ltree.h.orig Tue Nov 26 18:57:58 2002 > +++ ltree.h Tue Nov 26 20:16:40 2002 > @@ -6,6 +6,8 @@ > #include "utils/palloc.h" > #include "utils/builtins.h" > > +#define NODE_DELIMITER '/' > + > typedef struct > { > uint8 len; > @@ -88,7 +90,7 @@ > #ifndef abs > #define abs(a) ((a) < (0) ? -(a) : (a)) > #endif > -#define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' ) > +#define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' || (x) == NODE_DELIMITER ) It seems to me that it's mistake. ISALNUM shoud define correct character in name of node (level). Try to test with incorrect ltree value 'a..b'. -- Teodor Sigaev teodor@stack.net
On 27 Nov 2002 at 12:16, Teodor Sigaev wrote: > Dan Langille wrote: > > I have been looking at contrib/ltree in the PostgreSQL repository. > > I've modified the code to allow / as a node delimiter instead of . > > which is the default. > What is the reason for changing delimiter? My tree represents a file system. Here are some entries: # select id, pathname from element_pathnames order by pathname; 77024 | doc/de_DE.ISO8859-1 77028 | doc/de_DE.ISO8859-1/books84590 | doc/de_DE.ISO8859-1/books/Makefile.inc 77029 | doc/de_DE.ISO8859-1/books/faq 84591 | doc/de_DE.ISO8859-1/books/faq/Makefile77030 | doc/de_DE.ISO8859-1/books/faq/book.sgml 77691 | doc/de_DE.ISO8859-1/books/handbook77704 | doc/de_DE.ISO8859-1/books/handbook/Makefile 110592 | doc/de_DE.ISO8859-1/books/handbook/advanced-networking > > Below are the patches to make this change. I have also moved the > > delimiter to a DEFINE so that other customizations are easily done. > > This is a work in progress. > It's good. Thank you. More patches will follow as I get closer to my objective. > > -#define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' ) > > +#define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' || > > +#(x) == NODE_DELIMITER ) > It seems to me that it's mistake. ISALNUM shoud define correct > character in name of node (level). Try to test with incorrect ltree > value 'a..b'. I just did some simple tests and I see what you mean: ltree_test=# select * from tree;id | pathname ----+------------------ 1 | /ports 2 | ports/security 2 | ports//security 2 | /ports//security 2 | a..b (5 rows) Then I removed NODE_DELIMITER from ISALNUM and tried again: ltree_test=# insert into tree values (2, '/ports//security'); ERROR: Syntax error in position 0 near '/' ltree_test=# insert into tree values (2, 'ports//security'); ERROR: Syntax error in position 6 near '/' ltree_test=# insert into tree values (2, 'ports/security'); INSERT 29955201 1 ltree_test=# insert into tree values (2, 'ports/security/'); ERROR: Unexpected end of line ltree_test=# insert into tree values (2, 'ports/security/things'); INSERT 29955202 1 ltree_test=# select * from tree;id | pathname ----+----------------------- 1 | /ports 2 | ports/security 2 | ports//security 2 | /ports//security 2 | a..b 2 | ports/security2 | ports/security/things (7 rows) Removing NODE_DELIMITER from ISALNUM makes sense. Thank you. Here is the reason why NODE_DELIMITER was added. My initial data sample was of the form "/usr/local/" (i.e. it started with a NODE_DELIMITER). I have since changed my data so it does not start with a leading / because queries were not working. Based upon the sample data I was using (approximately 120,000 nodes as taken from a real file system), I had to change ISALNUM as I went along. Here is the current definition for ISALNUM: #define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' || (x) == '-' || (x) == '.' || (x) == '+' || (x) == ':' || (x) == '~' || (x) == '%' || (x) == ',' || (x) == '#') Given that I am trying to allow any valid filename, I think ISALNUM needs to allow any ASCII character. I also think I will need to modify the parsing within lquery_in to allow escaping of characters it recognizes but which may be part of a file name (e.g. :%~ may be part of a file name, but these are special characters to lquery_in). That I think will be the biggest change. Thank you for your interest and help. -- Dan Langille : http://www.langille.org/
>>What is the reason for changing delimiter? > > > My tree represents a file system. Here are some entries: >>>Below are the patches to make this change. I have also moved the >>>delimiter to a DEFINE so that other customizations are easily done. >>>This is a work in progress. >> > >>It's good. > > #define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' || > (x) == '-' || (x) == '.' || (x) == '+' || (x) == ':' || (x) == '~' || > (x) == '%' || (x) == ',' || (x) == '#') > > Given that I am trying to allow any valid filename, I think ISALNUM > needs to allow any ASCII character. > > I also think I will need to modify the parsing within lquery_in to > allow escaping of characters it recognizes but which may be part of a > file name (e.g. :%~ may be part of a file name, but these are special > characters to lquery_in). That I think will be the biggest change. Ok, I think it's a good extension. Let you prepare cumulative patch. Nevertheless, we have no chance to insert this to 7.3 release :(. Only for 7.3.1 or even 7.4. -- Teodor Sigaev teodor@stack.net
On 27 Nov 2002 at 19:55, Teodor Sigaev wrote: > Ok, I think it's a good extension. Let you prepare cumulative patch. > Nevertheless, we have no chance to insert this to 7.3 release :(. > Only for 7.3.1 or even 7.4. Thanks. As for the 7.3 release, yes, it would be nice, but that was not my goal. :) -- Dan Langille : http://www.langille.org/
Dan, is this ready to be applied to CVS? --------------------------------------------------------------------------- Dan Langille wrote: > I have been looking at contrib/ltree in the PostgreSQL repository. I've > modified the code to allow / as a node delimiter instead of . which is the > default. > > Below are the patches to make this change. I have also moved the > delimiter to a DEFINE so that other customizations are easily done. This > is a work in progress. > > My thanks to DarbyD for assistance. > > cheers > > > --- ltree.h.orig Tue Nov 26 18:57:58 2002 > +++ ltree.h Tue Nov 26 20:16:40 2002 > @@ -6,6 +6,8 @@ > #include "utils/palloc.h" > #include "utils/builtins.h" > > +#define NODE_DELIMITER '/' > + > typedef struct > { > uint8 len; > @@ -88,7 +90,7 @@ > #ifndef abs > #define abs(a) ((a) < (0) ? -(a) : (a)) > #endif > -#define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' ) > +#define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' || (x) == NODE_DELIMITER ) > > /* full text query */ > > --- ltree_io.c Tue Nov 26 20:23:45 2002 > +++ ltree_io.c.orig Tue Nov 26 18:57:26 2002 > @@ -48,7 +48,7 @@ > ptr = buf; > while (*ptr) > { > - if (*ptr == NODE_DELIMITER) > + if (*ptr == '.') > num++; > ptr++; > } > @@ -69,7 +69,7 @@ > } > else if (state == LTPRS_WAITDELIM) > { > - if (*ptr == NODE_DELIMITER) > + if (*ptr == '.') > { > lptr->len = ptr - lptr->start; > if (lptr->len > 255) > @@ -131,7 +131,7 @@ > { > if (i != 0) > { > - *ptr = NODE_DELIMITER; > + *ptr = '.'; > ptr++; > } > memcpy(ptr, curlevel->name, curlevel->len); > @@ -181,7 +181,7 @@ > ptr = buf; > while (*ptr) > { > - if (*ptr == NODE_DELIMITER) > + if (*ptr == '.') > num++; > else if (*ptr == '|') > numOR++; > @@ -265,7 +265,7 @@ > lptr->len, (int) (lptr->start - buf)); > state = LQPRS_WAITVAR; > } > - else if (*ptr == NODE_DELIMITER) > + else if (*ptr == '.') > { > lptr->len = ptr - lptr->start - > ((lptr->flag & LVAR_SUBLEXEM) ? 1 : 0) - > @@ -289,7 +289,7 @@ > { > if (*ptr == '{') > state = LQPRS_WAITFNUM; > - else if (*ptr == NODE_DELIMITER) > + else if (*ptr == '.') > { > curqlevel->low = 0; > curqlevel->high = 0xffff; > @@ -347,7 +347,7 @@ > } > else if (state == LQPRS_WAITEND) > { > - if (*ptr == NODE_DELIMITER) > + if (*ptr == '.') > { > state = LQPRS_WAITLEVEL; > curqlevel = NEXTLEV(curqlevel); > @@ -471,7 +471,7 @@ > { > if (i != 0) > { > - *ptr = NODE_DELIMITER; > + *ptr = '.'; > ptr++; > } > if (curqlevel->numvar) > > > ---------------------------(end of broadcast)--------------------------- > TIP 4: Don't 'kill -9' the postmaster > -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 359-1001+ If your life is a hard drive, | 13 Roberts Road + Christ can be your backup. | Newtown Square, Pennsylvania19073
Don't do it! It's a wrong patch. Dan will prepare correct patch (with other changes). Bruce Momjian wrote: > Dan, is this ready to be applied to CVS? > > --------------------------------------------------------------------------- > > Dan Langille wrote: > >>I have been looking at contrib/ltree in the PostgreSQL repository. I've >>modified the code to allow / as a node delimiter instead of . which is the >>default. >> >>Below are the patches to make this change. I have also moved the >>delimiter to a DEFINE so that other customizations are easily done. This >>is a work in progress. >> >>My thanks to DarbyD for assistance. >> >>cheers >> >> >>--- ltree.h.orig Tue Nov 26 18:57:58 2002 >>+++ ltree.h Tue Nov 26 20:16:40 2002 >>@@ -6,6 +6,8 @@ >> #include "utils/palloc.h" >> #include "utils/builtins.h" >> >>+#define NODE_DELIMITER '/' >>+ >> typedef struct >> { >> uint8 len; >>@@ -88,7 +90,7 @@ >> #ifndef abs >> #define abs(a) ((a) < (0) ? -(a) : (a)) >> #endif >>-#define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' ) >>+#define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' || (x) == NODE_DELIMITER ) >> >> /* full text query */ >> >>--- ltree_io.c Tue Nov 26 20:23:45 2002 >>+++ ltree_io.c.orig Tue Nov 26 18:57:26 2002 >>@@ -48,7 +48,7 @@ >> ptr = buf; >> while (*ptr) >> { >>- if (*ptr == NODE_DELIMITER) >>+ if (*ptr == '.') >> num++; >> ptr++; >> } >>@@ -69,7 +69,7 @@ >> } >> else if (state == LTPRS_WAITDELIM) >> { >>- if (*ptr == NODE_DELIMITER) >>+ if (*ptr == '.') >> { >> lptr->len = ptr - lptr->start; >> if (lptr->len > 255) >>@@ -131,7 +131,7 @@ >> { >> if (i != 0) >> { >>- *ptr = NODE_DELIMITER; >>+ *ptr = '.'; >> ptr++; >> } >> memcpy(ptr, curlevel->name, curlevel->len); >>@@ -181,7 +181,7 @@ >> ptr = buf; >> while (*ptr) >> { >>- if (*ptr == NODE_DELIMITER) >>+ if (*ptr == '.') >> num++; >> else if (*ptr == '|') >> numOR++; >>@@ -265,7 +265,7 @@ >> lptr->len, (int) (lptr->start - buf)); >> state = LQPRS_WAITVAR; >> } >>- else if (*ptr == NODE_DELIMITER) >>+ else if (*ptr == '.') >> { >> lptr->len = ptr - lptr->start - >> ((lptr->flag & LVAR_SUBLEXEM) ? 1 : 0) - >>@@ -289,7 +289,7 @@ >> { >> if (*ptr == '{') >> state = LQPRS_WAITFNUM; >>- else if (*ptr == NODE_DELIMITER) >>+ else if (*ptr == '.') >> { >> curqlevel->low = 0; >> curqlevel->high = 0xffff; >>@@ -347,7 +347,7 @@ >> } >> else if (state == LQPRS_WAITEND) >> { >>- if (*ptr == NODE_DELIMITER) >>+ if (*ptr == '.') >> { >> state = LQPRS_WAITLEVEL; >> curqlevel = NEXTLEV(curqlevel); >>@@ -471,7 +471,7 @@ >> { >> if (i != 0) >> { >>- *ptr = NODE_DELIMITER; >>+ *ptr = '.'; >> ptr++; >> } >> if (curqlevel->numvar) >> >> >>---------------------------(end of broadcast)--------------------------- >>TIP 4: Don't 'kill -9' the postmaster >> > > -- Teodor Sigaev teodor@stack.net
Thanks for asking. I have been diverted to other tasks and won't be able to get back to this for a short while. The basics work (i.e. population and simple compares) but I know for sure that certain functions will not work now that we allow what were previously operators to be part of the node name. In short, the code needs to allow for operators to be escaped if they are part of the node name. On 5 Dec 2002 at 0:54, Bruce Momjian wrote: > > Dan, is this ready to be applied to CVS? > > ---------------------------------------------------------------------- > ----- > > Dan Langille wrote: > > I have been looking at contrib/ltree in the PostgreSQL repository. > > I've modified the code to allow / as a node delimiter instead of . > > which is the default. > > > > Below are the patches to make this change. I have also moved the > > delimiter to a DEFINE so that other customizations are easily done. > > This is a work in progress. > > > > My thanks to DarbyD for assistance. > > > > cheers > > > > > > --- ltree.h.orig Tue Nov 26 18:57:58 2002 > > +++ ltree.h Tue Nov 26 20:16:40 2002 > > @@ -6,6 +6,8 @@ > > #include "utils/palloc.h" > > #include "utils/builtins.h" > > > > +#define NODE_DELIMITER '/' > > + > > typedef struct > > { > > uint8 len; > > @@ -88,7 +90,7 @@ > > #ifndef abs > > #define abs(a) ((a) < (0) ? -(a) : (a)) > > #endif > > -#define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' ) > > +#define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' || > > +#(x) == NODE_DELIMITER ) > > > > /* full text query */ > > > > --- ltree_io.c Tue Nov 26 20:23:45 2002 > > +++ ltree_io.c.orig Tue Nov 26 18:57:26 2002 > > @@ -48,7 +48,7 @@ > > ptr = buf; > > while (*ptr) > > { > > - if (*ptr == NODE_DELIMITER) > > + if (*ptr == '.') > > num++; > > ptr++; > > } > > @@ -69,7 +69,7 @@ > > } > > else if (state == LTPRS_WAITDELIM) > > { > > - if (*ptr == NODE_DELIMITER) > > + if (*ptr == '.') > > { > > lptr->len = ptr - lptr->start; > > if (lptr->len > 255) > > @@ -131,7 +131,7 @@ > > { > > if (i != 0) > > { > > - *ptr = NODE_DELIMITER; > > + *ptr = '.'; > > ptr++; > > } > > memcpy(ptr, curlevel->name, curlevel->len); > > @@ -181,7 +181,7 @@ > > ptr = buf; > > while (*ptr) > > { > > - if (*ptr == NODE_DELIMITER) > > + if (*ptr == '.') > > num++; > > else if (*ptr == '|') > > numOR++; > > @@ -265,7 +265,7 @@ > > lptr->len, (int) (lptr->start - buf)); > > state = LQPRS_WAITVAR; > > } > > - else if (*ptr == NODE_DELIMITER) > > + else if (*ptr == '.') > > { > > lptr->len = ptr - lptr->start - > > ((lptr->flag & LVAR_SUBLEXEM) ? 1 : 0) - > > @@ -289,7 +289,7 @@ > > { > > if (*ptr == '{') > > state = LQPRS_WAITFNUM; > > - else if (*ptr == NODE_DELIMITER) > > + else if (*ptr == '.') > > { > > curqlevel->low = 0; > > curqlevel->high = 0xffff; > > @@ -347,7 +347,7 @@ > > } > > else if (state == LQPRS_WAITEND) > > { > > - if (*ptr == NODE_DELIMITER) > > + if (*ptr == '.') > > { > > state = LQPRS_WAITLEVEL; > > curqlevel = NEXTLEV(curqlevel); > > @@ -471,7 +471,7 @@ > > { > > if (i != 0) > > { > > - *ptr = NODE_DELIMITER; > > + *ptr = '.'; > > ptr++; > > } > > if (curqlevel->numvar) > > > > > > ---------------------------(end of > > broadcast)--------------------------- TIP 4: Don't 'kill -9' the > > postmaster > > > > -- > Bruce Momjian | http://candle.pha.pa.us > pgman@candle.pha.pa.us | (610) 359-1001 + If your > life is a hard drive, | 13 Roberts Road + Christ can be your > backup. | Newtown Square, Pennsylvania 19073 > -- Dan Langille : http://www.langille.org/