Re: regexp character class locale awareness patch - Mailing list pgsql-hackers

From Bruce Momjian
Subject Re: regexp character class locale awareness patch
Date
Msg-id 200204172155.g3HLt0n07537@candle.pha.pa.us
Whole thread Raw
In response to Re: regexp character class locale awareness patch  (Bruce Momjian <pgman@candle.pha.pa.us>)
List pgsql-hackers
Your patch has been added to the PostgreSQL unapplied patches list at:
http://candle.pha.pa.us/cgi-bin/pgpatches

I will try to apply it within the next 48 hours.

---------------------------------------------------------------------------


Bruce Momjian wrote:
> Manuel Sugawara wrote:
> > Peter Eisentraut <peter_e@gmx.net> writes:
> > >
> > > Basically, you manually preprocess the patch to include the
> > > USE_LOCALE branch and remove the not USE_LOCALE branch.
> > 
> > Yeah, that should work. You may also remove include/regex/cclass.h
> > since it will not be used any more.
> > 
> > > However, if the no-locale branches have significant performance
> > > benefits then it might be worth pondering setting up some
> > > optimizations.
> > 
> > This is not the case.
> 
> Here is a patch based on this discussion.
> 
> -- 
>   Bruce Momjian                        |  http://candle.pha.pa.us
>   pgman@candle.pha.pa.us               |  (610) 853-3000
>   +  If your life is a hard drive,     |  830 Blythe Avenue
>   +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

> Index: src/backend/regex/regcomp.c
> ===================================================================
> RCS file: /cvsroot/pgsql/src/backend/regex/regcomp.c,v
> retrieving revision 1.28
> diff -c -r1.28 regcomp.c
> *** src/backend/regex/regcomp.c    28 Oct 2001 06:25:49 -0000    1.28
> --- src/backend/regex/regcomp.c    16 Apr 2002 23:12:38 -0000
> ***************
> *** 47,53 ****
>   #include "regex/regex.h"
>   #include "regex/utils.h"
>   #include "regex/regex2.h"
> ! #include "regex/cclass.h"
>   #include "regex/cname.h"
>   
>   /*
> --- 47,60 ----
>   #include "regex/regex.h"
>   #include "regex/utils.h"
>   #include "regex/regex2.h"
> ! struct cclass
> ! {
> !     char *name;
> !     char *chars;
> !     char *multis;
> ! };
> ! static struct cclass* cclasses = NULL;
> ! static struct cclass* cclass_init(void);
>   #include "regex/cname.h"
>   
>   /*
> ***************
> *** 174,179 ****
> --- 181,189 ----
>       pg_wchar   *wcp;
>   #endif
>   
> +     if ( cclasses == NULL )
> +         cclasses = cclass_init();
> +     
>   #ifdef REDEBUG
>   #define  GOODFLAGS(f)     (f)
>   #else
> ***************
> *** 884,890 ****
>       struct cclass *cp;
>       size_t        len;
>       char       *u;
> !     char        c;
>   
>       while (MORE() && pg_isalpha(PEEK()))
>           NEXT();
> --- 894,900 ----
>       struct cclass *cp;
>       size_t        len;
>       char       *u;
> !     unsigned char        c;
>   
>       while (MORE() && pg_isalpha(PEEK()))
>           NEXT();
> ***************
> *** 905,911 ****
>   
>       u = cp->chars;
>       while ((c = *u++) != '\0')
> !         CHadd(cs, c);
>       for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
>           MCadd(p, cs, u);
>   }
> --- 915,921 ----
>   
>       u = cp->chars;
>       while ((c = *u++) != '\0')
> !         CHadd(cs, c);   
>       for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
>           MCadd(p, cs, u);
>   }
> ***************
> *** 1715,1718 ****
> --- 1725,1788 ----
>   #else
>       return (islower((unsigned char) c));
>   #endif
> + }
> + 
> + static struct cclass *
> + cclass_init(void)
> + {
> +     struct cclass *cp = NULL;
> +     struct cclass *classes = NULL;
> +     struct cclass_factory
> +     {
> +         char *name;
> +         int (*func)(int);
> +         char *chars;
> +     } cclass_factories [] =
> +         {
> +             { "alnum", isalnum, NULL },
> +             { "alpha", isalpha, NULL },
> +             { "blank", NULL, " \t" },
> +             { "cntrl", iscntrl, NULL },
> +             { "digit", NULL, "0123456789" },
> +             { "graph", isgraph, NULL },
> +             { "lower", islower, NULL },
> +             { "print", isprint, NULL },
> +             { "punct", ispunct, NULL },
> +             { "space", NULL, "\t\n\v\f\r " },
> +             { "upper", isupper, NULL },
> +             { "xdigit", isxdigit, NULL },
> +             { NULL, NULL, NULL }
> +         };
> +     struct cclass_factory *cf = NULL;
> + 
> +     classes = malloc(sizeof(struct cclass) * (sizeof(cclass_factories) / sizeof(struct cclass_factory)));
> +     if (classes == NULL)
> +         elog(ERROR,"cclass_init: out of memory");
> +     
> +     cp = classes;
> +     for(cf = cclass_factories; cf->name != NULL; cf++)
> +         {
> +             cp->name = strdup(cf->name);
> +             if ( cf->chars )
> +                 cp->chars = strdup(cf->chars);
> +             else
> +                 {
> +                     int x = 0, y = 0;
> +                     cp->chars = malloc(sizeof(char) * 256);
> +                     if (cp->chars == NULL)
> +                         elog(ERROR,"cclass_init: out of memory");
> +                     for (x = 0; x < 256; x++)
> +                         {
> +                             if((cf->func)(x))
> +                                 *(cp->chars + y++) = x;                            
> +                         }
> +                     *(cp->chars + y) = '\0';
> +                 }
> +             cp->multis = "";
> +             cp++;
> +         }
> +     cp->name = cp->chars = NULL;
> +     cp->multis = "";
> +     
> +     return classes;
>   }

> 
> ---------------------------(end of broadcast)---------------------------
> TIP 6: Have you searched our list archives?
> 
> http://archives.postgresql.org

--  Bruce Momjian                        |  http://candle.pha.pa.us pgman@candle.pha.pa.us               |  (610)
853-3000+  If your life is a hard drive,     |  830 Blythe Avenue +  Christ can be your backup.        |  Drexel Hill,
Pennsylvania19026
 


pgsql-hackers by date:

Previous
From: Bruce Momjian
Date:
Subject: Re: Index Scans become Seq Scans after VACUUM ANALYSE
Next
From: Doug McNaught
Date:
Subject: Re: Index Scans become Seq Scans after VACUUM ANALYSE