regex + locale bug - Mailing list pgsql-hackers
From | Angelos Karageorgiou |
---|---|
Subject | regex + locale bug |
Date | |
Msg-id | Pine.BSI.4.05L.9902101934190.5912-100000@awesome.incredible.com Whole thread Raw |
List | pgsql-hackers |
I am using postgres 6.4.2 on BSD/OS 3.1 with a Greek locale that I have developed. I knew that regexes with postgress would not work because of something I did but a posting from another follow from Sweden gave me a clue that the problem must be with the regex package and not the locale. So I investigated the code and found out the pg_isdigit(int ch), pg_isalpha(int ch) and the associated functions do a comparison of characters as ints. I changed a few crucial points with a cast to (unsigned char) and voila , regexs in Greek with full locale support. My guess is that an int != unsigned char when comparing, the sign bit is probably the culprit. Please test the patch on some other language too, Swedish or Finish would be a nice touch. Patch follows, but it is trivial really. -------------------------------------------------------------------------------- *** regcomp.c Tue Sep 1 07:31:25 1998 --- regcomp.c.patched Wed Feb 10 19:57:11 1999 *************** *** 1038,1046 **** { assert(pg_isalpha(ch)); if (pg_isupper(ch)) ! return tolower(ch); else if (pg_islower(ch)) ! return toupper(ch); else /* peculiar, but could happen */ return ch; --- 1038,1046 ---- { assert(pg_isalpha(ch)); if (pg_isupper(ch)) ! return tolower((unsigned char)ch); else if (pg_islower(ch)) ! return toupper((unsigned char)ch); else /* peculiar, but could happen */ return ch; *************** *** 1055,1067 **** static void bothcases(p, ch) struct parse *p; ! int ch; { pg_wchar *oldnext = p->next; pg_wchar *oldend = p->end; pg_wchar bracket[3]; ! assert(othercase(ch) != ch);/* p_bracket() would recurse */ p->next = bracket; p->end = bracket + 2; bracket[0]= ch; --- 1055,1067 ---- static void bothcases(p, ch) struct parse *p; ! int ch; { pg_wchar *oldnext = p->next; pg_wchar *oldend = p->end; pg_wchar bracket[3]; ! assert(othercase(ch) != (unsigned char)ch);/* p_bracket() would recurse */ p->next = bracket; p->end = bracket+ 2; bracket[0] = ch; *************** *** 1084,1090 **** { cat_t *cap = p->g->categories; ! if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != ch) bothcases(p, ch); else { --- 1084,1090 ---- { cat_t *cap = p->g->categories; ! if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != (unsigned char)ch) bothcases(p, ch); else { *************** *** 1862,1868 **** #ifdef MULTIBYTE return (c >= 0 && c <= UCHAR_MAX && isdigit(c)); #else ! return (isdigit(c)); #endif } --- 1862,1868 ---- #ifdef MULTIBYTE return (c >= 0 && c <= UCHAR_MAX && isdigit(c)); #else ! return (isdigit((unsigned char)c)); #endif } *************** *** 1872,1878 **** #ifdef MULTIBYTE return (c >= 0 && c <= UCHAR_MAX && isalpha(c)); #else ! return (isalpha(c)); #endif } --- 1872,1878 ---- #ifdef MULTIBYTE return (c >= 0 && c <= UCHAR_MAX && isalpha(c)); #else ! return (isalpha((unsigned char)c)); #endif } *************** *** 1882,1888 **** #ifdef MULTIBYTE return (c >= 0 && c <= UCHAR_MAX && isupper(c)); #else ! return (isupper(c)); #endif } --- 1882,1888 ---- #ifdef MULTIBYTE return (c >= 0 && c <= UCHAR_MAX && isupper(c)); #else ! return (isupper((unsigned char)c)); #endif } *************** *** 1892,1897 **** #ifdef MULTIBYTE return (c >= 0 && c <= UCHAR_MAX && islower(c)); #else ! return (islower(c)); #endif } --- 1892,1897 ---- #ifdef MULTIBYTE return (c >= 0 && c <= UCHAR_MAX && islower(c)); #else ! return (islower((unsigned char)c)); #endif } -- Incredible Networks LTD Angelos Karageorgiou 20 Karea st, +30.1.92.12.312 (voice) 116 36 Athens, Greece. +30.1.92.12.314 (fax) http://www.incredible.com angelos@incredible.com (e-mail)
pgsql-hackers by date: