Re: [PORTS] Locale bug - Mailing list pgsql-patches
From | Bruce Momjian |
---|---|
Subject | Re: [PORTS] Locale bug |
Date | |
Msg-id | 200101241340.IAA11826@candle.pha.pa.us Whole thread Raw |
Responses |
Re: Re: [PORTS] Locale bug
Re: [PORTS] Locale bug |
List | pgsql-patches |
Is this still an issue on FreeBSD? Do we need to disable this problem somehow? > ============================================================================ > POSTGRESQL BUG REPORT TEMPLATE > ============================================================================ > > > Your name : Andriy I Pilipenko > Your email address : bamby@marka.net.ua > > > System Configuration > --------------------- > Architecture (example: Intel Pentium) : Intel Pentium > > Operating System (example: Linux 2.0.26 ELF) : FreeBSD 3.x, 4.0 > > PostgreSQL version (example: PostgreSQL-6.5.1): PostgreSQL-6.5.3, > PostgreSQL-7.0.beta5 > > Compiler used (example: gcc 2.8.0) : gcc 2.7.2.2, gcc 2.9.5 > > > Please enter a FULL description of your problem: > ------------------------------------------------ > > There is at least FreeBSD specific bug in PostgreSQL. If Postgres > configured with locale support but without multibyte support one cannot > perform case insensitive search using national language characters. > Problem comes from declaration pg_wchar as char for non-multibyte mode. > Character values above 127 considered to be negative values and this > result in improper return values of functions isalpha(), isupper() etc. > Declaring pg_wchar as unsigned char eliminates this problem. > > This problem not exists on Linux. On this system functions like isalpha(), > isupper() etc. successfully accept negative values as well as their > positive counterparts. > > > Please describe a way to repeat the problem. Please try to provide a > concise reproducible example, if at all possible: > ---------------------------------------------------------------------- > > Compile and install postgres with locale support enabled and multibyte > support disabled on FreeBSD. Create table with field of some character > type. Put in the table couple of recods with some character with code > above 127 in lower and upper case. Try query like this: > > SELECT * FROM table WHERE field ~* '<the_character>' > > where <the_character> is the mentioned character. You will receive only > one record with character exactly the same as in query. > > > If you know how this problem might be fixed, list the solution below: > --------------------------------------------------------------------- > > Here is the patch. I tried it on FreeBSD and Linux with success. This > patch applies to PostgreSQL 6.5.3 and 7.0.beta5. > > > Index: postgres/src/backend/regex/engine.c > diff -c postgres/src/backend/regex/engine.c:1.1.1.1 postgres/src/backend/regex/engine.c:1.2 > *** postgres/src/backend/regex/engine.c:1.1.1.1 Tue Apr 18 21:45:09 2000 > --- postgres/src/backend/regex/engine.c Wed Apr 19 09:46:38 2000 > *************** > *** 123,130 **** > #define NONCHAR(c) ((c) > 16777216) /* 16777216 == 2^24 == 3 bytes */ > #define NNONCHAR (CODEMAX-16777216) > #else > ! #define NONCHAR(c) ((c) > CHAR_MAX) > ! #define NNONCHAR (CODEMAX-CHAR_MAX) > #endif > > #ifdef REDEBUG > --- 123,130 ---- > #define NONCHAR(c) ((c) > 16777216) /* 16777216 == 2^24 == 3 bytes */ > #define NNONCHAR (CODEMAX-16777216) > #else > ! #define NONCHAR(c) ((c) > UCHAR_MAX) > ! #define NNONCHAR (CODEMAX-UCHAR_MAX) > #endif > > #ifdef REDEBUG > *************** > *** 958,965 **** > == #define BOW (BOL+4) > == #define EOW (BOL+5) > == #define CODEMAX (BOL+5) // highest code used > ! == #define NONCHAR(c) ((c) > CHAR_MAX) > ! == #define NNONCHAR (CODEMAX-CHAR_MAX) > */ > static states > step(g, start, stop, bef, ch, aft) > --- 958,965 ---- > == #define BOW (BOL+4) > == #define EOW (BOL+5) > == #define CODEMAX (BOL+5) // highest code used > ! == #define NONCHAR(c) ((c) > UCHAR_MAX) > ! == #define NNONCHAR (CODEMAX-UCHAR_MAX) > */ > static states > step(g, start, stop, bef, ch, aft) > Index: postgres/src/backend/regex/regcomp.c > diff -c postgres/src/backend/regex/regcomp.c:1.1.1.1 postgres/src/backend/regex/regcomp.c:1.2 > *** postgres/src/backend/regex/regcomp.c:1.1.1.1 Tue Apr 18 21:45:09 2000 > --- postgres/src/backend/regex/regcomp.c Wed Apr 19 09:46:38 2000 > *************** > *** 97,107 **** > static void p_b_eclass(struct parse * p, cset *cs); > static pg_wchar p_b_symbol(struct parse * p); > static char p_b_coll_elem(struct parse * p, int endc); > - #ifdef MULTIBYTE > static unsigned char othercase(int ch); > - #else > - static char othercase(int ch); > - #endif > static void bothcases(struct parse * p, int ch); > static void ordinary(struct parse * p, int ch); > static void nonnewline(struct parse * p); > --- 97,103 ---- > *************** > *** 224,232 **** > return REG_INVARG; > len = preg->re_endp - wcp; > #else > ! if (preg->re_endp < pattern) > return REG_INVARG; > ! len = preg->re_endp - pattern; > #endif > } > else > --- 220,228 ---- > return REG_INVARG; > len = preg->re_endp - wcp; > #else > ! if (preg->re_endp < (pg_wchar *) pattern) > return REG_INVARG; > ! len = preg->re_endp - (pg_wchar *) pattern; > #endif > } > else > *************** > *** 1038,1071 **** > - othercase - return the case counterpart of an alphabetic > == static char othercase(int ch); > */ > - #ifdef MULTIBYTE > static unsigned char /* if no counterpart, return ch */ > - #else > - static char /* if no counterpart, return ch */ > - #endif > othercase(ch) > int ch; > { > assert(pg_isalpha(ch)); > if (pg_isupper(ch)) > - #ifdef MULTIBYTE > - return (unsigned char) tolower(ch); > - #else > return tolower(ch); > - #endif > else if (pg_islower(ch)) > - #ifdef MULTIBYTE > - return (unsigned char) toupper(ch); > - #else > return toupper(ch); > - #endif > else > /* peculiar, but could happen */ > - #ifdef MULTIBYTE > - return (unsigned char) ch; > - #else > return ch; > - #endif > } > > /* > --- 1034,1051 ---- > Index: postgres/src/include/mb/pg_wchar.h > diff -c postgres/src/include/mb/pg_wchar.h:1.1.1.1 postgres/src/include/mb/pg_wchar.h:1.2 > *** postgres/src/include/mb/pg_wchar.h:1.1.1.1 Tue Apr 18 21:45:31 2000 > --- postgres/src/include/mb/pg_wchar.h Wed Apr 19 09:46:42 2000 > *************** > *** 34,40 **** > typedef unsigned int pg_wchar; > > #else > ! #define pg_wchar char > #endif > > /* > --- 34,40 ---- > typedef unsigned int pg_wchar; > > #else > ! typedef unsigned char pg_wchar; > #endif > > /* > Index: postgres/src/include/regex/regex2.h > diff -c postgres/src/include/regex/regex2.h:1.1.1.1 postgres/src/include/regex/regex2.h:1.2 > *** postgres/src/include/regex/regex2.h:1.1.1.1 Tue Apr 18 21:45:35 2000 > --- postgres/src/include/regex/regex2.h Wed Apr 19 09:46:47 2000 > *************** > *** 201,207 **** > #ifdef MULTIBYTE > #define OUT (16777216+1) /* 16777216 == 2^24 == 3 bytes */ > #else > ! #define OUT (CHAR_MAX+1) /* a non-character value */ > #endif > > #ifdef MULTIBYTE > --- 201,207 ---- > #ifdef MULTIBYTE > #define OUT (16777216+1) /* 16777216 == 2^24 == 3 bytes */ > #else > ! #define OUT (UCHAR_MAX+1) /* a non-character value */ > #endif > > #ifdef MULTIBYTE > > -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
pgsql-patches by date: