Locale bug - Mailing list pgsql-ports
From | Andriy I Pilipenko |
---|---|
Subject | Locale bug |
Date | |
Msg-id | Pine.BSF.4.21.0004191118560.87161-100000@bamby.marka.net.ua Whole thread Raw |
Responses |
Re: Locale bug
Re: Locale bug Re: Locale bug |
List | pgsql-ports |
============================================================================ POSTGRESQL BUG REPORT TEMPLATE ============================================================================ Your name : Andriy I Pilipenko Your email address : bamby@marka.net.ua System Configuration --------------------- Architecture (example: Intel Pentium) : Intel Pentium Operating System (example: Linux 2.0.26 ELF) : FreeBSD 3.x, 4.0 PostgreSQL version (example: PostgreSQL-6.5.1): PostgreSQL-6.5.3, PostgreSQL-7.0.beta5 Compiler used (example: gcc 2.8.0) : gcc 2.7.2.2, gcc 2.9.5 Please enter a FULL description of your problem: ------------------------------------------------ There is at least FreeBSD specific bug in PostgreSQL. If Postgres configured with locale support but without multibyte support one cannot perform case insensitive search using national language characters. Problem comes from declaration pg_wchar as char for non-multibyte mode. Character values above 127 considered to be negative values and this result in improper return values of functions isalpha(), isupper() etc. Declaring pg_wchar as unsigned char eliminates this problem. This problem not exists on Linux. On this system functions like isalpha(), isupper() etc. successfully accept negative values as well as their positive counterparts. Please describe a way to repeat the problem. Please try to provide a concise reproducible example, if at all possible: ---------------------------------------------------------------------- Compile and install postgres with locale support enabled and multibyte support disabled on FreeBSD. Create table with field of some character type. Put in the table couple of recods with some character with code above 127 in lower and upper case. Try query like this: SELECT * FROM table WHERE field ~* '<the_character>' where <the_character> is the mentioned character. You will receive only one record with character exactly the same as in query. If you know how this problem might be fixed, list the solution below: --------------------------------------------------------------------- Here is the patch. I tried it on FreeBSD and Linux with success. This patch applies to PostgreSQL 6.5.3 and 7.0.beta5. Index: postgres/src/backend/regex/engine.c diff -c postgres/src/backend/regex/engine.c:1.1.1.1 postgres/src/backend/regex/engine.c:1.2 *** postgres/src/backend/regex/engine.c:1.1.1.1 Tue Apr 18 21:45:09 2000 --- postgres/src/backend/regex/engine.c Wed Apr 19 09:46:38 2000 *************** *** 123,130 **** #define NONCHAR(c) ((c) > 16777216) /* 16777216 == 2^24 == 3 bytes */ #define NNONCHAR (CODEMAX-16777216) #else ! #define NONCHAR(c) ((c) > CHAR_MAX) ! #define NNONCHAR (CODEMAX-CHAR_MAX) #endif #ifdef REDEBUG --- 123,130 ---- #define NONCHAR(c) ((c) > 16777216) /* 16777216 == 2^24 == 3 bytes */ #define NNONCHAR (CODEMAX-16777216) #else ! #define NONCHAR(c) ((c) > UCHAR_MAX) ! #define NNONCHAR (CODEMAX-UCHAR_MAX) #endif #ifdef REDEBUG *************** *** 958,965 **** == #define BOW (BOL+4) == #define EOW (BOL+5) == #define CODEMAX (BOL+5) // highest code used ! == #define NONCHAR(c) ((c) > CHAR_MAX) ! == #define NNONCHAR (CODEMAX-CHAR_MAX) */ static states step(g, start, stop, bef, ch, aft) --- 958,965 ---- == #define BOW (BOL+4) == #define EOW (BOL+5) == #define CODEMAX (BOL+5) // highest code used ! == #define NONCHAR(c) ((c) > UCHAR_MAX) ! == #define NNONCHAR (CODEMAX-UCHAR_MAX) */ static states step(g, start, stop, bef, ch, aft) Index: postgres/src/backend/regex/regcomp.c diff -c postgres/src/backend/regex/regcomp.c:1.1.1.1 postgres/src/backend/regex/regcomp.c:1.2 *** postgres/src/backend/regex/regcomp.c:1.1.1.1 Tue Apr 18 21:45:09 2000 --- postgres/src/backend/regex/regcomp.c Wed Apr 19 09:46:38 2000 *************** *** 97,107 **** static void p_b_eclass(struct parse * p, cset *cs); static pg_wchar p_b_symbol(struct parse * p); static char p_b_coll_elem(struct parse * p, int endc); - #ifdef MULTIBYTE static unsigned char othercase(int ch); - #else - static char othercase(int ch); - #endif static void bothcases(struct parse * p, int ch); static void ordinary(struct parse * p, int ch); static void nonnewline(struct parse * p); --- 97,103 ---- *************** *** 224,232 **** return REG_INVARG; len = preg->re_endp - wcp; #else ! if (preg->re_endp < pattern) return REG_INVARG; ! len = preg->re_endp - pattern; #endif } else --- 220,228 ---- return REG_INVARG; len = preg->re_endp - wcp; #else ! if (preg->re_endp < (pg_wchar *) pattern) return REG_INVARG; ! len = preg->re_endp - (pg_wchar *) pattern; #endif } else *************** *** 1038,1071 **** - othercase - return the case counterpart of an alphabetic == static char othercase(int ch); */ - #ifdef MULTIBYTE static unsigned char /* if no counterpart, return ch */ - #else - static char /* if no counterpart, return ch */ - #endif othercase(ch) int ch; { assert(pg_isalpha(ch)); if (pg_isupper(ch)) - #ifdef MULTIBYTE - return (unsigned char) tolower(ch); - #else return tolower(ch); - #endif else if (pg_islower(ch)) - #ifdef MULTIBYTE - return (unsigned char) toupper(ch); - #else return toupper(ch); - #endif else /* peculiar, but could happen */ - #ifdef MULTIBYTE - return (unsigned char) ch; - #else return ch; - #endif } /* --- 1034,1051 ---- Index: postgres/src/include/mb/pg_wchar.h diff -c postgres/src/include/mb/pg_wchar.h:1.1.1.1 postgres/src/include/mb/pg_wchar.h:1.2 *** postgres/src/include/mb/pg_wchar.h:1.1.1.1 Tue Apr 18 21:45:31 2000 --- postgres/src/include/mb/pg_wchar.h Wed Apr 19 09:46:42 2000 *************** *** 34,40 **** typedef unsigned int pg_wchar; #else ! #define pg_wchar char #endif /* --- 34,40 ---- typedef unsigned int pg_wchar; #else ! typedef unsigned char pg_wchar; #endif /* Index: postgres/src/include/regex/regex2.h diff -c postgres/src/include/regex/regex2.h:1.1.1.1 postgres/src/include/regex/regex2.h:1.2 *** postgres/src/include/regex/regex2.h:1.1.1.1 Tue Apr 18 21:45:35 2000 --- postgres/src/include/regex/regex2.h Wed Apr 19 09:46:47 2000 *************** *** 201,207 **** #ifdef MULTIBYTE #define OUT (16777216+1) /* 16777216 == 2^24 == 3 bytes */ #else ! #define OUT (CHAR_MAX+1) /* a non-character value */ #endif #ifdef MULTIBYTE --- 201,207 ---- #ifdef MULTIBYTE #define OUT (16777216+1) /* 16777216 == 2^24 == 3 bytes */ #else ! #define OUT (UCHAR_MAX+1) /* a non-character value */ #endif #ifdef MULTIBYTE
pgsql-ports by date: