Re: [PORTS] Locale bug - Mailing list pgsql-patches

From Bruce Momjian
Subject Re: [PORTS] Locale bug
Date
Msg-id 200101241340.IAA11826@candle.pha.pa.us
Whole thread Raw
Responses Re: Re: [PORTS] Locale bug  (Tom Lane <tgl@sss.pgh.pa.us>)
Re: [PORTS] Locale bug  (bamby <bamby@marka.net.ua>)
List pgsql-patches
Is this still an issue on FreeBSD?  Do we need to disable this problem
somehow?


> ============================================================================
>                         POSTGRESQL BUG REPORT TEMPLATE
> ============================================================================
>
>
> Your name        :    Andriy I Pilipenko
> Your email address    :    bamby@marka.net.ua
>
>
> System Configuration
> ---------------------
>   Architecture (example: Intel Pentium)      :  Intel Pentium
>
>   Operating System (example: Linux 2.0.26 ELF)     :  FreeBSD 3.x, 4.0
>
>   PostgreSQL version (example: PostgreSQL-6.5.1):  PostgreSQL-6.5.3,
>                                                    PostgreSQL-7.0.beta5
>
>   Compiler used (example:  gcc 2.8.0)        :  gcc 2.7.2.2, gcc 2.9.5
>
>
> Please enter a FULL description of your problem:
> ------------------------------------------------
>
> There is at least FreeBSD specific bug in PostgreSQL. If Postgres
> configured with locale support but without multibyte support one cannot
> perform case insensitive search using national language characters.
> Problem comes from declaration pg_wchar as char for non-multibyte mode.
> Character values above 127 considered to be negative values and this
> result in improper return values of functions isalpha(), isupper() etc.
> Declaring pg_wchar as unsigned char eliminates this problem.
>
> This problem not exists on Linux. On this system functions like isalpha(),
> isupper() etc. successfully accept negative values as well as their
> positive counterparts.
>
>
> Please describe a way to repeat the problem.   Please try to provide a
> concise reproducible example, if at all possible:
> ----------------------------------------------------------------------
>
> Compile and install postgres with locale support enabled and multibyte
> support disabled on FreeBSD. Create table with field of some character
> type. Put in the table couple of recods with some character with code
> above 127 in lower and upper case. Try query like this:
>
>   SELECT * FROM table WHERE field ~* '<the_character>'
>
> where <the_character> is the mentioned character. You will receive only
> one record with character exactly the same as in query.
>
>
> If you know how this problem might be fixed, list the solution below:
> ---------------------------------------------------------------------
>
> Here is the patch. I tried it on FreeBSD and Linux with success. This
> patch applies to PostgreSQL 6.5.3 and 7.0.beta5.
>
>
> Index: postgres/src/backend/regex/engine.c
> diff -c postgres/src/backend/regex/engine.c:1.1.1.1 postgres/src/backend/regex/engine.c:1.2
> *** postgres/src/backend/regex/engine.c:1.1.1.1    Tue Apr 18 21:45:09 2000
> --- postgres/src/backend/regex/engine.c    Wed Apr 19 09:46:38 2000
> ***************
> *** 123,130 ****
>   #define NONCHAR(c)      ((c) > 16777216)    /* 16777216 == 2^24 == 3 bytes */
>   #define NNONCHAR  (CODEMAX-16777216)
>   #else
> ! #define NONCHAR(c)          ((c) > CHAR_MAX)
> ! #define NNONCHAR      (CODEMAX-CHAR_MAX)
>   #endif
>
>   #ifdef REDEBUG
> --- 123,130 ----
>   #define NONCHAR(c)      ((c) > 16777216)    /* 16777216 == 2^24 == 3 bytes */
>   #define NNONCHAR  (CODEMAX-16777216)
>   #else
> ! #define NONCHAR(c)          ((c) > UCHAR_MAX)
> ! #define NNONCHAR      (CODEMAX-UCHAR_MAX)
>   #endif
>
>   #ifdef REDEBUG
> ***************
> *** 958,965 ****
>    == #define        BOW        (BOL+4)
>    == #define        EOW        (BOL+5)
>    == #define        CODEMAX (BOL+5)            // highest code used
> !  == #define        NONCHAR(c)        ((c) > CHAR_MAX)
> !  == #define        NNONCHAR        (CODEMAX-CHAR_MAX)
>    */
>   static states
>   step(g, start, stop, bef, ch, aft)
> --- 958,965 ----
>    == #define        BOW        (BOL+4)
>    == #define        EOW        (BOL+5)
>    == #define        CODEMAX (BOL+5)            // highest code used
> !  == #define        NONCHAR(c)        ((c) > UCHAR_MAX)
> !  == #define        NNONCHAR        (CODEMAX-UCHAR_MAX)
>    */
>   static states
>   step(g, start, stop, bef, ch, aft)
> Index: postgres/src/backend/regex/regcomp.c
> diff -c postgres/src/backend/regex/regcomp.c:1.1.1.1 postgres/src/backend/regex/regcomp.c:1.2
> *** postgres/src/backend/regex/regcomp.c:1.1.1.1    Tue Apr 18 21:45:09 2000
> --- postgres/src/backend/regex/regcomp.c    Wed Apr 19 09:46:38 2000
> ***************
> *** 97,107 ****
>       static void p_b_eclass(struct parse * p, cset *cs);
>       static pg_wchar p_b_symbol(struct parse * p);
>       static char p_b_coll_elem(struct parse * p, int endc);
> - #ifdef MULTIBYTE
>       static unsigned char othercase(int ch);
> - #else
> -     static char othercase(int ch);
> - #endif
>       static void bothcases(struct parse * p, int ch);
>       static void ordinary(struct parse * p, int ch);
>       static void nonnewline(struct parse * p);
> --- 97,103 ----
> ***************
> *** 224,232 ****
>               return REG_INVARG;
>           len = preg->re_endp - wcp;
>   #else
> !         if (preg->re_endp < pattern)
>               return REG_INVARG;
> !         len = preg->re_endp - pattern;
>   #endif
>       }
>       else
> --- 220,228 ----
>               return REG_INVARG;
>           len = preg->re_endp - wcp;
>   #else
> !         if (preg->re_endp < (pg_wchar *) pattern)
>               return REG_INVARG;
> !         len = preg->re_endp - (pg_wchar *) pattern;
>   #endif
>       }
>       else
> ***************
> *** 1038,1071 ****
>    - othercase - return the case counterpart of an alphabetic
>    == static char othercase(int ch);
>    */
> - #ifdef MULTIBYTE
>   static unsigned char            /* if no counterpart, return ch */
> - #else
> - static char                        /* if no counterpart, return ch */
> - #endif
>   othercase(ch)
>   int            ch;
>   {
>       assert(pg_isalpha(ch));
>       if (pg_isupper(ch))
> - #ifdef MULTIBYTE
> -         return (unsigned char) tolower(ch);
> - #else
>           return tolower(ch);
> - #endif
>       else if (pg_islower(ch))
> - #ifdef MULTIBYTE
> -         return (unsigned char) toupper(ch);
> - #else
>           return toupper(ch);
> - #endif
>       else
>   /* peculiar, but could happen */
> - #ifdef MULTIBYTE
> -         return (unsigned char) ch;
> - #else
>           return ch;
> - #endif
>   }
>
>   /*
> --- 1034,1051 ----
> Index: postgres/src/include/mb/pg_wchar.h
> diff -c postgres/src/include/mb/pg_wchar.h:1.1.1.1 postgres/src/include/mb/pg_wchar.h:1.2
> *** postgres/src/include/mb/pg_wchar.h:1.1.1.1    Tue Apr 18 21:45:31 2000
> --- postgres/src/include/mb/pg_wchar.h    Wed Apr 19 09:46:42 2000
> ***************
> *** 34,40 ****
>   typedef unsigned int pg_wchar;
>
>   #else
> ! #define pg_wchar char
>   #endif
>
>   /*
> --- 34,40 ----
>   typedef unsigned int pg_wchar;
>
>   #else
> ! typedef unsigned char pg_wchar;
>   #endif
>
>   /*
> Index: postgres/src/include/regex/regex2.h
> diff -c postgres/src/include/regex/regex2.h:1.1.1.1 postgres/src/include/regex/regex2.h:1.2
> *** postgres/src/include/regex/regex2.h:1.1.1.1    Tue Apr 18 21:45:35 2000
> --- postgres/src/include/regex/regex2.h    Wed Apr 19 09:46:47 2000
> ***************
> *** 201,207 ****
>   #ifdef MULTIBYTE
>   #define OUT          (16777216+1)    /* 16777216 == 2^24 == 3 bytes */
>   #else
> ! #define OUT          (CHAR_MAX+1)    /* a non-character value */
>   #endif
>
>   #ifdef MULTIBYTE
> --- 201,207 ----
>   #ifdef MULTIBYTE
>   #define OUT          (16777216+1)    /* 16777216 == 2^24 == 3 bytes */
>   #else
> ! #define OUT          (UCHAR_MAX+1)    /* a non-character value */
>   #endif
>
>   #ifdef MULTIBYTE
>
>


--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

pgsql-patches by date:

Previous
From: Tom Lane
Date:
Subject: Re: ODBC Patch for OJs/Large Querys & Rows
Next
From: Peter Eisentraut
Date:
Subject: Re: Small patch to replace 'idle' by 'trans' if transactionis still open