Re: [GENERAL] invalid multibyte character for locale - Mailing list pgsql-patches
From | Bruce Momjian |
---|---|
Subject | Re: [GENERAL] invalid multibyte character for locale |
Date | |
Msg-id | 200503152353.j2FNrFJ26428@candle.pha.pa.us Whole thread Raw |
List | pgsql-patches |
Tatsuo Ishii wrote: > Apparently your hack does not kill #define USE_WIDE_UPPER_LOWER. > > BTW, the current code for upper/lower etc. seems to be broken. The > exact problem you have are happening in Japanese encodings too(EUC_JP) > too. PostgreSQL should not use wide-character method if LC_CTYPE is C. Here is the patch I made. I will apply it to CVS HEAD and 8.0.X because it is a bug fix. --------------------------------------------------------------------------- > -- > Tatsuo Ishii > > > L.S. > > > > I have a database created on: > > > > db=# select version(); > > version > > --------------------------------------------------------------------- > > PostgreSQL 8.0.1 on i686-pc-linux-gnu, compiled by GCC egcs-2.91.66 > > (1 row) > > > > > > The initdb was done using no-locale and unicode as default encoding, the > > particular database itself is indeed encoded as UNICODE. > > > > > > Due to a buggy glibc, the following patch was applied to this install in order > > to avoid a crash on things like 'upper(<string>)': > > > > --- oracle_compat.c_orig Mon Dec 6 22:14:11 2004 > > +++ oracle_compat.c Mon Dec 6 22:14:24 2004 > > @@ -43,7 +43,7 @@ > > * We assume if we have these two functions, we have their friends too, and > > * can use the wide-character method. > > */ > > -#if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER) > > +#if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER) && FALSE > > #define USE_WIDE_UPPER_LOWER > > #endif > > > > > > The database on this machine was dumped and then restored on another, which > > has a more recent installation of Slack on it: > > > > > > db=# select version(); > > version > > ------------------------------------------------------------------------ > > PostgreSQL 8.0.1 on i586-pc-linux-gnu, compiled by GCC gcc (GCC) 3.2.3 > > (1 row) > > > > > > Again, the initdb on this machine was done using no-locale and unicode as > > default encoding, the particular database obviously is also encoded as > > UNICODE. > > > > > > > > On the second machine, I'm now getting the following: > > > > db=# select 'J?TERBOG'; > > ?column? > > ---------- > > J?TERBOG > > (1 row) > > > > db=# select lower('J?TERBOG'); > > ERROR: invalid multibyte character for locale > > HINT: The server's LC_CTYPE locale is probably incompatible with the database > > encoding. > > > > > > > > As far as I can tell, this didn't happen with v8.0.0, but I'm afraid I can't > > be totally sure about that. Obviously, the error doesn't occur on the first > > machine due to the hack needed for the buggy glibc. > > > > > > I'd appreciate a pointer as to what is causing this. It 'shouldn't' be the > > hack nor the dump/restore cycle, but.......? > > > > > > TIA. > > > > > > > > -- > > Best, > > > > > > > > > > Frank. > > > > ---------------------------(end of broadcast)--------------------------- > > TIP 7: don't forget to increase your free space map settings > > > > ---------------------------(end of broadcast)--------------------------- > TIP 2: you can get off all lists at once with the unregister command > (send "unregister YourEmailAddressHere" to majordomo@postgresql.org) > -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 359-1001 + If your life is a hard drive, | 13 Roberts Road + Christ can be your backup. | Newtown Square, Pennsylvania 19073 Index: src/backend/utils/adt/oracle_compat.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v retrieving revision 1.57 diff -c -c -r1.57 oracle_compat.c *** src/backend/utils/adt/oracle_compat.c 31 Dec 2004 22:01:22 -0000 1.57 --- src/backend/utils/adt/oracle_compat.c 15 Mar 2005 13:45:22 -0000 *************** *** 166,173 **** lower(PG_FUNCTION_ARGS) { #ifdef USE_WIDE_UPPER_LOWER ! /* use wide char code only when max encoding length > one */ ! if (pg_database_encoding_max_length() > 1) { text *string = PG_GETARG_TEXT_P(0); text *result; --- 166,173 ---- lower(PG_FUNCTION_ARGS) { #ifdef USE_WIDE_UPPER_LOWER ! /* use wide char code only when max encoding length > 1 and ctype != C */ ! if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) { text *string = PG_GETARG_TEXT_P(0); text *result; *************** *** 228,235 **** upper(PG_FUNCTION_ARGS) { #ifdef USE_WIDE_UPPER_LOWER ! /* use wide char code only when max encoding length > one */ ! if (pg_database_encoding_max_length() > 1) { text *string = PG_GETARG_TEXT_P(0); text *result; --- 228,235 ---- upper(PG_FUNCTION_ARGS) { #ifdef USE_WIDE_UPPER_LOWER ! /* use wide char code only when max encoding length > 1 and ctype != C */ ! if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) { text *string = PG_GETARG_TEXT_P(0); text *result; *************** *** 293,300 **** initcap(PG_FUNCTION_ARGS) { #ifdef USE_WIDE_UPPER_LOWER ! /* use wide char code only when max encoding length > one */ ! if (pg_database_encoding_max_length() > 1) { text *string = PG_GETARG_TEXT_P(0); text *result; --- 293,300 ---- initcap(PG_FUNCTION_ARGS) { #ifdef USE_WIDE_UPPER_LOWER ! /* use wide char code only when max encoding length > 1 and ctype != C */ ! if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) { text *string = PG_GETARG_TEXT_P(0); text *result; Index: src/backend/utils/adt/pg_locale.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/pg_locale.c,v retrieving revision 1.30 diff -c -c -r1.30 pg_locale.c *** src/backend/utils/adt/pg_locale.c 1 Jan 2005 05:43:07 -0000 1.30 --- src/backend/utils/adt/pg_locale.c 15 Mar 2005 13:45:22 -0000 *************** *** 197,202 **** --- 197,229 ---- /* + * We'd like to cache whether LC_CTYPE is C (or POSIX), so we can + * optimize a few code paths in various places. + */ + bool + lc_ctype_is_c(void) + { + /* Cache result so we only have to compute it once */ + static int result = -1; + char *localeptr; + + if (result >= 0) + return (bool) result; + localeptr = setlocale(LC_CTYPE, NULL); + if (!localeptr) + elog(ERROR, "invalid LC_CTYPE setting"); + + if (strcmp(localeptr, "C") == 0) + result = true; + else if (strcmp(localeptr, "POSIX") == 0) + result = true; + else + result = false; + return (bool) result; + } + + + /* * Frees the malloced content of a struct lconv. (But not the struct * itself.) */ Index: src/include/utils/pg_locale.h =================================================================== RCS file: /cvsroot/pgsql/src/include/utils/pg_locale.h,v retrieving revision 1.19 diff -c -c -r1.19 pg_locale.h *** src/include/utils/pg_locale.h 1 Jan 2005 05:43:09 -0000 1.19 --- src/include/utils/pg_locale.h 15 Mar 2005 13:45:23 -0000 *************** *** 32,37 **** --- 32,38 ---- bool doit, GucSource source); extern bool lc_collate_is_c(void); + extern bool lc_ctype_is_c(void); /* * Return the POSIX lconv struct (contains number/money formatting
pgsql-patches by date: