Re: [HACKERS] [ADMIN] invalid multibyte character for locale - Mailing list pgsql-patches

From Bruce Momjian
Subject Re: [HACKERS] [ADMIN] invalid multibyte character for locale
Date
Msg-id 200503150526.j2F5QGw15413@candle.pha.pa.us
Whole thread Raw
Responses Re: [HACKERS] [ADMIN] invalid multibyte character for locale  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-patches
Tom Lane wrote:
> Bjoern Metzdorf <bm@turtle-entertainment.de> writes:
> > I assume I could just remove
> > #define USE_WIDE_UPPER_LOWER
> > from oracle_compat.c to emulate the old behaviour. But a cleaner fix
> > would be to check if we are using UNICODE and locale is C or POSIX and
> > only then skip USE_WIDE_UPPER_LOWER.
>
> Perhaps it would be reasonable to do something like this:
>
> #ifdef USE_WIDE_UPPER_LOWER
>     /*
>      * use wide char code only when max encoding length > one
>      * and we aren't in C locale
>      */
>     if (pg_database_encoding_max_length() > 1 &&
>         !lc_ctype_is_c())
>     {
>
> where lc_ctype_is_c() is the obvious clone of the existing
> lc_collate_is_c() routine.  We can reasonably assume that mbstowcs
> is going to be unable to offer any useful behavior in C locale.

Tom, is this the fix you were thinking of?  Seems like it would be a
good improvement.

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073
Index: src/backend/utils/adt/oracle_compat.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v
retrieving revision 1.57
diff -c -c -r1.57 oracle_compat.c
*** src/backend/utils/adt/oracle_compat.c    31 Dec 2004 22:01:22 -0000    1.57
--- src/backend/utils/adt/oracle_compat.c    15 Mar 2005 05:24:11 -0000
***************
*** 166,173 ****
  lower(PG_FUNCTION_ARGS)
  {
  #ifdef USE_WIDE_UPPER_LOWER
!     /* use wide char code only when max encoding length > one */
!     if (pg_database_encoding_max_length() > 1)
      {
          text       *string = PG_GETARG_TEXT_P(0);
          text       *result;
--- 166,173 ----
  lower(PG_FUNCTION_ARGS)
  {
  #ifdef USE_WIDE_UPPER_LOWER
!     /* use wide char code only when max encoding length > 1 */
!     if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
      {
          text       *string = PG_GETARG_TEXT_P(0);
          text       *result;
***************
*** 229,235 ****
  {
  #ifdef USE_WIDE_UPPER_LOWER
      /* use wide char code only when max encoding length > one */
!     if (pg_database_encoding_max_length() > 1)
      {
          text       *string = PG_GETARG_TEXT_P(0);
          text       *result;
--- 229,235 ----
  {
  #ifdef USE_WIDE_UPPER_LOWER
      /* use wide char code only when max encoding length > one */
!     if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
      {
          text       *string = PG_GETARG_TEXT_P(0);
          text       *result;
***************
*** 294,300 ****
  {
  #ifdef USE_WIDE_UPPER_LOWER
      /* use wide char code only when max encoding length > one */
!     if (pg_database_encoding_max_length() > 1)
      {
          text       *string = PG_GETARG_TEXT_P(0);
          text       *result;
--- 294,300 ----
  {
  #ifdef USE_WIDE_UPPER_LOWER
      /* use wide char code only when max encoding length > one */
!     if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
      {
          text       *string = PG_GETARG_TEXT_P(0);
          text       *result;
Index: src/backend/utils/adt/pg_locale.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/pg_locale.c,v
retrieving revision 1.30
diff -c -c -r1.30 pg_locale.c
*** src/backend/utils/adt/pg_locale.c    1 Jan 2005 05:43:07 -0000    1.30
--- src/backend/utils/adt/pg_locale.c    15 Mar 2005 05:24:11 -0000
***************
*** 197,202 ****
--- 197,229 ----


  /*
+  * We'd like to cache whether LC_CTYPE is C (or POSIX), so we can
+  * optimize a few code paths in various places.
+  */
+ bool
+ lc_ctype_is_c(void)
+ {
+     /* Cache result so we only have to compute it once */
+     static int    result = -1;
+     char       *localeptr;
+
+     if (result >= 0)
+         return (bool) result;
+     localeptr = setlocale(LC_CTYPE, NULL);
+     if (!localeptr)
+         elog(ERROR, "invalid LC_CTYPE setting");
+
+     if (strcmp(localeptr, "C") == 0)
+         result = true;
+     else if (strcmp(localeptr, "POSIX") == 0)
+         result = true;
+     else
+         result = false;
+     return (bool) result;
+ }
+
+
+ /*
   * Frees the malloced content of a struct lconv.  (But not the struct
   * itself.)
   */
Index: src/include/utils/pg_locale.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/pg_locale.h,v
retrieving revision 1.19
diff -c -c -r1.19 pg_locale.h
*** src/include/utils/pg_locale.h    1 Jan 2005 05:43:09 -0000    1.19
--- src/include/utils/pg_locale.h    15 Mar 2005 05:24:16 -0000
***************
*** 32,37 ****
--- 32,38 ----
                     bool doit, GucSource source);

  extern bool lc_collate_is_c(void);
+ extern bool lc_ctype_is_c(void);

  /*
   * Return the POSIX lconv struct (contains number/money formatting

pgsql-patches by date:

Previous
From: Christopher Kings-Lynne
Date:
Subject: Improvement to charset docs
Next
From: Tom Lane
Date:
Subject: Re: [HACKERS] [ADMIN] invalid multibyte character for locale