Re: Simplify formatting.c - Mailing list pgsql-patches

From Bruce Momjian
Subject Re: Simplify formatting.c
Date
Msg-id 200806231928.m5NJSTE01812@momjian.us
Whole thread Raw
In response to Re: Simplify formatting.c  (Bruce Momjian <bruce@momjian.us>)
List pgsql-patches
Bruce Momjian wrote:
> > I am starting to think that the simplest case is to keep the single-copy
> > version in there for single-byte encodings and not worry about the
> > overhead of the multi-byte case.
>
> My new idea is if we pass the length to str_initcap, we can eliminate
> the string copy from text to char *.  That leaves us with just one extra
> string copy from char * to text, which seems acceptable.  We still have
> the wide char copy but I don't see any easy way to eliminate that
> because the multi-byte code is complex and not something we want to
> duplicate.

I ended up going in this direction, and did the same for upper and
lower.  Patch attached and applied.   I don't see any other cleanups in
this area.

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +
Index: src/backend/utils/adt/formatting.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v
retrieving revision 1.142
diff -c -c -r1.142 formatting.c
*** src/backend/utils/adt/formatting.c    17 Jun 2008 16:09:06 -0000    1.142
--- src/backend/utils/adt/formatting.c    23 Jun 2008 19:24:35 -0000
***************
*** 925,933 ****
  static char *str_numth(char *dest, char *num, int type);
  static int    strspace_len(char *str);
  static int    strdigits_len(char *str);
- static char *str_toupper(char *buff);
- static char *str_tolower(char *buff);
- static char *str_initcap(char *buff);

  static int    seq_search(char *name, char **array, int type, int max, int *len);
  static void do_to_timestamp(text *date_txt, text *fmt,
--- 925,930 ----
***************
*** 1424,1435 ****
      return dest;
  }

  /* ----------
!  * Convert string to upper case. It is designed to be multibyte-aware.
   * ----------
   */
! static char *
! str_toupper(char *buff)
  {
      char        *result;

--- 1421,1444 ----
      return dest;
  }

+ /*
+  * If the system provides the needed functions for wide-character manipulation
+  * (which are all standardized by C99), then we implement upper/lower/initcap
+  * using wide-character functions, if necessary.  Otherwise we use the
+  * traditional <ctype.h> functions, which of course will not work as desired
+  * in multibyte character sets.  Note that in either case we are effectively
+  * assuming that the database character encoding matches the encoding implied
+  * by LC_CTYPE.
+  */
+
  /* ----------
!  * wide-character-aware lower function
!  * We pass the number of bytes so we can pass varlena and char*
!  * to this function.
   * ----------
   */
! char *
! str_tolower(char *buff, size_t nbytes)
  {
      char        *result;

***************
*** 1438,1464 ****

  #ifdef USE_WIDE_UPPER_LOWER
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
!         result = wstring_upper(buff);
      else
  #endif        /* USE_WIDE_UPPER_LOWER */
      {
          char *p;

!         result = pstrdup(buff);

          for (p = result; *p; p++)
!             *p = pg_toupper((unsigned char) *p);
      }

      return result;
  }

  /* ----------
!  * Convert string to lower case. It is designed to be multibyte-aware.
   * ----------
   */
! static char *
! str_tolower(char *buff)
  {
      char        *result;

--- 1447,1492 ----

  #ifdef USE_WIDE_UPPER_LOWER
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
!     {
!         wchar_t        *workspace;
!         int            curr_char = 0;
!
!         /* Output workspace cannot have more codes than input bytes */
!         workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
!
!         char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
!
!         for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
!             workspace[curr_char] = towlower(workspace[curr_char]);
!
!         /* Make result large enough; case change might change number of bytes */
!         result = palloc(curr_char * MB_CUR_MAX + 1);
!
!         wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
!         pfree(workspace);
!     }
      else
  #endif        /* USE_WIDE_UPPER_LOWER */
      {
          char *p;

!         result = pnstrdup(buff, nbytes);

          for (p = result; *p; p++)
!             *p = pg_tolower((unsigned char) *p);
      }

      return result;
  }

  /* ----------
!  * wide-character-aware upper function
!  * We pass the number of bytes so we can pass varlena and char*
!  * to this function.
   * ----------
   */
! char *
! str_toupper(char *buff, size_t nbytes)
  {
      char        *result;

***************
*** 1467,1493 ****

  #ifdef USE_WIDE_UPPER_LOWER
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
!         result = wstring_lower(buff);
      else
  #endif        /* USE_WIDE_UPPER_LOWER */
      {
          char *p;

!         result = pstrdup(buff);

          for (p = result; *p; p++)
!             *p = pg_tolower((unsigned char) *p);
      }

      return result;
  }
!
  /* ----------
   * wide-character-aware initcap function
   * ----------
   */
! static char *
! str_initcap(char *buff)
  {
      char        *result;
      bool        wasalnum = false;
--- 1495,1540 ----

  #ifdef USE_WIDE_UPPER_LOWER
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
!     {
!         wchar_t        *workspace;
!         int            curr_char = 0;
!
!         /* Output workspace cannot have more codes than input bytes */
!         workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
!
!         char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
!
!         for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
!             workspace[curr_char] = towupper(workspace[curr_char]);
!
!         /* Make result large enough; case change might change number of bytes */
!         result = palloc(curr_char * MB_CUR_MAX + 1);
!
!         wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
!         pfree(workspace);
!     }
      else
  #endif        /* USE_WIDE_UPPER_LOWER */
      {
          char *p;

!         result = pnstrdup(buff, nbytes);

          for (p = result; *p; p++)
!             *p = pg_toupper((unsigned char) *p);
      }

      return result;
  }
!
  /* ----------
   * wide-character-aware initcap function
+  * We pass the number of bytes so we can pass varlena and char*
+  * to this function.
   * ----------
   */
! char *
! str_initcap(char *buff, size_t nbytes)
  {
      char        *result;
      bool        wasalnum = false;
***************
*** 1499,1533 ****
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
      {
          wchar_t        *workspace;
!         text        *in_text;
!         text        *out_text;
!         int            i;

!         in_text = cstring_to_text(buff);
!         workspace = texttowcs(in_text);

!         for (i = 0; workspace[i] != 0; i++)
          {
              if (wasalnum)
!                 workspace[i] = towlower(workspace[i]);
              else
!                 workspace[i] = towupper(workspace[i]);
!             wasalnum = iswalnum(workspace[i]);
          }

!         out_text = wcstotext(workspace, i);
!         result = text_to_cstring(out_text);

          pfree(workspace);
-         pfree(in_text);
-         pfree(out_text);
      }
      else
  #endif        /* USE_WIDE_UPPER_LOWER */
      {
          char *p;

!         result = pstrdup(buff);

          for (p = result; *p; p++)
          {
--- 1546,1579 ----
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
      {
          wchar_t        *workspace;
!         int            curr_char = 0;
!
!         /* Output workspace cannot have more codes than input bytes */
!         workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));

!         char2wchar(workspace, nbytes + 1, buff, nbytes + 1);

!         for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
          {
              if (wasalnum)
!                 workspace[curr_char] = towlower(workspace[curr_char]);
              else
!                 workspace[curr_char] = towupper(workspace[curr_char]);
!             wasalnum = iswalnum(workspace[curr_char]);
          }

!         /* Make result large enough; case change might change number of bytes */
!         result = palloc(curr_char * MB_CUR_MAX + 1);

+         wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
          pfree(workspace);
      }
      else
  #endif        /* USE_WIDE_UPPER_LOWER */
      {
          char *p;

!         result = pnstrdup(buff, nbytes);

          for (p = result; *p; p++)
          {
***************
*** 1851,1857 ****
                  {
                      char       *p = pstrdup(tmtcTzn(in));

!                     strcpy(s, str_tolower(p));
                      pfree(p);
                      s += strlen(s);
                  }
--- 1897,1903 ----
                  {
                      char       *p = pstrdup(tmtcTzn(in));

!                     strcpy(s, str_tolower(p, strlen(p)));
                      pfree(p);
                      s += strlen(s);
                  }
***************
*** 1893,1903 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1]));
                  else
                  {
                      strcpy(workbuff, months_full[tm->tm_mon - 1]);
!                     sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff));
                  }
                  s += strlen(s);
                  break;
--- 1939,1951 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1],
!                                 strlen(localized_full_months[tm->tm_mon - 1])));
                  else
                  {
                      strcpy(workbuff, months_full[tm->tm_mon - 1]);
!                     sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
!                                 str_toupper(workbuff, strlen(workbuff)));
                  }
                  s += strlen(s);
                  break;
***************
*** 1906,1912 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1]));
                  else
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
                  s += strlen(s);
--- 1954,1961 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1],
!                                 strlen(localized_full_months[tm->tm_mon - 1])));
                  else
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
                  s += strlen(s);
***************
*** 1916,1922 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1]));
                  else
                  {
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
--- 1965,1972 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1],
!                                 strlen(localized_full_months[tm->tm_mon - 1])));
                  else
                  {
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
***************
*** 1929,1937 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1]));
                  else
!                     strcpy(s, str_toupper(months[tm->tm_mon - 1]));
                  s += strlen(s);
                  break;
              case DCH_Mon:
--- 1979,1989 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1],
!                                 strlen(localized_abbrev_months[tm->tm_mon - 1])));
                  else
!                     strcpy(s, str_toupper(months[tm->tm_mon - 1],
!                                 strlen(months[tm->tm_mon - 1])));
                  s += strlen(s);
                  break;
              case DCH_Mon:
***************
*** 1939,1945 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1]));
                  else
                      strcpy(s, months[tm->tm_mon - 1]);
                  s += strlen(s);
--- 1991,1998 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1],
!                                 strlen(localized_abbrev_months[tm->tm_mon - 1])));
                  else
                      strcpy(s, months[tm->tm_mon - 1]);
                  s += strlen(s);
***************
*** 1949,1955 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1]));
                  else
                  {
                      strcpy(s, months[tm->tm_mon - 1]);
--- 2002,2009 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1],
!                                 strlen(localized_abbrev_months[tm->tm_mon - 1])));
                  else
                  {
                      strcpy(s, months[tm->tm_mon - 1]);
***************
*** 1966,1983 ****
              case DCH_DAY:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_full_days[tm->tm_wday]));
                  else
                  {
                      strcpy(workbuff, days[tm->tm_wday]);
!                     sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff));
                  }
                  s += strlen(s);
                  break;
              case DCH_Day:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_full_days[tm->tm_wday]));
                  else
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
                  s += strlen(s);
--- 2020,2040 ----
              case DCH_DAY:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_full_days[tm->tm_wday],
!                                 strlen(localized_full_days[tm->tm_wday])));
                  else
                  {
                      strcpy(workbuff, days[tm->tm_wday]);
!                     sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
!                                 str_toupper(workbuff, strlen(workbuff)));
                  }
                  s += strlen(s);
                  break;
              case DCH_Day:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_full_days[tm->tm_wday],
!                                 strlen(localized_full_days[tm->tm_wday])));
                  else
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
                  s += strlen(s);
***************
*** 1985,1991 ****
              case DCH_day:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_full_days[tm->tm_wday]));
                  else
                  {
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
--- 2042,2049 ----
              case DCH_day:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_full_days[tm->tm_wday],
!                                 strlen(localized_full_days[tm->tm_wday])));
                  else
                  {
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
***************
*** 1996,2010 ****
              case DCH_DY:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday]));
                  else
!                     strcpy(s, str_toupper(days_short[tm->tm_wday]));
                  s += strlen(s);
                  break;
              case DCH_Dy:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday]));
                  else
                      strcpy(s, days_short[tm->tm_wday]);
                  s += strlen(s);
--- 2054,2071 ----
              case DCH_DY:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday],
!                                 strlen(localized_abbrev_days[tm->tm_wday])));
                  else
!                     strcpy(s, str_toupper(days_short[tm->tm_wday],
!                                 strlen(days_short[tm->tm_wday])));
                  s += strlen(s);
                  break;
              case DCH_Dy:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday],
!                                 strlen(localized_abbrev_days[tm->tm_wday])));
                  else
                      strcpy(s, days_short[tm->tm_wday]);
                  s += strlen(s);
***************
*** 2012,2018 ****
              case DCH_dy:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday]));
                  else
                  {
                      strcpy(s, days_short[tm->tm_wday]);
--- 2073,2080 ----
              case DCH_dy:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday],
!                                 strlen(localized_abbrev_days[tm->tm_wday])));
                  else
                  {
                      strcpy(s, days_short[tm->tm_wday]);
***************
*** 4277,4288 ****
                  case NUM_rn:
                      if (IS_FILLMODE(Np->Num))
                      {
!                         strcpy(Np->inout_p, str_tolower(Np->number_p));
                          Np->inout_p += strlen(Np->inout_p) - 1;
                      }
                      else
                      {
!                         sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p));
                          Np->inout_p += strlen(Np->inout_p) - 1;
                      }
                      break;
--- 4339,4352 ----
                  case NUM_rn:
                      if (IS_FILLMODE(Np->Num))
                      {
!                         strcpy(Np->inout_p, str_tolower(Np->number_p,
!                                 strlen(Np->number_p)));
                          Np->inout_p += strlen(Np->inout_p) - 1;
                      }
                      else
                      {
!                         sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p,
!                                 strlen(Np->number_p)));
                          Np->inout_p += strlen(Np->inout_p) - 1;
                      }
                      break;
Index: src/backend/utils/adt/oracle_compat.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v
retrieving revision 1.80
diff -c -c -r1.80 oracle_compat.c
*** src/backend/utils/adt/oracle_compat.c    17 Jun 2008 16:09:06 -0000    1.80
--- src/backend/utils/adt/oracle_compat.c    23 Jun 2008 19:24:35 -0000
***************
*** 29,320 ****
  #endif

  #include "utils/builtins.h"
  #include "utils/pg_locale.h"
  #include "mb/pg_wchar.h"


- /*
-  * If the system provides the needed functions for wide-character manipulation
-  * (which are all standardized by C99), then we implement upper/lower/initcap
-  * using wide-character functions.    Otherwise we use the traditional <ctype.h>
-  * functions, which of course will not work as desired in multibyte character
-  * sets.  Note that in either case we are effectively assuming that the
-  * database character encoding matches the encoding implied by LC_CTYPE.
-  */
- #ifdef USE_WIDE_UPPER_LOWER
- char       *wstring_lower(char *str);
- char       *wstring_upper(char *str);
- wchar_t       *texttowcs(const text *txt);
- text       *wcstotext(const wchar_t *str, int ncodes);
- #endif
-
  static text *dotrim(const char *string, int stringlen,
         const char *set, int setlen,
         bool doltrim, bool dortrim);


- #ifdef USE_WIDE_UPPER_LOWER
-
- /*
-  * Convert a TEXT value into a palloc'd wchar string.
-  */
- wchar_t *
- texttowcs(const text *txt)
- {
-     int            nbytes = VARSIZE_ANY_EXHDR(txt);
-     char       *workstr;
-     wchar_t    *result;
-     size_t        ncodes;
-
-     /* Overflow paranoia */
-     if (nbytes < 0 ||
-         nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
-         ereport(ERROR,
-                 (errcode(ERRCODE_OUT_OF_MEMORY),
-                  errmsg("out of memory")));
-
-     /* Need a null-terminated version of the input */
-     workstr = text_to_cstring(txt);
-
-     /* Output workspace cannot have more codes than input bytes */
-     result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
-     /* Do the conversion */
-     ncodes = mbstowcs(result, workstr, nbytes + 1);
-
-     if (ncodes == (size_t) -1)
-     {
-         /*
-          * Invalid multibyte character encountered.  We try to give a useful
-          * error message by letting pg_verifymbstr check the string.  But it's
-          * possible that the string is OK to us, and not OK to mbstowcs ---
-          * this suggests that the LC_CTYPE locale is different from the
-          * database encoding.  Give a generic error message if verifymbstr
-          * can't find anything wrong.
-          */
-         pg_verifymbstr(workstr, nbytes, false);
-         ereport(ERROR,
-                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                  errmsg("invalid multibyte character for locale"),
-                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
-     }
-
-     Assert(ncodes <= (size_t) nbytes);
-
-     return result;
- }
-
-
- /*
-  * Convert a wchar string into a palloc'd TEXT value.  The wchar string
-  * must be zero-terminated, but we also require the caller to pass the string
-  * length, since it will know it anyway in current uses.
-  */
- text *
- wcstotext(const wchar_t *str, int ncodes)
- {
-     text       *result;
-     size_t        nbytes;
-
-     /* Overflow paranoia */
-     if (ncodes < 0 ||
-         ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)
-         ereport(ERROR,
-                 (errcode(ERRCODE_OUT_OF_MEMORY),
-                  errmsg("out of memory")));
-
-     /* Make workspace certainly large enough for result */
-     result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);
-
-     /* Do the conversion */
-     nbytes = wcstombs((char *) VARDATA(result), str,
-                       (ncodes + 1) * MB_CUR_MAX);
-
-     if (nbytes == (size_t) -1)
-     {
-         /* Invalid multibyte character encountered ... shouldn't happen */
-         ereport(ERROR,
-                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                  errmsg("invalid multibyte character for locale")));
-     }
-
-     Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));
-
-     SET_VARSIZE(result, nbytes + VARHDRSZ);
-
-     return result;
- }
- #endif   /* USE_WIDE_UPPER_LOWER */
-
-
- /*
-  * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding.
-  * To make use of the upper/lower functionality, we need to map UTF8 to
-  * UTF16, which for some reason mbstowcs and wcstombs won't do for us.
-  * This conversion layer takes care of it.
-  */
-
- #ifdef WIN32
-
- /* texttowcs for the case of UTF8 to UTF16 */
- static wchar_t *
- win32_utf8_texttowcs(const text *txt)
- {
-     int            nbytes = VARSIZE_ANY_EXHDR(txt);
-     wchar_t    *result;
-     int            r;
-
-     /* Overflow paranoia */
-     if (nbytes < 0 ||
-         nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
-         ereport(ERROR,
-                 (errcode(ERRCODE_OUT_OF_MEMORY),
-                  errmsg("out of memory")));
-
-     /* Output workspace cannot have more codes than input bytes */
-     result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
-     /* stupid Microsloth API does not work for zero-length input */
-     if (nbytes == 0)
-         r = 0;
-     else
-     {
-         /* Do the conversion */
-         r = MultiByteToWideChar(CP_UTF8, 0, VARDATA_ANY(txt), nbytes,
-                                 result, nbytes);
-
-         if (r <= 0)                /* assume it's NO_UNICODE_TRANSLATION */
-         {
-             /* see notes above about error reporting */
-             pg_verifymbstr(VARDATA_ANY(txt), nbytes, false);
-             ereport(ERROR,
-                     (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                      errmsg("invalid multibyte character for locale"),
-                      errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
-         }
-     }
-
-     /* Append trailing null wchar (MultiByteToWideChar won't have) */
-     Assert(r <= nbytes);
-     result[r] = 0;
-
-     return result;
- }
-
- /* wcstotext for the case of UTF16 to UTF8 */
- static text *
- win32_utf8_wcstotext(const wchar_t *str)
- {
-     text       *result;
-     int            nbytes;
-     int            r;
-
-     /* Compute size of output string (this *will* include trailing null) */
-     nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
-     if (nbytes <= 0)            /* shouldn't happen */
-         ereport(ERROR,
-                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                  errmsg("UTF-16 to UTF-8 translation failed: %lu",
-                         GetLastError())));
-
-     result = palloc(nbytes + VARHDRSZ);
-
-     r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes,
-                             NULL, NULL);
-     if (r != nbytes)            /* shouldn't happen */
-         ereport(ERROR,
-                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                  errmsg("UTF-16 to UTF-8 translation failed: %lu",
-                         GetLastError())));
-
-     SET_VARSIZE(result, nbytes + VARHDRSZ - 1); /* -1 to ignore null */
-
-     return result;
- }
-
- /* interface layer to check which encoding is in use */
-
- static wchar_t *
- win32_texttowcs(const text *txt)
- {
-     if (GetDatabaseEncoding() == PG_UTF8)
-         return win32_utf8_texttowcs(txt);
-     else
-         return texttowcs(txt);
- }
-
- static text *
- win32_wcstotext(const wchar_t *str, int ncodes)
- {
-     if (GetDatabaseEncoding() == PG_UTF8)
-         return win32_utf8_wcstotext(str);
-     else
-         return wcstotext(str, ncodes);
- }
-
- /* use macros to cause routines below to call interface layer */
-
- #define texttowcs    win32_texttowcs
- #define wcstotext    win32_wcstotext
- #endif   /* WIN32 */
-
- #ifdef USE_WIDE_UPPER_LOWER
- /*
-  * string_upper and string_lower are used for correct multibyte upper/lower
-  * transformations localized strings. Returns pointers to transformated
-  * string.
-  */
- char *
- wstring_upper(char *str)
- {
-     wchar_t    *workspace;
-     text       *in_text;
-     text       *out_text;
-     char       *result;
-     int            i;
-
-     in_text = cstring_to_text(str);
-     workspace = texttowcs(in_text);
-
-     for (i = 0; workspace[i] != 0; i++)
-         workspace[i] = towupper(workspace[i]);
-
-     out_text = wcstotext(workspace, i);
-     result = text_to_cstring(out_text);
-
-     pfree(workspace);
-     pfree(in_text);
-     pfree(out_text);
-
-     return result;
- }
-
- char *
- wstring_lower(char *str)
- {
-     wchar_t    *workspace;
-     text       *in_text;
-     text       *out_text;
-     char       *result;
-     int            i;
-
-     in_text = cstring_to_text(str);
-     workspace = texttowcs(in_text);
-
-     for (i = 0; workspace[i] != 0; i++)
-         workspace[i] = towlower(workspace[i]);
-
-     out_text = wcstotext(workspace, i);
-     result = text_to_cstring(out_text);
-
-     pfree(workspace);
-     pfree(in_text);
-     pfree(out_text);
-
-     return result;
- }
- #endif   /* USE_WIDE_UPPER_LOWER */
-
  /********************************************************************
   *
   * lower
--- 29,44 ----
  #endif

  #include "utils/builtins.h"
+ #include "utils/formatting.h"
  #include "utils/pg_locale.h"
  #include "mb/pg_wchar.h"


  static text *dotrim(const char *string, int stringlen,
         const char *set, int setlen,
         bool doltrim, bool dortrim);


  /********************************************************************
   *
   * lower
***************
*** 332,383 ****
  Datum
  lower(PG_FUNCTION_ARGS)
  {
! #ifdef USE_WIDE_UPPER_LOWER
!
!     /*
!      * Use wide char code only when max encoding length > 1 and ctype != C.
!      * Some operating systems fail with multi-byte encodings and a C locale.
!      * Also, for a C locale there is no need to process as multibyte.
!      */
!     if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
!     {
!         text       *string = PG_GETARG_TEXT_PP(0);
!         text       *result;
!         wchar_t    *workspace;
!         int            i;
!
!         workspace = texttowcs(string);
!
!         for (i = 0; workspace[i] != 0; i++)
!             workspace[i] = towlower(workspace[i]);

!         result = wcstotext(workspace, i);
!
!         pfree(workspace);
!
!         PG_RETURN_TEXT_P(result);
!     }
!     else
! #endif   /* USE_WIDE_UPPER_LOWER */
!     {
!         text       *string = PG_GETARG_TEXT_P_COPY(0);
!         char       *ptr;
!         int            m;
!
!         /*
!          * Since we copied the string, we can scribble directly on the value
!          */
!         ptr = VARDATA(string);
!         m = VARSIZE(string) - VARHDRSZ;
!
!         while (m-- > 0)
!         {
!             *ptr = tolower((unsigned char) *ptr);
!             ptr++;
!         }
!
!         PG_RETURN_TEXT_P(string);
!     }
  }


--- 56,70 ----
  Datum
  lower(PG_FUNCTION_ARGS)
  {
!     text    *in_string = PG_GETARG_TEXT_PP(0);
!     char    *out_string;
!     text    *result;
!
!     out_string = str_tolower(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
!     result = cstring_to_text(out_string);
!     pfree(out_string);

!     PG_RETURN_TEXT_P(result);
  }


***************
*** 398,449 ****
  Datum
  upper(PG_FUNCTION_ARGS)
  {
! #ifdef USE_WIDE_UPPER_LOWER

!     /*
!      * Use wide char code only when max encoding length > 1 and ctype != C.
!      * Some operating systems fail with multi-byte encodings and a C locale.
!      * Also, for a C locale there is no need to process as multibyte.
!      */
!     if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
!     {
!         text       *string = PG_GETARG_TEXT_PP(0);
!         text       *result;
!         wchar_t    *workspace;
!         int            i;
!
!         workspace = texttowcs(string);
!
!         for (i = 0; workspace[i] != 0; i++)
!             workspace[i] = towupper(workspace[i]);
!
!         result = wcstotext(workspace, i);
!
!         pfree(workspace);
!
!         PG_RETURN_TEXT_P(result);
!     }
!     else
! #endif   /* USE_WIDE_UPPER_LOWER */
!     {
!         text       *string = PG_GETARG_TEXT_P_COPY(0);
!         char       *ptr;
!         int            m;
!
!         /*
!          * Since we copied the string, we can scribble directly on the value
!          */
!         ptr = VARDATA(string);
!         m = VARSIZE(string) - VARHDRSZ;
!
!         while (m-- > 0)
!         {
!             *ptr = toupper((unsigned char) *ptr);
!             ptr++;
!         }
!
!         PG_RETURN_TEXT_P(string);
!     }
  }


--- 85,99 ----
  Datum
  upper(PG_FUNCTION_ARGS)
  {
!     text    *in_string = PG_GETARG_TEXT_PP(0);
!     char    *out_string;
!     text    *result;
!
!     out_string = str_toupper(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
!     result = cstring_to_text(out_string);
!     pfree(out_string);

!     PG_RETURN_TEXT_P(result);
  }


***************
*** 467,530 ****
  Datum
  initcap(PG_FUNCTION_ARGS)
  {
! #ifdef USE_WIDE_UPPER_LOWER

!     /*
!      * Use wide char code only when max encoding length > 1 and ctype != C.
!      * Some operating systems fail with multi-byte encodings and a C locale.
!      * Also, for a C locale there is no need to process as multibyte.
!      */
!     if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
!     {
!         text       *string = PG_GETARG_TEXT_PP(0);
!         text       *result;
!         wchar_t    *workspace;
!         int            wasalnum = 0;
!         int            i;
!
!         workspace = texttowcs(string);
!
!         for (i = 0; workspace[i] != 0; i++)
!         {
!             if (wasalnum)
!                 workspace[i] = towlower(workspace[i]);
!             else
!                 workspace[i] = towupper(workspace[i]);
!             wasalnum = iswalnum(workspace[i]);
!         }
!
!         result = wcstotext(workspace, i);
!
!         pfree(workspace);
!
!         PG_RETURN_TEXT_P(result);
!     }
!     else
! #endif   /* USE_WIDE_UPPER_LOWER */
!     {
!         text       *string = PG_GETARG_TEXT_P_COPY(0);
!         int            wasalnum = 0;
!         char       *ptr;
!         int            m;
!
!         /*
!          * Since we copied the string, we can scribble directly on the value
!          */
!         ptr = VARDATA(string);
!         m = VARSIZE(string) - VARHDRSZ;
!
!         while (m-- > 0)
!         {
!             if (wasalnum)
!                 *ptr = tolower((unsigned char) *ptr);
!             else
!                 *ptr = toupper((unsigned char) *ptr);
!             wasalnum = isalnum((unsigned char) *ptr);
!             ptr++;
!         }
!
!         PG_RETURN_TEXT_P(string);
!     }
  }


--- 117,131 ----
  Datum
  initcap(PG_FUNCTION_ARGS)
  {
!     text    *in_string = PG_GETARG_TEXT_PP(0);
!     char    *out_string;
!     text    *result;
!
!     out_string = str_initcap(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
!     result = cstring_to_text(out_string);
!     pfree(out_string);

!     PG_RETURN_TEXT_P(result);
  }


Index: src/include/utils/formatting.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/formatting.h,v
retrieving revision 1.18
diff -c -c -r1.18 formatting.h
*** src/include/utils/formatting.h    1 Jan 2008 19:45:59 -0000    1.18
--- src/include/utils/formatting.h    23 Jun 2008 19:24:36 -0000
***************
*** 21,26 ****
--- 21,30 ----
  #include "fmgr.h"


+ extern char *str_tolower(char *buff, size_t nbytes);
+ extern char *str_toupper(char *buff, size_t nbytes);
+ extern char *str_initcap(char *buff, size_t nbytes);
+
  extern Datum timestamp_to_char(PG_FUNCTION_ARGS);
  extern Datum timestamptz_to_char(PG_FUNCTION_ARGS);
  extern Datum interval_to_char(PG_FUNCTION_ARGS);

pgsql-patches by date:

Previous
From: Neil Conway
Date:
Subject: Re: A GUC variable to replace PGBE_ACTIVITY_SIZE
Next
From: Bruce Momjian
Date:
Subject: Re: Simplify formatting.c