Thread: Simplify formatting.c

Simplify formatting.c

From

Bruce Momjian

Date:

19 May 2008, 22:56:08

Now that upper/lower/initcase do not modify the passed string, I have
simplified formatting.c with the attached patch.

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +
Index: src/backend/utils/adt/formatting.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v
retrieving revision 1.140
diff -c -c -r1.140 formatting.c
*** src/backend/utils/adt/formatting.c    19 May 2008 18:08:15 -0000    1.140
--- src/backend/utils/adt/formatting.c    20 May 2008 01:37:23 -0000
***************
*** 1894,1903 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                 {
!                     strcpy(workbuff, localized_full_months[tm->tm_mon - 1]);
!                     sprintf(s, "%*s", 0, str_toupper(workbuff));
!                 }
                  else
                  {
                      strcpy(workbuff, months_full[tm->tm_mon - 1]);
--- 1894,1900 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1]));
                  else
                  {
                      strcpy(workbuff, months_full[tm->tm_mon - 1]);
***************
*** 1910,1923 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                 {
!                     strcpy(workbuff, localized_full_months[tm->tm_mon - 1]);
!                     sprintf(s, "%*s", 0, str_initcap(workbuff));
!                 }
                  else
-                 {
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
-                 }
                  s += strlen(s);
                  break;
              case DCH_month:
--- 1907,1915 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1]));
                  else
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
                  s += strlen(s);
                  break;
              case DCH_month:
***************
*** 1925,1934 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                 {
!                     strcpy(workbuff, localized_full_months[tm->tm_mon - 1]);
!                     sprintf(s, "%*s", 0, str_tolower(workbuff));
!                 }
                  else
                  {
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
--- 1917,1923 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1]));
                  else
                  {
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
***************
*** 1941,1955 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                 {
!                     strcpy(workbuff, localized_abbrev_months[tm->tm_mon - 1]);
!                     sprintf(s, "%*s", 0, str_toupper(workbuff));
!                 }
                  else
!                 {
!                     strcpy(workbuff, months[tm->tm_mon - 1]);
!                     sprintf(s, "%*s", 0, str_toupper(workbuff));
!                 }
                  s += strlen(s);
                  break;
              case DCH_Mon:
--- 1930,1938 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1]));
                  else
!                     strcpy(s, str_toupper(months[tm->tm_mon - 1]));
                  s += strlen(s);
                  break;
              case DCH_Mon:
***************
*** 1957,1970 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                 {
!                     strcpy(workbuff, localized_abbrev_months[tm->tm_mon - 1]);
!                     sprintf(s, "%*s", 0, str_initcap(workbuff));
!                 }
                  else
-                 {
                      strcpy(s, months[tm->tm_mon - 1]);
-                 }
                  s += strlen(s);
                  break;
              case DCH_mon:
--- 1940,1948 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1]));
                  else
                      strcpy(s, months[tm->tm_mon - 1]);
                  s += strlen(s);
                  break;
              case DCH_mon:
***************
*** 1972,1981 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                 {
!                     strcpy(workbuff, localized_abbrev_months[tm->tm_mon - 1]);
!                     sprintf(s, "%*s", 0, str_tolower(workbuff));
!                 }
                  else
                  {
                      strcpy(s, months[tm->tm_mon - 1]);
--- 1950,1956 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1]));
                  else
                  {
                      strcpy(s, months[tm->tm_mon - 1]);
***************
*** 1992,2001 ****
              case DCH_DAY:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                 {
!                     strcpy(workbuff, localized_full_days[tm->tm_wday]);
!                     sprintf(s, "%*s", 0, str_toupper(workbuff));
!                 }
                  else
                  {
                      strcpy(workbuff, days[tm->tm_wday]);
--- 1967,1973 ----
              case DCH_DAY:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_full_days[tm->tm_wday]));
                  else
                  {
                      strcpy(workbuff, days[tm->tm_wday]);
***************
*** 2006,2028 ****
              case DCH_Day:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                 {
!                     strcpy(workbuff, localized_full_days[tm->tm_wday]);
!                     sprintf(s, "%*s", 0, str_initcap(workbuff));
!                 }
                  else
-                 {
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
-                 }
                  s += strlen(s);
                  break;
              case DCH_day:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                 {
!                     strcpy(workbuff, localized_full_days[tm->tm_wday]);
!                     sprintf(s, "%*s", 0, str_tolower(workbuff));
!                 }
                  else
                  {
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
--- 1978,1992 ----
              case DCH_Day:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_full_days[tm->tm_wday]));
                  else
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
                  s += strlen(s);
                  break;
              case DCH_day:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_full_days[tm->tm_wday]));
                  else
                  {
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
***************
*** 2033,2069 ****
              case DCH_DY:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                 {
!                     strcpy(workbuff, localized_abbrev_days[tm->tm_wday]);
!                     sprintf(s, "%*s", 0, str_toupper(workbuff));
!                 }
                  else
!                 {
!                     strcpy(workbuff, days_short[tm->tm_wday]);
!                     sprintf(s, "%*s", 0, str_toupper(workbuff));
!                 }
                  s += strlen(s);
                  break;
              case DCH_Dy:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                 {
!                     strcpy(workbuff, localized_abbrev_days[tm->tm_wday]);
!                     sprintf(s, "%*s", 0, str_initcap(workbuff));
!                 }
                  else
-                 {
                      strcpy(s, days_short[tm->tm_wday]);
-                 }
                  s += strlen(s);
                  break;
              case DCH_dy:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                 {
!                     strcpy(workbuff, localized_abbrev_days[tm->tm_wday]);
!                     sprintf(s, "%*s", 0, str_tolower(workbuff));
!                 }
                  else
                  {
                      strcpy(s, days_short[tm->tm_wday]);
--- 1997,2019 ----
              case DCH_DY:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday]));
                  else
!                     strcpy(s, str_toupper(days_short[tm->tm_wday]));
                  s += strlen(s);
                  break;
              case DCH_Dy:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday]));
                  else
                      strcpy(s, days_short[tm->tm_wday]);
                  s += strlen(s);
                  break;
              case DCH_dy:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday]));
                  else
                  {
                      strcpy(s, days_short[tm->tm_wday]);

Re: Simplify formatting.c

From

Tom Lane

Date:

19 May 2008, 23:29:14

Bruce Momjian <bruce@momjian.us> writes:
> Now that upper/lower/initcase do not modify the passed string, I have
> simplified formatting.c with the attached patch.

I was thinking the same thing while reading the patch.  But please,
make str_toupper() and friends declare their argument "const" if you're
going to do this.

Another issue in this area is that these functions could do with some
refactoring to eliminate useless text/cstring conversions; I'm pretty
sure some code paths are doing cstring->text->cstring in direct
succession.  Also, it seems a bit inconsistent to be relying on
oracle_compat.c for upper/lower but not initcap.

            regards, tom lane

Re: Simplify formatting.c

From

Euler Taveira de Oliveira

Date:

20 May 2008, 11:57:30

Tom Lane wrote:

> Also, it seems a bit inconsistent to be relying on
> oracle_compat.c for upper/lower but not initcap.
>
I saw this inconsistence while I'm doing the patch. What about moving
that upper/lower/initcap and wcs* code to another file. pg_locale.c?
BTW, formatting.c and oracle_compat.c already include pg_locale.h.


--
   Euler Taveira de Oliveira
   http://www.timbira.com/

Re: Simplify formatting.c

From

Tom Lane

Date:

20 May 2008, 20:09:44

Euler Taveira de Oliveira <euler@timbira.com> writes:
> Tom Lane wrote:
>> Also, it seems a bit inconsistent to be relying on
>> oracle_compat.c for upper/lower but not initcap.
>>
> I saw this inconsistence while I'm doing the patch. What about moving
> that upper/lower/initcap and wcs* code to another file. pg_locale.c?

That doesn't seem a particularly appropriate place for them.  pg_locale
is about dealing with the locale state, not about doing actual
operations based on the locale data.

I was just thinking of having oracle_compat expose an initcap routine.

            regards, tom lane

Re: Simplify formatting.c

From

Bruce Momjian

Date:

13 June 2008, 21:16:52

Tom Lane wrote:
> Euler Taveira de Oliveira <euler@timbira.com> writes:
> > Tom Lane wrote:
> >> Also, it seems a bit inconsistent to be relying on
> >> oracle_compat.c for upper/lower but not initcap.
> >>
> > I saw this inconsistence while I'm doing the patch. What about moving
> > that upper/lower/initcap and wcs* code to another file. pg_locale.c?
>
> That doesn't seem a particularly appropriate place for them.  pg_locale
> is about dealing with the locale state, not about doing actual
> operations based on the locale data.
>
> I was just thinking of having oracle_compat expose an initcap routine.

You mean like the attached?

I moved str_initcap() over into oracle_compat.c and then had initcap()
convert to/from TEXT to call it.  The code is a little weird because
str_initcap() needs to convert to text to use texttowcs(), so in
multibyte encodings initcap converts the string to text, then to char,
then to text to call texttowcs().  I didn't see a cleaner way to do
this.

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +
Index: src/backend/utils/adt/formatting.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v
retrieving revision 1.141
diff -c -c -r1.141 formatting.c
*** src/backend/utils/adt/formatting.c    20 May 2008 01:41:02 -0000    1.141
--- src/backend/utils/adt/formatting.c    13 Jun 2008 22:01:18 -0000
***************
*** 927,933 ****
  static int    strdigits_len(char *str);
  static char *str_toupper(char *buff);
  static char *str_tolower(char *buff);
- static char *str_initcap(char *buff);

  static int    seq_search(char *name, char **array, int type, int max, int *len);
  static void do_to_timestamp(text *date_txt, text *fmt,
--- 927,932 ----
***************
*** 1484,1549 ****
  }

  /* ----------
-  * wide-character-aware initcap function
-  * ----------
-  */
- static char *
- str_initcap(char *buff)
- {
-     char        *result;
-     bool        wasalnum = false;
-
-     if (!buff)
-         return NULL;
-
- #ifdef USE_WIDE_UPPER_LOWER
-     if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
-     {
-         wchar_t        *workspace;
-         text        *in_text;
-         text        *out_text;
-         int            i;
-
-         in_text = cstring_to_text(buff);
-         workspace = texttowcs(in_text);
-
-         for (i = 0; workspace[i] != 0; i++)
-         {
-             if (wasalnum)
-                 workspace[i] = towlower(workspace[i]);
-             else
-                 workspace[i] = towupper(workspace[i]);
-             wasalnum = iswalnum(workspace[i]);
-         }
-
-         out_text = wcstotext(workspace, i);
-         result = text_to_cstring(out_text);
-
-         pfree(workspace);
-         pfree(in_text);
-         pfree(out_text);
-     }
-     else
- #endif        /* USE_WIDE_UPPER_LOWER */
-     {
-         char *p;
-
-         result = pstrdup(buff);
-
-         for (p = result; *p; p++)
-         {
-             if (wasalnum)
-                 *p = pg_tolower((unsigned char) *p);
-             else
-                 *p = pg_toupper((unsigned char) *p);
-             wasalnum = isalnum((unsigned char) *p);
-         }
-     }
-
-     return result;
- }
-
- /* ----------
   * Sequential search with to upper/lower conversion
   * ----------
   */
--- 1483,1488 ----
Index: src/backend/utils/adt/oracle_compat.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v
retrieving revision 1.79
diff -c -c -r1.79 oracle_compat.c
*** src/backend/utils/adt/oracle_compat.c    19 May 2008 18:08:16 -0000    1.79
--- src/backend/utils/adt/oracle_compat.c    13 Jun 2008 22:01:18 -0000
***************
*** 471,478 ****
  Datum
  initcap(PG_FUNCTION_ARGS)
  {
! #ifdef USE_WIDE_UPPER_LOWER

      /*
       * Use wide char code only when max encoding length > 1 and ctype != C.
       * Some operating systems fail with multi-byte encodings and a C locale.
--- 471,496 ----
  Datum
  initcap(PG_FUNCTION_ARGS)
  {
!     text       *string = PG_GETARG_TEXT_PP(0);
!     char       *str2;
!
!     str2 = str_initcap(DatumGetCString(string));
!     string = cstring_to_text(str2);
!     pfree(str2);
!     PG_RETURN_TEXT_P(string);
! }
!

+ char *
+ str_initcap(char *str)
+ {
+     char        *result;
+     int            wasalnum = 0;
+
+     if (!str)
+         return NULL;
+
+ #ifdef USE_WIDE_UPPER_LOWER
      /*
       * Use wide char code only when max encoding length > 1 and ctype != C.
       * Some operating systems fail with multi-byte encodings and a C locale.
***************
*** 480,492 ****
       */
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
      {
-         text       *string = PG_GETARG_TEXT_PP(0);
-         text       *result;
          wchar_t    *workspace;
!         int            wasalnum = 0;
          int            i;

!         workspace = texttowcs(string);

          for (i = 0; workspace[i] != 0; i++)
          {
--- 498,510 ----
       */
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
      {
          wchar_t    *workspace;
!         text        *in_text;
!         text        *out_text;
          int            i;

!         in_text = cstring_to_text(str);
!         workspace = texttowcs(in_text);

          for (i = 0; workspace[i] != 0; i++)
          {
***************
*** 497,533 ****
              wasalnum = iswalnum(workspace[i]);
          }

!         result = wcstotext(workspace, i);

          pfree(workspace);

!         PG_RETURN_TEXT_P(result);
      }
      else
  #endif   /* USE_WIDE_UPPER_LOWER */
      {
-         text       *string = PG_GETARG_TEXT_P_COPY(0);
-         int            wasalnum = 0;
          char       *ptr;
-         int            m;

          /*
           * Since we copied the string, we can scribble directly on the value
           */
!         ptr = VARDATA(string);
!         m = VARSIZE(string) - VARHDRSZ;
!
!         while (m-- > 0)
          {
              if (wasalnum)
                  *ptr = tolower((unsigned char) *ptr);
              else
                  *ptr = toupper((unsigned char) *ptr);
              wasalnum = isalnum((unsigned char) *ptr);
-             ptr++;
          }

!         PG_RETURN_TEXT_P(string);
      }
  }

--- 515,548 ----
              wasalnum = iswalnum(workspace[i]);
          }

!         out_text = wcstotext(workspace, i);
!         result = text_to_cstring(out_text);

          pfree(workspace);
+         pfree(in_text);
+         pfree(out_text);

!         return result;
      }
      else
  #endif   /* USE_WIDE_UPPER_LOWER */
      {
          char       *ptr;

+         result = pstrdup(str);
          /*
           * Since we copied the string, we can scribble directly on the value
           */
!         for (ptr = result; *ptr; ptr++)
          {
              if (wasalnum)
                  *ptr = tolower((unsigned char) *ptr);
              else
                  *ptr = toupper((unsigned char) *ptr);
              wasalnum = isalnum((unsigned char) *ptr);
          }

!         return result;
      }
  }

Index: src/include/utils/builtins.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.316
diff -c -c -r1.316 builtins.h
*** src/include/utils/builtins.h    27 May 2008 00:13:09 -0000    1.316
--- src/include/utils/builtins.h    13 Jun 2008 22:01:19 -0000
***************
*** 727,732 ****
--- 727,733 ----
  extern Datum lower(PG_FUNCTION_ARGS);
  extern Datum upper(PG_FUNCTION_ARGS);
  extern Datum initcap(PG_FUNCTION_ARGS);
+ extern char *str_initcap(char *str);
  extern Datum lpad(PG_FUNCTION_ARGS);
  extern Datum rpad(PG_FUNCTION_ARGS);
  extern Datum btrim(PG_FUNCTION_ARGS);

Re: Simplify formatting.c

From

Alvaro Herrera

Date:

14 June 2008, 18:30:11

Bruce Momjian wrote:

> I moved str_initcap() over into oracle_compat.c and then had initcap()
> convert to/from TEXT to call it.  The code is a little weird because
> str_initcap() needs to convert to text to use texttowcs(), so in
> multibyte encodings initcap converts the string to text, then to char,
> then to text to call texttowcs().  I didn't see a cleaner way to do
> this.

Why not use wchar2char?  It seems there's room for extra cleanup here.

Also, the prototype of str_initcap in builtins.h looks out of place.

--
Alvaro Herrera                                http://www.CommandPrompt.com/
PostgreSQL Replication, Consulting, Custom Development, 24x7 support

Re: Simplify formatting.c

From

Bruce Momjian

Date:

17 June 2008, 13:10:54

Alvaro Herrera wrote:
> Bruce Momjian wrote:
>
> > I moved str_initcap() over into oracle_compat.c and then had initcap()
> > convert to/from TEXT to call it.  The code is a little weird because
> > str_initcap() needs to convert to text to use texttowcs(), so in
> > multibyte encodings initcap converts the string to text, then to char,
> > then to text to call texttowcs().  I didn't see a cleaner way to do
> > this.
>
> Why not use wchar2char?  It seems there's room for extra cleanup here.
>
> Also, the prototype of str_initcap in builtins.h looks out of place.

I talked to Alvaro on IM, and there is certainly much more cleanup to do
in this area. I will work from the bottom up.  First, is moving the
USE_WIDE_UPPER_LOWER define to c.h, and removing TS_USE_WIDE and using
USE_WIDE_UPPER_LOWER instead.  Patch attached and applied.

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +
Index: src/backend/tsearch/regis.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/tsearch/regis.c,v
retrieving revision 1.4
diff -c -c -r1.4 regis.c
*** src/backend/tsearch/regis.c    21 Jan 2008 02:46:10 -0000    1.4
--- src/backend/tsearch/regis.c    17 Jun 2008 16:06:54 -0000
***************
*** 178,184 ****
      r->node = NULL;
  }

! #ifdef TS_USE_WIDE
  static bool
  mb_strchr(char *str, char *c)
  {
--- 178,184 ----
      r->node = NULL;
  }

! #ifdef USE_WIDE_UPPER_LOWER
  static bool
  mb_strchr(char *str, char *c)
  {
Index: src/backend/tsearch/ts_locale.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/tsearch/ts_locale.c,v
retrieving revision 1.7
diff -c -c -r1.7 ts_locale.c
*** src/backend/tsearch/ts_locale.c    1 Jan 2008 19:45:52 -0000    1.7
--- src/backend/tsearch/ts_locale.c    17 Jun 2008 16:06:54 -0000
***************
*** 17,23 ****
  #include "tsearch/ts_public.h"


! #ifdef TS_USE_WIDE

  /*
   * wchar2char --- convert wide characters to multibyte format
--- 17,23 ----
  #include "tsearch/ts_public.h"


! #ifdef USE_WIDE_UPPER_LOWER

  /*
   * wchar2char --- convert wide characters to multibyte format
***************
*** 190,196 ****

      return iswprint((wint_t) character[0]);
  }
! #endif   /* TS_USE_WIDE */


  /*
--- 190,196 ----

      return iswprint((wint_t) character[0]);
  }
! #endif   /* USE_WIDE_UPPER_LOWER */


  /*
***************
*** 260,266 ****
      if (len == 0)
          return pstrdup("");

! #ifdef TS_USE_WIDE

      /*
       * Use wide char code only when max encoding length > 1 and ctype != C.
--- 260,266 ----
      if (len == 0)
          return pstrdup("");

! #ifdef USE_WIDE_UPPER_LOWER

      /*
       * Use wide char code only when max encoding length > 1 and ctype != C.
***************
*** 307,313 ****
          Assert(wlen < len);
      }
      else
! #endif   /* TS_USE_WIDE */
      {
          const char *ptr = str;
          char       *outptr;
--- 307,313 ----
          Assert(wlen < len);
      }
      else
! #endif   /* USE_WIDE_UPPER_LOWER */
      {
          const char *ptr = str;
          char       *outptr;
Index: src/backend/tsearch/wparser_def.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/tsearch/wparser_def.c,v
retrieving revision 1.14
diff -c -c -r1.14 wparser_def.c
*** src/backend/tsearch/wparser_def.c    1 Jan 2008 19:45:52 -0000    1.14
--- src/backend/tsearch/wparser_def.c    17 Jun 2008 16:06:54 -0000
***************
*** 238,244 ****
      /* string and position information */
      char       *str;            /* multibyte string */
      int            lenstr;            /* length of mbstring */
! #ifdef TS_USE_WIDE
      wchar_t    *wstr;            /* wide character string */
      int            lenwstr;        /* length of wsting */
  #endif
--- 238,244 ----
      /* string and position information */
      char       *str;            /* multibyte string */
      int            lenstr;            /* length of mbstring */
! #ifdef USE_WIDE_UPPER_LOWER
      wchar_t    *wstr;            /* wide character string */
      int            lenwstr;        /* length of wsting */
  #endif
***************
*** 291,297 ****
      prs->str = str;
      prs->lenstr = len;

! #ifdef TS_USE_WIDE

      /*
       * Use wide char code only when max encoding length > 1.
--- 291,297 ----
      prs->str = str;
      prs->lenstr = len;

! #ifdef USE_WIDE_UPPER_LOWER

      /*
       * Use wide char code only when max encoding length > 1.
***************
*** 328,334 ****
          prs->state = ptr;
      }

! #ifdef TS_USE_WIDE
      if (prs->wstr)
          pfree(prs->wstr);
  #endif
--- 328,334 ----
          prs->state = ptr;
      }

! #ifdef USE_WIDE_UPPER_LOWER
      if (prs->wstr)
          pfree(prs->wstr);
  #endif
***************
*** 344,350 ****
   * often are used for Asian languages
   */

! #ifdef TS_USE_WIDE

  #define p_iswhat(type)                                                        \
  static int                                                                    \
--- 344,350 ----
   * often are used for Asian languages
   */

! #ifdef USE_WIDE_UPPER_LOWER

  #define p_iswhat(type)                                                        \
  static int                                                                    \
***************
*** 439,445 ****
      Assert(prs->state);
      return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) == c)) ? 1 : 0;
  }
! #else                            /* TS_USE_WIDE */

  #define p_iswhat(type)                                                        \
  static int                                                                    \
--- 439,445 ----
      Assert(prs->state);
      return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) == c)) ? 1 : 0;
  }
! #else                            /* USE_WIDE_UPPER_LOWER */

  #define p_iswhat(type)                                                        \
  static int                                                                    \
***************
*** 463,469 ****

  p_iswhat(alnum)
  p_iswhat(alpha)
! #endif   /* TS_USE_WIDE */

  p_iswhat(digit)
  p_iswhat(lower)
--- 463,469 ----

  p_iswhat(alnum)
  p_iswhat(alpha)
! #endif   /* USE_WIDE_UPPER_LOWER */

  p_iswhat(digit)
  p_iswhat(lower)
Index: src/backend/utils/adt/formatting.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v
retrieving revision 1.141
diff -c -c -r1.141 formatting.c
*** src/backend/utils/adt/formatting.c    20 May 2008 01:41:02 -0000    1.141
--- src/backend/utils/adt/formatting.c    17 Jun 2008 16:06:54 -0000
***************
*** 948,955 ****
  static NUMCacheEntry *NUM_cache_getnew(char *str);
  static void NUM_cache_remove(NUMCacheEntry *ent);

! #if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)
! #define USE_WIDE_UPPER_LOWER
  /* externs are in oracle_compat.c */
  extern char *wstring_upper(char *str);
  extern char *wstring_lower(char *str);
--- 948,954 ----
  static NUMCacheEntry *NUM_cache_getnew(char *str);
  static void NUM_cache_remove(NUMCacheEntry *ent);

! #ifdef USE_WIDE_UPPER_LOWER
  /* externs are in oracle_compat.c */
  extern char *wstring_upper(char *str);
  extern char *wstring_lower(char *str);
Index: src/backend/utils/adt/oracle_compat.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v
retrieving revision 1.79
diff -c -c -r1.79 oracle_compat.c
*** src/backend/utils/adt/oracle_compat.c    19 May 2008 18:08:16 -0000    1.79
--- src/backend/utils/adt/oracle_compat.c    17 Jun 2008 16:06:54 -0000
***************
*** 40,51 ****
   * functions, which of course will not work as desired in multibyte character
   * sets.  Note that in either case we are effectively assuming that the
   * database character encoding matches the encoding implied by LC_CTYPE.
-  *
-  * We assume if we have these two functions, we have their friends too, and
-  * can use the wide-character method.
   */
! #if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)
! #define USE_WIDE_UPPER_LOWER
  char       *wstring_lower(char *str);
  char       *wstring_upper(char *str);
  wchar_t       *texttowcs(const text *txt);
--- 40,47 ----
   * functions, which of course will not work as desired in multibyte character
   * sets.  Note that in either case we are effectively assuming that the
   * database character encoding matches the encoding implied by LC_CTYPE.
   */
! #ifdef USE_WIDE_UPPER_LOWER
  char       *wstring_lower(char *str);
  char       *wstring_upper(char *str);
  wchar_t       *texttowcs(const text *txt);
Index: src/include/c.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/c.h,v
retrieving revision 1.226
diff -c -c -r1.226 c.h
*** src/include/c.h    21 Apr 2008 00:26:46 -0000    1.226
--- src/include/c.h    17 Jun 2008 16:06:55 -0000
***************
*** 813,818 ****
--- 813,826 ----
  #define HAVE_STRTOULL 1
  #endif

+ /*
+  * We assume if we have these two functions, we have their friends too, and
+  * can use the wide-character functions.
+  */
+ #if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)
+ #define USE_WIDE_UPPER_LOWER
+ #endif
+
  /* EXEC_BACKEND defines */
  #ifdef EXEC_BACKEND
  #define NON_EXEC_STATIC
Index: src/include/tsearch/ts_locale.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/tsearch/ts_locale.h,v
retrieving revision 1.5
diff -c -c -r1.5 ts_locale.h
*** src/include/tsearch/ts_locale.h    1 Jan 2008 19:45:59 -0000    1.5
--- src/include/tsearch/ts_locale.h    17 Jun 2008 16:06:55 -0000
***************
*** 29,41 ****
  #include <wctype.h>
  #endif

- #if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)
- #define TS_USE_WIDE
- #endif
-
  #define TOUCHAR(x)    (*((const unsigned char *) (x)))

! #ifdef TS_USE_WIDE

  extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
  extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
--- 29,37 ----
  #include <wctype.h>
  #endif

  #define TOUCHAR(x)    (*((const unsigned char *) (x)))

! #ifdef USE_WIDE_UPPER_LOWER

  extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
  extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
***************
*** 49,55 ****
  #define t_iseq(x,c)        (TOUCHAR(x) == (unsigned char) (c))

  #define COPYCHAR(d,s)    memcpy(d, s, pg_mblen(s))
! #else                            /* not TS_USE_WIDE */

  #define t_isdigit(x)    isdigit(TOUCHAR(x))
  #define t_isspace(x)    isspace(TOUCHAR(x))
--- 45,51 ----
  #define t_iseq(x,c)        (TOUCHAR(x) == (unsigned char) (c))

  #define COPYCHAR(d,s)    memcpy(d, s, pg_mblen(s))
! #else                            /* not USE_WIDE_UPPER_LOWER */

  #define t_isdigit(x)    isdigit(TOUCHAR(x))
  #define t_isspace(x)    isspace(TOUCHAR(x))
***************
*** 58,64 ****
  #define t_iseq(x,c)        (TOUCHAR(x) == (unsigned char) (c))

  #define COPYCHAR(d,s)    (*((unsigned char *) (d)) = TOUCHAR(s))
! #endif   /* TS_USE_WIDE */

  extern char *lowerstr(const char *str);
  extern char *lowerstr_with_len(const char *str, int len);
--- 54,60 ----
  #define t_iseq(x,c)        (TOUCHAR(x) == (unsigned char) (c))

  #define COPYCHAR(d,s)    (*((unsigned char *) (d)) = TOUCHAR(s))
! #endif   /* USE_WIDE_UPPER_LOWER */

  extern char *lowerstr(const char *str);
  extern char *lowerstr_with_len(const char *str, int len);

Re: Simplify formatting.c

From

Bruce Momjian

Date:

18 June 2008, 15:43:49

Bruce Momjian wrote:
> Alvaro Herrera wrote:
> > Bruce Momjian wrote:
> >
> > > I moved str_initcap() over into oracle_compat.c and then had initcap()
> > > convert to/from TEXT to call it.  The code is a little weird because
> > > str_initcap() needs to convert to text to use texttowcs(), so in
> > > multibyte encodings initcap converts the string to text, then to char,
> > > then to text to call texttowcs().  I didn't see a cleaner way to do
> > > this.
> >
> > Why not use wchar2char?  It seems there's room for extra cleanup here.
> >
> > Also, the prototype of str_initcap in builtins.h looks out of place.
>
> I talked to Alvaro on IM, and there is certainly much more cleanup to do
> in this area. I will work from the bottom up.  First, is moving the
> USE_WIDE_UPPER_LOWER define to c.h, and removing TS_USE_WIDE and using
> USE_WIDE_UPPER_LOWER instead.  Patch attached and applied.

The second step is to move wchar2char() and char2wchar() from tsearch
into /mb to be easier to use for other modules;  also move pnstrdup().

Patch attached and applied.

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +
Index: src/backend/tsearch/ts_locale.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/tsearch/ts_locale.c,v
retrieving revision 1.8
diff -c -c -r1.8 ts_locale.c
*** src/backend/tsearch/ts_locale.c    17 Jun 2008 16:09:06 -0000    1.8
--- src/backend/tsearch/ts_locale.c    18 Jun 2008 18:37:02 -0000
***************
*** 16,140 ****
  #include "tsearch/ts_locale.h"
  #include "tsearch/ts_public.h"

-
  #ifdef USE_WIDE_UPPER_LOWER

- /*
-  * wchar2char --- convert wide characters to multibyte format
-  *
-  * This has the same API as the standard wcstombs() function; in particular,
-  * tolen is the maximum number of bytes to store at *to, and *from must be
-  * zero-terminated.  The output will be zero-terminated iff there is room.
-  */
- size_t
- wchar2char(char *to, const wchar_t *from, size_t tolen)
- {
-     if (tolen == 0)
-         return 0;
-
- #ifdef WIN32
-     if (GetDatabaseEncoding() == PG_UTF8)
-     {
-         int            r;
-
-         r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
-                                 NULL, NULL);
-
-         if (r <= 0)
-             return (size_t) -1;
-
-         Assert(r <= tolen);
-
-         /* Microsoft counts the zero terminator in the result */
-         return r - 1;
-     }
- #endif   /* WIN32 */
-
-     return wcstombs(to, from, tolen);
- }
-
- /*
-  * char2wchar --- convert multibyte characters to wide characters
-  *
-  * This has almost the API of mbstowcs(), except that *from need not be
-  * null-terminated; instead, the number of input bytes is specified as
-  * fromlen.  Also, we ereport() rather than returning -1 for invalid
-  * input encoding.    tolen is the maximum number of wchar_t's to store at *to.
-  * The output will be zero-terminated iff there is room.
-  */
- size_t
- char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
- {
-     if (tolen == 0)
-         return 0;
-
- #ifdef WIN32
-     if (GetDatabaseEncoding() == PG_UTF8)
-     {
-         int            r;
-
-         /* stupid Microsloth API does not work for zero-length input */
-         if (fromlen == 0)
-             r = 0;
-         else
-         {
-             r = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
-
-             if (r <= 0)
-             {
-                 /* see notes in oracle_compat.c about error reporting */
-                 pg_verifymbstr(from, fromlen, false);
-                 ereport(ERROR,
-                         (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                          errmsg("invalid multibyte character for locale"),
-                          errhint("The server's LC_CTYPE locale is probably incompatible with the database
encoding.")));
-             }
-         }
-
-         Assert(r < tolen);
-         to[r] = 0;
-
-         return r;
-     }
- #endif   /* WIN32 */
-
-     if (lc_ctype_is_c())
-     {
-         /*
-          * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
-          * allocated with sufficient space
-          */
-         return pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
-     }
-     else
-     {
-         /*
-          * mbstowcs requires ending '\0'
-          */
-         char       *str = pnstrdup(from, fromlen);
-         size_t        result;
-
-         result = mbstowcs(to, str, tolen);
-
-         pfree(str);
-
-         if (result == (size_t) -1)
-         {
-             pg_verifymbstr(from, fromlen, false);
-             ereport(ERROR,
-                     (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                      errmsg("invalid multibyte character for locale"),
-                      errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
-         }
-
-         if (result < tolen)
-             to[result] = 0;
-
-         return result;
-     }
- }
-
-
  int
  t_isdigit(const char *ptr)
  {
--- 16,23 ----
Index: src/backend/tsearch/ts_utils.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/tsearch/ts_utils.c,v
retrieving revision 1.9
diff -c -c -r1.9 ts_utils.c
*** src/backend/tsearch/ts_utils.c    1 Jan 2008 19:45:52 -0000    1.9
--- src/backend/tsearch/ts_utils.c    18 Jun 2008 18:37:02 -0000
***************
*** 153,165 ****
              bsearch(&key, s->stop, s->len,
                      sizeof(char *), comparestr)) ? true : false;
  }
-
- char *
- pnstrdup(const char *in, int len)
- {
-     char       *out = palloc(len + 1);
-
-     memcpy(out, in, len);
-     out[len] = '\0';
-     return out;
- }
--- 153,155 ----
Index: src/backend/utils/mb/mbutils.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/mb/mbutils.c,v
retrieving revision 1.71
diff -c -c -r1.71 mbutils.c
*** src/backend/utils/mb/mbutils.c    27 May 2008 12:24:42 -0000    1.71
--- src/backend/utils/mb/mbutils.c    18 Jun 2008 18:37:02 -0000
***************
*** 555,560 ****
--- 555,688 ----
      return result;
  }

+
+
+ #ifdef USE_WIDE_UPPER_LOWER
+
+ /*
+  * wchar2char --- convert wide characters to multibyte format
+  *
+  * This has the same API as the standard wcstombs() function; in particular,
+  * tolen is the maximum number of bytes to store at *to, and *from must be
+  * zero-terminated.  The output will be zero-terminated iff there is room.
+  */
+ size_t
+ wchar2char(char *to, const wchar_t *from, size_t tolen)
+ {
+     size_t result;
+
+     if (tolen == 0)
+         return 0;
+
+ #ifdef WIN32
+     /*
+      * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding,
+      * and for some reason mbstowcs and wcstombs won't do this for us,
+      * so we use MultiByteToWideChar().
+      */
+     if (GetDatabaseEncoding() == PG_UTF8)
+     {
+         result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
+                                 NULL, NULL);
+         /* A zero return is failure */
+         if (result <= 0)
+             result = -1;
+         else
+         {
+             Assert(result <= tolen);
+             /* Microsoft counts the zero terminator in the result */
+             result--;
+         }
+     }
+     else
+ #endif   /* WIN32 */
+         result = wcstombs(to, from, tolen);
+     return result;
+ }
+
+ /*
+  * char2wchar --- convert multibyte characters to wide characters
+  *
+  * This has almost the API of mbstowcs(), except that *from need not be
+  * null-terminated; instead, the number of input bytes is specified as
+  * fromlen.  Also, we ereport() rather than returning -1 for invalid
+  * input encoding.    tolen is the maximum number of wchar_t's to store at *to.
+  * The output will be zero-terminated iff there is room.
+  */
+ size_t
+ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
+ {
+     size_t        result;
+
+     if (tolen == 0)
+         return 0;
+
+ #ifdef WIN32
+     /* See WIN32 "Unicode" comment above */
+     if (GetDatabaseEncoding() == PG_UTF8)
+     {
+         /* Win32 API does not work for zero-length input */
+         if (fromlen == 0)
+             result = 0;
+         else
+         {
+             result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
+             /* A zero return is failure */
+             if (result == 0)
+                 result = -1;
+         }
+
+         if (result != -1)
+         {
+             Assert(result < tolen);
+             /* Append trailing null wchar (MultiByteToWideChar() does not) */
+             to[result] = 0;
+         }
+     }
+     else
+ #endif   /* WIN32 */
+     {
+         if (lc_ctype_is_c())
+         {
+             /*
+              * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
+              * allocated with sufficient space
+              */
+             result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
+         }
+         else
+         {
+             /* mbstowcs requires ending '\0' */
+             char       *str = pnstrdup(from, fromlen);
+
+             result = mbstowcs(to, str, tolen);
+             pfree(str);
+         }
+     }
+
+     if (result == -1)
+     {
+         /*
+          * Invalid multibyte character encountered.  We try to give a useful
+          * error message by letting pg_verifymbstr check the string.  But it's
+          * possible that the string is OK to us, and not OK to mbstowcs ---
+          * this suggests that the LC_CTYPE locale is different from the
+          * database encoding.  Give a generic error message if verifymbstr
+          * can't find anything wrong.
+          */
+         pg_verifymbstr(from, fromlen, false);    /* might not return */
+         /* but if it does ... */
+         ereport(ERROR,
+                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+                  errmsg("invalid multibyte character for locale"),
+                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
+     }
+
+     return result;
+ }
+
+ #endif
+
  /* convert a multibyte string to a wchar */
  int
  pg_mb2wchar(const char *from, pg_wchar *to)
Index: src/backend/utils/mmgr/mcxt.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/mmgr/mcxt.c,v
retrieving revision 1.63
diff -c -c -r1.63 mcxt.c
*** src/backend/utils/mmgr/mcxt.c    1 Jan 2008 19:45:55 -0000    1.63
--- src/backend/utils/mmgr/mcxt.c    18 Jun 2008 18:37:05 -0000
***************
*** 624,629 ****
--- 624,641 ----
                                                   pointer, size);
  }

+ /* Like pstrdup(), but append null byte */
+ char *
+ pnstrdup(const char *in, int len)
+ {
+     char       *out = palloc(len + 1);
+
+     memcpy(out, in, len);
+     out[len] = '\0';
+     return out;
+ }
+
+
  /*
   * MemoryContextSwitchTo
   *        Returns the current context; installs the given context.
Index: src/include/mb/pg_wchar.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/mb/pg_wchar.h,v
retrieving revision 1.78
diff -c -c -r1.78 pg_wchar.h
*** src/include/mb/pg_wchar.h    1 Jan 2008 19:45:58 -0000    1.78
--- src/include/mb/pg_wchar.h    18 Jun 2008 18:37:05 -0000
***************
*** 362,367 ****
--- 362,372 ----
  extern int    pg_encoding_max_length(int encoding);
  extern int    pg_database_encoding_max_length(void);

+ #ifdef USE_WIDE_UPPER_LOWER
+ extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
+ extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
+ #endif
+
  extern void SetDefaultClientEncoding(void);
  extern int    SetClientEncoding(int encoding, bool doit);
  extern void InitializeClientEncoding(void);
Index: src/include/tsearch/ts_locale.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/tsearch/ts_locale.h,v
retrieving revision 1.6
diff -c -c -r1.6 ts_locale.h
*** src/include/tsearch/ts_locale.h    17 Jun 2008 16:09:06 -0000    1.6
--- src/include/tsearch/ts_locale.h    18 Jun 2008 18:37:05 -0000
***************
*** 33,41 ****

  #ifdef USE_WIDE_UPPER_LOWER

- extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
- extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
-
  extern int    t_isdigit(const char *ptr);
  extern int    t_isspace(const char *ptr);
  extern int    t_isalpha(const char *ptr);
--- 33,38 ----
Index: src/include/tsearch/ts_public.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/tsearch/ts_public.h,v
retrieving revision 1.9
diff -c -c -r1.9 ts_public.h
*** src/include/tsearch/ts_public.h    16 May 2008 16:31:02 -0000    1.9
--- src/include/tsearch/ts_public.h    18 Jun 2008 18:37:05 -0000
***************
*** 62,69 ****
  extern char *get_tsearch_config_filename(const char *basename,
                              const char *extension);

- extern char *pnstrdup(const char *in, int len);
-
  /*
   * Often useful stopword list management
   */
--- 62,67 ----
Index: src/include/utils/palloc.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/palloc.h,v
retrieving revision 1.38
diff -c -c -r1.38 palloc.h
*** src/include/utils/palloc.h    1 Jan 2008 19:45:59 -0000    1.38
--- src/include/utils/palloc.h    18 Jun 2008 18:37:05 -0000
***************
*** 70,75 ****
--- 70,77 ----

  extern void *repalloc(void *pointer, Size size);

+ extern char *pnstrdup(const char *in, int len);
+
  /*
   * MemoryContextSwitchTo can't be a macro in standard C compilers.
   * But we can make it an inline function when using GCC.

Re: Simplify formatting.c

From

Bruce Momjian

Date:

21 June 2008, 17:06:51

Bruce Momjian wrote:
> Bruce Momjian wrote:
> > Alvaro Herrera wrote:
> > > Bruce Momjian wrote:
> > >
> > > > I moved str_initcap() over into oracle_compat.c and then had initcap()
> > > > convert to/from TEXT to call it.  The code is a little weird because
> > > > str_initcap() needs to convert to text to use texttowcs(), so in
> > > > multibyte encodings initcap converts the string to text, then to char,
> > > > then to text to call texttowcs().  I didn't see a cleaner way to do
> > > > this.
> > >
> > > Why not use wchar2char?  It seems there's room for extra cleanup here.
> > >
> > > Also, the prototype of str_initcap in builtins.h looks out of place.
> >
> > I talked to Alvaro on IM, and there is certainly much more cleanup to do
> > in this area. I will work from the bottom up.  First, is moving the
> > USE_WIDE_UPPER_LOWER define to c.h, and removing TS_USE_WIDE and using
> > USE_WIDE_UPPER_LOWER instead.  Patch attached and applied.
>
> The second step is to move wchar2char() and char2wchar() from tsearch
> into /mb to be easier to use for other modules;  also move pnstrdup().

The third step is for oracle_compat.c::initcap() to use
formatting.c::str_initcap().  You can see the result;  patch attached
(not applied).

This greatly reduces the size of initcap(), with the downside that we
are making two extra copies of the string to convert it to/from char*.

Is this acceptable?  If it is I will do the same for uppper()/lower()
with similar code size reduction and modularity.

If not perhaps I should keep the non-multibyte code in initcap() and
have only the multi-byte use str_initcap().

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +
Index: src/backend/utils/adt/formatting.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v
retrieving revision 1.142
diff -c -c -r1.142 formatting.c
*** src/backend/utils/adt/formatting.c    17 Jun 2008 16:09:06 -0000    1.142
--- src/backend/utils/adt/formatting.c    21 Jun 2008 20:00:45 -0000
***************
*** 1499,1526 ****
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
      {
          wchar_t        *workspace;
!         text        *in_text;
!         text        *out_text;
!         int            i;

!         in_text = cstring_to_text(buff);
!         workspace = texttowcs(in_text);

!         for (i = 0; workspace[i] != 0; i++)
          {
              if (wasalnum)
!                 workspace[i] = towlower(workspace[i]);
              else
!                 workspace[i] = towupper(workspace[i]);
!             wasalnum = iswalnum(workspace[i]);
          }

!         out_text = wcstotext(workspace, i);
!         result = text_to_cstring(out_text);

          pfree(workspace);
-         pfree(in_text);
-         pfree(out_text);
      }
      else
  #endif        /* USE_WIDE_UPPER_LOWER */
--- 1499,1525 ----
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
      {
          wchar_t        *workspace;
!         int            curr_char = 0;

!         /* Output workspace cannot have more codes than input bytes */
!         workspace = (wchar_t *) palloc((strlen(buff) + 1) * sizeof(wchar_t));

!         char2wchar(workspace, strlen(buff) + 1, buff, strlen(buff) + 1);
!
!         for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
          {
              if (wasalnum)
!                 workspace[curr_char] = towlower(workspace[curr_char]);
              else
!                 workspace[curr_char] = towupper(workspace[curr_char]);
!             wasalnum = iswalnum(workspace[curr_char]);
          }

!         /* Make result large enough; case change might change number of bytes */
!         result = palloc(curr_char * MB_CUR_MAX + 1);

+         wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
          pfree(workspace);
      }
      else
  #endif        /* USE_WIDE_UPPER_LOWER */
Index: src/backend/utils/adt/oracle_compat.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v
retrieving revision 1.80
diff -c -c -r1.80 oracle_compat.c
*** src/backend/utils/adt/oracle_compat.c    17 Jun 2008 16:09:06 -0000    1.80
--- src/backend/utils/adt/oracle_compat.c    21 Jun 2008 20:00:45 -0000
***************
*** 467,530 ****
  Datum
  initcap(PG_FUNCTION_ARGS)
  {
! #ifdef USE_WIDE_UPPER_LOWER

!     /*
!      * Use wide char code only when max encoding length > 1 and ctype != C.
!      * Some operating systems fail with multi-byte encodings and a C locale.
!      * Also, for a C locale there is no need to process as multibyte.
!      */
!     if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
!     {
!         text       *string = PG_GETARG_TEXT_PP(0);
!         text       *result;
!         wchar_t    *workspace;
!         int            wasalnum = 0;
!         int            i;
!
!         workspace = texttowcs(string);
!
!         for (i = 0; workspace[i] != 0; i++)
!         {
!             if (wasalnum)
!                 workspace[i] = towlower(workspace[i]);
!             else
!                 workspace[i] = towupper(workspace[i]);
!             wasalnum = iswalnum(workspace[i]);
!         }
!
!         result = wcstotext(workspace, i);
!
!         pfree(workspace);
!
!         PG_RETURN_TEXT_P(result);
!     }
!     else
! #endif   /* USE_WIDE_UPPER_LOWER */
!     {
!         text       *string = PG_GETARG_TEXT_P_COPY(0);
!         int            wasalnum = 0;
!         char       *ptr;
!         int            m;
!
!         /*
!          * Since we copied the string, we can scribble directly on the value
!          */
!         ptr = VARDATA(string);
!         m = VARSIZE(string) - VARHDRSZ;

!         while (m-- > 0)
!         {
!             if (wasalnum)
!                 *ptr = tolower((unsigned char) *ptr);
!             else
!                 *ptr = toupper((unsigned char) *ptr);
!             wasalnum = isalnum((unsigned char) *ptr);
!             ptr++;
!         }
!
!         PG_RETURN_TEXT_P(string);
!     }
  }


--- 467,482 ----
  Datum
  initcap(PG_FUNCTION_ARGS)
  {
!     char    *in_string, *out_string;
!     text    *result;

!     in_string = text_to_cstring(PG_GETARG_TEXT_PP(0));
!     out_string = str_initcap(in_string);
!     pfree(in_string);
!     result = cstring_to_text(out_string);
!     pfree(out_string);

!     PG_RETURN_TEXT_P(result);
  }

Re: Simplify formatting.c

From

Tom Lane

Date:

21 June 2008, 21:54:19

Bruce Momjian <bruce@momjian.us> writes:
> The third step is for oracle_compat.c::initcap() to use
> formatting.c::str_initcap().  You can see the result;  patch attached
> (not applied).

> This greatly reduces the size of initcap(), with the downside that we
> are making two extra copies of the string to convert it to/from char*.

> Is this acceptable?

I'd say not.  Can't we do some more refactoring and avoid so many
useless conversions?  Seems like str_initcap is the wrong primitive API
--- the work ought to be done by a function that takes a char pointer
and a length.  That would be a suitable basis for functions operating
on both text datums and C strings.

(Perhaps what I should be asking is whether the performance of upper()
and lower() is equally bad.  Certainly all three should have comparable
code, so maybe they all need refactoring.)

            regards, tom lane

Re: Simplify formatting.c

From

Bruce Momjian

Date:

21 June 2008, 22:49:38

Tom Lane wrote:
> Bruce Momjian <bruce@momjian.us> writes:
> > The third step is for oracle_compat.c::initcap() to use
> > formatting.c::str_initcap().  You can see the result;  patch attached
> > (not applied).
>
> > This greatly reduces the size of initcap(), with the downside that we
> > are making two extra copies of the string to convert it to/from char*.
>
> > Is this acceptable?
>
> I'd say not.  Can't we do some more refactoring and avoid so many
> useless conversions?  Seems like str_initcap is the wrong primitive API
> --- the work ought to be done by a function that takes a char pointer
> and a length.  That would be a suitable basis for functions operating
> on both text datums and C strings.

Yea, I thought about that idea too but it is going to add a strlen()
calls in some places, but not in critical ones.

> (Perhaps what I should be asking is whether the performance of upper()
> and lower() is equally bad.  Certainly all three should have comparable
> code, so maybe they all need refactoring.)

Yes, they do.  I will work on the length idea and see how that goes.

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +

Re: Simplify formatting.c

From

Tom Lane

Date:

21 June 2008, 22:59:32

Bruce Momjian <bruce@momjian.us> writes:
> Tom Lane wrote:
>> I'd say not.  Can't we do some more refactoring and avoid so many
>> useless conversions?  Seems like str_initcap is the wrong primitive API
>> --- the work ought to be done by a function that takes a char pointer
>> and a length.  That would be a suitable basis for functions operating
>> on both text datums and C strings.

> Yea, I thought about that idea too but it is going to add a strlen()
> calls in some places, but not in critical ones.

Sure, but the cost-per-byte of the strlen should be a good bit less than
the cost-per-byte of the actual conversion, so that doesn't bother me
too much.

Actually it seems like the hard part is not so much the input
representation as the output representation --- what should the
base-level initcap routine return, to be reasonably efficient for
both cases?

            regards, tom lane

Re: Simplify formatting.c

From

Bruce Momjian

Date:

21 June 2008, 23:43:31

Tom Lane wrote:
> Bruce Momjian <bruce@momjian.us> writes:
> > Tom Lane wrote:
> >> I'd say not.  Can't we do some more refactoring and avoid so many
> >> useless conversions?  Seems like str_initcap is the wrong primitive API
> >> --- the work ought to be done by a function that takes a char pointer
> >> and a length.  That would be a suitable basis for functions operating
> >> on both text datums and C strings.
>
> > Yea, I thought about that idea too but it is going to add a strlen()
> > calls in some places, but not in critical ones.
>
> Sure, but the cost-per-byte of the strlen should be a good bit less than
> the cost-per-byte of the actual conversion, so that doesn't bother me
> too much.
>
> Actually it seems like the hard part is not so much the input
> representation as the output representation --- what should the
> base-level initcap routine return, to be reasonably efficient for
> both cases?

I hadn't gotten to trying it out yet, but I can see the output being a
problem.  You can't even really pre-allocate the storage before passing
it because you don't know the length after case change.  You could pass
back a char* and repalloc to get the varlena header in there but that is
very messy.

Add to that that the multi-byte case also has to be converted to wide
characters, so you have text -> char * -> wide chars -> char * -> text
for the most complex case.

I am starto to think that the simplest case is to keep the single-copy
version in there for single-byte encodings and not worry about the
overhead of the multi-byte case.

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +

Re: Simplify formatting.c

From

Bruce Momjian

Date:

22 June 2008, 23:40:39

Bruce Momjian wrote:
> > Actually it seems like the hard part is not so much the input
> > representation as the output representation --- what should the
> > base-level initcap routine return, to be reasonably efficient for
> > both cases?
>
> I hadn't gotten to trying it out yet, but I can see the output being a
> problem.  You can't even really pre-allocate the storage before passing
> it because you don't know the length after case change.  You could pass
> back a char* and repalloc to get the varlena header in there but that is
> very messy.
>
> Add to that that the multi-byte case also has to be converted to wide
> characters, so you have text -> char * -> wide chars -> char * -> text
> for the most complex case.
>
> I am starting to think that the simplest case is to keep the single-copy
> version in there for single-byte encodings and not worry about the
> overhead of the multi-byte case.

My new idea is if we pass the length to str_initcap, we can eliminate
the string copy from text to char *.  That leaves us with just one extra
string copy from char * to text, which seems acceptable.  We still have
the wide char copy but I don't see any easy way to eliminate that
because the multi-byte code is complex and not something we want to
duplicate.

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +

Re: Simplify formatting.c

From

Bruce Momjian

Date:

23 June 2008, 16:28:50

Bruce Momjian wrote:
> > I am starting to think that the simplest case is to keep the single-copy
> > version in there for single-byte encodings and not worry about the
> > overhead of the multi-byte case.
>
> My new idea is if we pass the length to str_initcap, we can eliminate
> the string copy from text to char *.  That leaves us with just one extra
> string copy from char * to text, which seems acceptable.  We still have
> the wide char copy but I don't see any easy way to eliminate that
> because the multi-byte code is complex and not something we want to
> duplicate.

I ended up going in this direction, and did the same for upper and
lower.  Patch attached and applied.   I don't see any other cleanups in
this area.

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +
Index: src/backend/utils/adt/formatting.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v
retrieving revision 1.142
diff -c -c -r1.142 formatting.c
*** src/backend/utils/adt/formatting.c    17 Jun 2008 16:09:06 -0000    1.142
--- src/backend/utils/adt/formatting.c    23 Jun 2008 19:24:35 -0000
***************
*** 925,933 ****
  static char *str_numth(char *dest, char *num, int type);
  static int    strspace_len(char *str);
  static int    strdigits_len(char *str);
- static char *str_toupper(char *buff);
- static char *str_tolower(char *buff);
- static char *str_initcap(char *buff);

  static int    seq_search(char *name, char **array, int type, int max, int *len);
  static void do_to_timestamp(text *date_txt, text *fmt,
--- 925,930 ----
***************
*** 1424,1435 ****
      return dest;
  }

  /* ----------
!  * Convert string to upper case. It is designed to be multibyte-aware.
   * ----------
   */
! static char *
! str_toupper(char *buff)
  {
      char        *result;

--- 1421,1444 ----
      return dest;
  }

+ /*
+  * If the system provides the needed functions for wide-character manipulation
+  * (which are all standardized by C99), then we implement upper/lower/initcap
+  * using wide-character functions, if necessary.  Otherwise we use the
+  * traditional <ctype.h> functions, which of course will not work as desired
+  * in multibyte character sets.  Note that in either case we are effectively
+  * assuming that the database character encoding matches the encoding implied
+  * by LC_CTYPE.
+  */
+
  /* ----------
!  * wide-character-aware lower function
!  * We pass the number of bytes so we can pass varlena and char*
!  * to this function.
   * ----------
   */
! char *
! str_tolower(char *buff, size_t nbytes)
  {
      char        *result;

***************
*** 1438,1464 ****

  #ifdef USE_WIDE_UPPER_LOWER
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
!         result = wstring_upper(buff);
      else
  #endif        /* USE_WIDE_UPPER_LOWER */
      {
          char *p;

!         result = pstrdup(buff);

          for (p = result; *p; p++)
!             *p = pg_toupper((unsigned char) *p);
      }

      return result;
  }

  /* ----------
!  * Convert string to lower case. It is designed to be multibyte-aware.
   * ----------
   */
! static char *
! str_tolower(char *buff)
  {
      char        *result;

--- 1447,1492 ----

  #ifdef USE_WIDE_UPPER_LOWER
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
!     {
!         wchar_t        *workspace;
!         int            curr_char = 0;
!
!         /* Output workspace cannot have more codes than input bytes */
!         workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
!
!         char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
!
!         for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
!             workspace[curr_char] = towlower(workspace[curr_char]);
!
!         /* Make result large enough; case change might change number of bytes */
!         result = palloc(curr_char * MB_CUR_MAX + 1);
!
!         wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
!         pfree(workspace);
!     }
      else
  #endif        /* USE_WIDE_UPPER_LOWER */
      {
          char *p;

!         result = pnstrdup(buff, nbytes);

          for (p = result; *p; p++)
!             *p = pg_tolower((unsigned char) *p);
      }

      return result;
  }

  /* ----------
!  * wide-character-aware upper function
!  * We pass the number of bytes so we can pass varlena and char*
!  * to this function.
   * ----------
   */
! char *
! str_toupper(char *buff, size_t nbytes)
  {
      char        *result;

***************
*** 1467,1493 ****

  #ifdef USE_WIDE_UPPER_LOWER
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
!         result = wstring_lower(buff);
      else
  #endif        /* USE_WIDE_UPPER_LOWER */
      {
          char *p;

!         result = pstrdup(buff);

          for (p = result; *p; p++)
!             *p = pg_tolower((unsigned char) *p);
      }

      return result;
  }
!
  /* ----------
   * wide-character-aware initcap function
   * ----------
   */
! static char *
! str_initcap(char *buff)
  {
      char        *result;
      bool        wasalnum = false;
--- 1495,1540 ----

  #ifdef USE_WIDE_UPPER_LOWER
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
!     {
!         wchar_t        *workspace;
!         int            curr_char = 0;
!
!         /* Output workspace cannot have more codes than input bytes */
!         workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
!
!         char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
!
!         for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
!             workspace[curr_char] = towupper(workspace[curr_char]);
!
!         /* Make result large enough; case change might change number of bytes */
!         result = palloc(curr_char * MB_CUR_MAX + 1);
!
!         wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
!         pfree(workspace);
!     }
      else
  #endif        /* USE_WIDE_UPPER_LOWER */
      {
          char *p;

!         result = pnstrdup(buff, nbytes);

          for (p = result; *p; p++)
!             *p = pg_toupper((unsigned char) *p);
      }

      return result;
  }
!
  /* ----------
   * wide-character-aware initcap function
+  * We pass the number of bytes so we can pass varlena and char*
+  * to this function.
   * ----------
   */
! char *
! str_initcap(char *buff, size_t nbytes)
  {
      char        *result;
      bool        wasalnum = false;
***************
*** 1499,1533 ****
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
      {
          wchar_t        *workspace;
!         text        *in_text;
!         text        *out_text;
!         int            i;

!         in_text = cstring_to_text(buff);
!         workspace = texttowcs(in_text);

!         for (i = 0; workspace[i] != 0; i++)
          {
              if (wasalnum)
!                 workspace[i] = towlower(workspace[i]);
              else
!                 workspace[i] = towupper(workspace[i]);
!             wasalnum = iswalnum(workspace[i]);
          }

!         out_text = wcstotext(workspace, i);
!         result = text_to_cstring(out_text);

          pfree(workspace);
-         pfree(in_text);
-         pfree(out_text);
      }
      else
  #endif        /* USE_WIDE_UPPER_LOWER */
      {
          char *p;

!         result = pstrdup(buff);

          for (p = result; *p; p++)
          {
--- 1546,1579 ----
      if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
      {
          wchar_t        *workspace;
!         int            curr_char = 0;
!
!         /* Output workspace cannot have more codes than input bytes */
!         workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));

!         char2wchar(workspace, nbytes + 1, buff, nbytes + 1);

!         for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
          {
              if (wasalnum)
!                 workspace[curr_char] = towlower(workspace[curr_char]);
              else
!                 workspace[curr_char] = towupper(workspace[curr_char]);
!             wasalnum = iswalnum(workspace[curr_char]);
          }

!         /* Make result large enough; case change might change number of bytes */
!         result = palloc(curr_char * MB_CUR_MAX + 1);

+         wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
          pfree(workspace);
      }
      else
  #endif        /* USE_WIDE_UPPER_LOWER */
      {
          char *p;

!         result = pnstrdup(buff, nbytes);

          for (p = result; *p; p++)
          {
***************
*** 1851,1857 ****
                  {
                      char       *p = pstrdup(tmtcTzn(in));

!                     strcpy(s, str_tolower(p));
                      pfree(p);
                      s += strlen(s);
                  }
--- 1897,1903 ----
                  {
                      char       *p = pstrdup(tmtcTzn(in));

!                     strcpy(s, str_tolower(p, strlen(p)));
                      pfree(p);
                      s += strlen(s);
                  }
***************
*** 1893,1903 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1]));
                  else
                  {
                      strcpy(workbuff, months_full[tm->tm_mon - 1]);
!                     sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff));
                  }
                  s += strlen(s);
                  break;
--- 1939,1951 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1],
!                                 strlen(localized_full_months[tm->tm_mon - 1])));
                  else
                  {
                      strcpy(workbuff, months_full[tm->tm_mon - 1]);
!                     sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
!                                 str_toupper(workbuff, strlen(workbuff)));
                  }
                  s += strlen(s);
                  break;
***************
*** 1906,1912 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1]));
                  else
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
                  s += strlen(s);
--- 1954,1961 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1],
!                                 strlen(localized_full_months[tm->tm_mon - 1])));
                  else
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
                  s += strlen(s);
***************
*** 1916,1922 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1]));
                  else
                  {
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
--- 1965,1972 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1],
!                                 strlen(localized_full_months[tm->tm_mon - 1])));
                  else
                  {
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
***************
*** 1929,1937 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1]));
                  else
!                     strcpy(s, str_toupper(months[tm->tm_mon - 1]));
                  s += strlen(s);
                  break;
              case DCH_Mon:
--- 1979,1989 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1],
!                                 strlen(localized_abbrev_months[tm->tm_mon - 1])));
                  else
!                     strcpy(s, str_toupper(months[tm->tm_mon - 1],
!                                 strlen(months[tm->tm_mon - 1])));
                  s += strlen(s);
                  break;
              case DCH_Mon:
***************
*** 1939,1945 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1]));
                  else
                      strcpy(s, months[tm->tm_mon - 1]);
                  s += strlen(s);
--- 1991,1998 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1],
!                                 strlen(localized_abbrev_months[tm->tm_mon - 1])));
                  else
                      strcpy(s, months[tm->tm_mon - 1]);
                  s += strlen(s);
***************
*** 1949,1955 ****
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1]));
                  else
                  {
                      strcpy(s, months[tm->tm_mon - 1]);
--- 2002,2009 ----
                  if (!tm->tm_mon)
                      break;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1],
!                                 strlen(localized_abbrev_months[tm->tm_mon - 1])));
                  else
                  {
                      strcpy(s, months[tm->tm_mon - 1]);
***************
*** 1966,1983 ****
              case DCH_DAY:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_full_days[tm->tm_wday]));
                  else
                  {
                      strcpy(workbuff, days[tm->tm_wday]);
!                     sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff));
                  }
                  s += strlen(s);
                  break;
              case DCH_Day:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_full_days[tm->tm_wday]));
                  else
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
                  s += strlen(s);
--- 2020,2040 ----
              case DCH_DAY:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_full_days[tm->tm_wday],
!                                 strlen(localized_full_days[tm->tm_wday])));
                  else
                  {
                      strcpy(workbuff, days[tm->tm_wday]);
!                     sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
!                                 str_toupper(workbuff, strlen(workbuff)));
                  }
                  s += strlen(s);
                  break;
              case DCH_Day:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_full_days[tm->tm_wday],
!                                 strlen(localized_full_days[tm->tm_wday])));
                  else
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
                  s += strlen(s);
***************
*** 1985,1991 ****
              case DCH_day:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_full_days[tm->tm_wday]));
                  else
                  {
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
--- 2042,2049 ----
              case DCH_day:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_full_days[tm->tm_wday],
!                                 strlen(localized_full_days[tm->tm_wday])));
                  else
                  {
                      sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
***************
*** 1996,2010 ****
              case DCH_DY:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday]));
                  else
!                     strcpy(s, str_toupper(days_short[tm->tm_wday]));
                  s += strlen(s);
                  break;
              case DCH_Dy:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday]));
                  else
                      strcpy(s, days_short[tm->tm_wday]);
                  s += strlen(s);
--- 2054,2071 ----
              case DCH_DY:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday],
!                                 strlen(localized_abbrev_days[tm->tm_wday])));
                  else
!                     strcpy(s, str_toupper(days_short[tm->tm_wday],
!                                 strlen(days_short[tm->tm_wday])));
                  s += strlen(s);
                  break;
              case DCH_Dy:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday],
!                                 strlen(localized_abbrev_days[tm->tm_wday])));
                  else
                      strcpy(s, days_short[tm->tm_wday]);
                  s += strlen(s);
***************
*** 2012,2018 ****
              case DCH_dy:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday]));
                  else
                  {
                      strcpy(s, days_short[tm->tm_wday]);
--- 2073,2080 ----
              case DCH_dy:
                  INVALID_FOR_INTERVAL;
                  if (S_TM(n->suffix))
!                     strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday],
!                                 strlen(localized_abbrev_days[tm->tm_wday])));
                  else
                  {
                      strcpy(s, days_short[tm->tm_wday]);
***************
*** 4277,4288 ****
                  case NUM_rn:
                      if (IS_FILLMODE(Np->Num))
                      {
!                         strcpy(Np->inout_p, str_tolower(Np->number_p));
                          Np->inout_p += strlen(Np->inout_p) - 1;
                      }
                      else
                      {
!                         sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p));
                          Np->inout_p += strlen(Np->inout_p) - 1;
                      }
                      break;
--- 4339,4352 ----
                  case NUM_rn:
                      if (IS_FILLMODE(Np->Num))
                      {
!                         strcpy(Np->inout_p, str_tolower(Np->number_p,
!                                 strlen(Np->number_p)));
                          Np->inout_p += strlen(Np->inout_p) - 1;
                      }
                      else
                      {
!                         sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p,
!                                 strlen(Np->number_p)));
                          Np->inout_p += strlen(Np->inout_p) - 1;
                      }
                      break;
Index: src/backend/utils/adt/oracle_compat.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v
retrieving revision 1.80
diff -c -c -r1.80 oracle_compat.c
*** src/backend/utils/adt/oracle_compat.c    17 Jun 2008 16:09:06 -0000    1.80
--- src/backend/utils/adt/oracle_compat.c    23 Jun 2008 19:24:35 -0000
***************
*** 29,320 ****
  #endif

  #include "utils/builtins.h"
  #include "utils/pg_locale.h"
  #include "mb/pg_wchar.h"


- /*
-  * If the system provides the needed functions for wide-character manipulation
-  * (which are all standardized by C99), then we implement upper/lower/initcap
-  * using wide-character functions.    Otherwise we use the traditional <ctype.h>
-  * functions, which of course will not work as desired in multibyte character
-  * sets.  Note that in either case we are effectively assuming that the
-  * database character encoding matches the encoding implied by LC_CTYPE.
-  */
- #ifdef USE_WIDE_UPPER_LOWER
- char       *wstring_lower(char *str);
- char       *wstring_upper(char *str);
- wchar_t       *texttowcs(const text *txt);
- text       *wcstotext(const wchar_t *str, int ncodes);
- #endif
-
  static text *dotrim(const char *string, int stringlen,
         const char *set, int setlen,
         bool doltrim, bool dortrim);


- #ifdef USE_WIDE_UPPER_LOWER
-
- /*
-  * Convert a TEXT value into a palloc'd wchar string.
-  */
- wchar_t *
- texttowcs(const text *txt)
- {
-     int            nbytes = VARSIZE_ANY_EXHDR(txt);
-     char       *workstr;
-     wchar_t    *result;
-     size_t        ncodes;
-
-     /* Overflow paranoia */
-     if (nbytes < 0 ||
-         nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
-         ereport(ERROR,
-                 (errcode(ERRCODE_OUT_OF_MEMORY),
-                  errmsg("out of memory")));
-
-     /* Need a null-terminated version of the input */
-     workstr = text_to_cstring(txt);
-
-     /* Output workspace cannot have more codes than input bytes */
-     result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
-     /* Do the conversion */
-     ncodes = mbstowcs(result, workstr, nbytes + 1);
-
-     if (ncodes == (size_t) -1)
-     {
-         /*
-          * Invalid multibyte character encountered.  We try to give a useful
-          * error message by letting pg_verifymbstr check the string.  But it's
-          * possible that the string is OK to us, and not OK to mbstowcs ---
-          * this suggests that the LC_CTYPE locale is different from the
-          * database encoding.  Give a generic error message if verifymbstr
-          * can't find anything wrong.
-          */
-         pg_verifymbstr(workstr, nbytes, false);
-         ereport(ERROR,
-                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                  errmsg("invalid multibyte character for locale"),
-                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
-     }
-
-     Assert(ncodes <= (size_t) nbytes);
-
-     return result;
- }
-
-
- /*
-  * Convert a wchar string into a palloc'd TEXT value.  The wchar string
-  * must be zero-terminated, but we also require the caller to pass the string
-  * length, since it will know it anyway in current uses.
-  */
- text *
- wcstotext(const wchar_t *str, int ncodes)
- {
-     text       *result;
-     size_t        nbytes;
-
-     /* Overflow paranoia */
-     if (ncodes < 0 ||
-         ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)
-         ereport(ERROR,
-                 (errcode(ERRCODE_OUT_OF_MEMORY),
-                  errmsg("out of memory")));
-
-     /* Make workspace certainly large enough for result */
-     result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);
-
-     /* Do the conversion */
-     nbytes = wcstombs((char *) VARDATA(result), str,
-                       (ncodes + 1) * MB_CUR_MAX);
-
-     if (nbytes == (size_t) -1)
-     {
-         /* Invalid multibyte character encountered ... shouldn't happen */
-         ereport(ERROR,
-                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                  errmsg("invalid multibyte character for locale")));
-     }
-
-     Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));
-
-     SET_VARSIZE(result, nbytes + VARHDRSZ);
-
-     return result;
- }
- #endif   /* USE_WIDE_UPPER_LOWER */
-
-
- /*
-  * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding.
-  * To make use of the upper/lower functionality, we need to map UTF8 to
-  * UTF16, which for some reason mbstowcs and wcstombs won't do for us.
-  * This conversion layer takes care of it.
-  */
-
- #ifdef WIN32
-
- /* texttowcs for the case of UTF8 to UTF16 */
- static wchar_t *
- win32_utf8_texttowcs(const text *txt)
- {
-     int            nbytes = VARSIZE_ANY_EXHDR(txt);
-     wchar_t    *result;
-     int            r;
-
-     /* Overflow paranoia */
-     if (nbytes < 0 ||
-         nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
-         ereport(ERROR,
-                 (errcode(ERRCODE_OUT_OF_MEMORY),
-                  errmsg("out of memory")));
-
-     /* Output workspace cannot have more codes than input bytes */
-     result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
-     /* stupid Microsloth API does not work for zero-length input */
-     if (nbytes == 0)
-         r = 0;
-     else
-     {
-         /* Do the conversion */
-         r = MultiByteToWideChar(CP_UTF8, 0, VARDATA_ANY(txt), nbytes,
-                                 result, nbytes);
-
-         if (r <= 0)                /* assume it's NO_UNICODE_TRANSLATION */
-         {
-             /* see notes above about error reporting */
-             pg_verifymbstr(VARDATA_ANY(txt), nbytes, false);
-             ereport(ERROR,
-                     (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                      errmsg("invalid multibyte character for locale"),
-                      errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
-         }
-     }
-
-     /* Append trailing null wchar (MultiByteToWideChar won't have) */
-     Assert(r <= nbytes);
-     result[r] = 0;
-
-     return result;
- }
-
- /* wcstotext for the case of UTF16 to UTF8 */
- static text *
- win32_utf8_wcstotext(const wchar_t *str)
- {
-     text       *result;
-     int            nbytes;
-     int            r;
-
-     /* Compute size of output string (this *will* include trailing null) */
-     nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
-     if (nbytes <= 0)            /* shouldn't happen */
-         ereport(ERROR,
-                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                  errmsg("UTF-16 to UTF-8 translation failed: %lu",
-                         GetLastError())));
-
-     result = palloc(nbytes + VARHDRSZ);
-
-     r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes,
-                             NULL, NULL);
-     if (r != nbytes)            /* shouldn't happen */
-         ereport(ERROR,
-                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                  errmsg("UTF-16 to UTF-8 translation failed: %lu",
-                         GetLastError())));
-
-     SET_VARSIZE(result, nbytes + VARHDRSZ - 1); /* -1 to ignore null */
-
-     return result;
- }
-
- /* interface layer to check which encoding is in use */
-
- static wchar_t *
- win32_texttowcs(const text *txt)
- {
-     if (GetDatabaseEncoding() == PG_UTF8)
-         return win32_utf8_texttowcs(txt);
-     else
-         return texttowcs(txt);
- }
-
- static text *
- win32_wcstotext(const wchar_t *str, int ncodes)
- {
-     if (GetDatabaseEncoding() == PG_UTF8)
-         return win32_utf8_wcstotext(str);
-     else
-         return wcstotext(str, ncodes);
- }
-
- /* use macros to cause routines below to call interface layer */
-
- #define texttowcs    win32_texttowcs
- #define wcstotext    win32_wcstotext
- #endif   /* WIN32 */
-
- #ifdef USE_WIDE_UPPER_LOWER
- /*
-  * string_upper and string_lower are used for correct multibyte upper/lower
-  * transformations localized strings. Returns pointers to transformated
-  * string.
-  */
- char *
- wstring_upper(char *str)
- {
-     wchar_t    *workspace;
-     text       *in_text;
-     text       *out_text;
-     char       *result;
-     int            i;
-
-     in_text = cstring_to_text(str);
-     workspace = texttowcs(in_text);
-
-     for (i = 0; workspace[i] != 0; i++)
-         workspace[i] = towupper(workspace[i]);
-
-     out_text = wcstotext(workspace, i);
-     result = text_to_cstring(out_text);
-
-     pfree(workspace);
-     pfree(in_text);
-     pfree(out_text);
-
-     return result;
- }
-
- char *
- wstring_lower(char *str)
- {
-     wchar_t    *workspace;
-     text       *in_text;
-     text       *out_text;
-     char       *result;
-     int            i;
-
-     in_text = cstring_to_text(str);
-     workspace = texttowcs(in_text);
-
-     for (i = 0; workspace[i] != 0; i++)
-         workspace[i] = towlower(workspace[i]);
-
-     out_text = wcstotext(workspace, i);
-     result = text_to_cstring(out_text);
-
-     pfree(workspace);
-     pfree(in_text);
-     pfree(out_text);
-
-     return result;
- }
- #endif   /* USE_WIDE_UPPER_LOWER */
-
  /********************************************************************
   *
   * lower
--- 29,44 ----
  #endif

  #include "utils/builtins.h"
+ #include "utils/formatting.h"
  #include "utils/pg_locale.h"
  #include "mb/pg_wchar.h"


  static text *dotrim(const char *string, int stringlen,
         const char *set, int setlen,
         bool doltrim, bool dortrim);


  /********************************************************************
   *
   * lower
***************
*** 332,383 ****
  Datum
  lower(PG_FUNCTION_ARGS)
  {
! #ifdef USE_WIDE_UPPER_LOWER
!
!     /*
!      * Use wide char code only when max encoding length > 1 and ctype != C.
!      * Some operating systems fail with multi-byte encodings and a C locale.
!      * Also, for a C locale there is no need to process as multibyte.
!      */
!     if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
!     {
!         text       *string = PG_GETARG_TEXT_PP(0);
!         text       *result;
!         wchar_t    *workspace;
!         int            i;
!
!         workspace = texttowcs(string);
!
!         for (i = 0; workspace[i] != 0; i++)
!             workspace[i] = towlower(workspace[i]);

!         result = wcstotext(workspace, i);
!
!         pfree(workspace);
!
!         PG_RETURN_TEXT_P(result);
!     }
!     else
! #endif   /* USE_WIDE_UPPER_LOWER */
!     {
!         text       *string = PG_GETARG_TEXT_P_COPY(0);
!         char       *ptr;
!         int            m;
!
!         /*
!          * Since we copied the string, we can scribble directly on the value
!          */
!         ptr = VARDATA(string);
!         m = VARSIZE(string) - VARHDRSZ;
!
!         while (m-- > 0)
!         {
!             *ptr = tolower((unsigned char) *ptr);
!             ptr++;
!         }
!
!         PG_RETURN_TEXT_P(string);
!     }
  }


--- 56,70 ----
  Datum
  lower(PG_FUNCTION_ARGS)
  {
!     text    *in_string = PG_GETARG_TEXT_PP(0);
!     char    *out_string;
!     text    *result;
!
!     out_string = str_tolower(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
!     result = cstring_to_text(out_string);
!     pfree(out_string);

!     PG_RETURN_TEXT_P(result);
  }


***************
*** 398,449 ****
  Datum
  upper(PG_FUNCTION_ARGS)
  {
! #ifdef USE_WIDE_UPPER_LOWER

!     /*
!      * Use wide char code only when max encoding length > 1 and ctype != C.
!      * Some operating systems fail with multi-byte encodings and a C locale.
!      * Also, for a C locale there is no need to process as multibyte.
!      */
!     if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
!     {
!         text       *string = PG_GETARG_TEXT_PP(0);
!         text       *result;
!         wchar_t    *workspace;
!         int            i;
!
!         workspace = texttowcs(string);
!
!         for (i = 0; workspace[i] != 0; i++)
!             workspace[i] = towupper(workspace[i]);
!
!         result = wcstotext(workspace, i);
!
!         pfree(workspace);
!
!         PG_RETURN_TEXT_P(result);
!     }
!     else
! #endif   /* USE_WIDE_UPPER_LOWER */
!     {
!         text       *string = PG_GETARG_TEXT_P_COPY(0);
!         char       *ptr;
!         int            m;
!
!         /*
!          * Since we copied the string, we can scribble directly on the value
!          */
!         ptr = VARDATA(string);
!         m = VARSIZE(string) - VARHDRSZ;
!
!         while (m-- > 0)
!         {
!             *ptr = toupper((unsigned char) *ptr);
!             ptr++;
!         }
!
!         PG_RETURN_TEXT_P(string);
!     }
  }


--- 85,99 ----
  Datum
  upper(PG_FUNCTION_ARGS)
  {
!     text    *in_string = PG_GETARG_TEXT_PP(0);
!     char    *out_string;
!     text    *result;
!
!     out_string = str_toupper(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
!     result = cstring_to_text(out_string);
!     pfree(out_string);

!     PG_RETURN_TEXT_P(result);
  }


***************
*** 467,530 ****
  Datum
  initcap(PG_FUNCTION_ARGS)
  {
! #ifdef USE_WIDE_UPPER_LOWER

!     /*
!      * Use wide char code only when max encoding length > 1 and ctype != C.
!      * Some operating systems fail with multi-byte encodings and a C locale.
!      * Also, for a C locale there is no need to process as multibyte.
!      */
!     if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
!     {
!         text       *string = PG_GETARG_TEXT_PP(0);
!         text       *result;
!         wchar_t    *workspace;
!         int            wasalnum = 0;
!         int            i;
!
!         workspace = texttowcs(string);
!
!         for (i = 0; workspace[i] != 0; i++)
!         {
!             if (wasalnum)
!                 workspace[i] = towlower(workspace[i]);
!             else
!                 workspace[i] = towupper(workspace[i]);
!             wasalnum = iswalnum(workspace[i]);
!         }
!
!         result = wcstotext(workspace, i);
!
!         pfree(workspace);
!
!         PG_RETURN_TEXT_P(result);
!     }
!     else
! #endif   /* USE_WIDE_UPPER_LOWER */
!     {
!         text       *string = PG_GETARG_TEXT_P_COPY(0);
!         int            wasalnum = 0;
!         char       *ptr;
!         int            m;
!
!         /*
!          * Since we copied the string, we can scribble directly on the value
!          */
!         ptr = VARDATA(string);
!         m = VARSIZE(string) - VARHDRSZ;
!
!         while (m-- > 0)
!         {
!             if (wasalnum)
!                 *ptr = tolower((unsigned char) *ptr);
!             else
!                 *ptr = toupper((unsigned char) *ptr);
!             wasalnum = isalnum((unsigned char) *ptr);
!             ptr++;
!         }
!
!         PG_RETURN_TEXT_P(string);
!     }
  }


--- 117,131 ----
  Datum
  initcap(PG_FUNCTION_ARGS)
  {
!     text    *in_string = PG_GETARG_TEXT_PP(0);
!     char    *out_string;
!     text    *result;
!
!     out_string = str_initcap(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
!     result = cstring_to_text(out_string);
!     pfree(out_string);

!     PG_RETURN_TEXT_P(result);
  }


Index: src/include/utils/formatting.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/formatting.h,v
retrieving revision 1.18
diff -c -c -r1.18 formatting.h
*** src/include/utils/formatting.h    1 Jan 2008 19:45:59 -0000    1.18
--- src/include/utils/formatting.h    23 Jun 2008 19:24:36 -0000
***************
*** 21,26 ****
--- 21,30 ----
  #include "fmgr.h"


+ extern char *str_tolower(char *buff, size_t nbytes);
+ extern char *str_toupper(char *buff, size_t nbytes);
+ extern char *str_initcap(char *buff, size_t nbytes);
+
  extern Datum timestamp_to_char(PG_FUNCTION_ARGS);
  extern Datum timestamptz_to_char(PG_FUNCTION_ARGS);
  extern Datum interval_to_char(PG_FUNCTION_ARGS);

Re: Simplify formatting.c

From

Bruce Momjian

Date:

23 June 2008, 16:30:33

Euler Taveira de Oliveira wrote:
> Tom Lane wrote:
>
> > Also, it seems a bit inconsistent to be relying on
> > oracle_compat.c for upper/lower but not initcap.
> >
> I saw this inconsistence while I'm doing the patch. What about moving
> that upper/lower/initcap and wcs* code to another file. pg_locale.c?
> BTW, formatting.c and oracle_compat.c already include pg_locale.h.

I researched this idea but is seems pg_locale.c contains only
locale-specific stuff, while these functions have locale and non-locale
versions;  I ended up moving the common stuff into formatting.c.

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +