Re: Simplify formatting.c - Mailing list pgsql-patches

From Bruce Momjian
Subject Re: Simplify formatting.c
Date
Msg-id 200806181843.m5IIhgm03448@momjian.us
Whole thread Raw
In response to Re: Simplify formatting.c  (Bruce Momjian <bruce@momjian.us>)
Responses Re: Simplify formatting.c  (Bruce Momjian <bruce@momjian.us>)
List pgsql-patches
Bruce Momjian wrote:
> Alvaro Herrera wrote:
> > Bruce Momjian wrote:
> >
> > > I moved str_initcap() over into oracle_compat.c and then had initcap()
> > > convert to/from TEXT to call it.  The code is a little weird because
> > > str_initcap() needs to convert to text to use texttowcs(), so in
> > > multibyte encodings initcap converts the string to text, then to char,
> > > then to text to call texttowcs().  I didn't see a cleaner way to do
> > > this.
> >
> > Why not use wchar2char?  It seems there's room for extra cleanup here.
> >
> > Also, the prototype of str_initcap in builtins.h looks out of place.
>
> I talked to Alvaro on IM, and there is certainly much more cleanup to do
> in this area. I will work from the bottom up.  First, is moving the
> USE_WIDE_UPPER_LOWER define to c.h, and removing TS_USE_WIDE and using
> USE_WIDE_UPPER_LOWER instead.  Patch attached and applied.

The second step is to move wchar2char() and char2wchar() from tsearch
into /mb to be easier to use for other modules;  also move pnstrdup().

Patch attached and applied.

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +
Index: src/backend/tsearch/ts_locale.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/tsearch/ts_locale.c,v
retrieving revision 1.8
diff -c -c -r1.8 ts_locale.c
*** src/backend/tsearch/ts_locale.c    17 Jun 2008 16:09:06 -0000    1.8
--- src/backend/tsearch/ts_locale.c    18 Jun 2008 18:37:02 -0000
***************
*** 16,140 ****
  #include "tsearch/ts_locale.h"
  #include "tsearch/ts_public.h"

-
  #ifdef USE_WIDE_UPPER_LOWER

- /*
-  * wchar2char --- convert wide characters to multibyte format
-  *
-  * This has the same API as the standard wcstombs() function; in particular,
-  * tolen is the maximum number of bytes to store at *to, and *from must be
-  * zero-terminated.  The output will be zero-terminated iff there is room.
-  */
- size_t
- wchar2char(char *to, const wchar_t *from, size_t tolen)
- {
-     if (tolen == 0)
-         return 0;
-
- #ifdef WIN32
-     if (GetDatabaseEncoding() == PG_UTF8)
-     {
-         int            r;
-
-         r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
-                                 NULL, NULL);
-
-         if (r <= 0)
-             return (size_t) -1;
-
-         Assert(r <= tolen);
-
-         /* Microsoft counts the zero terminator in the result */
-         return r - 1;
-     }
- #endif   /* WIN32 */
-
-     return wcstombs(to, from, tolen);
- }
-
- /*
-  * char2wchar --- convert multibyte characters to wide characters
-  *
-  * This has almost the API of mbstowcs(), except that *from need not be
-  * null-terminated; instead, the number of input bytes is specified as
-  * fromlen.  Also, we ereport() rather than returning -1 for invalid
-  * input encoding.    tolen is the maximum number of wchar_t's to store at *to.
-  * The output will be zero-terminated iff there is room.
-  */
- size_t
- char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
- {
-     if (tolen == 0)
-         return 0;
-
- #ifdef WIN32
-     if (GetDatabaseEncoding() == PG_UTF8)
-     {
-         int            r;
-
-         /* stupid Microsloth API does not work for zero-length input */
-         if (fromlen == 0)
-             r = 0;
-         else
-         {
-             r = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
-
-             if (r <= 0)
-             {
-                 /* see notes in oracle_compat.c about error reporting */
-                 pg_verifymbstr(from, fromlen, false);
-                 ereport(ERROR,
-                         (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                          errmsg("invalid multibyte character for locale"),
-                          errhint("The server's LC_CTYPE locale is probably incompatible with the database
encoding.")));
-             }
-         }
-
-         Assert(r < tolen);
-         to[r] = 0;
-
-         return r;
-     }
- #endif   /* WIN32 */
-
-     if (lc_ctype_is_c())
-     {
-         /*
-          * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
-          * allocated with sufficient space
-          */
-         return pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
-     }
-     else
-     {
-         /*
-          * mbstowcs requires ending '\0'
-          */
-         char       *str = pnstrdup(from, fromlen);
-         size_t        result;
-
-         result = mbstowcs(to, str, tolen);
-
-         pfree(str);
-
-         if (result == (size_t) -1)
-         {
-             pg_verifymbstr(from, fromlen, false);
-             ereport(ERROR,
-                     (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                      errmsg("invalid multibyte character for locale"),
-                      errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
-         }
-
-         if (result < tolen)
-             to[result] = 0;
-
-         return result;
-     }
- }
-
-
  int
  t_isdigit(const char *ptr)
  {
--- 16,23 ----
Index: src/backend/tsearch/ts_utils.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/tsearch/ts_utils.c,v
retrieving revision 1.9
diff -c -c -r1.9 ts_utils.c
*** src/backend/tsearch/ts_utils.c    1 Jan 2008 19:45:52 -0000    1.9
--- src/backend/tsearch/ts_utils.c    18 Jun 2008 18:37:02 -0000
***************
*** 153,165 ****
              bsearch(&key, s->stop, s->len,
                      sizeof(char *), comparestr)) ? true : false;
  }
-
- char *
- pnstrdup(const char *in, int len)
- {
-     char       *out = palloc(len + 1);
-
-     memcpy(out, in, len);
-     out[len] = '\0';
-     return out;
- }
--- 153,155 ----
Index: src/backend/utils/mb/mbutils.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/mb/mbutils.c,v
retrieving revision 1.71
diff -c -c -r1.71 mbutils.c
*** src/backend/utils/mb/mbutils.c    27 May 2008 12:24:42 -0000    1.71
--- src/backend/utils/mb/mbutils.c    18 Jun 2008 18:37:02 -0000
***************
*** 555,560 ****
--- 555,688 ----
      return result;
  }

+
+
+ #ifdef USE_WIDE_UPPER_LOWER
+
+ /*
+  * wchar2char --- convert wide characters to multibyte format
+  *
+  * This has the same API as the standard wcstombs() function; in particular,
+  * tolen is the maximum number of bytes to store at *to, and *from must be
+  * zero-terminated.  The output will be zero-terminated iff there is room.
+  */
+ size_t
+ wchar2char(char *to, const wchar_t *from, size_t tolen)
+ {
+     size_t result;
+
+     if (tolen == 0)
+         return 0;
+
+ #ifdef WIN32
+     /*
+      * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding,
+      * and for some reason mbstowcs and wcstombs won't do this for us,
+      * so we use MultiByteToWideChar().
+      */
+     if (GetDatabaseEncoding() == PG_UTF8)
+     {
+         result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
+                                 NULL, NULL);
+         /* A zero return is failure */
+         if (result <= 0)
+             result = -1;
+         else
+         {
+             Assert(result <= tolen);
+             /* Microsoft counts the zero terminator in the result */
+             result--;
+         }
+     }
+     else
+ #endif   /* WIN32 */
+         result = wcstombs(to, from, tolen);
+     return result;
+ }
+
+ /*
+  * char2wchar --- convert multibyte characters to wide characters
+  *
+  * This has almost the API of mbstowcs(), except that *from need not be
+  * null-terminated; instead, the number of input bytes is specified as
+  * fromlen.  Also, we ereport() rather than returning -1 for invalid
+  * input encoding.    tolen is the maximum number of wchar_t's to store at *to.
+  * The output will be zero-terminated iff there is room.
+  */
+ size_t
+ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
+ {
+     size_t        result;
+
+     if (tolen == 0)
+         return 0;
+
+ #ifdef WIN32
+     /* See WIN32 "Unicode" comment above */
+     if (GetDatabaseEncoding() == PG_UTF8)
+     {
+         /* Win32 API does not work for zero-length input */
+         if (fromlen == 0)
+             result = 0;
+         else
+         {
+             result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
+             /* A zero return is failure */
+             if (result == 0)
+                 result = -1;
+         }
+
+         if (result != -1)
+         {
+             Assert(result < tolen);
+             /* Append trailing null wchar (MultiByteToWideChar() does not) */
+             to[result] = 0;
+         }
+     }
+     else
+ #endif   /* WIN32 */
+     {
+         if (lc_ctype_is_c())
+         {
+             /*
+              * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
+              * allocated with sufficient space
+              */
+             result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
+         }
+         else
+         {
+             /* mbstowcs requires ending '\0' */
+             char       *str = pnstrdup(from, fromlen);
+
+             result = mbstowcs(to, str, tolen);
+             pfree(str);
+         }
+     }
+
+     if (result == -1)
+     {
+         /*
+          * Invalid multibyte character encountered.  We try to give a useful
+          * error message by letting pg_verifymbstr check the string.  But it's
+          * possible that the string is OK to us, and not OK to mbstowcs ---
+          * this suggests that the LC_CTYPE locale is different from the
+          * database encoding.  Give a generic error message if verifymbstr
+          * can't find anything wrong.
+          */
+         pg_verifymbstr(from, fromlen, false);    /* might not return */
+         /* but if it does ... */
+         ereport(ERROR,
+                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+                  errmsg("invalid multibyte character for locale"),
+                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
+     }
+
+     return result;
+ }
+
+ #endif
+
  /* convert a multibyte string to a wchar */
  int
  pg_mb2wchar(const char *from, pg_wchar *to)
Index: src/backend/utils/mmgr/mcxt.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/mmgr/mcxt.c,v
retrieving revision 1.63
diff -c -c -r1.63 mcxt.c
*** src/backend/utils/mmgr/mcxt.c    1 Jan 2008 19:45:55 -0000    1.63
--- src/backend/utils/mmgr/mcxt.c    18 Jun 2008 18:37:05 -0000
***************
*** 624,629 ****
--- 624,641 ----
                                                   pointer, size);
  }

+ /* Like pstrdup(), but append null byte */
+ char *
+ pnstrdup(const char *in, int len)
+ {
+     char       *out = palloc(len + 1);
+
+     memcpy(out, in, len);
+     out[len] = '\0';
+     return out;
+ }
+
+
  /*
   * MemoryContextSwitchTo
   *        Returns the current context; installs the given context.
Index: src/include/mb/pg_wchar.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/mb/pg_wchar.h,v
retrieving revision 1.78
diff -c -c -r1.78 pg_wchar.h
*** src/include/mb/pg_wchar.h    1 Jan 2008 19:45:58 -0000    1.78
--- src/include/mb/pg_wchar.h    18 Jun 2008 18:37:05 -0000
***************
*** 362,367 ****
--- 362,372 ----
  extern int    pg_encoding_max_length(int encoding);
  extern int    pg_database_encoding_max_length(void);

+ #ifdef USE_WIDE_UPPER_LOWER
+ extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
+ extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
+ #endif
+
  extern void SetDefaultClientEncoding(void);
  extern int    SetClientEncoding(int encoding, bool doit);
  extern void InitializeClientEncoding(void);
Index: src/include/tsearch/ts_locale.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/tsearch/ts_locale.h,v
retrieving revision 1.6
diff -c -c -r1.6 ts_locale.h
*** src/include/tsearch/ts_locale.h    17 Jun 2008 16:09:06 -0000    1.6
--- src/include/tsearch/ts_locale.h    18 Jun 2008 18:37:05 -0000
***************
*** 33,41 ****

  #ifdef USE_WIDE_UPPER_LOWER

- extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
- extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
-
  extern int    t_isdigit(const char *ptr);
  extern int    t_isspace(const char *ptr);
  extern int    t_isalpha(const char *ptr);
--- 33,38 ----
Index: src/include/tsearch/ts_public.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/tsearch/ts_public.h,v
retrieving revision 1.9
diff -c -c -r1.9 ts_public.h
*** src/include/tsearch/ts_public.h    16 May 2008 16:31:02 -0000    1.9
--- src/include/tsearch/ts_public.h    18 Jun 2008 18:37:05 -0000
***************
*** 62,69 ****
  extern char *get_tsearch_config_filename(const char *basename,
                              const char *extension);

- extern char *pnstrdup(const char *in, int len);
-
  /*
   * Often useful stopword list management
   */
--- 62,67 ----
Index: src/include/utils/palloc.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/palloc.h,v
retrieving revision 1.38
diff -c -c -r1.38 palloc.h
*** src/include/utils/palloc.h    1 Jan 2008 19:45:59 -0000    1.38
--- src/include/utils/palloc.h    18 Jun 2008 18:37:05 -0000
***************
*** 70,75 ****
--- 70,77 ----

  extern void *repalloc(void *pointer, Size size);

+ extern char *pnstrdup(const char *in, int len);
+
  /*
   * MemoryContextSwitchTo can't be a macro in standard C compilers.
   * But we can make it an inline function when using GCC.

pgsql-patches by date:

Previous
From: Tom Lane
Date:
Subject: Rewrite sinval messaging to reduce contention
Next
From: Simon Riggs
Date:
Subject: Re: [HACKERS] Hint Bits and Write I/O