Re: Simplify formatting.c - Mailing list pgsql-patches
From | Bruce Momjian |
---|---|
Subject | Re: Simplify formatting.c |
Date | |
Msg-id | 200806212006.m5LK6jX07815@momjian.us Whole thread Raw |
In response to | Re: Simplify formatting.c (Bruce Momjian <bruce@momjian.us>) |
Responses |
Re: Simplify formatting.c
|
List | pgsql-patches |
Bruce Momjian wrote: > Bruce Momjian wrote: > > Alvaro Herrera wrote: > > > Bruce Momjian wrote: > > > > > > > I moved str_initcap() over into oracle_compat.c and then had initcap() > > > > convert to/from TEXT to call it. The code is a little weird because > > > > str_initcap() needs to convert to text to use texttowcs(), so in > > > > multibyte encodings initcap converts the string to text, then to char, > > > > then to text to call texttowcs(). I didn't see a cleaner way to do > > > > this. > > > > > > Why not use wchar2char? It seems there's room for extra cleanup here. > > > > > > Also, the prototype of str_initcap in builtins.h looks out of place. > > > > I talked to Alvaro on IM, and there is certainly much more cleanup to do > > in this area. I will work from the bottom up. First, is moving the > > USE_WIDE_UPPER_LOWER define to c.h, and removing TS_USE_WIDE and using > > USE_WIDE_UPPER_LOWER instead. Patch attached and applied. > > The second step is to move wchar2char() and char2wchar() from tsearch > into /mb to be easier to use for other modules; also move pnstrdup(). The third step is for oracle_compat.c::initcap() to use formatting.c::str_initcap(). You can see the result; patch attached (not applied). This greatly reduces the size of initcap(), with the downside that we are making two extra copies of the string to convert it to/from char*. Is this acceptable? If it is I will do the same for uppper()/lower() with similar code size reduction and modularity. If not perhaps I should keep the non-multibyte code in initcap() and have only the multi-byte use str_initcap(). -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com + If your life is a hard drive, Christ can be your backup. + Index: src/backend/utils/adt/formatting.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v retrieving revision 1.142 diff -c -c -r1.142 formatting.c *** src/backend/utils/adt/formatting.c 17 Jun 2008 16:09:06 -0000 1.142 --- src/backend/utils/adt/formatting.c 21 Jun 2008 20:00:45 -0000 *************** *** 1499,1526 **** if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) { wchar_t *workspace; ! text *in_text; ! text *out_text; ! int i; ! in_text = cstring_to_text(buff); ! workspace = texttowcs(in_text); ! for (i = 0; workspace[i] != 0; i++) { if (wasalnum) ! workspace[i] = towlower(workspace[i]); else ! workspace[i] = towupper(workspace[i]); ! wasalnum = iswalnum(workspace[i]); } ! out_text = wcstotext(workspace, i); ! result = text_to_cstring(out_text); pfree(workspace); - pfree(in_text); - pfree(out_text); } else #endif /* USE_WIDE_UPPER_LOWER */ --- 1499,1525 ---- if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) { wchar_t *workspace; ! int curr_char = 0; ! /* Output workspace cannot have more codes than input bytes */ ! workspace = (wchar_t *) palloc((strlen(buff) + 1) * sizeof(wchar_t)); ! char2wchar(workspace, strlen(buff) + 1, buff, strlen(buff) + 1); ! ! for (curr_char = 0; workspace[curr_char] != 0; curr_char++) { if (wasalnum) ! workspace[curr_char] = towlower(workspace[curr_char]); else ! workspace[curr_char] = towupper(workspace[curr_char]); ! wasalnum = iswalnum(workspace[curr_char]); } ! /* Make result large enough; case change might change number of bytes */ ! result = palloc(curr_char * MB_CUR_MAX + 1); + wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1); pfree(workspace); } else #endif /* USE_WIDE_UPPER_LOWER */ Index: src/backend/utils/adt/oracle_compat.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v retrieving revision 1.80 diff -c -c -r1.80 oracle_compat.c *** src/backend/utils/adt/oracle_compat.c 17 Jun 2008 16:09:06 -0000 1.80 --- src/backend/utils/adt/oracle_compat.c 21 Jun 2008 20:00:45 -0000 *************** *** 467,530 **** Datum initcap(PG_FUNCTION_ARGS) { ! #ifdef USE_WIDE_UPPER_LOWER ! /* ! * Use wide char code only when max encoding length > 1 and ctype != C. ! * Some operating systems fail with multi-byte encodings and a C locale. ! * Also, for a C locale there is no need to process as multibyte. ! */ ! if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) ! { ! text *string = PG_GETARG_TEXT_PP(0); ! text *result; ! wchar_t *workspace; ! int wasalnum = 0; ! int i; ! ! workspace = texttowcs(string); ! ! for (i = 0; workspace[i] != 0; i++) ! { ! if (wasalnum) ! workspace[i] = towlower(workspace[i]); ! else ! workspace[i] = towupper(workspace[i]); ! wasalnum = iswalnum(workspace[i]); ! } ! ! result = wcstotext(workspace, i); ! ! pfree(workspace); ! ! PG_RETURN_TEXT_P(result); ! } ! else ! #endif /* USE_WIDE_UPPER_LOWER */ ! { ! text *string = PG_GETARG_TEXT_P_COPY(0); ! int wasalnum = 0; ! char *ptr; ! int m; ! ! /* ! * Since we copied the string, we can scribble directly on the value ! */ ! ptr = VARDATA(string); ! m = VARSIZE(string) - VARHDRSZ; ! while (m-- > 0) ! { ! if (wasalnum) ! *ptr = tolower((unsigned char) *ptr); ! else ! *ptr = toupper((unsigned char) *ptr); ! wasalnum = isalnum((unsigned char) *ptr); ! ptr++; ! } ! ! PG_RETURN_TEXT_P(string); ! } } --- 467,482 ---- Datum initcap(PG_FUNCTION_ARGS) { ! char *in_string, *out_string; ! text *result; ! in_string = text_to_cstring(PG_GETARG_TEXT_PP(0)); ! out_string = str_initcap(in_string); ! pfree(in_string); ! result = cstring_to_text(out_string); ! pfree(out_string); ! PG_RETURN_TEXT_P(result); }
pgsql-patches by date: