Re: Collation rules and multi-lingual databases - Mailing list pgsql-hackers

From Joe Conway
Subject Re: Collation rules and multi-lingual databases
Date
Msg-id 3F47925F.1040305@joeconway.com
Whole thread Raw
In response to Re: Collation rules and multi-lingual databases  (Greg Stark <gsstark@mit.edu>)
Responses Re: Collation rules and multi-lingual databases
List pgsql-hackers
Greg Stark wrote:
> Yeah I thought of that. But if making it a critical section is cheap then it's
> probably a better approach. The problem with restoring the locale for the
> palloc is that if the user is unlucky he might sort a table of thousands of
> strings that all trigger the exception case.
> 

What about something like this?
8<--------------------------------

#include <setjmp.h>
#include <string.h>

#include "postgres.h"
#include "fmgr.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"

#define GET_STR(textp) \  DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
#define GET_BYTEA(str_) \  DatumGetTextP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
#define MAX_BYTEA_LEN    0x3fffffff

/* * pg_strxfrm - Function to convert string similar to the strxfrm C * function using a specified locale. */
extern Datum pg_strxfrm(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(pg_strxfrm);

Datum
pg_strxfrm(PG_FUNCTION_ARGS)
{  char       *str = GET_STR(PG_GETARG_TEXT_P(0));  size_t      str_len = strlen(str);  char       *localestr =
GET_STR(PG_GETARG_TEXT_P(1)); size_t      approx_trans_len = 4 + (str_len * 3);  char       *trans = (char *)
palloc(approx_trans_len+ 1);  size_t      actual_trans_len;  char       *oldlocale;  char       *newlocale;  sigjmp_buf
save_restart;
 
  if (approx_trans_len > MAX_BYTEA_LEN)    elog(ERROR, "source string too long to transform");
  oldlocale = setlocale(LC_COLLATE, NULL);  if (!oldlocale)    elog(ERROR, "setlocale failed to return a locale");
  /* catch elog while locale is set other than the default */  memcpy(&save_restart, &Warn_restart,
sizeof(save_restart)); if (sigsetjmp(Warn_restart, 1) != 0)  {    memcpy(&Warn_restart, &save_restart,
sizeof(Warn_restart));   newlocale = setlocale(LC_COLLATE, oldlocale);    if (!newlocale)      elog(PANIC, "setlocale
failedto reset locale: %s", localestr);    siglongjmp(Warn_restart, 1);  }
 
  newlocale = setlocale(LC_COLLATE, localestr);  if (!newlocale)    elog(ERROR, "setlocale failed to set a locale: %s",
localestr);
  actual_trans_len = strxfrm(trans, str, approx_trans_len + 1);
  /* if the buffer was not large enough, resize it and try again */  if (actual_trans_len >= approx_trans_len)  {
approx_trans_len= actual_trans_len + 1;    if (approx_trans_len > MAX_BYTEA_LEN)      elog(ERROR, "source string too
longto transform");
 
    trans = (char *) repalloc(trans, approx_trans_len + 1);    actual_trans_len = strxfrm(trans, str, approx_trans_len
+1);
 
    /* if the buffer still not large enough, punt */    if (actual_trans_len >= approx_trans_len)      elog(ERROR,
"strxfrmfailed, buffer insufficient");  }
 
  newlocale = setlocale(LC_COLLATE, oldlocale);  if (!newlocale)    elog(PANIC, "setlocale failed to reset locale: %s",
localestr);
  PG_RETURN_BYTEA_P(GET_BYTEA(trans));
}

8<--------------------------------

Joe



pgsql-hackers by date:

Previous
From: Stephan Szabo
Date:
Subject: Re: Collation rules and multi-lingual databases
Next
From: Andrew Dunstan
Date:
Subject: sequence generator for sysids