unknownin/out patch (was [HACKERS] PQescapeBytea is not multibyte aware) - Mailing list pgsql-patches

From Joe Conway
Subject unknownin/out patch (was [HACKERS] PQescapeBytea is not multibyte aware)
Date
Msg-id 3CB0D3E1.4010508@joeconway.com
Whole thread Raw
Responses Re: unknownin/out patch (was [HACKERS] PQescapeBytea is not multibyte aware)  (Tom Lane <tgl@sss.pgh.pa.us>)
Re: unknownin/out patch (was [HACKERS] PQescapeBytea is not  (Bruce Momjian <pgman@candle.pha.pa.us>)
Re: unknownin/out patch (was [HACKERS] PQescapeBytea is not  (Bruce Momjian <pgman@candle.pha.pa.us>)
List pgsql-patches
Tom Lane wrote:
> Joe Conway <mail@joeconway.com> writes:
>
>>I think you're correct that in a client/database encoding mismatch
>>scenario, there would be bigger problems. Thoughts on this?
>
>
> This scenario is probably why Tatsuo wants PQescapeBytea to octalize
> everything with the high bit set; I'm not sure there's any lesser way
> out.  Nonetheless, if UNKNOWN conversion introduces additional failures
> then it makes sense to fix that.
>
>             regards, tom lane
>

Here's a patch to add unknownin/unknownout support. I also poked around
looking for places that assume UNKNOWN == TEXT. One of those was the
"SET" type in pg_type.h, which was using textin/textout. This one I took
care of in this patch. The other suspicious place was in
string_to_dataum (which is defined in both selfuncs.c and indxpath.c). I
wasn't too sure about those, so I left them be.

Regression tests all pass with the exception of horology, which also
fails on CVS tip. It looks like that is a daylight savings time issue
though.

Also as a side note, I can't get make check to get past initdb if I
configure with --enable-multibyte on CVS tip. Is there a known problem
or am I just being clueless . . .wait, let's qualify that -- am I being
clueless on this one issue? ;-)

Joe
diff -Ncr pgsql.orig/src/backend/utils/adt/varlena.c pgsql/src/backend/utils/adt/varlena.c
*** pgsql.orig/src/backend/utils/adt/varlena.c    Sun Apr  7 10:21:25 2002
--- pgsql/src/backend/utils/adt/varlena.c    Sun Apr  7 11:44:54 2002
***************
*** 228,233 ****
--- 228,273 ----
  }


+ /*
+  *        unknownin            - converts "..." to internal representation
+  */
+ Datum
+ unknownin(PG_FUNCTION_ARGS)
+ {
+     char       *inputStr = PG_GETARG_CSTRING(0);
+     unknown       *result;
+     int            len;
+
+     len = strlen(inputStr) + VARHDRSZ;
+
+     result = (unknown *) palloc(len);
+     VARATT_SIZEP(result) = len;
+
+     memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
+
+     PG_RETURN_UNKNOWN_P(result);
+ }
+
+
+ /*
+  *        unknownout            - converts internal representation to "..."
+  */
+ Datum
+ unknownout(PG_FUNCTION_ARGS)
+ {
+     unknown       *t = PG_GETARG_UNKNOWN_P(0);
+     int            len;
+     char       *result;
+
+     len = VARSIZE(t) - VARHDRSZ;
+     result = (char *) palloc(len + 1);
+     memcpy(result, VARDATA(t), len);
+     result[len] = '\0';
+
+     PG_RETURN_CSTRING(result);
+ }
+
+
  /* ========== PUBLIC ROUTINES ========== */

  /*
diff -Ncr pgsql.orig/src/include/c.h pgsql/src/include/c.h
*** pgsql.orig/src/include/c.h    Sun Apr  7 10:21:29 2002
--- pgsql/src/include/c.h    Sun Apr  7 11:40:59 2002
***************
*** 389,394 ****
--- 389,395 ----
   */
  typedef struct varlena bytea;
  typedef struct varlena text;
+ typedef struct varlena unknown;
  typedef struct varlena BpChar;    /* blank-padded char, ie SQL char(n) */
  typedef struct varlena VarChar; /* var-length char, ie SQL varchar(n) */

diff -Ncr pgsql.orig/src/include/catalog/pg_proc.h pgsql/src/include/catalog/pg_proc.h
*** pgsql.orig/src/include/catalog/pg_proc.h    Sun Apr  7 10:21:29 2002
--- pgsql/src/include/catalog/pg_proc.h    Sun Apr  7 11:56:09 2002
***************
*** 235,240 ****
--- 235,245 ----
  DATA(insert OID = 108 (  scalargtjoinsel   PGNSP PGUID 12 f t t s 3 f 701 "0 26 0" 100 0 0 100  scalargtjoinsel -
_null_)); 
  DESCR("join selectivity of > and related operators on scalar datatypes");

+ DATA(insert OID =  109 (  unknownin               PGNSP PGUID 12 f t t i 1 f 705 "0" 100 0 0 100    unknownin -
_null_)); 
+ DESCR("(internal)");
+ DATA(insert OID =  110 (  unknownout           PGNSP PGUID 12 f t t i 1 f 23  "0" 100 0 0 100    unknownout - _null_
));
+ DESCR("(internal)");
+
  DATA(insert OID = 112 (  text               PGNSP PGUID 12 f t t i 1 f  25 "23" 100 0 0 100  int4_text - _null_ ));
  DESCR("convert int4 to text");
  DATA(insert OID = 113 (  text               PGNSP PGUID 12 f t t i 1 f  25 "21" 100 0 0 100  int2_text - _null_ ));
diff -Ncr pgsql.orig/src/include/catalog/pg_type.h pgsql/src/include/catalog/pg_type.h
*** pgsql.orig/src/include/catalog/pg_type.h    Sun Apr  7 10:21:29 2002
--- pgsql/src/include/catalog/pg_type.h    Sun Apr  7 11:57:36 2002
***************
*** 302,308 ****
  DESCR("array of INDEX_MAX_KEYS oids, used in system tables");
  #define OIDVECTOROID    30

! DATA(insert OID = 32 (    SET           PGNSP PGUID -1  -1 f b t \054 0   0 textin textout textin textout i p f 0 -1
0_null_ _null_ )); 
  DESCR("set of tuples");

  DATA(insert OID = 71 (    pg_type         PGNSP PGUID 4 4 t c t \054 1247 0 int4in int4out int4in int4out i p f 0 -1
0_null_ _null_ )); 
--- 302,308 ----
  DESCR("array of INDEX_MAX_KEYS oids, used in system tables");
  #define OIDVECTOROID    30

! DATA(insert OID = 32 (    SET           PGNSP PGUID -1  -1 f b t \054 0   0 unknownin unknownout unknownin unknownout
ip f 0 -1 0 _null_ _null_ )); 
  DESCR("set of tuples");

  DATA(insert OID = 71 (    pg_type         PGNSP PGUID 4 4 t c t \054 1247 0 int4in int4out int4in int4out i p f 0 -1
0_null_ _null_ )); 
***************
*** 366,372 ****
  DATA(insert OID = 704 (  tinterval PGNSP PGUID 12  47 f b t \054 0   0 tintervalin tintervalout tintervalin
tintervalouti p f 0 -1 0 _null_ _null_ )); 
  DESCR("(abstime,abstime), time interval");
  #define TINTERVALOID    704
! DATA(insert OID = 705 (  unknown   PGNSP PGUID -1  -1 f b t \054 0   0 textin textout textin textout i p f 0 -1 0
_null__null_ )); 
  DESCR("");
  #define UNKNOWNOID        705

--- 366,372 ----
  DATA(insert OID = 704 (  tinterval PGNSP PGUID 12  47 f b t \054 0   0 tintervalin tintervalout tintervalin
tintervalouti p f 0 -1 0 _null_ _null_ )); 
  DESCR("(abstime,abstime), time interval");
  #define TINTERVALOID    704
! DATA(insert OID = 705 (  unknown   PGNSP PGUID -1  -1 f b t \054 0   0 unknownin unknownout unknownin unknownout i p
f0 -1 0 _null_ _null_ )); 
  DESCR("");
  #define UNKNOWNOID        705

diff -Ncr pgsql.orig/src/include/fmgr.h pgsql/src/include/fmgr.h
*** pgsql.orig/src/include/fmgr.h    Sun Apr  7 10:21:29 2002
--- pgsql/src/include/fmgr.h    Sun Apr  7 12:11:30 2002
***************
*** 185,190 ****
--- 185,191 ----
  /* DatumGetFoo macros for varlena types will typically look like this: */
  #define DatumGetByteaP(X)            ((bytea *) PG_DETOAST_DATUM(X))
  #define DatumGetTextP(X)            ((text *) PG_DETOAST_DATUM(X))
+ #define DatumGetUnknownP(X)            ((unknown *) PG_DETOAST_DATUM(X))
  #define DatumGetBpCharP(X)            ((BpChar *) PG_DETOAST_DATUM(X))
  #define DatumGetVarCharP(X)            ((VarChar *) PG_DETOAST_DATUM(X))
  /* And we also offer variants that return an OK-to-write copy */
***************
*** 200,205 ****
--- 201,207 ----
  /* GETARG macros for varlena types will typically look like this: */
  #define PG_GETARG_BYTEA_P(n)        DatumGetByteaP(PG_GETARG_DATUM(n))
  #define PG_GETARG_TEXT_P(n)            DatumGetTextP(PG_GETARG_DATUM(n))
+ #define PG_GETARG_UNKNOWN_P(n)        DatumGetUnknownP(PG_GETARG_DATUM(n))
  #define PG_GETARG_BPCHAR_P(n)        DatumGetBpCharP(PG_GETARG_DATUM(n))
  #define PG_GETARG_VARCHAR_P(n)        DatumGetVarCharP(PG_GETARG_DATUM(n))
  /* And we also offer variants that return an OK-to-write copy */
***************
*** 239,244 ****
--- 241,247 ----
  /* RETURN macros for other pass-by-ref types will typically look like this: */
  #define PG_RETURN_BYTEA_P(x)   PG_RETURN_POINTER(x)
  #define PG_RETURN_TEXT_P(x)    PG_RETURN_POINTER(x)
+ #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
  #define PG_RETURN_BPCHAR_P(x)  PG_RETURN_POINTER(x)
  #define PG_RETURN_VARCHAR_P(x) PG_RETURN_POINTER(x)

diff -Ncr pgsql.orig/src/include/utils/builtins.h pgsql/src/include/utils/builtins.h
*** pgsql.orig/src/include/utils/builtins.h    Sun Apr  7 10:21:29 2002
--- pgsql/src/include/utils/builtins.h    Sun Apr  7 12:26:17 2002
***************
*** 414,419 ****
--- 414,422 ----
  extern bool SplitIdentifierString(char *rawstring, char separator,
                                    List **namelist);

+ extern Datum unknownin(PG_FUNCTION_ARGS);
+ extern Datum unknownout(PG_FUNCTION_ARGS);
+
  extern Datum byteain(PG_FUNCTION_ARGS);
  extern Datum byteaout(PG_FUNCTION_ARGS);
  extern Datum byteaoctetlen(PG_FUNCTION_ARGS);

pgsql-patches by date:

Previous
From: Tom Lane
Date:
Subject: Re: Completed pg_depend support
Next
From: Joe Conway
Date:
Subject: Re: unknownin/out patch (was [HACKERS] PQescapeBytea is