Thread: unknownin/out patch (was [HACKERS] PQescapeBytea is not multibyte aware)

unknownin/out patch (was [HACKERS] PQescapeBytea is not multibyte aware)

From
Joe Conway
Date:
Tom Lane wrote:
> Joe Conway <mail@joeconway.com> writes:
>
>>I think you're correct that in a client/database encoding mismatch
>>scenario, there would be bigger problems. Thoughts on this?
>
>
> This scenario is probably why Tatsuo wants PQescapeBytea to octalize
> everything with the high bit set; I'm not sure there's any lesser way
> out.  Nonetheless, if UNKNOWN conversion introduces additional failures
> then it makes sense to fix that.
>
>             regards, tom lane
>

Here's a patch to add unknownin/unknownout support. I also poked around
looking for places that assume UNKNOWN == TEXT. One of those was the
"SET" type in pg_type.h, which was using textin/textout. This one I took
care of in this patch. The other suspicious place was in
string_to_dataum (which is defined in both selfuncs.c and indxpath.c). I
wasn't too sure about those, so I left them be.

Regression tests all pass with the exception of horology, which also
fails on CVS tip. It looks like that is a daylight savings time issue
though.

Also as a side note, I can't get make check to get past initdb if I
configure with --enable-multibyte on CVS tip. Is there a known problem
or am I just being clueless . . .wait, let's qualify that -- am I being
clueless on this one issue? ;-)

Joe
diff -Ncr pgsql.orig/src/backend/utils/adt/varlena.c pgsql/src/backend/utils/adt/varlena.c
*** pgsql.orig/src/backend/utils/adt/varlena.c    Sun Apr  7 10:21:25 2002
--- pgsql/src/backend/utils/adt/varlena.c    Sun Apr  7 11:44:54 2002
***************
*** 228,233 ****
--- 228,273 ----
  }


+ /*
+  *        unknownin            - converts "..." to internal representation
+  */
+ Datum
+ unknownin(PG_FUNCTION_ARGS)
+ {
+     char       *inputStr = PG_GETARG_CSTRING(0);
+     unknown       *result;
+     int            len;
+
+     len = strlen(inputStr) + VARHDRSZ;
+
+     result = (unknown *) palloc(len);
+     VARATT_SIZEP(result) = len;
+
+     memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
+
+     PG_RETURN_UNKNOWN_P(result);
+ }
+
+
+ /*
+  *        unknownout            - converts internal representation to "..."
+  */
+ Datum
+ unknownout(PG_FUNCTION_ARGS)
+ {
+     unknown       *t = PG_GETARG_UNKNOWN_P(0);
+     int            len;
+     char       *result;
+
+     len = VARSIZE(t) - VARHDRSZ;
+     result = (char *) palloc(len + 1);
+     memcpy(result, VARDATA(t), len);
+     result[len] = '\0';
+
+     PG_RETURN_CSTRING(result);
+ }
+
+
  /* ========== PUBLIC ROUTINES ========== */

  /*
diff -Ncr pgsql.orig/src/include/c.h pgsql/src/include/c.h
*** pgsql.orig/src/include/c.h    Sun Apr  7 10:21:29 2002
--- pgsql/src/include/c.h    Sun Apr  7 11:40:59 2002
***************
*** 389,394 ****
--- 389,395 ----
   */
  typedef struct varlena bytea;
  typedef struct varlena text;
+ typedef struct varlena unknown;
  typedef struct varlena BpChar;    /* blank-padded char, ie SQL char(n) */
  typedef struct varlena VarChar; /* var-length char, ie SQL varchar(n) */

diff -Ncr pgsql.orig/src/include/catalog/pg_proc.h pgsql/src/include/catalog/pg_proc.h
*** pgsql.orig/src/include/catalog/pg_proc.h    Sun Apr  7 10:21:29 2002
--- pgsql/src/include/catalog/pg_proc.h    Sun Apr  7 11:56:09 2002
***************
*** 235,240 ****
--- 235,245 ----
  DATA(insert OID = 108 (  scalargtjoinsel   PGNSP PGUID 12 f t t s 3 f 701 "0 26 0" 100 0 0 100  scalargtjoinsel -
_null_)); 
  DESCR("join selectivity of > and related operators on scalar datatypes");

+ DATA(insert OID =  109 (  unknownin               PGNSP PGUID 12 f t t i 1 f 705 "0" 100 0 0 100    unknownin -
_null_)); 
+ DESCR("(internal)");
+ DATA(insert OID =  110 (  unknownout           PGNSP PGUID 12 f t t i 1 f 23  "0" 100 0 0 100    unknownout - _null_
));
+ DESCR("(internal)");
+
  DATA(insert OID = 112 (  text               PGNSP PGUID 12 f t t i 1 f  25 "23" 100 0 0 100  int4_text - _null_ ));
  DESCR("convert int4 to text");
  DATA(insert OID = 113 (  text               PGNSP PGUID 12 f t t i 1 f  25 "21" 100 0 0 100  int2_text - _null_ ));
diff -Ncr pgsql.orig/src/include/catalog/pg_type.h pgsql/src/include/catalog/pg_type.h
*** pgsql.orig/src/include/catalog/pg_type.h    Sun Apr  7 10:21:29 2002
--- pgsql/src/include/catalog/pg_type.h    Sun Apr  7 11:57:36 2002
***************
*** 302,308 ****
  DESCR("array of INDEX_MAX_KEYS oids, used in system tables");
  #define OIDVECTOROID    30

! DATA(insert OID = 32 (    SET           PGNSP PGUID -1  -1 f b t \054 0   0 textin textout textin textout i p f 0 -1
0_null_ _null_ )); 
  DESCR("set of tuples");

  DATA(insert OID = 71 (    pg_type         PGNSP PGUID 4 4 t c t \054 1247 0 int4in int4out int4in int4out i p f 0 -1
0_null_ _null_ )); 
--- 302,308 ----
  DESCR("array of INDEX_MAX_KEYS oids, used in system tables");
  #define OIDVECTOROID    30

! DATA(insert OID = 32 (    SET           PGNSP PGUID -1  -1 f b t \054 0   0 unknownin unknownout unknownin unknownout
ip f 0 -1 0 _null_ _null_ )); 
  DESCR("set of tuples");

  DATA(insert OID = 71 (    pg_type         PGNSP PGUID 4 4 t c t \054 1247 0 int4in int4out int4in int4out i p f 0 -1
0_null_ _null_ )); 
***************
*** 366,372 ****
  DATA(insert OID = 704 (  tinterval PGNSP PGUID 12  47 f b t \054 0   0 tintervalin tintervalout tintervalin
tintervalouti p f 0 -1 0 _null_ _null_ )); 
  DESCR("(abstime,abstime), time interval");
  #define TINTERVALOID    704
! DATA(insert OID = 705 (  unknown   PGNSP PGUID -1  -1 f b t \054 0   0 textin textout textin textout i p f 0 -1 0
_null__null_ )); 
  DESCR("");
  #define UNKNOWNOID        705

--- 366,372 ----
  DATA(insert OID = 704 (  tinterval PGNSP PGUID 12  47 f b t \054 0   0 tintervalin tintervalout tintervalin
tintervalouti p f 0 -1 0 _null_ _null_ )); 
  DESCR("(abstime,abstime), time interval");
  #define TINTERVALOID    704
! DATA(insert OID = 705 (  unknown   PGNSP PGUID -1  -1 f b t \054 0   0 unknownin unknownout unknownin unknownout i p
f0 -1 0 _null_ _null_ )); 
  DESCR("");
  #define UNKNOWNOID        705

diff -Ncr pgsql.orig/src/include/fmgr.h pgsql/src/include/fmgr.h
*** pgsql.orig/src/include/fmgr.h    Sun Apr  7 10:21:29 2002
--- pgsql/src/include/fmgr.h    Sun Apr  7 12:11:30 2002
***************
*** 185,190 ****
--- 185,191 ----
  /* DatumGetFoo macros for varlena types will typically look like this: */
  #define DatumGetByteaP(X)            ((bytea *) PG_DETOAST_DATUM(X))
  #define DatumGetTextP(X)            ((text *) PG_DETOAST_DATUM(X))
+ #define DatumGetUnknownP(X)            ((unknown *) PG_DETOAST_DATUM(X))
  #define DatumGetBpCharP(X)            ((BpChar *) PG_DETOAST_DATUM(X))
  #define DatumGetVarCharP(X)            ((VarChar *) PG_DETOAST_DATUM(X))
  /* And we also offer variants that return an OK-to-write copy */
***************
*** 200,205 ****
--- 201,207 ----
  /* GETARG macros for varlena types will typically look like this: */
  #define PG_GETARG_BYTEA_P(n)        DatumGetByteaP(PG_GETARG_DATUM(n))
  #define PG_GETARG_TEXT_P(n)            DatumGetTextP(PG_GETARG_DATUM(n))
+ #define PG_GETARG_UNKNOWN_P(n)        DatumGetUnknownP(PG_GETARG_DATUM(n))
  #define PG_GETARG_BPCHAR_P(n)        DatumGetBpCharP(PG_GETARG_DATUM(n))
  #define PG_GETARG_VARCHAR_P(n)        DatumGetVarCharP(PG_GETARG_DATUM(n))
  /* And we also offer variants that return an OK-to-write copy */
***************
*** 239,244 ****
--- 241,247 ----
  /* RETURN macros for other pass-by-ref types will typically look like this: */
  #define PG_RETURN_BYTEA_P(x)   PG_RETURN_POINTER(x)
  #define PG_RETURN_TEXT_P(x)    PG_RETURN_POINTER(x)
+ #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
  #define PG_RETURN_BPCHAR_P(x)  PG_RETURN_POINTER(x)
  #define PG_RETURN_VARCHAR_P(x) PG_RETURN_POINTER(x)

diff -Ncr pgsql.orig/src/include/utils/builtins.h pgsql/src/include/utils/builtins.h
*** pgsql.orig/src/include/utils/builtins.h    Sun Apr  7 10:21:29 2002
--- pgsql/src/include/utils/builtins.h    Sun Apr  7 12:26:17 2002
***************
*** 414,419 ****
--- 414,422 ----
  extern bool SplitIdentifierString(char *rawstring, char separator,
                                    List **namelist);

+ extern Datum unknownin(PG_FUNCTION_ARGS);
+ extern Datum unknownout(PG_FUNCTION_ARGS);
+
  extern Datum byteain(PG_FUNCTION_ARGS);
  extern Datum byteaout(PG_FUNCTION_ARGS);
  extern Datum byteaoctetlen(PG_FUNCTION_ARGS);

Re: unknownin/out patch (was [HACKERS] PQescapeBytea is

From
Joe Conway
Date:
Joe Conway wrote:
> Here's a patch to add unknownin/unknownout support. I also poked around
> looking for places that assume UNKNOWN == TEXT. One of those was the
> "SET" type in pg_type.h, which was using textin/textout. This one I took
> care of in this patch. The other suspicious place was in
> string_to_dataum (which is defined in both selfuncs.c and indxpath.c). I
> wasn't too sure about those, so I left them be.
>

I found three other suspicious spots in the source, where UNKNOWN ==
TEXT is assumed. The first looks like it needs to be changed for sure,
the other two I'm less sure about. Feedback would be most appreciated
(on this and the patch itself).

(line numbers based on CVS from earlier today)
parse_node.c
line 428
parse_coerce.c
line 85
parse_coerce.c
line 403

Joe



Joe Conway <mail@joeconway.com> writes:
> Regression tests all pass with the exception of horology, which also
> fails on CVS tip. It looks like that is a daylight savings time issue
> though.

Yup, ye olde DST-transition-makes-for-funny-day-length issue.  This is
mentioned in the docs at
http://www.ca.postgresql.org/users-lounge/docs/7.2/postgres/regress-evaluation.html#AEN18363
although I see the troublesome tests are now in horology not timestamp.
(Docs fixed...)

> Also as a side note, I can't get make check to get past initdb if I
> configure with --enable-multibyte on CVS tip. Is there a known problem

News to me --- anyone else seeing that?

            regards, tom lane

Re: unknownin/out patch (was [HACKERS] PQescapeBytea is not multibyte aware)

From
"Christopher Kings-Lynne"
Date:
> Yup, ye olde DST-transition-makes-for-funny-day-length issue.  This is
> mentioned in the docs at
> http://www.ca.postgresql.org/users-lounge/docs/7.2/postgres/regres
> s-evaluation.html#AEN18363
> although I see the troublesome tests are now in horology not timestamp.
> (Docs fixed...)
>
> > Also as a side note, I can't get make check to get past initdb if I
> > configure with --enable-multibyte on CVS tip. Is there a known problem
>
> News to me --- anyone else seeing that?

I get initdb failures all the time when building CVS.  You need to gmake
clean to fix some things.  Try doing a gmake clean && gmake check

Chris


>> Also as a side note, I can't get make check to get past initdb if I
>> configure with --enable-multibyte on CVS tip. Is there a known problem

> News to me --- anyone else seeing that?

FWIW, CVS tip with --enable-multibyte builds and passes regression tests
here (modulo the horology thing).  I concur with Chris' suggestion that
you may not have done a clean reconfiguration.  If you're not using
--enable-depend then a "make clean" is certainly needed.

            regards, tom lane

Re: unknownin/out patch (was [HACKERS] PQescapeBytea is not

From
Tatsuo Ishii
Date:
> >> Also as a side note, I can't get make check to get past initdb if I
> >> configure with --enable-multibyte on CVS tip. Is there a known problem
>
> > News to me --- anyone else seeing that?
>
> FWIW, CVS tip with --enable-multibyte builds and passes regression tests
> here (modulo the horology thing).  I concur with Chris' suggestion that
> you may not have done a clean reconfiguration.  If you're not using
> --enable-depend then a "make clean" is certainly needed.

Try a multibyte encoding database. For example,

$ createdb -E EUC_JP test
$ psql -c 'SELECT SUBSTRING('1234567890' FROM 3)' test
 substring
-----------
 3456
(1 row)

Apparently this is wrong.
--
Tatsuo Ishii

Re: unknownin/out patch (was [HACKERS] PQescapeBytea is

From
Joe Conway
Date:
Tom Lane wrote:
> FWIW, CVS tip with --enable-multibyte builds and passes regression tests
> here (modulo the horology thing).  I concur with Chris' suggestion that
> you may not have done a clean reconfiguration.  If you're not using
> --enable-depend then a "make clean" is certainly needed.
>


--enable-depend did the trick.

Patch now passes all tests but horology with:

./configure --enable-locale  --enable-debug --enable-cassert
--enable-multibyte --enable-syslog --enable-nls --enable-depend

Thanks!

Joe


Re: unknownin/out patch (was [HACKERS] PQescapeBytea is

From
Joe Conway
Date:
Tatsuo Ishii wrote:
>
>
> Try a multibyte encoding database. For example,
>
> $ createdb -E EUC_JP test
> $ psql -c 'SELECT SUBSTRING('1234567890' FROM 3)' test
>  substring
> -----------
>  3456
> (1 row)
>
> Apparently this is wrong.
> --
> Tatsuo Ishii

This problem exists in CVS tip *without* the unknownin/out patch:

# psql -U postgres testjp
Welcome to psql, the PostgreSQL interactive terminal.

Type:  \copyright for distribution terms
        \h for help with SQL commands
        \? for help on internal slash commands
        \g or terminate with semicolon to execute query
        \q to quit

testjp=# SELECT SUBSTRING('1234567890' FROM 3);
  substring
-----------
  3456
(1 row)

testjp=# select * from pg_type where typname = 'unknown';
  typname | typnamespace | typowner | typlen | typprtlen | typbyval |
typtype | typisdefined | typdelim | typrelid | typelem | typinput |
typoutput | typreceive | typsend | typalign | typstorage | typnotnull |
typbasetype | typtypmod | typndims | typdefaultbin | typdefault

---------+--------------+----------+--------+-----------+----------+---------+--------------+----------+----------+---------+----------+-----------+------------+---------+----------+------------+------------+-------------+-----------+----------+---------------+------------
  unknown |           11 |        1 |     -1 |        -1 | f        | b
       | t            | ,        |        0 |       0 | textin   |
textout   | textin     | textout | i        | p          | f          |
           0 |        -1 |        0 |               |
(1 row)

This is built from source with:
#define CATALOG_VERSION_NO      200204031

./configure --enable-locale  --enable-debug --enable-cassert
--enable-multibyte --enable-syslog --enable-nls --enable-depend

Joe



Re: unknownin/out patch (was [HACKERS] PQescapeBytea is

From
Tatsuo Ishii
Date:
> Tatsuo Ishii wrote:
> >
> >
> > Try a multibyte encoding database. For example,
> >
> > $ createdb -E EUC_JP test
> > $ psql -c 'SELECT SUBSTRING('1234567890' FROM 3)' test
> >  substring
> > -----------
> >  3456
> > (1 row)
> >
> > Apparently this is wrong.
> > --
> > Tatsuo Ishii
>
> This problem exists in CVS tip *without* the unknownin/out patch:

Sure. That has been broken for a while.
--
Tatsuo Ishii


Re: unknownin/out patch (was [HACKERS] PQescapeBytea is

From
Tatsuo Ishii
Date:
> > Tatsuo Ishii wrote:
> > >
> > >
> > > Try a multibyte encoding database. For example,
> > >
> > > $ createdb -E EUC_JP test
> > > $ psql -c 'SELECT SUBSTRING('1234567890' FROM 3)' test
> > >  substring
> > > -----------
> > >  3456
> > > (1 row)
> > >
> > > Apparently this is wrong.
> > > --
> > > Tatsuo Ishii
> >
> > This problem exists in CVS tip *without* the unknownin/out patch:
>
> Sure. That has been broken for a while.

I guess this actually happened in 1.79 of varlena.c:

---------------------------------------------------------------------------
revision 1.79
date: 2002/03/05 05:33:19;  author: momjian;  state: Exp;  lines: +45 -42
I attach a version of my toast-slicing patch, against current CVS
(current as of a few hours ago.)

This patch:

1. Adds PG_GETARG_xxx_P_SLICE() macros and associated support routines.

2. Adds routines in src/backend/access/tuptoaster.c for fetching only
necessary chunks of a toasted value. (Modelled on latest changes to
assume chunks are returned in order).

3. Amends text_substr and bytea_substr to use new methods. It now
handles multibyte cases -and should still lead to a performance
improvement in the multibyte case where the substring is near the
beginning of the string.

4. Added new command: ALTER TABLE tabname ALTER COLUMN colname SET
STORAGE {PLAIN | EXTERNAL | EXTENDED | MAIN} to parser and documented in
alter-table.sgml. (NB I used ColId as the item type for the storage
mode string, rather than a new production - I hope this makes sense!).
All this does is sets attstorage for the specified column.

4. AlterTableAlterColumnStatistics is now AlterTableAlterColumnFlags and
handles both statistics and storage (it uses the subtype code to
distinguish). The previous version of my patch also re-arranged other
code in backend/commands/command.c but I have dropped that from this
patch.(I plan to return to it separately).

5. Documented new macros (and also the PG_GETARG_xxx_P_COPY macros) in
xfunc.sgml. ref/alter_table.sgml also contains documentation for ALTER
COLUMN SET STORAGE.

John Gray
---------------------------------------------------------------------------

Re: unknownin/out patch (was [HACKERS] PQescapeBytea is

From
Joe Conway
Date:
Tatsuo Ishii wrote:
 >>> Tatsuo Ishii wrote:
 >>>
 >>>>
 >>>> Try a multibyte encoding database. For example,
 >>>>
 >>>> $ createdb -E EUC_JP test $ psql -c 'SELECT
 >>>> SUBSTRING('1234567890' FROM 3)' test substring ----------- 3456
 >>>>

 >>>> (1 row)
 >>>>
 >>>> Apparently this is wrong. -- Tatsuo Ishii
 >>>
 >>> This problem exists in CVS tip *without* the unknownin/out
 >>> patch:
 >>
 >> Sure. That has been broken for a while.
 >
 >
 > I guess this actually happened in 1.79 of varlena.c:
 >
Yes, I was just looking at that also. It doesn't consider the case of n
= -1 for MB. See the lines:

#ifdef MULTIBYTE
    eml = pg_database_encoding_max_length ();

    if (eml > 1)
    {
       sm = 0;
       sn = (m + n) * eml + 3;
    }
#endif

When n = -1 this does the wrong thing. And also a few lines later:

#ifdef MULTIBYTE
    len = pg_mbstrlen_with_len (VARDATA (string), sn - 3);

I think both places need to test for n = -1. Do you agree?


Joe


Re: unknownin/out patch (was [HACKERS] PQescapeBytea is

From
Joe Conway
Date:
Joe Conway wrote:
> Tatsuo Ishii wrote:
>  >>> Tatsuo Ishii wrote:
>  >>>
>  >>>>
>  >>>> Try a multibyte encoding database. For example,
>  >>>>
>  >>>> $ createdb -E EUC_JP test $ psql -c 'SELECT
>  >>>> SUBSTRING('1234567890' FROM 3)' test substring ----------- 3456
>  >>>>
>
>  >>>> (1 row)
>  >>>>
>  >>>> Apparently this is wrong. -- Tatsuo Ishii
>  >>>
>  >>> This problem exists in CVS tip *without* the unknownin/out
>  >>> patch:
>  >>
>  >> Sure. That has been broken for a while.
>  >
>  >
>  > I guess this actually happened in 1.79 of varlena.c:
>  >
> Yes, I was just looking at that also. It doesn't consider the case of n
> = -1 for MB. See the lines:
>
> #ifdef MULTIBYTE
>    eml = pg_database_encoding_max_length ();
>
>    if (eml > 1)
>    {
>       sm = 0;
>       sn = (m + n) * eml + 3;
>    }
> #endif
>
> When n = -1 this does the wrong thing. And also a few lines later:
>
> #ifdef MULTIBYTE
>    len = pg_mbstrlen_with_len (VARDATA (string), sn - 3);
>
> I think both places need to test for n = -1. Do you agree?
>
>
> Joe
>

The attached patch should fix the bug reported by Tatsuo.

# psql -U postgres testjp
Welcome to psql, the PostgreSQL interactive terminal.

Type:  \copyright for distribution terms
        \h for help with SQL commands
        \? for help on internal slash commands
        \g or terminate with semicolon to execute query
        \q to quit

testjp=# SELECT SUBSTRING('1234567890' FROM 3);
  substring
------------
  34567890
(1 row)

Joe
*** pgsql/src/backend/utils/adt/varlena.c    Sun Apr  7 11:44:54 2002
--- pgsql.orig/src/backend/utils/adt/varlena.c    Mon Apr  8 22:32:16 2002
***************
*** 413,419 ****
      if (eml > 1)
      {
          sm = 0;
!         sn = (m + n) * eml + 3; /* +3 to avoid mb characters overhanging slice end */
      }
  #endif

--- 373,382 ----
      if (eml > 1)
      {
          sm = 0;
!         if (n > -1)
!             sn = (m + n) * eml + 3; /* +3 to avoid mb characters overhanging slice end */
!         else
!             sn = n;        /* n < 0 is special-cased by heap_tuple_untoast_attr_slice */
      }
  #endif

***************
*** 427,433 ****
      PG_RETURN_NULL();   /* notreached: suppress compiler warning */
  #endif
  #ifdef MULTIBYTE
!     len = pg_mbstrlen_with_len (VARDATA (string), sn - 3);

      if (m > len)
      {
--- 390,399 ----
      PG_RETURN_NULL();   /* notreached: suppress compiler warning */
  #endif
  #ifdef MULTIBYTE
!     if (n > -1)
!         len = pg_mbstrlen_with_len (VARDATA (string), sn - 3);
!     else    /* n < 0 is special-cased; need full string length */
!         len = pg_mbstrlen (VARDATA (string));

      if (m > len)
      {

Re: unknownin/out patch (was [HACKERS] PQescapeBytea is

From
John Gray
Date:
On Tue, 2002-04-09 at 06:57, Joe Conway wrote:
[snipped]
> > Yes, I was just looking at that also. It doesn't consider the case of n
> > = -1 for MB. See the lines:
> >
> > #ifdef MULTIBYTE
> >    eml = pg_database_encoding_max_length ();
> >
> >    if (eml > 1)
> >    {
> >       sm = 0;
> >       sn = (m + n) * eml + 3;
> >    }
> > #endif
> >
> > When n = -1 this does the wrong thing. And also a few lines later:
> >
> > #ifdef MULTIBYTE
> >    len = pg_mbstrlen_with_len (VARDATA (string), sn - 3);
> >
> > I think both places need to test for n = -1. Do you agree?
> >

Sorry folks! I hadn't thought through the logic of that in the n = -1
and multibyte case. The patch looks OK to me.

John




Re: unknownin/out patch (was [HACKERS] PQescapeBytea is

From
Peter Eisentraut
Date:
Tom Lane writes:

> FWIW, CVS tip with --enable-multibyte builds and passes regression tests
> here (modulo the horology thing).  I concur with Chris' suggestion that
> you may not have done a clean reconfiguration.  If you're not using
> --enable-depend then a "make clean" is certainly needed.

Maybe we should turn on dependency tracking by default?  This is about the
(enough + 1)th time I'm seeing this.

--
Peter Eisentraut   peter_e@gmx.net


Re: unknownin/out patch (was [HACKERS] PQescapeBytea is

From
Bruce Momjian
Date:
Peter Eisentraut wrote:
> Tom Lane writes:
>
> > FWIW, CVS tip with --enable-multibyte builds and passes regression tests
> > here (modulo the horology thing).  I concur with Chris' suggestion that
> > you may not have done a clean reconfiguration.  If you're not using
> > --enable-depend then a "make clean" is certainly needed.
>
> Maybe we should turn on dependency tracking by default?  This is about the
> (enough + 1)th time I'm seeing this.

What is the downside to turning it on?  I can't think of one.

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

Re: unknownin/out patch (was [HACKERS] PQescapeBytea is

From
Tom Lane
Date:
Bruce Momjian <pgman@candle.pha.pa.us> writes:
> Peter Eisentraut wrote:
>> Maybe we should turn on dependency tracking by default?  This is about the
>> (enough + 1)th time I'm seeing this.

> What is the downside to turning it on?  I can't think of one.

Well, we'll still see the same kinds of reports from developers using
non-GCC compilers (surely there are some) ... so enable-depend isn't
going to magically make the issue go away.

Personally I tend to rebuild from "make clean" whenever I've done
anything nontrivial, and certainly after a CVS sync; so I have no use
for enable-depend.  But as long as I can turn it off, I don't object
to changing the default.

            regards, tom lane

Re: unknownin/out patch (was [HACKERS] PQescapeBytea is

From
Tatsuo Ishii
Date:
I'm about to commit your patches with a small fix.
--
Tatsuo Ishii

From: Joe Conway <mail@joeconway.com>
Subject: Re: [PATCHES] unknownin/out patch (was [HACKERS] PQescapeBytea is
Date: Mon, 08 Apr 2002 22:57:47 -0700
Message-ID: <3CB282DB.4050708@joeconway.com>

> Joe Conway wrote:
> > Tatsuo Ishii wrote:
> >  >>> Tatsuo Ishii wrote:
> >  >>>
> >  >>>>
> >  >>>> Try a multibyte encoding database. For example,
> >  >>>>
> >  >>>> $ createdb -E EUC_JP test $ psql -c 'SELECT
> >  >>>> SUBSTRING('1234567890' FROM 3)' test substring ----------- 3456
> >  >>>>
> >
> >  >>>> (1 row)
> >  >>>>
> >  >>>> Apparently this is wrong. -- Tatsuo Ishii
> >  >>>
> >  >>> This problem exists in CVS tip *without* the unknownin/out
> >  >>> patch:
> >  >>
> >  >> Sure. That has been broken for a while.
> >  >
> >  >
> >  > I guess this actually happened in 1.79 of varlena.c:
> >  >
> > Yes, I was just looking at that also. It doesn't consider the case of n
> > = -1 for MB. See the lines:
> >
> > #ifdef MULTIBYTE
> >    eml = pg_database_encoding_max_length ();
> >
> >    if (eml > 1)
> >    {
> >       sm = 0;
> >       sn = (m + n) * eml + 3;
> >    }
> > #endif
> >
> > When n = -1 this does the wrong thing. And also a few lines later:
> >
> > #ifdef MULTIBYTE
> >    len = pg_mbstrlen_with_len (VARDATA (string), sn - 3);
> >
> > I think both places need to test for n = -1. Do you agree?
> >
> >
> > Joe
> >
>
> The attached patch should fix the bug reported by Tatsuo.
>
> # psql -U postgres testjp
> Welcome to psql, the PostgreSQL interactive terminal.
>
> Type:  \copyright for distribution terms
>         \h for help with SQL commands
>         \? for help on internal slash commands
>         \g or terminate with semicolon to execute query
>         \q to quit
>
> testjp=# SELECT SUBSTRING('1234567890' FROM 3);
>   substring
> ------------
>   34567890
> (1 row)
>
> Joe

Re: unknownin/out patch (was [HACKERS] PQescapeBytea is not

From
Bruce Momjian
Date:
Your patch has been added to the PostgreSQL unapplied patches list at:

    http://candle.pha.pa.us/cgi-bin/pgpatches

I will try to apply it within the next 48 hours.

---------------------------------------------------------------------------


Joe Conway wrote:
> Tom Lane wrote:
> > Joe Conway <mail@joeconway.com> writes:
> >
> >>I think you're correct that in a client/database encoding mismatch
> >>scenario, there would be bigger problems. Thoughts on this?
> >
> >
> > This scenario is probably why Tatsuo wants PQescapeBytea to octalize
> > everything with the high bit set; I'm not sure there's any lesser way
> > out.  Nonetheless, if UNKNOWN conversion introduces additional failures
> > then it makes sense to fix that.
> >
> >             regards, tom lane
> >
>
> Here's a patch to add unknownin/unknownout support. I also poked around
> looking for places that assume UNKNOWN == TEXT. One of those was the
> "SET" type in pg_type.h, which was using textin/textout. This one I took
> care of in this patch. The other suspicious place was in
> string_to_dataum (which is defined in both selfuncs.c and indxpath.c). I
> wasn't too sure about those, so I left them be.
>
> Regression tests all pass with the exception of horology, which also
> fails on CVS tip. It looks like that is a daylight savings time issue
> though.
>
> Also as a side note, I can't get make check to get past initdb if I
> configure with --enable-multibyte on CVS tip. Is there a known problem
> or am I just being clueless . . .wait, let's qualify that -- am I being
> clueless on this one issue? ;-)
>
> Joe

> diff -Ncr pgsql.orig/src/backend/utils/adt/varlena.c pgsql/src/backend/utils/adt/varlena.c
> *** pgsql.orig/src/backend/utils/adt/varlena.c    Sun Apr  7 10:21:25 2002
> --- pgsql/src/backend/utils/adt/varlena.c    Sun Apr  7 11:44:54 2002
> ***************
> *** 228,233 ****
> --- 228,273 ----
>   }
>
>
> + /*
> +  *        unknownin            - converts "..." to internal representation
> +  */
> + Datum
> + unknownin(PG_FUNCTION_ARGS)
> + {
> +     char       *inputStr = PG_GETARG_CSTRING(0);
> +     unknown       *result;
> +     int            len;
> +
> +     len = strlen(inputStr) + VARHDRSZ;
> +
> +     result = (unknown *) palloc(len);
> +     VARATT_SIZEP(result) = len;
> +
> +     memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
> +
> +     PG_RETURN_UNKNOWN_P(result);
> + }
> +
> +
> + /*
> +  *        unknownout            - converts internal representation to "..."
> +  */
> + Datum
> + unknownout(PG_FUNCTION_ARGS)
> + {
> +     unknown       *t = PG_GETARG_UNKNOWN_P(0);
> +     int            len;
> +     char       *result;
> +
> +     len = VARSIZE(t) - VARHDRSZ;
> +     result = (char *) palloc(len + 1);
> +     memcpy(result, VARDATA(t), len);
> +     result[len] = '\0';
> +
> +     PG_RETURN_CSTRING(result);
> + }
> +
> +
>   /* ========== PUBLIC ROUTINES ========== */
>
>   /*
> diff -Ncr pgsql.orig/src/include/c.h pgsql/src/include/c.h
> *** pgsql.orig/src/include/c.h    Sun Apr  7 10:21:29 2002
> --- pgsql/src/include/c.h    Sun Apr  7 11:40:59 2002
> ***************
> *** 389,394 ****
> --- 389,395 ----
>    */
>   typedef struct varlena bytea;
>   typedef struct varlena text;
> + typedef struct varlena unknown;
>   typedef struct varlena BpChar;    /* blank-padded char, ie SQL char(n) */
>   typedef struct varlena VarChar; /* var-length char, ie SQL varchar(n) */
>
> diff -Ncr pgsql.orig/src/include/catalog/pg_proc.h pgsql/src/include/catalog/pg_proc.h
> *** pgsql.orig/src/include/catalog/pg_proc.h    Sun Apr  7 10:21:29 2002
> --- pgsql/src/include/catalog/pg_proc.h    Sun Apr  7 11:56:09 2002
> ***************
> *** 235,240 ****
> --- 235,245 ----
>   DATA(insert OID = 108 (  scalargtjoinsel   PGNSP PGUID 12 f t t s 3 f 701 "0 26 0" 100 0 0 100  scalargtjoinsel -
_null_)); 
>   DESCR("join selectivity of > and related operators on scalar datatypes");
>
> + DATA(insert OID =  109 (  unknownin               PGNSP PGUID 12 f t t i 1 f 705 "0" 100 0 0 100    unknownin -
_null_)); 
> + DESCR("(internal)");
> + DATA(insert OID =  110 (  unknownout           PGNSP PGUID 12 f t t i 1 f 23  "0" 100 0 0 100    unknownout -
_null_)); 
> + DESCR("(internal)");
> +
>   DATA(insert OID = 112 (  text               PGNSP PGUID 12 f t t i 1 f  25 "23" 100 0 0 100  int4_text - _null_ ));
>   DESCR("convert int4 to text");
>   DATA(insert OID = 113 (  text               PGNSP PGUID 12 f t t i 1 f  25 "21" 100 0 0 100  int2_text - _null_ ));
> diff -Ncr pgsql.orig/src/include/catalog/pg_type.h pgsql/src/include/catalog/pg_type.h
> *** pgsql.orig/src/include/catalog/pg_type.h    Sun Apr  7 10:21:29 2002
> --- pgsql/src/include/catalog/pg_type.h    Sun Apr  7 11:57:36 2002
> ***************
> *** 302,308 ****
>   DESCR("array of INDEX_MAX_KEYS oids, used in system tables");
>   #define OIDVECTOROID    30
>
> ! DATA(insert OID = 32 (    SET           PGNSP PGUID -1  -1 f b t \054 0   0 textin textout textin textout i p f 0
-10 _null_ _null_ )); 
>   DESCR("set of tuples");
>
>   DATA(insert OID = 71 (    pg_type         PGNSP PGUID 4 4 t c t \054 1247 0 int4in int4out int4in int4out i p f 0
-10 _null_ _null_ )); 
> --- 302,308 ----
>   DESCR("array of INDEX_MAX_KEYS oids, used in system tables");
>   #define OIDVECTOROID    30
>
> ! DATA(insert OID = 32 (    SET           PGNSP PGUID -1  -1 f b t \054 0   0 unknownin unknownout unknownin
unknownouti p f 0 -1 0 _null_ _null_ )); 
>   DESCR("set of tuples");
>
>   DATA(insert OID = 71 (    pg_type         PGNSP PGUID 4 4 t c t \054 1247 0 int4in int4out int4in int4out i p f 0
-10 _null_ _null_ )); 
> ***************
> *** 366,372 ****
>   DATA(insert OID = 704 (  tinterval PGNSP PGUID 12  47 f b t \054 0   0 tintervalin tintervalout tintervalin
tintervalouti p f 0 -1 0 _null_ _null_ )); 
>   DESCR("(abstime,abstime), time interval");
>   #define TINTERVALOID    704
> ! DATA(insert OID = 705 (  unknown   PGNSP PGUID -1  -1 f b t \054 0   0 textin textout textin textout i p f 0 -1 0
_null__null_ )); 
>   DESCR("");
>   #define UNKNOWNOID        705
>
> --- 366,372 ----
>   DATA(insert OID = 704 (  tinterval PGNSP PGUID 12  47 f b t \054 0   0 tintervalin tintervalout tintervalin
tintervalouti p f 0 -1 0 _null_ _null_ )); 
>   DESCR("(abstime,abstime), time interval");
>   #define TINTERVALOID    704
> ! DATA(insert OID = 705 (  unknown   PGNSP PGUID -1  -1 f b t \054 0   0 unknownin unknownout unknownin unknownout i
pf 0 -1 0 _null_ _null_ )); 
>   DESCR("");
>   #define UNKNOWNOID        705
>
> diff -Ncr pgsql.orig/src/include/fmgr.h pgsql/src/include/fmgr.h
> *** pgsql.orig/src/include/fmgr.h    Sun Apr  7 10:21:29 2002
> --- pgsql/src/include/fmgr.h    Sun Apr  7 12:11:30 2002
> ***************
> *** 185,190 ****
> --- 185,191 ----
>   /* DatumGetFoo macros for varlena types will typically look like this: */
>   #define DatumGetByteaP(X)            ((bytea *) PG_DETOAST_DATUM(X))
>   #define DatumGetTextP(X)            ((text *) PG_DETOAST_DATUM(X))
> + #define DatumGetUnknownP(X)            ((unknown *) PG_DETOAST_DATUM(X))
>   #define DatumGetBpCharP(X)            ((BpChar *) PG_DETOAST_DATUM(X))
>   #define DatumGetVarCharP(X)            ((VarChar *) PG_DETOAST_DATUM(X))
>   /* And we also offer variants that return an OK-to-write copy */
> ***************
> *** 200,205 ****
> --- 201,207 ----
>   /* GETARG macros for varlena types will typically look like this: */
>   #define PG_GETARG_BYTEA_P(n)        DatumGetByteaP(PG_GETARG_DATUM(n))
>   #define PG_GETARG_TEXT_P(n)            DatumGetTextP(PG_GETARG_DATUM(n))
> + #define PG_GETARG_UNKNOWN_P(n)        DatumGetUnknownP(PG_GETARG_DATUM(n))
>   #define PG_GETARG_BPCHAR_P(n)        DatumGetBpCharP(PG_GETARG_DATUM(n))
>   #define PG_GETARG_VARCHAR_P(n)        DatumGetVarCharP(PG_GETARG_DATUM(n))
>   /* And we also offer variants that return an OK-to-write copy */
> ***************
> *** 239,244 ****
> --- 241,247 ----
>   /* RETURN macros for other pass-by-ref types will typically look like this: */
>   #define PG_RETURN_BYTEA_P(x)   PG_RETURN_POINTER(x)
>   #define PG_RETURN_TEXT_P(x)    PG_RETURN_POINTER(x)
> + #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
>   #define PG_RETURN_BPCHAR_P(x)  PG_RETURN_POINTER(x)
>   #define PG_RETURN_VARCHAR_P(x) PG_RETURN_POINTER(x)
>
> diff -Ncr pgsql.orig/src/include/utils/builtins.h pgsql/src/include/utils/builtins.h
> *** pgsql.orig/src/include/utils/builtins.h    Sun Apr  7 10:21:29 2002
> --- pgsql/src/include/utils/builtins.h    Sun Apr  7 12:26:17 2002
> ***************
> *** 414,419 ****
> --- 414,422 ----
>   extern bool SplitIdentifierString(char *rawstring, char separator,
>                                     List **namelist);
>
> + extern Datum unknownin(PG_FUNCTION_ARGS);
> + extern Datum unknownout(PG_FUNCTION_ARGS);
> +
>   extern Datum byteain(PG_FUNCTION_ARGS);
>   extern Datum byteaout(PG_FUNCTION_ARGS);
>   extern Datum byteaoctetlen(PG_FUNCTION_ARGS);

>
> ---------------------------(end of broadcast)---------------------------
> TIP 5: Have you checked our extensive FAQ?
>
> http://www.postgresql.org/users-lounge/docs/faq.html

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

Re: unknownin/out patch (was [HACKERS] PQescapeBytea is not

From
Bruce Momjian
Date:
Patch applied.  Thanks.

Catalog version updated.

---------------------------------------------------------------------------



Joe Conway wrote:
> Tom Lane wrote:
> > Joe Conway <mail@joeconway.com> writes:
> >
> >>I think you're correct that in a client/database encoding mismatch
> >>scenario, there would be bigger problems. Thoughts on this?
> >
> >
> > This scenario is probably why Tatsuo wants PQescapeBytea to octalize
> > everything with the high bit set; I'm not sure there's any lesser way
> > out.  Nonetheless, if UNKNOWN conversion introduces additional failures
> > then it makes sense to fix that.
> >
> >             regards, tom lane
> >
>
> Here's a patch to add unknownin/unknownout support. I also poked around
> looking for places that assume UNKNOWN == TEXT. One of those was the
> "SET" type in pg_type.h, which was using textin/textout. This one I took
> care of in this patch. The other suspicious place was in
> string_to_dataum (which is defined in both selfuncs.c and indxpath.c). I
> wasn't too sure about those, so I left them be.
>
> Regression tests all pass with the exception of horology, which also
> fails on CVS tip. It looks like that is a daylight savings time issue
> though.
>
> Also as a side note, I can't get make check to get past initdb if I
> configure with --enable-multibyte on CVS tip. Is there a known problem
> or am I just being clueless . . .wait, let's qualify that -- am I being
> clueless on this one issue? ;-)
>
> Joe

> diff -Ncr pgsql.orig/src/backend/utils/adt/varlena.c pgsql/src/backend/utils/adt/varlena.c
> *** pgsql.orig/src/backend/utils/adt/varlena.c    Sun Apr  7 10:21:25 2002
> --- pgsql/src/backend/utils/adt/varlena.c    Sun Apr  7 11:44:54 2002
> ***************
> *** 228,233 ****
> --- 228,273 ----
>   }
>
>
> + /*
> +  *        unknownin            - converts "..." to internal representation
> +  */
> + Datum
> + unknownin(PG_FUNCTION_ARGS)
> + {
> +     char       *inputStr = PG_GETARG_CSTRING(0);
> +     unknown       *result;
> +     int            len;
> +
> +     len = strlen(inputStr) + VARHDRSZ;
> +
> +     result = (unknown *) palloc(len);
> +     VARATT_SIZEP(result) = len;
> +
> +     memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
> +
> +     PG_RETURN_UNKNOWN_P(result);
> + }
> +
> +
> + /*
> +  *        unknownout            - converts internal representation to "..."
> +  */
> + Datum
> + unknownout(PG_FUNCTION_ARGS)
> + {
> +     unknown       *t = PG_GETARG_UNKNOWN_P(0);
> +     int            len;
> +     char       *result;
> +
> +     len = VARSIZE(t) - VARHDRSZ;
> +     result = (char *) palloc(len + 1);
> +     memcpy(result, VARDATA(t), len);
> +     result[len] = '\0';
> +
> +     PG_RETURN_CSTRING(result);
> + }
> +
> +
>   /* ========== PUBLIC ROUTINES ========== */
>
>   /*
> diff -Ncr pgsql.orig/src/include/c.h pgsql/src/include/c.h
> *** pgsql.orig/src/include/c.h    Sun Apr  7 10:21:29 2002
> --- pgsql/src/include/c.h    Sun Apr  7 11:40:59 2002
> ***************
> *** 389,394 ****
> --- 389,395 ----
>    */
>   typedef struct varlena bytea;
>   typedef struct varlena text;
> + typedef struct varlena unknown;
>   typedef struct varlena BpChar;    /* blank-padded char, ie SQL char(n) */
>   typedef struct varlena VarChar; /* var-length char, ie SQL varchar(n) */
>
> diff -Ncr pgsql.orig/src/include/catalog/pg_proc.h pgsql/src/include/catalog/pg_proc.h
> *** pgsql.orig/src/include/catalog/pg_proc.h    Sun Apr  7 10:21:29 2002
> --- pgsql/src/include/catalog/pg_proc.h    Sun Apr  7 11:56:09 2002
> ***************
> *** 235,240 ****
> --- 235,245 ----
>   DATA(insert OID = 108 (  scalargtjoinsel   PGNSP PGUID 12 f t t s 3 f 701 "0 26 0" 100 0 0 100  scalargtjoinsel -
_null_)); 
>   DESCR("join selectivity of > and related operators on scalar datatypes");
>
> + DATA(insert OID =  109 (  unknownin               PGNSP PGUID 12 f t t i 1 f 705 "0" 100 0 0 100    unknownin -
_null_)); 
> + DESCR("(internal)");
> + DATA(insert OID =  110 (  unknownout           PGNSP PGUID 12 f t t i 1 f 23  "0" 100 0 0 100    unknownout -
_null_)); 
> + DESCR("(internal)");
> +
>   DATA(insert OID = 112 (  text               PGNSP PGUID 12 f t t i 1 f  25 "23" 100 0 0 100  int4_text - _null_ ));
>   DESCR("convert int4 to text");
>   DATA(insert OID = 113 (  text               PGNSP PGUID 12 f t t i 1 f  25 "21" 100 0 0 100  int2_text - _null_ ));
> diff -Ncr pgsql.orig/src/include/catalog/pg_type.h pgsql/src/include/catalog/pg_type.h
> *** pgsql.orig/src/include/catalog/pg_type.h    Sun Apr  7 10:21:29 2002
> --- pgsql/src/include/catalog/pg_type.h    Sun Apr  7 11:57:36 2002
> ***************
> *** 302,308 ****
>   DESCR("array of INDEX_MAX_KEYS oids, used in system tables");
>   #define OIDVECTOROID    30
>
> ! DATA(insert OID = 32 (    SET           PGNSP PGUID -1  -1 f b t \054 0   0 textin textout textin textout i p f 0
-10 _null_ _null_ )); 
>   DESCR("set of tuples");
>
>   DATA(insert OID = 71 (    pg_type         PGNSP PGUID 4 4 t c t \054 1247 0 int4in int4out int4in int4out i p f 0
-10 _null_ _null_ )); 
> --- 302,308 ----
>   DESCR("array of INDEX_MAX_KEYS oids, used in system tables");
>   #define OIDVECTOROID    30
>
> ! DATA(insert OID = 32 (    SET           PGNSP PGUID -1  -1 f b t \054 0   0 unknownin unknownout unknownin
unknownouti p f 0 -1 0 _null_ _null_ )); 
>   DESCR("set of tuples");
>
>   DATA(insert OID = 71 (    pg_type         PGNSP PGUID 4 4 t c t \054 1247 0 int4in int4out int4in int4out i p f 0
-10 _null_ _null_ )); 
> ***************
> *** 366,372 ****
>   DATA(insert OID = 704 (  tinterval PGNSP PGUID 12  47 f b t \054 0   0 tintervalin tintervalout tintervalin
tintervalouti p f 0 -1 0 _null_ _null_ )); 
>   DESCR("(abstime,abstime), time interval");
>   #define TINTERVALOID    704
> ! DATA(insert OID = 705 (  unknown   PGNSP PGUID -1  -1 f b t \054 0   0 textin textout textin textout i p f 0 -1 0
_null__null_ )); 
>   DESCR("");
>   #define UNKNOWNOID        705
>
> --- 366,372 ----
>   DATA(insert OID = 704 (  tinterval PGNSP PGUID 12  47 f b t \054 0   0 tintervalin tintervalout tintervalin
tintervalouti p f 0 -1 0 _null_ _null_ )); 
>   DESCR("(abstime,abstime), time interval");
>   #define TINTERVALOID    704
> ! DATA(insert OID = 705 (  unknown   PGNSP PGUID -1  -1 f b t \054 0   0 unknownin unknownout unknownin unknownout i
pf 0 -1 0 _null_ _null_ )); 
>   DESCR("");
>   #define UNKNOWNOID        705
>
> diff -Ncr pgsql.orig/src/include/fmgr.h pgsql/src/include/fmgr.h
> *** pgsql.orig/src/include/fmgr.h    Sun Apr  7 10:21:29 2002
> --- pgsql/src/include/fmgr.h    Sun Apr  7 12:11:30 2002
> ***************
> *** 185,190 ****
> --- 185,191 ----
>   /* DatumGetFoo macros for varlena types will typically look like this: */
>   #define DatumGetByteaP(X)            ((bytea *) PG_DETOAST_DATUM(X))
>   #define DatumGetTextP(X)            ((text *) PG_DETOAST_DATUM(X))
> + #define DatumGetUnknownP(X)            ((unknown *) PG_DETOAST_DATUM(X))
>   #define DatumGetBpCharP(X)            ((BpChar *) PG_DETOAST_DATUM(X))
>   #define DatumGetVarCharP(X)            ((VarChar *) PG_DETOAST_DATUM(X))
>   /* And we also offer variants that return an OK-to-write copy */
> ***************
> *** 200,205 ****
> --- 201,207 ----
>   /* GETARG macros for varlena types will typically look like this: */
>   #define PG_GETARG_BYTEA_P(n)        DatumGetByteaP(PG_GETARG_DATUM(n))
>   #define PG_GETARG_TEXT_P(n)            DatumGetTextP(PG_GETARG_DATUM(n))
> + #define PG_GETARG_UNKNOWN_P(n)        DatumGetUnknownP(PG_GETARG_DATUM(n))
>   #define PG_GETARG_BPCHAR_P(n)        DatumGetBpCharP(PG_GETARG_DATUM(n))
>   #define PG_GETARG_VARCHAR_P(n)        DatumGetVarCharP(PG_GETARG_DATUM(n))
>   /* And we also offer variants that return an OK-to-write copy */
> ***************
> *** 239,244 ****
> --- 241,247 ----
>   /* RETURN macros for other pass-by-ref types will typically look like this: */
>   #define PG_RETURN_BYTEA_P(x)   PG_RETURN_POINTER(x)
>   #define PG_RETURN_TEXT_P(x)    PG_RETURN_POINTER(x)
> + #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
>   #define PG_RETURN_BPCHAR_P(x)  PG_RETURN_POINTER(x)
>   #define PG_RETURN_VARCHAR_P(x) PG_RETURN_POINTER(x)
>
> diff -Ncr pgsql.orig/src/include/utils/builtins.h pgsql/src/include/utils/builtins.h
> *** pgsql.orig/src/include/utils/builtins.h    Sun Apr  7 10:21:29 2002
> --- pgsql/src/include/utils/builtins.h    Sun Apr  7 12:26:17 2002
> ***************
> *** 414,419 ****
> --- 414,422 ----
>   extern bool SplitIdentifierString(char *rawstring, char separator,
>                                     List **namelist);
>
> + extern Datum unknownin(PG_FUNCTION_ARGS);
> + extern Datum unknownout(PG_FUNCTION_ARGS);
> +
>   extern Datum byteain(PG_FUNCTION_ARGS);
>   extern Datum byteaout(PG_FUNCTION_ARGS);
>   extern Datum byteaoctetlen(PG_FUNCTION_ARGS);

>
> ---------------------------(end of broadcast)---------------------------
> TIP 5: Have you checked our extensive FAQ?
>
> http://www.postgresql.org/users-lounge/docs/faq.html

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

Re: unknownin/out patch

From
Tom Lane
Date:
Joe Conway <mail@joeconway.com> writes:
>> Here's a patch to add unknownin/unknownout support. I also poked around
>> looking for places that assume UNKNOWN == TEXT. One of those was the
>> "SET" type in pg_type.h, which was using textin/textout. This one I took
>> care of in this patch. The other suspicious place was in
>> string_to_dataum (which is defined in both selfuncs.c and indxpath.c). I
>> wasn't too sure about those, so I left them be.

I do not think string_to_datum is a problem.  UNKNOWN constants should
never get past the parse analysis stage, so the planner doesn't have to
deal with them.  Certainly, it won't be looking at them in the context
of making any interesting selectivity decisions.

> I found three other suspicious spots in the source, where UNKNOWN ==
> TEXT is assumed. The first looks like it needs to be changed for sure,
> the other two I'm less sure about. Feedback would be most appreciated
> (on this and the patch itself).

> (line numbers based on CVS from earlier today)
> parse_node.c
> line 428
> parse_coerce.c
> line 85
> parse_coerce.c
> line 403

The first two of these certainly need to be changed --- these are
exactly the places where we convert literal strings to and (later)
from UNKNOWN-constant representation.  The third is okay as-is;
it's a type resolution rule, not code that is touching any literal
constants directly.  Will fix these in an upcoming commit.

The patch looks okay otherwise, except that I'm moving the typedef
unknown and the fmgr macros for it into varlena.c.  These two routines
are the only routines that will ever need them, so there's no need to
clutter the system-wide headers with 'em.  (Also, I am uncomfortable
with having a globally-visible typedef with such a generic name as
"unknown"; strikes me as a recipe for name conflicts.)

            regards, tom lane