Thread: TODO Item - Return compressed length of TOAST datatypes (WIP)

TODO Item - Return compressed length of TOAST datatypes (WIP)

From
Mark Kirkwood
Date:
I thought I would have a look at:

(Datatypes) Add function to return compressed length of TOAST data values.

A WIP patch is attached for comment (wanted to check I hadn't bitten off
more than I could chew *before* asking questions!).

A few questions come to mind:

1) The name - I have called it 'toast_compressed_length'. Seems longish
- I'm wondering if just 'compressed_length' is ok?

2) What should be returned for toasted data that is not compressed (or
plain stored data for that matter)? The WIP patch just gives the
uncompressed size (I notice I may need to subtract VARHDRSZ in some cases).

3) What should be returned for non-varlena types? The WIP patch is
treating everything as a varlena, so is returning incorrect information
for that case.

4) The builtin is declared as immutable - I am not so sure about that (I
am wondering if altering a column's storage from MAIN -> EXTENDED and
then updating the column to be itself will fool it).

5) Any multi-byte locale considerations?

regards

Mark








diff -Nacr src/include/catalog/pg_proc.h.orig src/include/catalog/pg_proc.h
*** src/include/catalog/pg_proc.h.orig    Fri Jun 17 15:30:17 2005
--- src/include/catalog/pg_proc.h    Fri Jun 17 17:08:18 2005
***************
*** 3655,3660 ****
--- 3655,3664 ----
  DATA(insert OID = 2560 (  pg_postmaster_start_time PGNSP PGUID 12 f f t f s 0 1184 "" _null_ _null_ _null_
pgsql_postmaster_start_time- _null_ )); 
  DESCR("postmaster start time");

+ /* Toast compressed length */
+ DATA(insert OID = 2561 (  toast_compressed_length       PGNSP PGUID 12 f f t f i 1 23 "25" _null_ _null_ _null_
toast_compressed_length- _null_ )); 
+ DESCR("toast compressed length");
+

  /*
   * Symbolic values for provolatile column: these indicate whether the result
diff -Nacr src/include/access/tuptoaster.h.orig src/include/access/tuptoaster.h
*** src/include/access/tuptoaster.h.orig    Thu Jun 16 21:12:57 2005
--- src/include/access/tuptoaster.h    Thu Jun 16 21:14:06 2005
***************
*** 138,141 ****
--- 138,149 ----
   */
  extern Size toast_raw_datum_size(Datum value);

+ /* ----------
+  * toast_compressed_datum_size -
+  *
+  *    Return the compressed (toasted) size of a varlena datum
+  * ----------
+  */
+ extern Size toast_compressed_datum_size(Datum value);
+
  #endif   /* TUPTOASTER_H */
diff -Nacr src/include/utils/pg_lzcompress.h.orig src/include/utils/pg_lzcompress.h
*** src/include/utils/pg_lzcompress.h.orig    Thu Jun 16 21:21:37 2005
--- src/include/utils/pg_lzcompress.h    Thu Jun 16 21:21:11 2005
***************
*** 228,231 ****
--- 228,238 ----
  extern int    pglz_get_next_decomp_char_from_lzdata(PGLZ_DecompState *dstate);
  extern int    pglz_get_next_decomp_char_from_plain(PGLZ_DecompState *dstate);

+ /* ----------
+  * Function to get compressed size.
+  * Internal use only.
+  * ----------
+  */
+ extern int    pglz_fetch_size(PGLZ_Header *source);
+
  #endif   /* _PG_LZCOMPRESS_H_ */
diff -Nacr src/include/utils/builtins.h.orig src/include/utils/builtins.h
*** src/include/utils/builtins.h.orig    Fri Jun 17 15:25:01 2005
--- src/include/utils/builtins.h    Fri Jun 17 15:27:30 2005
***************
*** 828,831 ****
--- 828,834 ----
  /* catalog/pg_conversion.c */
  extern Datum pg_convert_using(PG_FUNCTION_ARGS);

+ /* toastfuncs.c */
+ Datum   toast_compressed_length(PG_FUNCTION_ARGS);
+
  #endif   /* BUILTINS_H */
diff -Nacr src/backend/access/heap/tuptoaster.c.orig src/backend/access/heap/tuptoaster.c
*** src/backend/access/heap/tuptoaster.c.orig    Thu Jun 16 20:56:59 2005
--- src/backend/access/heap/tuptoaster.c    Fri Jun 17 15:12:30 2005
***************
*** 1436,1438 ****
--- 1436,1499 ----

      return result;
  }
+
+ /* ----------
+  * toast_compressed_datum_size
+  *
+  *    Show the compressed size of a datum
+  * ----------
+  */
+ Size
+ toast_compressed_datum_size(Datum value)
+ {
+
+
+     Size        size;
+     varattrib    *attr = (varattrib *) DatumGetPointer(value);
+
+     if (!PointerIsValid(attr))
+     {
+         /*
+          * No storage or NULL.
+          */
+         size = 0;
+     }
+     else if (VARATT_IS_EXTERNAL(attr))
+     {
+         /*
+          * Attribute is stored externally
+          * If  it is compressed too, then we need to get the external datum
+          * and interrogate *its* compressed size
+          * otherwise just use the external rawsize (i.e. no compression)
+          */
+         if (VARATT_IS_COMPRESSED(attr))
+         {
+             varattrib        *attrext = toast_fetch_datum(attr);
+             size = pglz_fetch_size((PGLZ_Header *)attrext);
+             pfree(attrext);
+         }
+         else
+         {
+
+             size = attr->va_content.va_external.va_rawsize;
+         }
+     }
+     else if (VARATT_IS_COMPRESSED(attr))
+     {
+         /*
+          * Attribute is stored compressed inline, so calculate
+          * compressed size on the datum itself.
+          */
+         size = pglz_fetch_size((PGLZ_Header *)attr);
+     }
+     else
+     {
+         /*
+          * Attribute is stored inline, no compression.
+          */
+         size = VARSIZE(attr);
+     }
+
+     return size;
+
+ }
diff -Nacr src/backend/utils/adt/Makefile.orig src/backend/utils/adt/Makefile
*** src/backend/utils/adt/Makefile.orig    Fri Jun 17 15:26:44 2005
--- src/backend/utils/adt/Makefile    Fri Jun 17 16:39:04 2005
***************
*** 24,30 ****
      tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \
      network.o mac.o inet_net_ntop.o inet_net_pton.o \
      ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \
!     ascii.o quote.o pgstatfuncs.o encode.o

  like.o: like.c like_match.c

--- 24,30 ----
      tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \
      network.o mac.o inet_net_ntop.o inet_net_pton.o \
      ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \
!     ascii.o quote.o pgstatfuncs.o encode.o toastfuncs.o

  like.o: like.c like_match.c

diff -Nacr src/backend/utils/adt/pg_lzcompress.c.orig src/backend/utils/adt/pg_lzcompress.c
*** src/backend/utils/adt/pg_lzcompress.c.orig    Thu Jun 16 21:14:42 2005
--- src/backend/utils/adt/pg_lzcompress.c    Fri Jun 17 16:30:49 2005
***************
*** 904,907 ****
--- 899,930 ----
          return EOF;

      return (int) (*(dstate->cp_in++));
+ }
+
+ /* ----------
+  * pglz_fetch_size -
+  *
+  *        Actual calculation to get the compressed size.
+  *
+  * ----------
+  */
+ int
+ pglz_fetch_size(PGLZ_Header *source)
+ {
+
+     int        size;
+
+
+     if (VARATT_SIZE(source) == source->rawsize + sizeof(PGLZ_Header))
+     {
+         /* Compression was not attempted or not effective.*/
+         size = source->rawsize;
+     }
+     else
+     {
+         /* Compressed attribute. */
+         size = VARATT_SIZE(source) - sizeof(PGLZ_Header);
+     }
+
+     return size;
  }
diff -Nacr src/backend/utils/adt/toastfuncs.c.orig src/backend/utils/adt/toastfuncs.c
*** src/backend/utils/adt/toastfuncs.c.orig    Fri Jun 17 17:29:34 2005
--- src/backend/utils/adt/toastfuncs.c    Fri Jun 17 16:39:18 2005
***************
*** 0 ****
--- 1,36 ----
+ /*-------------------------------------------------------------------------
+  *
+  * toastfuncs.c
+  *      Functions for accessing information about toasted data.
+  *
+  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  *
+  * IDENTIFICATION
+  *      $PostgreSQL$
+  *
+  *-------------------------------------------------------------------------
+  */
+
+ #include "postgres.h"
+ #include "fmgr.h"
+
+ #include "access/xact.h"
+ #include "access/tuptoaster.h"
+ #include "utils/builtins.h"
+ #include "utils/pg_lzcompress.h"
+
+ Datum
+ toast_compressed_length(PG_FUNCTION_ARGS)
+ {
+
+     Datum            value = PG_GETARG_DATUM(0);
+     int                size;
+
+
+     size = toast_compressed_datum_size(value);
+
+     PG_RETURN_INT32(size);
+ }
+




Re: TODO Item - Return compressed length of TOAST datatypes (WIP)

From
Tom Lane
Date:
Mark Kirkwood <markir@paradise.net.nz> writes:
> I thought I would have a look at:
> (Datatypes) Add function to return compressed length of TOAST data values.

My recollection of that discussion is that we just wanted something
that would return the actual VARSIZE() of the datum.  You're building
something way too confusing ...

A more interesting point is that the way you've declared the function,
it will only work on text values, which is pretty limiting.  Ideally
we'd define this thing as "pg_datum_length(any-varlena-type) returns int"
but there is no pseudotype corresponding to "any varlena type".

I was thinking about this the other day in connection with my proposal
to make something that could return the TOAST value OID of an
out-of-line datum.  I think the only non-restrictive way to do it would
be to declare the function as taking "any", and then add a runtime check
using the get_fn_expr_argtype() mechanism to test that what you've been
given is in fact a varlena datatype.

            regards, tom lane

Re: TODO Item - Return compressed length of TOAST datatypes

From
Mark Kirkwood
Date:
Tom Lane wrote:
> Mark Kirkwood <markir@paradise.net.nz> writes:
>
>>I thought I would have a look at:
>>(Datatypes) Add function to return compressed length of TOAST data values.
>
>
> My recollection of that discussion is that we just wanted something
> that would return the actual VARSIZE() of the datum.  You're building
> something way too confusing ...
>

I was guessing a little about exactly what was wanted - for some reason
   I couldn't access the mail archives for the last few days (however,
can now).

> A more interesting point is that the way you've declared the function,
> it will only work on text values, which is pretty limiting.  Ideally
> we'd define this thing as "pg_datum_length(any-varlena-type) returns int"
> but there is no pseudotype corresponding to "any varlena type".
>
> I was thinking about this the other day in connection with my proposal
> to make something that could return the TOAST value OID of an
> out-of-line datum.  I think the only non-restrictive way to do it would
> be to declare the function as taking "any", and then add a runtime check
> using the get_fn_expr_argtype() mechanism to test that what you've been
> given is in fact a varlena datatype.
>

Yes - was thinking I needed to check if the Datum was a varlena (and
didn't know how to do it...), so thanks for the pointer!

With these thoughts in mind, I'll have another go :-)

Cheers

Mark


Re: TODO Item - Return compressed length of TOAST datatypes

From
Mark Kirkwood
Date:
The next iteration -

Hopefully I have got the idea basically right.

I wonder if I have done the "am I a varlena" the long way.., pls advise
if so!

Cheers

Mark

Tom Lane wrote:
>
> My recollection of that discussion is that we just wanted something
> that would return the actual VARSIZE() of the datum.  You're building
> something way too confusing ...
>
> A more interesting point is that the way you've declared the function,
> it will only work on text values, which is pretty limiting.  Ideally
> we'd define this thing as "pg_datum_length(any-varlena-type) returns int"
> but there is no pseudotype corresponding to "any varlena type".
>
> I was thinking about this the other day in connection with my proposal
> to make something that could return the TOAST value OID of an
> out-of-line datum.  I think the only non-restrictive way to do it would
> be to declare the function as taking "any", and then add a runtime check
> using the get_fn_expr_argtype() mechanism to test that what you've been
> given is in fact a varlena datatype.
>
>


diff -Nacr ./src/backend/access/heap/tuptoaster.c.orig ./src/backend/access/heap/tuptoaster.c
*** ./src/backend/access/heap/tuptoaster.c.orig    Mon Mar 21 13:23:58 2005
--- ./src/backend/access/heap/tuptoaster.c    Sun Jun 19 14:24:43 2005
***************
*** 1436,1438 ****
--- 1436,1482 ----

      return result;
  }
+
+ /* ----------
+  * toast_datum_size
+  *
+  *    Show the (possibly compressed) size of a datum
+  * ----------
+  */
+ Size
+ toast_datum_size(Datum value)
+ {
+
+     varattrib    *attr = (varattrib *) DatumGetPointer(value);
+     Size        result;
+
+     if (VARATT_IS_EXTERNAL(attr))
+     {
+         /*
+          * Attribute is stored externally - If it is compressed too,
+          * then we need to get the external datum and calculate its size,
+          * otherwise we just use the external rawsize.
+          */
+         if (VARATT_IS_COMPRESSED(attr))
+         {
+             varattrib        *attrext = toast_fetch_datum(attr);
+             result = VARSIZE(attrext);
+             pfree(attrext);
+         }
+         else
+         {
+             result = attr->va_content.va_external.va_rawsize;
+         }
+     }
+     else
+     {
+         /*
+          * Attribute is stored inline either compressed or not, just
+          * calculate the size of the datum in either case.
+          */
+         result = VARSIZE(attr);
+     }
+
+     return result;
+
+ }
diff -Nacr ./src/backend/utils/adt/Makefile.orig ./src/backend/utils/adt/Makefile
*** ./src/backend/utils/adt/Makefile.orig    Fri Apr  2 09:28:45 2004
--- ./src/backend/utils/adt/Makefile    Fri Jun 17 17:52:09 2005
***************
*** 24,30 ****
      tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \
      network.o mac.o inet_net_ntop.o inet_net_pton.o \
      ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \
!     ascii.o quote.o pgstatfuncs.o encode.o

  like.o: like.c like_match.c

--- 24,30 ----
      tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \
      network.o mac.o inet_net_ntop.o inet_net_pton.o \
      ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \
!     ascii.o quote.o pgstatfuncs.o encode.o toastfuncs.o

  like.o: like.c like_match.c

diff -Nacr ./src/backend/utils/adt/toastfuncs.c.orig ./src/backend/utils/adt/toastfuncs.c
*** ./src/backend/utils/adt/toastfuncs.c.orig    Fri Jun 17 17:52:09 2005
--- ./src/backend/utils/adt/toastfuncs.c    Sun Jun 19 17:35:26 2005
***************
*** 0 ****
--- 1,74 ----
+ /*-------------------------------------------------------------------------
+  *
+  * toastfuncs.c
+  *      Functions for accessing information about toasted data.
+  *
+  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  *
+  * IDENTIFICATION
+  *      $PostgreSQL$
+  *
+  *-------------------------------------------------------------------------
+  */
+
+ #include "postgres.h"
+ #include "fmgr.h"
+
+ #include "catalog/pg_type.h"
+ #include "access/xact.h"
+ #include "access/tuptoaster.h"
+ #include "utils/builtins.h"
+ #include "utils/syscache.h"
+
+ Datum
+ pg_datum_length(PG_FUNCTION_ARGS)
+ {
+
+     Datum            value = PG_GETARG_DATUM(0);
+     int                size;
+
+
+     if (fcinfo->flinfo->fn_extra == NULL)
+     {
+         /*
+          * On the first call check lookup the datatype of the supplied argument
+          * and check if is a varlena .
+          */
+         Oid                argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
+         HeapTuple        tp;
+         int                typlen;
+
+         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+                                                      sizeof(int));
+
+         tp = SearchSysCache(TYPEOID,
+                             ObjectIdGetDatum(argtypeid),
+                             0, 0, 0);
+         if (HeapTupleIsValid(tp))
+         {
+             Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tp);
+             typlen = typtup->typlen;
+             ReleaseSysCache(tp);
+         }
+         else
+         {
+             /* Oid not in pg_type, should never happen. */
+             elog(ERROR, "invalid typid %u", argtypeid);
+         }
+
+
+
+         if ( typlen != -1 )
+         {
+             elog(ERROR, "this function is only applicable to varlena types");
+         }
+     }
+
+
+     size = toast_datum_size(value) - VARHDRSZ;
+
+     PG_RETURN_INT32(size);
+ }
+
diff -Nacr ./src/include/access/tuptoaster.h.orig ./src/include/access/tuptoaster.h
*** ./src/include/access/tuptoaster.h.orig    Mon Mar 21 13:24:04 2005
--- ./src/include/access/tuptoaster.h    Sun Jun 19 14:45:18 2005
***************
*** 138,141 ****
--- 138,149 ----
   */
  extern Size toast_raw_datum_size(Datum value);

+ /* ----------
+  * toast_datum_size -
+  *
+  *    Return the (possibly compressed) size of a varlena datum
+  * ----------
+  */
+ extern Size toast_datum_size(Datum value);
+
  #endif   /* TUPTOASTER_H */
diff -Nacr ./src/include/catalog/pg_proc.h.orig ./src/include/catalog/pg_proc.h
*** ./src/include/catalog/pg_proc.h.orig    Wed Jun 15 09:04:41 2005
--- ./src/include/catalog/pg_proc.h    Sun Jun 19 17:08:35 2005
***************
*** 3655,3660 ****
--- 3655,3664 ----
  DATA(insert OID = 2560 (  pg_postmaster_start_time PGNSP PGUID 12 f f t f s 0 1184 "" _null_ _null_ _null_
pgsql_postmaster_start_time- _null_ )); 
  DESCR("postmaster start time");

+ /* Toast compressed length */
+ DATA(insert OID = 2561 (  pg_datum_length       PGNSP PGUID 12 f f t f i 1 23 "2276" _null_ _null_ _null_
pg_datum_length- _null_ )); 
+ DESCR("length (possibly compressed) of varlena types");
+

  /*
   * Symbolic values for provolatile column: these indicate whether the result
diff -Nacr ./src/include/utils/builtins.h.orig ./src/include/utils/builtins.h
*** ./src/include/utils/builtins.h.orig    Fri May 27 12:57:49 2005
--- ./src/include/utils/builtins.h    Sun Jun 19 12:48:35 2005
***************
*** 828,831 ****
--- 828,834 ----
  /* catalog/pg_conversion.c */
  extern Datum pg_convert_using(PG_FUNCTION_ARGS);

+ /* toastfuncs.c */
+ extern Datum pg_datum_length(PG_FUNCTION_ARGS);
+
  #endif   /* BUILTINS_H */

Re: TODO Item - Return compressed length of TOAST datatypes

From
Mark Kirkwood
Date:
I did a few cleanups on the last patch. Please examine this one instead.
The changes are:

1. Add documentation for pg_datum_length builtin.
2. Correct some typos in the code comments.
3. Move the code in toastfuncs.c to varlena.c as it is probably the
correct place.
4. Use ereport instead of elog.
5  Quiet compiler warning in pg_datum_length.

Best wishes

Mark

Mark Kirkwood wrote:
> The next iteration -
>
> Hopefully I have got the idea basically right.
>
> I wonder if I have done the "am I a varlena" the long way.., pls advise
> if so!
>


diff -Nacr ./doc/src/sgml/func.sgml.orig ./doc/src/sgml/func.sgml
*** ./doc/src/sgml/func.sgml.orig    Mon Jun 20 15:38:23 2005
--- ./doc/src/sgml/func.sgml    Mon Jun 20 15:45:51 2005
***************
*** 2187,2192 ****
--- 2187,2200 ----
        </row>

        <row>
+        <entry><literal><function>pg_datum_length</function>(<parameter>string</parameter>)</literal></entry>
+        <entry><type>integer</type></entry>
+        <entry>Number of bytes (before toast decompression) in string</entry>
+        <entry><literal>pg_datum_length( 'jo\\000se'::bytea)</literal></entry>
+        <entry><literal>5</literal></entry>
+       </row>
+
+       <row>
         <entry><literal><function>position</function>(<parameter>substring</parameter> in
<parameter>string</parameter>)</literal></entry>
         <entry><type>integer</type></entry>
         <entry>Location of specified substring</entry>
diff -Nacr ./src/backend/access/heap/tuptoaster.c.orig ./src/backend/access/heap/tuptoaster.c
*** ./src/backend/access/heap/tuptoaster.c.orig    Mon Jun 20 17:11:37 2005
--- ./src/backend/access/heap/tuptoaster.c    Mon Jun 20 17:11:44 2005
***************
*** 1436,1438 ****
--- 1436,1482 ----

      return result;
  }
+
+ /* ----------
+  * toast_datum_size
+  *
+  *    Show the (possibly compressed) size of a datum
+  * ----------
+  */
+ Size
+ toast_datum_size(Datum value)
+ {
+
+     varattrib    *attr = (varattrib *) DatumGetPointer(value);
+     Size        result;
+
+     if (VARATT_IS_EXTERNAL(attr))
+     {
+         /*
+          * Attribute is stored externally - If it is compressed too,
+          * then we need to get the external datum and calculate its size,
+          * otherwise we just use the external rawsize.
+          */
+         if (VARATT_IS_COMPRESSED(attr))
+         {
+             varattrib        *attrext = toast_fetch_datum(attr);
+             result = VARSIZE(attrext);
+             pfree(attrext);
+         }
+         else
+         {
+             result = attr->va_content.va_external.va_rawsize;
+         }
+     }
+     else
+     {
+         /*
+          * Attribute is stored inline either compressed or not, just
+          * calculate the size of the datum in either case.
+          */
+         result = VARSIZE(attr);
+     }
+
+     return result;
+
+ }
diff -Nacr ./src/backend/utils/adt/varlena.c.orig ./src/backend/utils/adt/varlena.c
*** ./src/backend/utils/adt/varlena.c.orig    Mon Jun 20 14:28:03 2005
--- ./src/backend/utils/adt/varlena.c    Mon Jun 20 17:17:58 2005
***************
*** 28,33 ****
--- 28,34 ----
  #include "utils/builtins.h"
  #include "utils/lsyscache.h"
  #include "utils/pg_locale.h"
+ #include "utils/syscache.h"


  typedef struct varlena unknown;
***************
*** 2330,2333 ****
--- 2331,2396 ----

      result_text = PG_STR_GET_TEXT(hexsum);
      PG_RETURN_TEXT_P(result_text);
+ }
+
+ /*
+  * Show the (possibly compressed) length of a datum.
+  */
+ Datum
+ pg_datum_length(PG_FUNCTION_ARGS)
+ {
+
+     Datum            value = PG_GETARG_DATUM(0);
+     int                result;
+
+
+     if (fcinfo->flinfo->fn_extra == NULL)
+     {
+         /*
+          * On the first call lookup the datatype of the supplied argument
+          * and check if is a varlena.
+          */
+         Oid                argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
+         HeapTuple        tp;
+         int                typlen = 0;
+
+
+         tp = SearchSysCache(TYPEOID,
+                             ObjectIdGetDatum(argtypeid),
+                             0, 0, 0);
+         if (HeapTupleIsValid(tp))
+         {
+             Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tp);
+             typlen = typtup->typlen;
+             ReleaseSysCache(tp);
+         }
+         else
+         {
+             /* Oid not in pg_type, should never happen. */
+             ereport(ERROR,
+                     (errcode(ERRCODE_INTERNAL_ERROR),
+                      errmsg("invalid typid: %u", argtypeid)));
+         }
+
+
+         if ( typlen != -1 )
+         {
+             /* Not a varlena. */
+             ereport(ERROR,
+                     (errcode(ERRCODE_DATATYPE_MISMATCH),
+                      errmsg("function is only applicable to varlena types")));
+         }
+
+
+         /*
+          * Allocate fn_extra so we don't go here again!
+          */
+         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+                                                      sizeof(int));
+     }
+
+
+     result = toast_datum_size(value) - VARHDRSZ;
+
+     PG_RETURN_INT32(result);
  }
diff -Nacr ./src/include/access/tuptoaster.h.orig ./src/include/access/tuptoaster.h
*** ./src/include/access/tuptoaster.h.orig    Mon Mar 21 13:24:04 2005
--- ./src/include/access/tuptoaster.h    Sun Jun 19 14:45:18 2005
***************
*** 138,141 ****
--- 138,149 ----
   */
  extern Size toast_raw_datum_size(Datum value);

+ /* ----------
+  * toast_datum_size -
+  *
+  *    Return the (possibly compressed) size of a varlena datum
+  * ----------
+  */
+ extern Size toast_datum_size(Datum value);
+
  #endif   /* TUPTOASTER_H */
diff -Nacr ./src/include/catalog/pg_proc.h.orig ./src/include/catalog/pg_proc.h
*** ./src/include/catalog/pg_proc.h.orig    Wed Jun 15 09:04:41 2005
--- ./src/include/catalog/pg_proc.h    Sun Jun 19 17:08:35 2005
***************
*** 3655,3660 ****
--- 3655,3664 ----
  DATA(insert OID = 2560 (  pg_postmaster_start_time PGNSP PGUID 12 f f t f s 0 1184 "" _null_ _null_ _null_
pgsql_postmaster_start_time- _null_ )); 
  DESCR("postmaster start time");

+ /* Toast compressed length */
+ DATA(insert OID = 2561 (  pg_datum_length       PGNSP PGUID 12 f f t f i 1 23 "2276" _null_ _null_ _null_
pg_datum_length- _null_ )); 
+ DESCR("length (possibly compressed) of varlena types");
+

  /*
   * Symbolic values for provolatile column: these indicate whether the result
diff -Nacr ./src/include/utils/builtins.h.orig ./src/include/utils/builtins.h
*** ./src/include/utils/builtins.h.orig    Fri May 27 12:57:49 2005
--- ./src/include/utils/builtins.h    Mon Jun 20 15:59:09 2005
***************
*** 601,606 ****
--- 601,607 ----
  extern Datum byteapos(PG_FUNCTION_ARGS);
  extern Datum bytea_substr(PG_FUNCTION_ARGS);
  extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS);
+ extern Datum pg_datum_length(PG_FUNCTION_ARGS);

  /* version.c */
  extern Datum pgsql_version(PG_FUNCTION_ARGS);


Re: TODO Item - Return compressed length of TOAST datatypes

From
Bruce Momjian
Date:
Mark Kirkwood wrote:
> I did a few cleanups on the last patch. Please examine this one instead.
> The changes are:
>
> 1. Add documentation for pg_datum_length builtin.
> 2. Correct some typos in the code comments.
> 3. Move the code in toastfuncs.c to varlena.c as it is probably the
> correct place.
> 4. Use ereport instead of elog.
> 5  Quiet compiler warning in pg_datum_length.

I have modified your patch to simplify the logic, and renamed it to
pg_column_size(), to be consistent with our soon-to-be-added
pg_relation/tablespace/database functions from dbsize.

Here is a sample usage:

    test=> CREATE TABLE test (x INT, y TEXT);
    CREATE TABLE
    test=> INSERT INTO test VALUES (4, repeat('x', 10000));
    INSERT 0 1
    test=> INSERT INTO test VALUES (4, repeat('x', 100000));
    INSERT 0 1
    test=> SELECT pg_column_size(x), pg_column_size(y) FROM test;
     pg_column_size | pg_column_size
    ----------------+----------------
                  4 |            121
                  4 |           1152
    (2 rows)

Interesting the 10-times larger column is 10-times larger in storage.
Do we have some limit on how many repeated values we can record?

Patch attached and applied.

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073
Index: doc/src/sgml/func.sgml
===================================================================
RCS file: /cvsroot/pgsql/doc/src/sgml/func.sgml,v
retrieving revision 1.262
diff -c -c -r1.262 func.sgml
*** doc/src/sgml/func.sgml    29 Jun 2005 01:52:56 -0000    1.262
--- doc/src/sgml/func.sgml    6 Jul 2005 18:55:34 -0000
***************
*** 2187,2192 ****
--- 2187,2200 ----
        </row>

        <row>
+        <entry><literal><function>pg_column_size</function>(<parameter>string</parameter>)</literal></entry>
+        <entry><type>integer</type></entry>
+        <entry>Number of bytes required to store the value, which might be compressed</entry>
+        <entry><literal>pg_column_size('jo\\000se'::bytea)</literal></entry>
+        <entry><literal>5</literal></entry>
+       </row>
+
+       <row>
         <entry><literal><function>position</function>(<parameter>substring</parameter> in
<parameter>string</parameter>)</literal></entry>
         <entry><type>integer</type></entry>
         <entry>Location of specified substring</entry>
Index: src/backend/access/heap/tuptoaster.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v
retrieving revision 1.49
diff -c -c -r1.49 tuptoaster.c
*** src/backend/access/heap/tuptoaster.c    21 Mar 2005 01:23:58 -0000    1.49
--- src/backend/access/heap/tuptoaster.c    6 Jul 2005 18:55:35 -0000
***************
*** 1436,1438 ****
--- 1436,1480 ----

      return result;
  }
+
+ /* ----------
+  * toast_datum_size
+  *
+  *    Show the (possibly compressed) size of a datum
+  * ----------
+  */
+ Size
+ toast_datum_size(Datum value)
+ {
+
+     varattrib    *attr = (varattrib *) DatumGetPointer(value);
+     Size        result;
+
+     if (VARATT_IS_EXTERNAL(attr))
+     {
+         /*
+          * Attribute is stored externally - If it is compressed too,
+          * then we need to get the external datum and calculate its size,
+          * otherwise we just use the external rawsize.
+          */
+         if (VARATT_IS_COMPRESSED(attr))
+         {
+             varattrib        *attrext = toast_fetch_datum(attr);
+             result = VARSIZE(attrext);
+             pfree(attrext);
+         }
+         else
+             result = attr->va_content.va_external.va_rawsize;
+     }
+     else
+     {
+         /*
+          * Attribute is stored inline either compressed or not, just
+          * calculate the size of the datum in either case.
+          */
+         result = VARSIZE(attr);
+     }
+
+     return result;
+
+ }
Index: src/backend/utils/adt/varlena.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v
retrieving revision 1.124
diff -c -c -r1.124 varlena.c
*** src/backend/utils/adt/varlena.c    4 Jul 2005 18:56:44 -0000    1.124
--- src/backend/utils/adt/varlena.c    6 Jul 2005 18:55:36 -0000
***************
*** 28,33 ****
--- 28,34 ----
  #include "utils/builtins.h"
  #include "utils/lsyscache.h"
  #include "utils/pg_locale.h"
+ #include "utils/syscache.h"


  typedef struct varlena unknown;
***************
*** 2348,2350 ****
--- 2349,2395 ----
      result_text = PG_STR_GET_TEXT(hexsum);
      PG_RETURN_TEXT_P(result_text);
  }
+
+ /*
+  * Return the length of a datum, possibly compressed
+  */
+ Datum
+ pg_column_size(PG_FUNCTION_ARGS)
+ {
+     Datum            value = PG_GETARG_DATUM(0);
+     int                result;
+
+     /*    fn_extra stores the fixed column length, or -1 for varlena. */
+     if (fcinfo->flinfo->fn_extra == NULL)    /* first call? */
+     {
+         /* On the first call lookup the datatype of the supplied argument */
+         Oid                argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
+         HeapTuple        tp;
+         int                typlen;
+
+         tp = SearchSysCache(TYPEOID,
+                             ObjectIdGetDatum(argtypeid),
+                             0, 0, 0);
+         if (!HeapTupleIsValid(tp))
+         {
+             /* Oid not in pg_type, should never happen. */
+             ereport(ERROR,
+                     (errcode(ERRCODE_INTERNAL_ERROR),
+                      errmsg("invalid typid: %u", argtypeid)));
+         }
+
+         typlen = ((Form_pg_type)GETSTRUCT(tp))->typlen;
+         ReleaseSysCache(tp);
+         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+                                                       sizeof(int));
+         *(int *)fcinfo->flinfo->fn_extra = typlen;
+     }
+
+     if (*(int *)fcinfo->flinfo->fn_extra != -1)
+         PG_RETURN_INT32(*(int *)fcinfo->flinfo->fn_extra);
+     else
+     {
+         result = toast_datum_size(value) - VARHDRSZ;
+         PG_RETURN_INT32(result);
+     }
+ }
Index: src/include/access/tuptoaster.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/access/tuptoaster.h,v
retrieving revision 1.22
diff -c -c -r1.22 tuptoaster.h
*** src/include/access/tuptoaster.h    21 Mar 2005 01:24:04 -0000    1.22
--- src/include/access/tuptoaster.h    6 Jul 2005 18:55:37 -0000
***************
*** 138,141 ****
--- 138,149 ----
   */
  extern Size toast_raw_datum_size(Datum value);

+ /* ----------
+  * toast_datum_size -
+  *
+  *    Return the storage size of a varlena datum
+  * ----------
+  */
+ extern Size toast_datum_size(Datum value);
+
  #endif   /* TUPTOASTER_H */
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.373
diff -c -c -r1.373 pg_proc.h
*** src/include/catalog/pg_proc.h    1 Jul 2005 19:19:03 -0000    1.373
--- src/include/catalog/pg_proc.h    6 Jul 2005 18:55:43 -0000
***************
*** 3658,3663 ****
--- 3658,3667 ----
  DATA(insert OID = 2560 (  pg_postmaster_start_time PGNSP PGUID 12 f f t f s 0 1184 "" _null_ _null_ _null_
pgsql_postmaster_start_time- _null_ )); 
  DESCR("postmaster start time");

+ /* Column storage size */
+ DATA(insert OID = 1269 (  pg_column_size       PGNSP PGUID 12 f f t f i 1 23 "2276" _null_ _null_ _null_
pg_column_size- _null_ )); 
+ DESCR("bytes required to store the value, perhaps with compression");
+
  /* new functions for Y-direction rtree opclasses */
  DATA(insert OID = 2562 (  box_below           PGNSP PGUID 12 f f t f i 2 16 "603 603" _null_ _null_ _null_
box_below- _null_ )); 
  DESCR("is below");
Index: src/include/utils/builtins.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.258
diff -c -c -r1.258 builtins.h
*** src/include/utils/builtins.h    17 Jun 2005 22:32:50 -0000    1.258
--- src/include/utils/builtins.h    6 Jul 2005 18:55:43 -0000
***************
*** 601,606 ****
--- 601,607 ----
  extern Datum byteapos(PG_FUNCTION_ARGS);
  extern Datum bytea_substr(PG_FUNCTION_ARGS);
  extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS);
+ extern Datum pg_column_size(PG_FUNCTION_ARGS);

  /* version.c */
  extern Datum pgsql_version(PG_FUNCTION_ARGS);

Re: TODO Item - Return compressed length of TOAST datatypes

From
Neil Conway
Date:
Bruce Momjian wrote:
> + /*
> +  * Return the length of a datum, possibly compressed
> +  */
> + Datum
> + pg_column_size(PG_FUNCTION_ARGS)
> + {
> +     Datum            value = PG_GETARG_DATUM(0);
> +     int                result;
> +
> +     /*    fn_extra stores the fixed column length, or -1 for varlena. */
> +     if (fcinfo->flinfo->fn_extra == NULL)    /* first call? */
> +     {
> +         /* On the first call lookup the datatype of the supplied argument */

[...]

Is this optimization worth the code complexity?

> +         tp = SearchSysCache(TYPEOID,
> +                             ObjectIdGetDatum(argtypeid),
> +                             0, 0, 0);
> +         if (!HeapTupleIsValid(tp))
> +         {
> +             /* Oid not in pg_type, should never happen. */
> +             ereport(ERROR,
> +                     (errcode(ERRCODE_INTERNAL_ERROR),
> +                      errmsg("invalid typid: %u", argtypeid)));
> +         }

elog(ERROR) is usually used for "can't happen" errors; also, the usual
error message in this scenario is "cache lookup failed [...]". Perhaps
better to use get_typlen() here, anyway.

-Neil

Re: TODO Item - Return compressed length of TOAST datatypes

From
Mark Kirkwood
Date:
Neil Conway wrote:
> Bruce Momjian wrote:
>
>> + /* +  * Return the length of a datum, possibly compressed
>> +  */
>> + Datum
>> + pg_column_size(PG_FUNCTION_ARGS)
>> + {
>> +     Datum            value = PG_GETARG_DATUM(0);
>> +     int                result;
>> + +     /*    fn_extra stores the fixed column length, or -1 for
>> varlena. */
>> +     if (fcinfo->flinfo->fn_extra == NULL)    /* first call? */
>> +     {
>> +         /* On the first call lookup the datatype of the supplied
>> argument */
>
>
> [...]
>
> Is this optimization worth the code complexity?
>

I didn't performance test it, but the idea of hammering the catalogs for
each value to be processed seemed a bad thing.

>> +         tp = SearchSysCache(TYPEOID,
>> +                             ObjectIdGetDatum(argtypeid),
>> +                             0, 0, 0);
>> +         if (!HeapTupleIsValid(tp))
>> +         {
>> +             /* Oid not in pg_type, should never happen. */
>> +             ereport(ERROR,
>> +                     (errcode(ERRCODE_INTERNAL_ERROR),
>> +                      errmsg("invalid typid: %u", argtypeid)));
>> +         }
>
>
> elog(ERROR) is usually used for "can't happen" errors; also, the usual
> error message in this scenario is "cache lookup failed [...]". Perhaps
> better to use get_typlen() here, anyway.
>

Ahem,..ah yes, get_typlen()! (ducks)- that is clearly much better!

I have attached a little change to varlena.c that uses it. I left the
ereport as it was, but am not fussed about it either way.

BTW - Bruce, I like the changes, makes the beast more friendly to use!

Best wishes

Mark


Index: src/backend/utils/adt/varlena.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/varlena.c,v
retrieving revision 1.125
diff -c -r1.125 varlena.c
*** src/backend/utils/adt/varlena.c    6 Jul 2005 19:02:52 -0000    1.125
--- src/backend/utils/adt/varlena.c    7 Jul 2005 02:52:50 -0000
***************
*** 28,34 ****
  #include "utils/builtins.h"
  #include "utils/lsyscache.h"
  #include "utils/pg_locale.h"
- #include "utils/syscache.h"


  typedef struct varlena unknown;
--- 28,33 ----
***************
*** 2364,2385 ****
      {
          /* On the first call lookup the datatype of the supplied argument */
          Oid                argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
!         HeapTuple        tp;
!         int                typlen;

!         tp = SearchSysCache(TYPEOID,
!                             ObjectIdGetDatum(argtypeid),
!                             0, 0, 0);
!         if (!HeapTupleIsValid(tp))
          {
              /* Oid not in pg_type, should never happen. */
              ereport(ERROR,
                      (errcode(ERRCODE_INTERNAL_ERROR),
!                      errmsg("invalid typid: %u", argtypeid)));
          }
!
!         typlen = ((Form_pg_type)GETSTRUCT(tp))->typlen;
!         ReleaseSysCache(tp);
          fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
                                                        sizeof(int));
          *(int *)fcinfo->flinfo->fn_extra = typlen;
--- 2363,2379 ----
      {
          /* On the first call lookup the datatype of the supplied argument */
          Oid                argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
!         int                typlen    = get_typlen(argtypeid);

!
!         if (typlen == 0)
          {
              /* Oid not in pg_type, should never happen. */
              ereport(ERROR,
                      (errcode(ERRCODE_INTERNAL_ERROR),
!                      errmsg("cache lookup failed for type %u", argtypeid)));
          }
!
          fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
                                                        sizeof(int));
          *(int *)fcinfo->flinfo->fn_extra = typlen;


Re: TODO Item - Return compressed length of TOAST datatypes

From
Neil Conway
Date:
Mark Kirkwood wrote:
> I didn't performance test it, but the idea of hammering the catalogs for
> each value to be processed seemed a bad thing.

Well, the syscache already sits in front of the catalogs themselves. I'd
be curious to see what the performance difference actually is...

-Neil

Re: TODO Item - Return compressed length of TOAST datatypes

From
Alvaro Herrera
Date:
On Thu, Jul 07, 2005 at 03:01:46PM +1200, Mark Kirkwood wrote:
> Neil Conway wrote:

> >elog(ERROR) is usually used for "can't happen" errors; also, the usual
> >error message in this scenario is "cache lookup failed [...]". Perhaps
> >better to use get_typlen() here, anyway.
>
> I have attached a little change to varlena.c that uses it. I left the
> ereport as it was, but am not fussed about it either way.

I am, because it gives useless messages to the translators to work on.
elog parameters are not marked for translation, ereport are (errmsg and
friends, really).  So please don't do that.

--
Alvaro Herrera (<alvherre[a]alvh.no-ip.org>)
"¿Cómo puedes confiar en algo que pagas y que no ves,
y no confiar en algo que te dan y te lo muestran?" (Germán Poo)

Re: TODO Item - Return compressed length of TOAST datatypes

From
Mark Kirkwood
Date:
Alvaro Herrera wrote:
> On Thu, Jul 07, 2005 at 03:01:46PM +1200, Mark Kirkwood wrote:
>
>>Neil Conway wrote:
>
>
>>>elog(ERROR) is usually used for "can't happen" errors.
>>
>>I have attached a little change to varlena.c that uses it. I left the
>>ereport as it was, but am not fussed about it either way.
>
>
> I am, because it gives useless messages to the translators to work on.
> elog parameters are not marked for translation, ereport are (errmsg and
> friends, really).  So please don't do that.
>

Ok, didn't realize the difference! Revised patch attached that uses elog.

Neil, I will runs some tests to see if there is any performance saving
with the first-call-only business.

Cheers

Mark

Index: src/backend/utils/adt/varlena.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/varlena.c,v
retrieving revision 1.125
diff -c -r1.125 varlena.c
*** src/backend/utils/adt/varlena.c    6 Jul 2005 19:02:52 -0000    1.125
--- src/backend/utils/adt/varlena.c    7 Jul 2005 03:40:44 -0000
***************
*** 28,34 ****
  #include "utils/builtins.h"
  #include "utils/lsyscache.h"
  #include "utils/pg_locale.h"
- #include "utils/syscache.h"


  typedef struct varlena unknown;
--- 28,33 ----
***************
*** 2364,2385 ****
      {
          /* On the first call lookup the datatype of the supplied argument */
          Oid                argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
!         HeapTuple        tp;
!         int                typlen;

!         tp = SearchSysCache(TYPEOID,
!                             ObjectIdGetDatum(argtypeid),
!                             0, 0, 0);
!         if (!HeapTupleIsValid(tp))
          {
              /* Oid not in pg_type, should never happen. */
!             ereport(ERROR,
!                     (errcode(ERRCODE_INTERNAL_ERROR),
!                      errmsg("invalid typid: %u", argtypeid)));
          }
!
!         typlen = ((Form_pg_type)GETSTRUCT(tp))->typlen;
!         ReleaseSysCache(tp);
          fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
                                                        sizeof(int));
          *(int *)fcinfo->flinfo->fn_extra = typlen;
--- 2363,2377 ----
      {
          /* On the first call lookup the datatype of the supplied argument */
          Oid                argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
!         int                typlen    = get_typlen(argtypeid);

!
!         if (typlen == 0)
          {
              /* Oid not in pg_type, should never happen. */
!             elog(ERROR, "cache lookup failed for type %u", argtypeid);
          }
!
          fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
                                                        sizeof(int));
          *(int *)fcinfo->flinfo->fn_extra = typlen;


Re: TODO Item - Return compressed length of TOAST datatypes

From
Tom Lane
Date:
Alvaro Herrera <alvherre@alvh.no-ip.org> writes:
> On Thu, Jul 07, 2005 at 03:01:46PM +1200, Mark Kirkwood wrote:
>> I have attached a little change to varlena.c that uses it. I left the
>> ereport as it was, but am not fussed about it either way.

> I am, because it gives useless messages to the translators to work on.

Exactly.  I had already made a private note to fix that patch ---
inventing your own random wording and punctuation for an extremely
standard error message is simply Not Acceptable.

            regards, tom lane

Re: TODO Item - Return compressed length of TOAST datatypes

From
Mark Kirkwood
Date:
Neil Conway wrote:
> Mark Kirkwood wrote:
>
>> I didn't performance test it, but the idea of hammering the catalogs for
>> each value to be processed seemed a bad thing.
>
>
> Well, the syscache already sits in front of the catalogs themselves. I'd
> be curious to see what the performance difference actually is...
>
>

I did some tests with a two tables, one small and one large, I am seeing
a consistent difference favoring the first-call-only type checking:

              Table "public.dim1"
  Column |          Type          | Modifiers
--------+------------------------+-----------
  d1key  | integer                | not null
  dat    | character varying(100) | not null
  dattyp | character varying(20)  | not null
  dfill  | character varying(100) | not null

INFO:  "dim1": scanned 24 of 24 pages, containing 1001 live rows and 0
dead rows; 1001 rows in sample, 1001 estimated total rows


SELECT max(pg_column_size(dfill)) FROM dim1


Run      1st call only check?     elapsed(ms)
1        y                        5.5
2        y                        3.1
3        n                        11.3
4        n                        4.1

Now try a bigger table:

             Table "public.fact0"
  Column |          Type          | Modifiers
--------+------------------------+-----------
  d0key  | integer                | not null
  d1key  | integer                | not null
  d2key  | integer                | not null
  fval   | integer                | not null
  ffill  | character varying(100) | not null


INFO:  "fact0": scanned 3000 of 18182 pages, containing 165000 live rows
and 0 dead rows; 3000 rows in sample, 1000010 estimated total rows

SELECT max(pg_column_size(ffill)) FROM fact0

Run      1st call only check?     elapsed(ms)
1        y                        2497
2        y                        2484
3        y                        2496
4        y                        2480
5        n                        3572
6        n                        3570
7        n                        3558
8        n                        3457


Re: TODO Item - Return compressed length of TOAST datatypes

From
Tom Lane
Date:
Mark Kirkwood <markir@paradise.net.nz> writes:
> I did some tests with a two tables, one small and one large, I am seeing
> a consistent difference favoring the first-call-only type checking:

We had come to a similar conclusion in regards to making array-related
functions cache lookup info across calls.  It's worth checking the
assumption every so often, but the result surprises me not at all.

            regards, tom lane

Re: TODO Item - Return compressed length of TOAST datatypes

From
Mark Kirkwood
Date:
Tom Lane wrote:
> Alvaro Herrera <alvherre@alvh.no-ip.org> writes:
>
>>On Thu, Jul 07, 2005 at 03:01:46PM +1200, Mark Kirkwood wrote:
>>
>>>I have attached a little change to varlena.c that uses it. I left the
>>>ereport as it was, but am not fussed about it either way.
>
>
>>I am, because it gives useless messages to the translators to work on.
>
>
> Exactly.  I had already made a private note to fix that patch ---
> inventing your own random wording and punctuation for an extremely
> standard error message is simply Not Acceptable.
>

Apologies all, my "not fussed about it" was highly misleading - I had
already changed the error message as per Neil's suggestion in the
revised patch, but that was not obvious from my comment :-(

regards

Mark

Re: TODO Item - Return compressed length of TOAST datatypes

From
Bruce Momjian
Date:
Patch applied.  Thanks.

---------------------------------------------------------------------------


Mark Kirkwood wrote:
> Alvaro Herrera wrote:
> > On Thu, Jul 07, 2005 at 03:01:46PM +1200, Mark Kirkwood wrote:
> >
> >>Neil Conway wrote:
> >
> >
> >>>elog(ERROR) is usually used for "can't happen" errors.
> >>
> >>I have attached a little change to varlena.c that uses it. I left the
> >>ereport as it was, but am not fussed about it either way.
> >
> >
> > I am, because it gives useless messages to the translators to work on.
> > elog parameters are not marked for translation, ereport are (errmsg and
> > friends, really).  So please don't do that.
> >
>
> Ok, didn't realize the difference! Revised patch attached that uses elog.
>
> Neil, I will runs some tests to see if there is any performance saving
> with the first-call-only business.
>
> Cheers
>
> Mark
>

> Index: src/backend/utils/adt/varlena.c
> ===================================================================
> RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/varlena.c,v
> retrieving revision 1.125
> diff -c -r1.125 varlena.c
> *** src/backend/utils/adt/varlena.c    6 Jul 2005 19:02:52 -0000    1.125
> --- src/backend/utils/adt/varlena.c    7 Jul 2005 03:40:44 -0000
> ***************
> *** 28,34 ****
>   #include "utils/builtins.h"
>   #include "utils/lsyscache.h"
>   #include "utils/pg_locale.h"
> - #include "utils/syscache.h"
>
>
>   typedef struct varlena unknown;
> --- 28,33 ----
> ***************
> *** 2364,2385 ****
>       {
>           /* On the first call lookup the datatype of the supplied argument */
>           Oid                argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
> !         HeapTuple        tp;
> !         int                typlen;
>
> !         tp = SearchSysCache(TYPEOID,
> !                             ObjectIdGetDatum(argtypeid),
> !                             0, 0, 0);
> !         if (!HeapTupleIsValid(tp))
>           {
>               /* Oid not in pg_type, should never happen. */
> !             ereport(ERROR,
> !                     (errcode(ERRCODE_INTERNAL_ERROR),
> !                      errmsg("invalid typid: %u", argtypeid)));
>           }
> !
> !         typlen = ((Form_pg_type)GETSTRUCT(tp))->typlen;
> !         ReleaseSysCache(tp);
>           fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
>                                                         sizeof(int));
>           *(int *)fcinfo->flinfo->fn_extra = typlen;
> --- 2363,2377 ----
>       {
>           /* On the first call lookup the datatype of the supplied argument */
>           Oid                argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
> !         int                typlen    = get_typlen(argtypeid);
>
> !
> !         if (typlen == 0)
>           {
>               /* Oid not in pg_type, should never happen. */
> !             elog(ERROR, "cache lookup failed for type %u", argtypeid);
>           }
> !
>           fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
>                                                         sizeof(int));
>           *(int *)fcinfo->flinfo->fn_extra = typlen;
>

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073