Re: TODO Item - Return compressed length of TOAST datatypes - Mailing list pgsql-patches

From Bruce Momjian
Subject Re: TODO Item - Return compressed length of TOAST datatypes
Date
Msg-id 200507061909.j66J9J628254@candle.pha.pa.us
Whole thread Raw
In response to Re: TODO Item - Return compressed length of TOAST datatypes  (Mark Kirkwood <markir@paradise.net.nz>)
Responses Re: TODO Item - Return compressed length of TOAST datatypes
List pgsql-patches
Mark Kirkwood wrote:
> I did a few cleanups on the last patch. Please examine this one instead.
> The changes are:
>
> 1. Add documentation for pg_datum_length builtin.
> 2. Correct some typos in the code comments.
> 3. Move the code in toastfuncs.c to varlena.c as it is probably the
> correct place.
> 4. Use ereport instead of elog.
> 5  Quiet compiler warning in pg_datum_length.

I have modified your patch to simplify the logic, and renamed it to
pg_column_size(), to be consistent with our soon-to-be-added
pg_relation/tablespace/database functions from dbsize.

Here is a sample usage:

    test=> CREATE TABLE test (x INT, y TEXT);
    CREATE TABLE
    test=> INSERT INTO test VALUES (4, repeat('x', 10000));
    INSERT 0 1
    test=> INSERT INTO test VALUES (4, repeat('x', 100000));
    INSERT 0 1
    test=> SELECT pg_column_size(x), pg_column_size(y) FROM test;
     pg_column_size | pg_column_size
    ----------------+----------------
                  4 |            121
                  4 |           1152
    (2 rows)

Interesting the 10-times larger column is 10-times larger in storage.
Do we have some limit on how many repeated values we can record?

Patch attached and applied.

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073
Index: doc/src/sgml/func.sgml
===================================================================
RCS file: /cvsroot/pgsql/doc/src/sgml/func.sgml,v
retrieving revision 1.262
diff -c -c -r1.262 func.sgml
*** doc/src/sgml/func.sgml    29 Jun 2005 01:52:56 -0000    1.262
--- doc/src/sgml/func.sgml    6 Jul 2005 18:55:34 -0000
***************
*** 2187,2192 ****
--- 2187,2200 ----
        </row>

        <row>
+        <entry><literal><function>pg_column_size</function>(<parameter>string</parameter>)</literal></entry>
+        <entry><type>integer</type></entry>
+        <entry>Number of bytes required to store the value, which might be compressed</entry>
+        <entry><literal>pg_column_size('jo\\000se'::bytea)</literal></entry>
+        <entry><literal>5</literal></entry>
+       </row>
+
+       <row>
         <entry><literal><function>position</function>(<parameter>substring</parameter> in
<parameter>string</parameter>)</literal></entry>
         <entry><type>integer</type></entry>
         <entry>Location of specified substring</entry>
Index: src/backend/access/heap/tuptoaster.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v
retrieving revision 1.49
diff -c -c -r1.49 tuptoaster.c
*** src/backend/access/heap/tuptoaster.c    21 Mar 2005 01:23:58 -0000    1.49
--- src/backend/access/heap/tuptoaster.c    6 Jul 2005 18:55:35 -0000
***************
*** 1436,1438 ****
--- 1436,1480 ----

      return result;
  }
+
+ /* ----------
+  * toast_datum_size
+  *
+  *    Show the (possibly compressed) size of a datum
+  * ----------
+  */
+ Size
+ toast_datum_size(Datum value)
+ {
+
+     varattrib    *attr = (varattrib *) DatumGetPointer(value);
+     Size        result;
+
+     if (VARATT_IS_EXTERNAL(attr))
+     {
+         /*
+          * Attribute is stored externally - If it is compressed too,
+          * then we need to get the external datum and calculate its size,
+          * otherwise we just use the external rawsize.
+          */
+         if (VARATT_IS_COMPRESSED(attr))
+         {
+             varattrib        *attrext = toast_fetch_datum(attr);
+             result = VARSIZE(attrext);
+             pfree(attrext);
+         }
+         else
+             result = attr->va_content.va_external.va_rawsize;
+     }
+     else
+     {
+         /*
+          * Attribute is stored inline either compressed or not, just
+          * calculate the size of the datum in either case.
+          */
+         result = VARSIZE(attr);
+     }
+
+     return result;
+
+ }
Index: src/backend/utils/adt/varlena.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v
retrieving revision 1.124
diff -c -c -r1.124 varlena.c
*** src/backend/utils/adt/varlena.c    4 Jul 2005 18:56:44 -0000    1.124
--- src/backend/utils/adt/varlena.c    6 Jul 2005 18:55:36 -0000
***************
*** 28,33 ****
--- 28,34 ----
  #include "utils/builtins.h"
  #include "utils/lsyscache.h"
  #include "utils/pg_locale.h"
+ #include "utils/syscache.h"


  typedef struct varlena unknown;
***************
*** 2348,2350 ****
--- 2349,2395 ----
      result_text = PG_STR_GET_TEXT(hexsum);
      PG_RETURN_TEXT_P(result_text);
  }
+
+ /*
+  * Return the length of a datum, possibly compressed
+  */
+ Datum
+ pg_column_size(PG_FUNCTION_ARGS)
+ {
+     Datum            value = PG_GETARG_DATUM(0);
+     int                result;
+
+     /*    fn_extra stores the fixed column length, or -1 for varlena. */
+     if (fcinfo->flinfo->fn_extra == NULL)    /* first call? */
+     {
+         /* On the first call lookup the datatype of the supplied argument */
+         Oid                argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
+         HeapTuple        tp;
+         int                typlen;
+
+         tp = SearchSysCache(TYPEOID,
+                             ObjectIdGetDatum(argtypeid),
+                             0, 0, 0);
+         if (!HeapTupleIsValid(tp))
+         {
+             /* Oid not in pg_type, should never happen. */
+             ereport(ERROR,
+                     (errcode(ERRCODE_INTERNAL_ERROR),
+                      errmsg("invalid typid: %u", argtypeid)));
+         }
+
+         typlen = ((Form_pg_type)GETSTRUCT(tp))->typlen;
+         ReleaseSysCache(tp);
+         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+                                                       sizeof(int));
+         *(int *)fcinfo->flinfo->fn_extra = typlen;
+     }
+
+     if (*(int *)fcinfo->flinfo->fn_extra != -1)
+         PG_RETURN_INT32(*(int *)fcinfo->flinfo->fn_extra);
+     else
+     {
+         result = toast_datum_size(value) - VARHDRSZ;
+         PG_RETURN_INT32(result);
+     }
+ }
Index: src/include/access/tuptoaster.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/access/tuptoaster.h,v
retrieving revision 1.22
diff -c -c -r1.22 tuptoaster.h
*** src/include/access/tuptoaster.h    21 Mar 2005 01:24:04 -0000    1.22
--- src/include/access/tuptoaster.h    6 Jul 2005 18:55:37 -0000
***************
*** 138,141 ****
--- 138,149 ----
   */
  extern Size toast_raw_datum_size(Datum value);

+ /* ----------
+  * toast_datum_size -
+  *
+  *    Return the storage size of a varlena datum
+  * ----------
+  */
+ extern Size toast_datum_size(Datum value);
+
  #endif   /* TUPTOASTER_H */
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.373
diff -c -c -r1.373 pg_proc.h
*** src/include/catalog/pg_proc.h    1 Jul 2005 19:19:03 -0000    1.373
--- src/include/catalog/pg_proc.h    6 Jul 2005 18:55:43 -0000
***************
*** 3658,3663 ****
--- 3658,3667 ----
  DATA(insert OID = 2560 (  pg_postmaster_start_time PGNSP PGUID 12 f f t f s 0 1184 "" _null_ _null_ _null_
pgsql_postmaster_start_time- _null_ )); 
  DESCR("postmaster start time");

+ /* Column storage size */
+ DATA(insert OID = 1269 (  pg_column_size       PGNSP PGUID 12 f f t f i 1 23 "2276" _null_ _null_ _null_
pg_column_size- _null_ )); 
+ DESCR("bytes required to store the value, perhaps with compression");
+
  /* new functions for Y-direction rtree opclasses */
  DATA(insert OID = 2562 (  box_below           PGNSP PGUID 12 f f t f i 2 16 "603 603" _null_ _null_ _null_
box_below- _null_ )); 
  DESCR("is below");
Index: src/include/utils/builtins.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.258
diff -c -c -r1.258 builtins.h
*** src/include/utils/builtins.h    17 Jun 2005 22:32:50 -0000    1.258
--- src/include/utils/builtins.h    6 Jul 2005 18:55:43 -0000
***************
*** 601,606 ****
--- 601,607 ----
  extern Datum byteapos(PG_FUNCTION_ARGS);
  extern Datum bytea_substr(PG_FUNCTION_ARGS);
  extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS);
+ extern Datum pg_column_size(PG_FUNCTION_ARGS);

  /* version.c */
  extern Datum pgsql_version(PG_FUNCTION_ARGS);

pgsql-patches by date:

Previous
From: Tom Lane
Date:
Subject: Re: More to Bad link Makefile patch
Next
From: Alvaro Herrera
Date:
Subject: Re: Autovacuum integration