TODO Item - Return compressed length of TOAST datatypes (WIP) - Mailing list pgsql-patches

From Mark Kirkwood
Subject TODO Item - Return compressed length of TOAST datatypes (WIP)
Date
Msg-id 42B35B49.4000904@paradise.net.nz
Whole thread Raw
Responses Re: TODO Item - Return compressed length of TOAST datatypes (WIP)
List pgsql-patches
I thought I would have a look at:

(Datatypes) Add function to return compressed length of TOAST data values.

A WIP patch is attached for comment (wanted to check I hadn't bitten off
more than I could chew *before* asking questions!).

A few questions come to mind:

1) The name - I have called it 'toast_compressed_length'. Seems longish
- I'm wondering if just 'compressed_length' is ok?

2) What should be returned for toasted data that is not compressed (or
plain stored data for that matter)? The WIP patch just gives the
uncompressed size (I notice I may need to subtract VARHDRSZ in some cases).

3) What should be returned for non-varlena types? The WIP patch is
treating everything as a varlena, so is returning incorrect information
for that case.

4) The builtin is declared as immutable - I am not so sure about that (I
am wondering if altering a column's storage from MAIN -> EXTENDED and
then updating the column to be itself will fool it).

5) Any multi-byte locale considerations?

regards

Mark








diff -Nacr src/include/catalog/pg_proc.h.orig src/include/catalog/pg_proc.h
*** src/include/catalog/pg_proc.h.orig    Fri Jun 17 15:30:17 2005
--- src/include/catalog/pg_proc.h    Fri Jun 17 17:08:18 2005
***************
*** 3655,3660 ****
--- 3655,3664 ----
  DATA(insert OID = 2560 (  pg_postmaster_start_time PGNSP PGUID 12 f f t f s 0 1184 "" _null_ _null_ _null_
pgsql_postmaster_start_time- _null_ )); 
  DESCR("postmaster start time");

+ /* Toast compressed length */
+ DATA(insert OID = 2561 (  toast_compressed_length       PGNSP PGUID 12 f f t f i 1 23 "25" _null_ _null_ _null_
toast_compressed_length- _null_ )); 
+ DESCR("toast compressed length");
+

  /*
   * Symbolic values for provolatile column: these indicate whether the result
diff -Nacr src/include/access/tuptoaster.h.orig src/include/access/tuptoaster.h
*** src/include/access/tuptoaster.h.orig    Thu Jun 16 21:12:57 2005
--- src/include/access/tuptoaster.h    Thu Jun 16 21:14:06 2005
***************
*** 138,141 ****
--- 138,149 ----
   */
  extern Size toast_raw_datum_size(Datum value);

+ /* ----------
+  * toast_compressed_datum_size -
+  *
+  *    Return the compressed (toasted) size of a varlena datum
+  * ----------
+  */
+ extern Size toast_compressed_datum_size(Datum value);
+
  #endif   /* TUPTOASTER_H */
diff -Nacr src/include/utils/pg_lzcompress.h.orig src/include/utils/pg_lzcompress.h
*** src/include/utils/pg_lzcompress.h.orig    Thu Jun 16 21:21:37 2005
--- src/include/utils/pg_lzcompress.h    Thu Jun 16 21:21:11 2005
***************
*** 228,231 ****
--- 228,238 ----
  extern int    pglz_get_next_decomp_char_from_lzdata(PGLZ_DecompState *dstate);
  extern int    pglz_get_next_decomp_char_from_plain(PGLZ_DecompState *dstate);

+ /* ----------
+  * Function to get compressed size.
+  * Internal use only.
+  * ----------
+  */
+ extern int    pglz_fetch_size(PGLZ_Header *source);
+
  #endif   /* _PG_LZCOMPRESS_H_ */
diff -Nacr src/include/utils/builtins.h.orig src/include/utils/builtins.h
*** src/include/utils/builtins.h.orig    Fri Jun 17 15:25:01 2005
--- src/include/utils/builtins.h    Fri Jun 17 15:27:30 2005
***************
*** 828,831 ****
--- 828,834 ----
  /* catalog/pg_conversion.c */
  extern Datum pg_convert_using(PG_FUNCTION_ARGS);

+ /* toastfuncs.c */
+ Datum   toast_compressed_length(PG_FUNCTION_ARGS);
+
  #endif   /* BUILTINS_H */
diff -Nacr src/backend/access/heap/tuptoaster.c.orig src/backend/access/heap/tuptoaster.c
*** src/backend/access/heap/tuptoaster.c.orig    Thu Jun 16 20:56:59 2005
--- src/backend/access/heap/tuptoaster.c    Fri Jun 17 15:12:30 2005
***************
*** 1436,1438 ****
--- 1436,1499 ----

      return result;
  }
+
+ /* ----------
+  * toast_compressed_datum_size
+  *
+  *    Show the compressed size of a datum
+  * ----------
+  */
+ Size
+ toast_compressed_datum_size(Datum value)
+ {
+
+
+     Size        size;
+     varattrib    *attr = (varattrib *) DatumGetPointer(value);
+
+     if (!PointerIsValid(attr))
+     {
+         /*
+          * No storage or NULL.
+          */
+         size = 0;
+     }
+     else if (VARATT_IS_EXTERNAL(attr))
+     {
+         /*
+          * Attribute is stored externally
+          * If  it is compressed too, then we need to get the external datum
+          * and interrogate *its* compressed size
+          * otherwise just use the external rawsize (i.e. no compression)
+          */
+         if (VARATT_IS_COMPRESSED(attr))
+         {
+             varattrib        *attrext = toast_fetch_datum(attr);
+             size = pglz_fetch_size((PGLZ_Header *)attrext);
+             pfree(attrext);
+         }
+         else
+         {
+
+             size = attr->va_content.va_external.va_rawsize;
+         }
+     }
+     else if (VARATT_IS_COMPRESSED(attr))
+     {
+         /*
+          * Attribute is stored compressed inline, so calculate
+          * compressed size on the datum itself.
+          */
+         size = pglz_fetch_size((PGLZ_Header *)attr);
+     }
+     else
+     {
+         /*
+          * Attribute is stored inline, no compression.
+          */
+         size = VARSIZE(attr);
+     }
+
+     return size;
+
+ }
diff -Nacr src/backend/utils/adt/Makefile.orig src/backend/utils/adt/Makefile
*** src/backend/utils/adt/Makefile.orig    Fri Jun 17 15:26:44 2005
--- src/backend/utils/adt/Makefile    Fri Jun 17 16:39:04 2005
***************
*** 24,30 ****
      tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \
      network.o mac.o inet_net_ntop.o inet_net_pton.o \
      ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \
!     ascii.o quote.o pgstatfuncs.o encode.o

  like.o: like.c like_match.c

--- 24,30 ----
      tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \
      network.o mac.o inet_net_ntop.o inet_net_pton.o \
      ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \
!     ascii.o quote.o pgstatfuncs.o encode.o toastfuncs.o

  like.o: like.c like_match.c

diff -Nacr src/backend/utils/adt/pg_lzcompress.c.orig src/backend/utils/adt/pg_lzcompress.c
*** src/backend/utils/adt/pg_lzcompress.c.orig    Thu Jun 16 21:14:42 2005
--- src/backend/utils/adt/pg_lzcompress.c    Fri Jun 17 16:30:49 2005
***************
*** 904,907 ****
--- 899,930 ----
          return EOF;

      return (int) (*(dstate->cp_in++));
+ }
+
+ /* ----------
+  * pglz_fetch_size -
+  *
+  *        Actual calculation to get the compressed size.
+  *
+  * ----------
+  */
+ int
+ pglz_fetch_size(PGLZ_Header *source)
+ {
+
+     int        size;
+
+
+     if (VARATT_SIZE(source) == source->rawsize + sizeof(PGLZ_Header))
+     {
+         /* Compression was not attempted or not effective.*/
+         size = source->rawsize;
+     }
+     else
+     {
+         /* Compressed attribute. */
+         size = VARATT_SIZE(source) - sizeof(PGLZ_Header);
+     }
+
+     return size;
  }
diff -Nacr src/backend/utils/adt/toastfuncs.c.orig src/backend/utils/adt/toastfuncs.c
*** src/backend/utils/adt/toastfuncs.c.orig    Fri Jun 17 17:29:34 2005
--- src/backend/utils/adt/toastfuncs.c    Fri Jun 17 16:39:18 2005
***************
*** 0 ****
--- 1,36 ----
+ /*-------------------------------------------------------------------------
+  *
+  * toastfuncs.c
+  *      Functions for accessing information about toasted data.
+  *
+  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  *
+  * IDENTIFICATION
+  *      $PostgreSQL$
+  *
+  *-------------------------------------------------------------------------
+  */
+
+ #include "postgres.h"
+ #include "fmgr.h"
+
+ #include "access/xact.h"
+ #include "access/tuptoaster.h"
+ #include "utils/builtins.h"
+ #include "utils/pg_lzcompress.h"
+
+ Datum
+ toast_compressed_length(PG_FUNCTION_ARGS)
+ {
+
+     Datum            value = PG_GETARG_DATUM(0);
+     int                size;
+
+
+     size = toast_compressed_datum_size(value);
+
+     PG_RETURN_INT32(size);
+ }
+




pgsql-patches by date:

Previous
From: Bruce Momjian
Date:
Subject: Re: Quick little \h enhancement for psql
Next
From: Andreas Pflug
Date:
Subject: default database creation with initdb