TODO Item - Return compressed length of TOAST datatypes (WIP) - Mailing list pgsql-patches
| From | Mark Kirkwood |
|---|---|
| Subject | TODO Item - Return compressed length of TOAST datatypes (WIP) |
| Date | |
| Msg-id | 42B35B49.4000904@paradise.net.nz Whole thread Raw |
| Responses |
Re: TODO Item - Return compressed length of TOAST datatypes (WIP)
|
| List | pgsql-patches |
I thought I would have a look at:
(Datatypes) Add function to return compressed length of TOAST data values.
A WIP patch is attached for comment (wanted to check I hadn't bitten off
more than I could chew *before* asking questions!).
A few questions come to mind:
1) The name - I have called it 'toast_compressed_length'. Seems longish
- I'm wondering if just 'compressed_length' is ok?
2) What should be returned for toasted data that is not compressed (or
plain stored data for that matter)? The WIP patch just gives the
uncompressed size (I notice I may need to subtract VARHDRSZ in some cases).
3) What should be returned for non-varlena types? The WIP patch is
treating everything as a varlena, so is returning incorrect information
for that case.
4) The builtin is declared as immutable - I am not so sure about that (I
am wondering if altering a column's storage from MAIN -> EXTENDED and
then updating the column to be itself will fool it).
5) Any multi-byte locale considerations?
regards
Mark
diff -Nacr src/include/catalog/pg_proc.h.orig src/include/catalog/pg_proc.h
*** src/include/catalog/pg_proc.h.orig Fri Jun 17 15:30:17 2005
--- src/include/catalog/pg_proc.h Fri Jun 17 17:08:18 2005
***************
*** 3655,3660 ****
--- 3655,3664 ----
DATA(insert OID = 2560 ( pg_postmaster_start_time PGNSP PGUID 12 f f t f s 0 1184 "" _null_ _null_ _null_
pgsql_postmaster_start_time- _null_ ));
DESCR("postmaster start time");
+ /* Toast compressed length */
+ DATA(insert OID = 2561 ( toast_compressed_length PGNSP PGUID 12 f f t f i 1 23 "25" _null_ _null_ _null_
toast_compressed_length- _null_ ));
+ DESCR("toast compressed length");
+
/*
* Symbolic values for provolatile column: these indicate whether the result
diff -Nacr src/include/access/tuptoaster.h.orig src/include/access/tuptoaster.h
*** src/include/access/tuptoaster.h.orig Thu Jun 16 21:12:57 2005
--- src/include/access/tuptoaster.h Thu Jun 16 21:14:06 2005
***************
*** 138,141 ****
--- 138,149 ----
*/
extern Size toast_raw_datum_size(Datum value);
+ /* ----------
+ * toast_compressed_datum_size -
+ *
+ * Return the compressed (toasted) size of a varlena datum
+ * ----------
+ */
+ extern Size toast_compressed_datum_size(Datum value);
+
#endif /* TUPTOASTER_H */
diff -Nacr src/include/utils/pg_lzcompress.h.orig src/include/utils/pg_lzcompress.h
*** src/include/utils/pg_lzcompress.h.orig Thu Jun 16 21:21:37 2005
--- src/include/utils/pg_lzcompress.h Thu Jun 16 21:21:11 2005
***************
*** 228,231 ****
--- 228,238 ----
extern int pglz_get_next_decomp_char_from_lzdata(PGLZ_DecompState *dstate);
extern int pglz_get_next_decomp_char_from_plain(PGLZ_DecompState *dstate);
+ /* ----------
+ * Function to get compressed size.
+ * Internal use only.
+ * ----------
+ */
+ extern int pglz_fetch_size(PGLZ_Header *source);
+
#endif /* _PG_LZCOMPRESS_H_ */
diff -Nacr src/include/utils/builtins.h.orig src/include/utils/builtins.h
*** src/include/utils/builtins.h.orig Fri Jun 17 15:25:01 2005
--- src/include/utils/builtins.h Fri Jun 17 15:27:30 2005
***************
*** 828,831 ****
--- 828,834 ----
/* catalog/pg_conversion.c */
extern Datum pg_convert_using(PG_FUNCTION_ARGS);
+ /* toastfuncs.c */
+ Datum toast_compressed_length(PG_FUNCTION_ARGS);
+
#endif /* BUILTINS_H */
diff -Nacr src/backend/access/heap/tuptoaster.c.orig src/backend/access/heap/tuptoaster.c
*** src/backend/access/heap/tuptoaster.c.orig Thu Jun 16 20:56:59 2005
--- src/backend/access/heap/tuptoaster.c Fri Jun 17 15:12:30 2005
***************
*** 1436,1438 ****
--- 1436,1499 ----
return result;
}
+
+ /* ----------
+ * toast_compressed_datum_size
+ *
+ * Show the compressed size of a datum
+ * ----------
+ */
+ Size
+ toast_compressed_datum_size(Datum value)
+ {
+
+
+ Size size;
+ varattrib *attr = (varattrib *) DatumGetPointer(value);
+
+ if (!PointerIsValid(attr))
+ {
+ /*
+ * No storage or NULL.
+ */
+ size = 0;
+ }
+ else if (VARATT_IS_EXTERNAL(attr))
+ {
+ /*
+ * Attribute is stored externally
+ * If it is compressed too, then we need to get the external datum
+ * and interrogate *its* compressed size
+ * otherwise just use the external rawsize (i.e. no compression)
+ */
+ if (VARATT_IS_COMPRESSED(attr))
+ {
+ varattrib *attrext = toast_fetch_datum(attr);
+ size = pglz_fetch_size((PGLZ_Header *)attrext);
+ pfree(attrext);
+ }
+ else
+ {
+
+ size = attr->va_content.va_external.va_rawsize;
+ }
+ }
+ else if (VARATT_IS_COMPRESSED(attr))
+ {
+ /*
+ * Attribute is stored compressed inline, so calculate
+ * compressed size on the datum itself.
+ */
+ size = pglz_fetch_size((PGLZ_Header *)attr);
+ }
+ else
+ {
+ /*
+ * Attribute is stored inline, no compression.
+ */
+ size = VARSIZE(attr);
+ }
+
+ return size;
+
+ }
diff -Nacr src/backend/utils/adt/Makefile.orig src/backend/utils/adt/Makefile
*** src/backend/utils/adt/Makefile.orig Fri Jun 17 15:26:44 2005
--- src/backend/utils/adt/Makefile Fri Jun 17 16:39:04 2005
***************
*** 24,30 ****
tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \
network.o mac.o inet_net_ntop.o inet_net_pton.o \
ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \
! ascii.o quote.o pgstatfuncs.o encode.o
like.o: like.c like_match.c
--- 24,30 ----
tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \
network.o mac.o inet_net_ntop.o inet_net_pton.o \
ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \
! ascii.o quote.o pgstatfuncs.o encode.o toastfuncs.o
like.o: like.c like_match.c
diff -Nacr src/backend/utils/adt/pg_lzcompress.c.orig src/backend/utils/adt/pg_lzcompress.c
*** src/backend/utils/adt/pg_lzcompress.c.orig Thu Jun 16 21:14:42 2005
--- src/backend/utils/adt/pg_lzcompress.c Fri Jun 17 16:30:49 2005
***************
*** 904,907 ****
--- 899,930 ----
return EOF;
return (int) (*(dstate->cp_in++));
+ }
+
+ /* ----------
+ * pglz_fetch_size -
+ *
+ * Actual calculation to get the compressed size.
+ *
+ * ----------
+ */
+ int
+ pglz_fetch_size(PGLZ_Header *source)
+ {
+
+ int size;
+
+
+ if (VARATT_SIZE(source) == source->rawsize + sizeof(PGLZ_Header))
+ {
+ /* Compression was not attempted or not effective.*/
+ size = source->rawsize;
+ }
+ else
+ {
+ /* Compressed attribute. */
+ size = VARATT_SIZE(source) - sizeof(PGLZ_Header);
+ }
+
+ return size;
}
diff -Nacr src/backend/utils/adt/toastfuncs.c.orig src/backend/utils/adt/toastfuncs.c
*** src/backend/utils/adt/toastfuncs.c.orig Fri Jun 17 17:29:34 2005
--- src/backend/utils/adt/toastfuncs.c Fri Jun 17 16:39:18 2005
***************
*** 0 ****
--- 1,36 ----
+ /*-------------------------------------------------------------------------
+ *
+ * toastfuncs.c
+ * Functions for accessing information about toasted data.
+ *
+ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL$
+ *
+ *-------------------------------------------------------------------------
+ */
+
+ #include "postgres.h"
+ #include "fmgr.h"
+
+ #include "access/xact.h"
+ #include "access/tuptoaster.h"
+ #include "utils/builtins.h"
+ #include "utils/pg_lzcompress.h"
+
+ Datum
+ toast_compressed_length(PG_FUNCTION_ARGS)
+ {
+
+ Datum value = PG_GETARG_DATUM(0);
+ int size;
+
+
+ size = toast_compressed_datum_size(value);
+
+ PG_RETURN_INT32(size);
+ }
+
pgsql-patches by date: