Re: TODO Item - Return compressed length of TOAST datatypes - Mailing list pgsql-patches
From | Bruce Momjian |
---|---|
Subject | Re: TODO Item - Return compressed length of TOAST datatypes |
Date | |
Msg-id | 200507061909.j66J9J628254@candle.pha.pa.us Whole thread Raw |
In response to | Re: TODO Item - Return compressed length of TOAST datatypes (Mark Kirkwood <markir@paradise.net.nz>) |
Responses |
Re: TODO Item - Return compressed length of TOAST datatypes
|
List | pgsql-patches |
Mark Kirkwood wrote: > I did a few cleanups on the last patch. Please examine this one instead. > The changes are: > > 1. Add documentation for pg_datum_length builtin. > 2. Correct some typos in the code comments. > 3. Move the code in toastfuncs.c to varlena.c as it is probably the > correct place. > 4. Use ereport instead of elog. > 5 Quiet compiler warning in pg_datum_length. I have modified your patch to simplify the logic, and renamed it to pg_column_size(), to be consistent with our soon-to-be-added pg_relation/tablespace/database functions from dbsize. Here is a sample usage: test=> CREATE TABLE test (x INT, y TEXT); CREATE TABLE test=> INSERT INTO test VALUES (4, repeat('x', 10000)); INSERT 0 1 test=> INSERT INTO test VALUES (4, repeat('x', 100000)); INSERT 0 1 test=> SELECT pg_column_size(x), pg_column_size(y) FROM test; pg_column_size | pg_column_size ----------------+---------------- 4 | 121 4 | 1152 (2 rows) Interesting the 10-times larger column is 10-times larger in storage. Do we have some limit on how many repeated values we can record? Patch attached and applied. -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 359-1001 + If your life is a hard drive, | 13 Roberts Road + Christ can be your backup. | Newtown Square, Pennsylvania 19073 Index: doc/src/sgml/func.sgml =================================================================== RCS file: /cvsroot/pgsql/doc/src/sgml/func.sgml,v retrieving revision 1.262 diff -c -c -r1.262 func.sgml *** doc/src/sgml/func.sgml 29 Jun 2005 01:52:56 -0000 1.262 --- doc/src/sgml/func.sgml 6 Jul 2005 18:55:34 -0000 *************** *** 2187,2192 **** --- 2187,2200 ---- </row> <row> + <entry><literal><function>pg_column_size</function>(<parameter>string</parameter>)</literal></entry> + <entry><type>integer</type></entry> + <entry>Number of bytes required to store the value, which might be compressed</entry> + <entry><literal>pg_column_size('jo\\000se'::bytea)</literal></entry> + <entry><literal>5</literal></entry> + </row> + + <row> <entry><literal><function>position</function>(<parameter>substring</parameter> in <parameter>string</parameter>)</literal></entry> <entry><type>integer</type></entry> <entry>Location of specified substring</entry> Index: src/backend/access/heap/tuptoaster.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v retrieving revision 1.49 diff -c -c -r1.49 tuptoaster.c *** src/backend/access/heap/tuptoaster.c 21 Mar 2005 01:23:58 -0000 1.49 --- src/backend/access/heap/tuptoaster.c 6 Jul 2005 18:55:35 -0000 *************** *** 1436,1438 **** --- 1436,1480 ---- return result; } + + /* ---------- + * toast_datum_size + * + * Show the (possibly compressed) size of a datum + * ---------- + */ + Size + toast_datum_size(Datum value) + { + + varattrib *attr = (varattrib *) DatumGetPointer(value); + Size result; + + if (VARATT_IS_EXTERNAL(attr)) + { + /* + * Attribute is stored externally - If it is compressed too, + * then we need to get the external datum and calculate its size, + * otherwise we just use the external rawsize. + */ + if (VARATT_IS_COMPRESSED(attr)) + { + varattrib *attrext = toast_fetch_datum(attr); + result = VARSIZE(attrext); + pfree(attrext); + } + else + result = attr->va_content.va_external.va_rawsize; + } + else + { + /* + * Attribute is stored inline either compressed or not, just + * calculate the size of the datum in either case. + */ + result = VARSIZE(attr); + } + + return result; + + } Index: src/backend/utils/adt/varlena.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v retrieving revision 1.124 diff -c -c -r1.124 varlena.c *** src/backend/utils/adt/varlena.c 4 Jul 2005 18:56:44 -0000 1.124 --- src/backend/utils/adt/varlena.c 6 Jul 2005 18:55:36 -0000 *************** *** 28,33 **** --- 28,34 ---- #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" + #include "utils/syscache.h" typedef struct varlena unknown; *************** *** 2348,2350 **** --- 2349,2395 ---- result_text = PG_STR_GET_TEXT(hexsum); PG_RETURN_TEXT_P(result_text); } + + /* + * Return the length of a datum, possibly compressed + */ + Datum + pg_column_size(PG_FUNCTION_ARGS) + { + Datum value = PG_GETARG_DATUM(0); + int result; + + /* fn_extra stores the fixed column length, or -1 for varlena. */ + if (fcinfo->flinfo->fn_extra == NULL) /* first call? */ + { + /* On the first call lookup the datatype of the supplied argument */ + Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0); + HeapTuple tp; + int typlen; + + tp = SearchSysCache(TYPEOID, + ObjectIdGetDatum(argtypeid), + 0, 0, 0); + if (!HeapTupleIsValid(tp)) + { + /* Oid not in pg_type, should never happen. */ + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("invalid typid: %u", argtypeid))); + } + + typlen = ((Form_pg_type)GETSTRUCT(tp))->typlen; + ReleaseSysCache(tp); + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(int)); + *(int *)fcinfo->flinfo->fn_extra = typlen; + } + + if (*(int *)fcinfo->flinfo->fn_extra != -1) + PG_RETURN_INT32(*(int *)fcinfo->flinfo->fn_extra); + else + { + result = toast_datum_size(value) - VARHDRSZ; + PG_RETURN_INT32(result); + } + } Index: src/include/access/tuptoaster.h =================================================================== RCS file: /cvsroot/pgsql/src/include/access/tuptoaster.h,v retrieving revision 1.22 diff -c -c -r1.22 tuptoaster.h *** src/include/access/tuptoaster.h 21 Mar 2005 01:24:04 -0000 1.22 --- src/include/access/tuptoaster.h 6 Jul 2005 18:55:37 -0000 *************** *** 138,141 **** --- 138,149 ---- */ extern Size toast_raw_datum_size(Datum value); + /* ---------- + * toast_datum_size - + * + * Return the storage size of a varlena datum + * ---------- + */ + extern Size toast_datum_size(Datum value); + #endif /* TUPTOASTER_H */ Index: src/include/catalog/pg_proc.h =================================================================== RCS file: /cvsroot/pgsql/src/include/catalog/pg_proc.h,v retrieving revision 1.373 diff -c -c -r1.373 pg_proc.h *** src/include/catalog/pg_proc.h 1 Jul 2005 19:19:03 -0000 1.373 --- src/include/catalog/pg_proc.h 6 Jul 2005 18:55:43 -0000 *************** *** 3658,3663 **** --- 3658,3667 ---- DATA(insert OID = 2560 ( pg_postmaster_start_time PGNSP PGUID 12 f f t f s 0 1184 "" _null_ _null_ _null_ pgsql_postmaster_start_time- _null_ )); DESCR("postmaster start time"); + /* Column storage size */ + DATA(insert OID = 1269 ( pg_column_size PGNSP PGUID 12 f f t f i 1 23 "2276" _null_ _null_ _null_ pg_column_size- _null_ )); + DESCR("bytes required to store the value, perhaps with compression"); + /* new functions for Y-direction rtree opclasses */ DATA(insert OID = 2562 ( box_below PGNSP PGUID 12 f f t f i 2 16 "603 603" _null_ _null_ _null_ box_below- _null_ )); DESCR("is below"); Index: src/include/utils/builtins.h =================================================================== RCS file: /cvsroot/pgsql/src/include/utils/builtins.h,v retrieving revision 1.258 diff -c -c -r1.258 builtins.h *** src/include/utils/builtins.h 17 Jun 2005 22:32:50 -0000 1.258 --- src/include/utils/builtins.h 6 Jul 2005 18:55:43 -0000 *************** *** 601,606 **** --- 601,607 ---- extern Datum byteapos(PG_FUNCTION_ARGS); extern Datum bytea_substr(PG_FUNCTION_ARGS); extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS); + extern Datum pg_column_size(PG_FUNCTION_ARGS); /* version.c */ extern Datum pgsql_version(PG_FUNCTION_ARGS);
pgsql-patches by date: