From 79d501b31c967e2e01424ba10cd0d0e14d175c08 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Veldanda Date: Mon, 21 Apr 2025 13:12:30 +0000 Subject: [PATCH v13 8/8] initial draft to address datum leak problem --- src/backend/access/heap/heapam.c | 3 +- src/backend/access/table/toast_helper.c | 227 +++++++++++++++++- src/backend/catalog/pg_zstd_dictionaries.c | 4 +- src/test/regress/expected/compression.out | 8 +- .../regress/expected/compression_zstd_1.out | 60 ++--- 5 files changed, 264 insertions(+), 38 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index c1a4de14a59..0348161432a 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2278,7 +2278,7 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, Assert(!HeapTupleHasExternal(tup)); return tup; } - else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD) + else if (HeapTupleHasExternal(tup) || HeapTupleHasVarWidth(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD) return heap_toast_insert_or_update(relation, tup, NULL, options); else return tup; @@ -3776,6 +3776,7 @@ l2: else need_toast = (HeapTupleHasExternal(&oldtup) || HeapTupleHasExternal(newtup) || + HeapTupleHasVarWidth(newtup) || newtup->t_len > TOAST_TUPLE_THRESHOLD); pagefree = PageGetHeapFreeSpace(page); diff --git a/src/backend/access/table/toast_helper.c b/src/backend/access/table/toast_helper.c index f4b1cbe494e..2a90ebf77c9 100644 --- a/src/backend/access/table/toast_helper.c +++ b/src/backend/access/table/toast_helper.c @@ -19,7 +19,17 @@ #include "access/toast_internals.h" #include "catalog/pg_type_d.h" #include "varatt.h" - +#include "utils/lsyscache.h" +#include "access/htup_details.h" +#include "catalog/pg_type.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/rangetypes.h" +#include "utils/multirangetypes.h" +#include "utils/typcache.h" +#include "miscadmin.h" + +static Datum flatten_datum(Datum value, Oid typid); /* * Prepare to TOAST a tuple. @@ -151,6 +161,25 @@ toast_tuple_init(ToastTupleContext *ttc) ttc->ttc_flags |= (TOAST_NEEDS_CHANGE | TOAST_NEEDS_FREE); } + if (!(IsCatalogNamespace(ttc->ttc_rel->rd_rel->relnamespace) || IsToastNamespace(ttc->ttc_rel->rd_rel->relnamespace))) + { + if (!VARATT_IS_SHORT(new_value)) + { + Datum oldd = PointerGetDatum(new_value); + Datum clean = flatten_datum(oldd, att->atttypid); + + if (DatumGetPointer(clean) != DatumGetPointer(oldd)) + { + if (ttc->ttc_attr[i].tai_oldexternal != NULL) + pfree(new_value); + new_value = (struct varlena *) DatumGetPointer(clean); + ttc->ttc_values[i] = clean; + ttc->ttc_attr[i].tai_colflags |= TOASTCOL_NEEDS_FREE; + ttc->ttc_flags |= (TOAST_NEEDS_CHANGE | TOAST_NEEDS_FREE); + } + } + } + /* * Remember the size of this attribute */ @@ -346,3 +375,199 @@ toast_delete_external(Relation rel, const Datum *values, const bool *isnull, } } } + +static Datum +flatten_datum(Datum value, Oid typid) +{ + Oid basetypid; + char typtype; + + /* initialize at top of this block */ + basetypid = getBaseType(typid); /* Get basetype for Domain. */ + typtype = get_typtype(basetypid); + + /* ---------- BASE / ENUM / PSEUDO ----------------------- */ + if (typtype == TYPTYPE_BASE || + typtype == TYPTYPE_ENUM || + typtype == TYPTYPE_PSEUDO) + { + if (get_typlen(basetypid) > 0 || + get_typbyval(basetypid)) + return value; /* fixed‑len or pass‑by‑val */ + + /* ---------- ARRAY ------------------------------------------------ */ + if (type_is_array(typid)) + { + ArrayType *arr; + TypeCacheEntry *elemCache; + Datum *elems; + bool *nulls; + int nitems; + int i; + ArrayType *new_arr; + + arr = DatumGetArrayTypeP(value); + elemCache = lookup_type_cache(ARR_ELEMTYPE(arr), 0); + + if (elemCache->typbyval || elemCache->typlen > 0) + return PointerGetDatum(arr); + + deconstruct_array(arr, + ARR_ELEMTYPE(arr), + elemCache->typlen, + elemCache->typbyval, + elemCache->typalign, + &elems, &nulls, &nitems); + + if (nitems == 0) + return PointerGetDatum(arr); + + for (i = 0; i < nitems; i++) + { + if (!nulls[i]) + elems[i] = flatten_datum(elems[i], + ARR_ELEMTYPE(arr)); + } + + new_arr = construct_md_array(elems, nulls, + ARR_NDIM(arr), + ARR_DIMS(arr), + ARR_LBOUND(arr), + ARR_ELEMTYPE(arr), + elemCache->typlen, + elemCache->typbyval, + elemCache->typalign); + + pfree(elems); + pfree(nulls); + + return PointerGetDatum(new_arr); + } + + return PointerGetDatum(PG_DETOAST_DATUM(value)); + } + + /* ---------- COMPOSITE ------------------------------------------- */ + if (typtype == TYPTYPE_COMPOSITE) + { + HeapTupleHeader hdr; + TupleDesc td; + int natts; + Datum *vals; + bool *nulls; + HeapTupleData t; + int i; + Form_pg_attribute att; + HeapTuple newt; + HeapTupleHeader copy; + + hdr = DatumGetHeapTupleHeader(value); + + if (!(hdr->t_infomask & HEAP_HASVARWIDTH)) + return PointerGetDatum(hdr); + + td = lookup_rowtype_tupdesc(HeapTupleHeaderGetTypeId(hdr), + HeapTupleHeaderGetTypMod(hdr)); + natts = td->natts; + + vals = palloc(sizeof(Datum) * natts); + nulls = palloc(sizeof(bool) * natts); + + t.t_len = VARSIZE(hdr); /* ⚑ portable */ + t.t_tableOid = InvalidOid; + t.t_data = hdr; + ItemPointerSetInvalid(&t.t_self); + + heap_deform_tuple(&t, td, vals, nulls); + + for (i = 0; i < natts; i++) + { + att = TupleDescAttr(td, i); + if (att->attisdropped || att->atthasmissing) + continue; + if (!nulls[i] && att->attlen == -1) + vals[i] = flatten_datum(vals[i], att->atttypid); + } + + newt = heap_form_tuple(td, vals, nulls); + copy = (HeapTupleHeader) palloc(newt->t_len); + memcpy(copy, newt->t_data, newt->t_len); + + ReleaseTupleDesc(td); + pfree(vals); + pfree(nulls); + heap_freetuple(newt); + + return PointerGetDatum(copy); + } + + /* ---------- RANGE ----------------------------------------------- */ + if (typtype == TYPTYPE_RANGE) + { + RangeType *r; + TypeCacheEntry *tc; + RangeBound l; + RangeBound u; + bool empty; + + r = DatumGetRangeTypeP(value); + tc = lookup_type_cache(basetypid, TYPECACHE_RANGE_INFO); + + range_deserialize(tc, r, &l, &u, &empty); + + if (!empty && + !(tc->rngelemtype->typbyval || + tc->rngelemtype->typlen > 0)) + { + if (!l.infinite) + l.val = flatten_datum(l.val, + tc->rngelemtype->type_id); + if (!u.infinite) + u.val = flatten_datum(u.val, + tc->rngelemtype->type_id); + return PointerGetDatum(make_range(tc, &l, &u, empty, NULL)); + } + + return PointerGetDatum(r); + } + + /* ---------- MULTIRANGE ---------------------------- */ + if (typtype == TYPTYPE_MULTIRANGE) + { + MultirangeType *mr; + TypeCacheEntry *tc; + int32 rangeCount; + RangeType **ranges; + RangeType **out; + MultirangeType *newmr; + int i; + + mr = DatumGetMultirangeTypeP(value); + tc = lookup_type_cache(basetypid, + TYPECACHE_MULTIRANGE_INFO); + + multirange_deserialize(tc->rngtype, mr, + &rangeCount, &ranges); + + out = palloc(sizeof(RangeType *) * rangeCount); + + for (i = 0; i < rangeCount; i++) + { + out[i] = DatumGetRangeTypeP( + flatten_datum(PointerGetDatum(ranges[i]), + RangeTypeGetOid(ranges[i]))); + } + + newmr = make_multirange(MultirangeTypeGetOid(mr), + tc->rngtype, + rangeCount, out); + pfree(out); + + return PointerGetDatum(newmr); + } + + elog(ERROR, "flatten_datum: unsupported type %u", typid); + pg_unreachable(); + /* keep compiler happy: */ + return (Datum) 0; +} diff --git a/src/backend/catalog/pg_zstd_dictionaries.c b/src/backend/catalog/pg_zstd_dictionaries.c index 08a6883ecd4..63c2a34190a 100644 --- a/src/backend/catalog/pg_zstd_dictionaries.c +++ b/src/backend/catalog/pg_zstd_dictionaries.c @@ -410,7 +410,7 @@ range_typzstdsampling(PG_FUNCTION_ARGS) bool empty; /* Get information about range type; note column might be a domain */ - TypeCacheEntry *typcache = range_get_typcache(fcinfo, getBaseType(range->rangetypid)); + TypeCacheEntry *typcache = range_get_typcache(fcinfo, RangeTypeGetOid(range)); /* If the type does not supply a builder, skip */ if (!OidIsValid(typcache->rngelemtype->typzstdsampling)) @@ -436,7 +436,7 @@ multirange_typzstdsampling(PG_FUNCTION_ARGS) RangeType **ranges; /* Get information about multirange type; note column might be a domain */ - TypeCacheEntry *typcache = multirange_get_typcache(fcinfo, getBaseType(mrange->multirangetypid)); + TypeCacheEntry *typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mrange)); /* If the type does not supply a builder, skip */ if (!OidIsValid(typcache->rngtype->typzstdsampling)) diff --git a/src/test/regress/expected/compression.out b/src/test/regress/expected/compression.out index 94495388ade..29ef885e516 100644 --- a/src/test/regress/expected/compression.out +++ b/src/test/regress/expected/compression.out @@ -69,7 +69,7 @@ SELECT pg_column_compression(f1) FROM cmmove3; pg_column_compression ----------------------- pglz - lz4 + pglz (2 rows) -- test LIKE INCLUDING COMPRESSION @@ -97,7 +97,7 @@ UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; SELECT pg_column_compression(f1) FROM cmmove2; pg_column_compression ----------------------- - lz4 + pglz (1 row) -- test externally stored compressed data @@ -200,8 +200,8 @@ SELECT pg_column_compression(f1) FROM cmdata1; SELECT pg_column_compression(x) FROM compressmv; pg_column_compression ----------------------- - lz4 - lz4 + pglz + pglz (2 rows) -- test compression with partition diff --git a/src/test/regress/expected/compression_zstd_1.out b/src/test/regress/expected/compression_zstd_1.out index c1a7936e574..260b9fd2b17 100644 --- a/src/test/regress/expected/compression_zstd_1.out +++ b/src/test/regress/expected/compression_zstd_1.out @@ -139,36 +139,36 @@ SELECT pg_column_compression(f1) AS mv_compression FROM compressmv_zstd; mv_compression ---------------- - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd - zstd + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz + pglz (30 rows) SELECT objid::regclass, refobjid from pg_depend where refclassid = 9946; -- 2.47.1