From 3043cfc51dc345683b5f2aac6b0c431680a476b6 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Veldanda Date: Sat, 3 May 2025 01:57:15 +0000 Subject: [PATCH v20 1/2] varattrib_4b design proposal to make it extended to support multiple compression algorithms. --- contrib/amcheck/verify_heapam.c | 2 +- src/backend/access/brin/brin_tuple.c | 4 +- src/backend/access/common/detoast.c | 6 +- src/backend/access/common/indextuple.c | 5 +- src/backend/access/common/toast_compression.c | 38 ++++++++- src/backend/access/common/toast_internals.c | 18 ++-- src/backend/access/table/toast_helper.c | 4 +- src/include/access/toast_compression.h | 85 ++++++++++++++++--- src/include/access/toast_internals.h | 38 ++++++--- src/include/varatt.h | 75 +++++++++++++++- src/tools/pgindent/typedefs.list | 2 + 11 files changed, 232 insertions(+), 45 deletions(-) diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index aa9cccd1da4..d7c2ac6951a 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -1786,7 +1786,7 @@ check_tuple_attribute(HeapCheckContext *ctx) bool valid = false; /* Compressed attributes should have a valid compression method */ - cmid = TOAST_COMPRESS_METHOD(&toast_pointer); + cmid = toast_get_compression_id(attr); switch (cmid) { /* List of all valid compression method IDs */ diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c index 861f397e6db..9c1e22e98c6 100644 --- a/src/backend/access/brin/brin_tuple.c +++ b/src/backend/access/brin/brin_tuple.c @@ -223,6 +223,7 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, { Datum cvalue; char compression; + CompressionInfo cmp; Form_pg_attribute att = TupleDescAttr(brdesc->bd_tupdesc, keyno); @@ -237,7 +238,8 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, else compression = InvalidCompressionMethod; - cvalue = toast_compress_datum(value, compression); + cmp = setup_compression_info(compression, att); + cvalue = toast_compress_datum(value, cmp); if (DatumGetPointer(cvalue) != NULL) { diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index 62651787742..01419d1c65f 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -478,7 +478,7 @@ toast_decompress_datum(struct varlena *attr) * Fetch the compression method id stored in the compression header and * decompress the data using the appropriate decompression routine. */ - cmid = TOAST_COMPRESS_METHOD(attr); + cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr); switch (cmid) { case TOAST_PGLZ_COMPRESSION_ID: @@ -514,14 +514,14 @@ toast_decompress_datum_slice(struct varlena *attr, int32 slicelength) * have been seen to give wrong results if passed an output size that is * more than the data's true decompressed size. */ - if ((uint32) slicelength >= TOAST_COMPRESS_EXTSIZE(attr)) + if ((uint32) slicelength >= VARDATA_COMPRESSED_GET_EXTSIZE(attr)) return toast_decompress_datum(attr); /* * Fetch the compression method id stored in the compression header and * decompress the data slice using the appropriate decompression routine. */ - cmid = TOAST_COMPRESS_METHOD(attr); + cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr); switch (cmid) { case TOAST_PGLZ_COMPRESSION_ID: diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c index 1986b943a28..0386f5a1491 100644 --- a/src/backend/access/common/indextuple.c +++ b/src/backend/access/common/indextuple.c @@ -123,9 +123,10 @@ index_form_tuple_context(TupleDesc tupleDescriptor, att->attstorage == TYPSTORAGE_MAIN)) { Datum cvalue; + CompressionInfo cmp; - cvalue = toast_compress_datum(untoasted_values[i], - att->attcompression); + cmp = setup_compression_info(att->attcompression, att); + cvalue = toast_compress_datum(untoasted_values[i], cmp); if (DatumGetPointer(cvalue) != NULL) { diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c index 21f2f4af97e..5e5d42d80ef 100644 --- a/src/backend/access/common/toast_compression.c +++ b/src/backend/access/common/toast_compression.c @@ -21,6 +21,7 @@ #include "access/toast_compression.h" #include "common/pg_lzcompress.h" #include "varatt.h" +#include "utils/attoptcache.h" /* GUC */ int default_toast_compression = TOAST_PGLZ_COMPRESSION; @@ -266,7 +267,10 @@ toast_get_compression_id(struct varlena *attr) VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); - if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) + if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) + && VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) == VARATT_4BCE_MASK) + cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(detoast_external_attr(attr)); + else cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer); } else if (VARATT_IS_COMPRESSED(attr)) @@ -314,3 +318,35 @@ GetCompressionMethodName(char method) return NULL; /* keep compiler quiet */ } } + +CompressionInfo +setup_compression_info(char cmethod, Form_pg_attribute att) +{ + CompressionInfo info; + + /* initialize from the attribute’s default settings */ + info.cmethod = cmethod; + info.cmp_ext = NULL; + + /* If the compression method is not valid, use the current default */ + if (!CompressionMethodIsValid(cmethod)) + info.cmethod = default_toast_compression; + + switch (info.cmethod) + { + case TOAST_PGLZ_COMPRESSION: + case TOAST_LZ4_COMPRESSION: + break; + default: + elog(ERROR, "invalid compression method %c", info.cmethod); + } + + return info; +} + +void +free_compression_info(CompressionInfo *info) +{ + if (info->cmp_ext != NULL) + pfree(info->cmp_ext); +} diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index 7d8be8346ce..83b537d51bf 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -43,25 +43,22 @@ static bool toastid_valueid_exists(Oid toastrelid, Oid valueid); * ---------- */ Datum -toast_compress_datum(Datum value, char cmethod) +toast_compress_datum(Datum value, CompressionInfo cmp) { struct varlena *tmp = NULL; int32 valsize; ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID; + varatt_cmp_extended *cmp_ext = cmp.cmp_ext; Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value))); Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value)); - /* If the compression method is not valid, use the current default */ - if (!CompressionMethodIsValid(cmethod)) - cmethod = default_toast_compression; - /* * Call appropriate compression routine for the compression method. */ - switch (cmethod) + switch (cmp.cmethod) { case TOAST_PGLZ_COMPRESSION: tmp = pglz_compress_datum((const struct varlena *) value); @@ -72,11 +69,14 @@ toast_compress_datum(Datum value, char cmethod) cmid = TOAST_LZ4_COMPRESSION_ID; break; default: - elog(ERROR, "invalid compression method %c", cmethod); + elog(ERROR, "invalid compression method %c", cmp.cmethod); } if (tmp == NULL) + { + free_compression_info(&cmp); return PointerGetDatum(NULL); + } /* * We recheck the actual size even if compression reports success, because @@ -92,13 +92,15 @@ toast_compress_datum(Datum value, char cmethod) { /* successful compression */ Assert(cmid != TOAST_INVALID_COMPRESSION_ID); - TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(tmp, valsize, cmid); + TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD_INFO(tmp, valsize, cmid, cmp_ext); + free_compression_info(&cmp); return PointerGetDatum(tmp); } else { /* incompressible data */ pfree(tmp); + free_compression_info(&cmp); return PointerGetDatum(NULL); } } diff --git a/src/backend/access/table/toast_helper.c b/src/backend/access/table/toast_helper.c index b60fab0a4d2..ba5af5db404 100644 --- a/src/backend/access/table/toast_helper.c +++ b/src/backend/access/table/toast_helper.c @@ -229,8 +229,10 @@ toast_tuple_try_compression(ToastTupleContext *ttc, int attribute) Datum *value = &ttc->ttc_values[attribute]; Datum new_value; ToastAttrInfo *attr = &ttc->ttc_attr[attribute]; + Form_pg_attribute att = TupleDescAttr(ttc->ttc_rel->rd_att, attribute); + CompressionInfo cmp = setup_compression_info(attr->tai_compression, att); - new_value = toast_compress_datum(*value, attr->tai_compression); + new_value = toast_compress_datum(*value, cmp); if (DatumGetPointer(new_value) != NULL) { diff --git a/src/include/access/toast_compression.h b/src/include/access/toast_compression.h index 13c4612ceed..1aef65cde99 100644 --- a/src/include/access/toast_compression.h +++ b/src/include/access/toast_compression.h @@ -13,6 +13,9 @@ #ifndef TOAST_COMPRESSION_H #define TOAST_COMPRESSION_H +#include "varatt.h" +#include "catalog/pg_attribute.h" + /* * GUC support. * @@ -23,24 +26,80 @@ extern PGDLLIMPORT int default_toast_compression; /* - * Built-in compression method ID. The toast compression header will store - * this in the first 2 bits of the raw length. These built-in compression - * method IDs are directly mapped to the built-in compression methods. + * Stub errors if someone tries to query metadata size + * for an algorithm that doesn’t support it. + */ +static inline uint32 +unsupported_meta_size(const varatt_cmp_extended *hdr) +{ + elog(ERROR, "toast_cmpid_meta_size called for unsupported compression algorithm"); + return 0; /* unreachable */ +} + +/* + * TOAST compression methods enumeration. * - * Don't use these values for anything other than understanding the meaning - * of the raw bits from a varlena; in particular, if the goal is to identify - * a compression method, use the constants TOAST_PGLZ_COMPRESSION, etc. - * below. We might someday support more than 4 compression methods, but - * we can never have more than 4 values in this enum, because there are - * only 2 bits available in the places where this is stored. + * NAME : algorithm identifier + * VALUE : enum value + * META-SIZE-FN : Calculates algorithm metadata size. */ +#define TOAST_COMPRESSION_LIST \ + X(PGLZ, 0, unsupported_meta_size) \ + X(LZ4, 1, unsupported_meta_size) \ + X(INVALID, 2, unsupported_meta_size) /* sentinel */ + +/* Compression algorithm identifiers */ typedef enum ToastCompressionId { - TOAST_PGLZ_COMPRESSION_ID = 0, - TOAST_LZ4_COMPRESSION_ID = 1, - TOAST_INVALID_COMPRESSION_ID = 2, +#define X(name,val,fn) TOAST_##name##_COMPRESSION_ID = (val), + TOAST_COMPRESSION_LIST +#undef X } ToastCompressionId; +/* lookup table to check if compression method uses extended format */ +static const bool toast_cmpid_extended[] = { +#define X(name,val,fn) \ + /* PGLZ, LZ4 don't use extended format */ \ + [TOAST_##name##_COMPRESSION_ID] = \ + ((val) != TOAST_PGLZ_COMPRESSION_ID && \ + (val) != TOAST_LZ4_COMPRESSION_ID && \ + (val) != TOAST_INVALID_COMPRESSION_ID), + TOAST_COMPRESSION_LIST +#undef X +}; + +#define TOAST_CMPID_EXTENDED(alg) \ + (toast_cmpid_extended[alg]) + +/* + * Prototype for a per-datum metadata-size callback: + * given a pointer to the extended header, return + * how many metadata bytes follow it. + */ +typedef uint32 (*ToastMetaSizeFn) (const varatt_cmp_extended *hdr); + +/* Callback table—indexed by ToastCompressionId */ +static const ToastMetaSizeFn toast_meta_size_fns[] = { +#define X(name,val,fn) [TOAST_##name##_COMPRESSION_ID] = fn, + TOAST_COMPRESSION_LIST +#undef X +}; + +/* Calculates algorithm metadata size */ +static inline uint32 +toast_cmpid_meta_size(const varatt_cmp_extended *hdr) +{ + Assert(hdr != NULL); + return toast_meta_size_fns[hdr->cmp_alg] (hdr); +} + +typedef struct CompressionInfo +{ + char cmethod; + varatt_cmp_extended *cmp_ext; /* non-NULL only if uses extended + * compression methods */ +} CompressionInfo; + /* * Built-in compression methods. pg_attribute will store these in the * attcompression column. In attcompression, InvalidCompressionMethod @@ -69,5 +128,7 @@ extern struct varlena *lz4_decompress_datum_slice(const struct varlena *value, extern ToastCompressionId toast_get_compression_id(struct varlena *attr); extern char CompressionNameToMethod(const char *compression); extern const char *GetCompressionMethodName(char method); +extern CompressionInfo setup_compression_info(char cmethod, Form_pg_attribute att); +extern void free_compression_info(CompressionInfo *info); #endif /* TOAST_COMPRESSION_H */ diff --git a/src/include/access/toast_internals.h b/src/include/access/toast_internals.h index 06ae8583c1e..f4a4829ad17 100644 --- a/src/include/access/toast_internals.h +++ b/src/include/access/toast_internals.h @@ -31,21 +31,33 @@ typedef struct toast_compress_header * Utilities for manipulation of header information for compressed * toast entries. */ -#define TOAST_COMPRESS_EXTSIZE(ptr) \ - (((toast_compress_header *) (ptr))->tcinfo & VARLENA_EXTSIZE_MASK) -#define TOAST_COMPRESS_METHOD(ptr) \ - (((toast_compress_header *) (ptr))->tcinfo >> VARLENA_EXTSIZE_BITS) - -#define TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(ptr, len, cm_method) \ - do { \ - Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK); \ - Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \ - (cm_method) == TOAST_LZ4_COMPRESSION_ID); \ - ((toast_compress_header *) (ptr))->tcinfo = \ - (len) | ((uint32) (cm_method) << VARLENA_EXTSIZE_BITS); \ +#define TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD_INFO(ptr, len, cm_method, cmp_ext) \ + do { \ + Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK); \ + Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \ + (cm_method) == TOAST_LZ4_COMPRESSION_ID); \ + if (!TOAST_CMPID_EXTENDED((cm_method))) \ + { \ + ((toast_compress_header *)(ptr))->tcinfo = \ + ((uint32)(len)) | ((uint32)(cm_method) << VARLENA_EXTSIZE_BITS); \ + } \ + else \ + { \ + /* extended path: mark EXT flag in tcinfo */ \ + ((toast_compress_header *)(ptr))->tcinfo = \ + ((uint32)(len)) | \ + ((uint32)(VARATT_4BCE_MASK) << VARLENA_EXTSIZE_BITS); \ + Assert((cmp_ext) != NULL); \ + /* copy header + algorithm-specific metadata */ \ + memcpy( \ + VARATT_4BCE_HDR_PTR(ptr), \ + (const void *)(cmp_ext), sizeof(varatt_cmp_extended) + \ + toast_cmpid_meta_size((const varatt_cmp_extended *)(cmp_ext)) \ + ); \ + } \ } while (0) -extern Datum toast_compress_datum(Datum value, char cmethod); +extern Datum toast_compress_datum(Datum value, CompressionInfo cmp); extern Oid toast_get_valid_index(Oid toastoid, LOCKMODE lock); extern void toast_delete_datum(Relation rel, Datum value, bool is_speculative); diff --git a/src/include/varatt.h b/src/include/varatt.h index 2e8564d4998..91460f313c5 100644 --- a/src/include/varatt.h +++ b/src/include/varatt.h @@ -328,7 +328,8 @@ typedef struct #define VARDATA_COMPRESSED_GET_EXTSIZE(PTR) \ (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo & VARLENA_EXTSIZE_MASK) #define VARDATA_COMPRESSED_GET_COMPRESS_METHOD(PTR) \ - (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS) + ( (VARATT_IS_4BCE(PTR)) ? (VARATT_4BCE_CMP_METHOD(PTR)) \ + : (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS)) /* Same for external Datums; but note argument is a struct varatt_external */ #define VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) \ @@ -340,8 +341,17 @@ typedef struct do { \ Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \ (cm) == TOAST_LZ4_COMPRESSION_ID); \ - ((toast_pointer).va_extinfo = \ - (len) | ((uint32) (cm) << VARLENA_EXTSIZE_BITS)); \ + if (!TOAST_CMPID_EXTENDED((cm))) \ + { \ + /* Store the actual method in va_extinfo */ \ + ((toast_pointer).va_extinfo = \ + (len) | ((uint32) (cm) << VARLENA_EXTSIZE_BITS)); \ + } \ + else \ + { \ + /* Store 11 in the top 2 bits, meaning "extended" method. */ \ + (toast_pointer).va_extinfo = (uint32)(len) | (VARATT_4BCE_MASK << VARLENA_EXTSIZE_BITS ); \ + } \ } while (0) /* @@ -355,4 +365,63 @@ typedef struct (VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) < \ (toast_pointer).va_rawsize - VARHDRSZ) +/* + * varatt_cmp_extended: an optional per‐datum header for extended compression method. + * Only used when va_tcinfo’s top two bits are “11”. + */ +typedef struct varatt_cmp_extended +{ + uint8 cmp_alg; /* algorithm id (0–255) */ + char cmp_meta[FLEXIBLE_ARRAY_MEMBER]; /* algorithm‐specific + * metadata */ +} varatt_cmp_extended; + +/* + * 1) Detect the extended‐compression flag in va_tcinfo + * (top 2 bits = 0b11 indicate “cmp_ext” path) + */ +#define VARATT_4BCE_MASK 0x3 + +#define VARATT_IS_4BCE(PTR) \ + ((((varattrib_4b *)(PTR))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS) == VARATT_4BCE_MASK) + +/* + * 2) Pointer to varatt_cmp_extended header (just after the 8-byte varattrib_4b compressed headers) + */ +#define VARATT_4BCE_HDR_PTR(PTR) \ + ((varatt_cmp_extended *) ((char *)(PTR) + VARHDRSZ_COMPRESSED)) + +/* get the algorithm ID */ +#define VARATT_4BCE_CMP_METHOD(PTR) \ + ((ToastCompressionId)VARATT_4BCE_HDR_PTR(PTR)->cmp_alg) + +/* set the algorithm ID */ +#define VARATT_4BCE_SET_HDR(EXT_PTR, alg) ((varatt_cmp_extended*)(EXT_PTR))->cmp_alg = (uint8)(alg); + +/* + * 3) Helpers to find metadata vs payload: + * – cmp_meta[] pointer + * – compressed‐bytes pointer + * – compressed-bytes size + * – total header size + */ + +/* pointer to compression algorithm's metadata */ +#define VARATT_4BCE_META_PTR(PTR) \ + ((void *)VARATT_4BCE_HDR_PTR(PTR)->cmp_meta) + +/* pointer to compressed bytes (after metadata) */ +#define VARATT_4BCE_PAYLOAD_PTR(PTR) \ + ((void *) ((char *)VARATT_4BCE_META_PTR(PTR) + toast_cmpid_meta_size(VARATT_4BCE_HDR_PTR(PTR)))) + +/* number of compressed‐payload bytes */ +#define VARATT_4BCE_PAYLOAD_SIZE(PTR) \ + ( VARSIZE_4B(PTR) - VARHDRSZ_COMPRESSED - sizeof(varatt_cmp_extended) \ + - toast_cmpid_meta_size(VARATT_4BCE_HDR_PTR(PTR))) + +/* total header+meta size before payload */ +#define VARATT_4BCE_HDRSZ(EXT_PTR) \ + ( VARHDRSZ_COMPRESSED + sizeof(varatt_cmp_extended) \ + + toast_cmpid_meta_size(EXT_PTR)) + #endif diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index e5879e00dff..ea28675e0c9 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -482,6 +482,7 @@ CompositeIOData CompositeTypeStmt CompoundAffixFlag CompressFileHandle +CompressionInfo CompressionLocation CompressorState ComputeXidHorizonsResult @@ -4153,6 +4154,7 @@ uuid_t va_list vacuumingOptions validate_string_relopt +varatt_cmp_extended varatt_expanded varattrib_1b varattrib_1b_e base-commit: a675149e87706d01e4007150a0124b89bdef08be -- 2.47.1