From 96448839f1cac8ad7dc390eb2f8be74d71bab9f4 Mon Sep 17 00:00:00 2001 From: reshke Date: Wed, 22 Oct 2025 15:29:26 +0000 Subject: [PATCH v20251218 1/2] Move `normalize tuple` logic from nbtcheck to verify_common Preparational patch to reuse index tuple normalize logic in GiST amcheck. --- contrib/amcheck/verify_common.c | 112 ++++++++++++++++++++++++++++++++ contrib/amcheck/verify_common.h | 2 + contrib/amcheck/verify_nbtree.c | 107 +----------------------------- 3 files changed, 115 insertions(+), 106 deletions(-) diff --git a/contrib/amcheck/verify_common.c b/contrib/amcheck/verify_common.c index a31ce06ed99..e9b4887f65e 100644 --- a/contrib/amcheck/verify_common.c +++ b/contrib/amcheck/verify_common.c @@ -13,6 +13,7 @@ #include "postgres.h" #include "access/genam.h" +#include "access/heaptoast.h" #include "access/table.h" #include "access/tableam.h" #include "verify_common.h" @@ -189,3 +190,114 @@ index_checkable(Relation rel, Oid am_id) return amcheck_index_mainfork_expected(rel); } + +IndexTuple +amcheck_normalize_tuple(Relation irel, IndexTuple itup) +{ + TupleDesc tupleDescriptor = RelationGetDescr(irel); + Datum normalized[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + bool need_free[INDEX_MAX_KEYS]; + bool formnewtup = false; + IndexTuple reformed; + int i; + + /* Easy case: It's immediately clear that tuple has no varlena datums */ + if (!IndexTupleHasVarwidths(itup)) + return itup; + + for (i = 0; i < tupleDescriptor->natts; i++) + { + Form_pg_attribute att; + + att = TupleDescAttr(tupleDescriptor, i); + + /* Assume untoasted/already normalized datum initially */ + need_free[i] = false; + normalized[i] = index_getattr(itup, att->attnum, + tupleDescriptor, + &isnull[i]); + if (att->attbyval || att->attlen != -1 || isnull[i]) + continue; + + /* + * Callers always pass a tuple that could safely be inserted into the + * index without further processing, so an external varlena header + * should never be encountered here + */ + if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i]))) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"", + ItemPointerGetBlockNumber(&(itup->t_tid)), + ItemPointerGetOffsetNumber(&(itup->t_tid)), + RelationGetRelationName(irel)))); + else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) && + VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET && + (att->attstorage == TYPSTORAGE_EXTENDED || + att->attstorage == TYPSTORAGE_MAIN)) + { + /* + * This value will be compressed by index_form_tuple() with the + * current storage settings. We may be here because this tuple + * was formed with different storage settings. So, force forming. + */ + formnewtup = true; + } + else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i]))) + { + formnewtup = true; + normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i])); + need_free[i] = true; + } + + /* + * Short tuples may have 1B or 4B header. Convert 4B header of short + * tuples to 1B + */ + else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i]))) + { + /* convert to short varlena */ + Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i])); + char *data = palloc(len); + + SET_VARSIZE_SHORT(data, len); + memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1); + + formnewtup = true; + normalized[i] = PointerGetDatum(data); + need_free[i] = true; + } + } + + /* + * Easier case: Tuple has varlena datums, none of which are compressed or + * short with 4B header + */ + if (!formnewtup) + return itup; + + /* + * Hard case: Tuple had compressed varlena datums that necessitate + * creating normalized version of the tuple from uncompressed input datums + * (normalized input datums). This is rather naive, but shouldn't be + * necessary too often. + * + * In the heap, tuples may contain short varlena datums with both 1B + * header and 4B headers. But the corresponding index tuple should always + * have such varlena's with 1B headers. So, if there is a short varlena + * with 4B header, we need to convert it for fingerprinting. + * + * Note that we rely on deterministic index_form_tuple() TOAST compression + * of normalized input. + */ + reformed = index_form_tuple(tupleDescriptor, normalized, isnull); + reformed->t_tid = itup->t_tid; + + /* Cannot leak memory here */ + for (i = 0; i < tupleDescriptor->natts; i++) + if (need_free[i]) + pfree(DatumGetPointer(normalized[i])); + + return reformed; +} \ No newline at end of file diff --git a/contrib/amcheck/verify_common.h b/contrib/amcheck/verify_common.h index 3fa63d2121a..ffe0d30beb3 100644 --- a/contrib/amcheck/verify_common.h +++ b/contrib/amcheck/verify_common.h @@ -26,3 +26,5 @@ extern void amcheck_lock_relation_and_check(Oid indrelid, Oid am_id, IndexDoCheckCallback check, LOCKMODE lockmode, void *state); + +extern IndexTuple amcheck_normalize_tuple(Relation irel, IndexTuple itup); diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index f91392a3a49..2ad27eb5c2b 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -2845,115 +2845,10 @@ bt_tuple_present_callback(Relation index, ItemPointer tid, Datum *values, static IndexTuple bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup) { - TupleDesc tupleDescriptor = RelationGetDescr(state->rel); - Datum normalized[INDEX_MAX_KEYS]; - bool isnull[INDEX_MAX_KEYS]; - bool need_free[INDEX_MAX_KEYS]; - bool formnewtup = false; - IndexTuple reformed; - int i; - /* Caller should only pass "logical" non-pivot tuples here */ Assert(!BTreeTupleIsPosting(itup) && !BTreeTupleIsPivot(itup)); - /* Easy case: It's immediately clear that tuple has no varlena datums */ - if (!IndexTupleHasVarwidths(itup)) - return itup; - - for (i = 0; i < tupleDescriptor->natts; i++) - { - Form_pg_attribute att; - - att = TupleDescAttr(tupleDescriptor, i); - - /* Assume untoasted/already normalized datum initially */ - need_free[i] = false; - normalized[i] = index_getattr(itup, att->attnum, - tupleDescriptor, - &isnull[i]); - if (att->attbyval || att->attlen != -1 || isnull[i]) - continue; - - /* - * Callers always pass a tuple that could safely be inserted into the - * index without further processing, so an external varlena header - * should never be encountered here - */ - if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i]))) - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"", - ItemPointerGetBlockNumber(&(itup->t_tid)), - ItemPointerGetOffsetNumber(&(itup->t_tid)), - RelationGetRelationName(state->rel)))); - else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) && - VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET && - (att->attstorage == TYPSTORAGE_EXTENDED || - att->attstorage == TYPSTORAGE_MAIN)) - { - /* - * This value will be compressed by index_form_tuple() with the - * current storage settings. We may be here because this tuple - * was formed with different storage settings. So, force forming. - */ - formnewtup = true; - } - else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i]))) - { - formnewtup = true; - normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i])); - need_free[i] = true; - } - - /* - * Short tuples may have 1B or 4B header. Convert 4B header of short - * tuples to 1B - */ - else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i]))) - { - /* convert to short varlena */ - Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i])); - char *data = palloc(len); - - SET_VARSIZE_SHORT(data, len); - memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1); - - formnewtup = true; - normalized[i] = PointerGetDatum(data); - need_free[i] = true; - } - } - - /* - * Easier case: Tuple has varlena datums, none of which are compressed or - * short with 4B header - */ - if (!formnewtup) - return itup; - - /* - * Hard case: Tuple had compressed varlena datums that necessitate - * creating normalized version of the tuple from uncompressed input datums - * (normalized input datums). This is rather naive, but shouldn't be - * necessary too often. - * - * In the heap, tuples may contain short varlena datums with both 1B - * header and 4B headers. But the corresponding index tuple should always - * have such varlena's with 1B headers. So, if there is a short varlena - * with 4B header, we need to convert it for fingerprinting. - * - * Note that we rely on deterministic index_form_tuple() TOAST compression - * of normalized input. - */ - reformed = index_form_tuple(tupleDescriptor, normalized, isnull); - reformed->t_tid = itup->t_tid; - - /* Cannot leak memory here */ - for (i = 0; i < tupleDescriptor->natts; i++) - if (need_free[i]) - pfree(DatumGetPointer(normalized[i])); - - return reformed; + return amcheck_normalize_tuple(state->rel, itup); } /* -- 2.43.0