From 1c4781ff30b5f4d9e10c6dc03c755d34881338b1 Mon Sep 17 00:00:00 2001 From: Mark Dilger Date: Mon, 29 Mar 2021 14:31:13 -0700 Subject: [PATCH v14 2/3] Replacing implementation of check_tuple_visibility Using a modified version of HeapTupleSatisfiesVacuumHorizon. --- contrib/amcheck/verify_heapam.c | 479 +++++++++++++++++++++++++------- 1 file changed, 371 insertions(+), 108 deletions(-) diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index 9172b5fd81..be22b491d6 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -73,6 +73,8 @@ typedef struct HeapCheckContext TransactionId oldest_xid; /* ShmemVariableCache->oldestXid */ FullTransactionId oldest_fxid; /* 64-bit version of oldest_xid, computed * relative to next_fxid */ + TransactionId safe_xmin; /* this XID and newer ones can't become + * all-visible while we're running */ /* * Cached copy of value from MultiXactState @@ -114,6 +116,9 @@ typedef struct HeapCheckContext uint32 offset; /* offset in tuple data */ AttrNumber attnum; + /* True if toast for this tuple could be vacuumed away */ + bool tuple_can_be_pruned; + /* Values for iterating over toast for the attribute */ int32 chunkno; int32 attrsize; @@ -249,6 +254,12 @@ verify_heapam(PG_FUNCTION_ARGS) memset(&ctx, 0, sizeof(HeapCheckContext)); ctx.cached_xid = InvalidTransactionId; + /* + * Any xmin newer than the xmin of our snapshot can't become all-visible + * while we're running. + */ + ctx.safe_xmin = GetTransactionSnapshot()->xmin; + /* * If we report corruption when not examining some individual attribute, * we need attnum to be reported as NULL. Set that up before any @@ -640,189 +651,441 @@ check_tuple_header(HeapCheckContext *ctx) } /* - * Checks whether a tuple is visible for checking. + * Checks whether a tuple is visible to our transaction for checking, which is + * not a question of whether we should be able to see the tuple relative to any + * particular snapshot, but rather a question of whether it is safe and + * reasonable to check the tuple attributes. The caller should already have + * checked that the tuple is sufficiently sensible for us to evaluate. * - * Since we do not hold a snapshot, tuple visibility is not a question of - * whether we should be able to see the tuple relative to any particular - * snapshot, but rather a question of whether it is safe and reasonable to - * check the tuple attributes. + * If a tuple could have been inserted by a transaction that also added a + * column to the table, but which ultimately did not commit, or which has not + * yet committed, then the table's current TupleDesc might differ from the one + * used to construct this tuple, so we must not check it. * - * For visibility determination not specifically related to corruption, what we - * want to know is if a tuple is potentially visible to any running - * transaction. If you are tempted to replace this function's visibility logic - * with a call to another visibility checking function, keep in mind that this - * function does not update hint bits, as it seems imprudent to write hint bits - * (or anything at all) to a table during a corruption check. Nor does this - * function bother classifying tuple visibility beyond a boolean visible vs. - * not visible. + * As a special case, if our own transaction inserted the tuple, even if we + * added a column to the table, our TupleDesc should match. We could check the + * tuple, but choose not to do so. * - * Returns whether the tuple is visible for checking. + * If a tuple has been updated or deleted, we can still read the old tuple for + * corruption checking purposes, as long as we are careful about concurrent + * vacuums. The main table tuple itself cannot be vacuumed away because we + * hold a buffer lock on the page, but if the deleting transaction is older + * than our transaction snapshot's xmin, then vacuum could remove the toast at + * any time, so we must not check the toast. + * + * If xmin or xmax values are older than can be checked against clog, or appear + * to be in the future (possibly due to wrap-around), then we cannot make a + * determination about the visibility of the tuple, so we must not check it. + * + * Returns true if the tuple should be checked, false otherwise. Sets + * ctx->toast_is_volatile true if the toast might be vacuumed away, false + * otherwise. */ static bool check_tuple_visibility(HeapCheckContext *ctx) { + TransactionId xmin; + TransactionId xvac; + TransactionId xmax; + XidCommitStatus xmin_status; + XidCommitStatus xvac_status; + XidCommitStatus xmax_status; HeapTupleHeader tuphdr = ctx->tuphdr; - uint16 infomask = tuphdr->t_infomask; - if (!HeapTupleHeaderXminCommitted(tuphdr)) + ctx->tuple_can_be_pruned = true; /* have not yet proven otherwise */ + + /* If xmin is normal, it should be within valid range */ + xmin = HeapTupleHeaderGetXmin(tuphdr); + switch (get_xid_status(xmin, ctx, &xmin_status)) { - TransactionId raw_xmin = HeapTupleHeaderGetRawXmin(tuphdr); + case XID_INVALID: + case XID_BOUNDS_OK: + break; + case XID_IN_FUTURE: + report_corruption(ctx, + psprintf("xmin %u equals or exceeds next valid transaction ID %u:%u", + xmin, + EpochFromFullTransactionId(ctx->next_fxid), + XidFromFullTransactionId(ctx->next_fxid))); + return false; /* corrupt */ + case XID_PRECEDES_CLUSTERMIN: + report_corruption(ctx, + psprintf("xmin %u precedes oldest valid transaction ID %u:%u", + xmin, + EpochFromFullTransactionId(ctx->oldest_fxid), + XidFromFullTransactionId(ctx->oldest_fxid))); + return false; /* corrupt */ + case XID_PRECEDES_RELMIN: + report_corruption(ctx, + psprintf("xmin %u precedes relation freeze threshold %u:%u", + xmin, + EpochFromFullTransactionId(ctx->relfrozenfxid), + XidFromFullTransactionId(ctx->relfrozenfxid))); + return false; /* corrupt */ + } + /* + * Has inserting transaction committed? + */ + if (!HeapTupleHeaderXminCommitted(tuphdr)) + { if (HeapTupleHeaderXminInvalid(tuphdr)) - return false; /* HEAPTUPLE_DEAD */ + + /* + * The inserting transaction aborted. The structure of the tuple + * may not match our relation description, so we cannot check it. + */ + return false; /* uncheckable */ /* Used by pre-9.0 binary upgrades */ - else if (infomask & HEAP_MOVED_OFF || - infomask & HEAP_MOVED_IN) + else if (tuphdr->t_infomask & HEAP_MOVED_OFF) { - XidCommitStatus status; - TransactionId xvac = HeapTupleHeaderGetXvac(tuphdr); + xvac = HeapTupleHeaderGetXvac(tuphdr); - switch (get_xid_status(xvac, ctx, &status)) + switch (get_xid_status(xvac, ctx, &xvac_status)) { case XID_INVALID: report_corruption(ctx, - pstrdup("old-style VACUUM FULL transaction ID is invalid")); + pstrdup("old-style VACUUM FULL transaction ID for moved off tuple is invalid")); return false; /* corrupt */ case XID_IN_FUTURE: report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u equals or exceeds next valid transaction ID %u:%u", + psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple equals or exceeds next valid transaction ID %u:%u", xvac, EpochFromFullTransactionId(ctx->next_fxid), XidFromFullTransactionId(ctx->next_fxid))); return false; /* corrupt */ case XID_PRECEDES_RELMIN: report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u precedes relation freeze threshold %u:%u", + psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple precedes relation freeze threshold %u:%u", xvac, EpochFromFullTransactionId(ctx->relfrozenfxid), XidFromFullTransactionId(ctx->relfrozenfxid))); return false; /* corrupt */ - break; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u precedes oldest valid transaction ID %u:%u", + psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple precedes oldest valid transaction ID %u:%u", xvac, EpochFromFullTransactionId(ctx->oldest_fxid), XidFromFullTransactionId(ctx->oldest_fxid))); return false; /* corrupt */ - break; case XID_BOUNDS_OK: - switch (status) - { - case XID_IN_PROGRESS: - case XID_IS_CURRENT_XID: - return true; /* HEAPTUPLE_DELETE_IN_PROGRESS */ - case XID_COMMITTED: - case XID_ABORTED: - return false; /* HEAPTUPLE_DEAD */ - } + break; } - } - else - { - XidCommitStatus status; - switch (get_xid_status(raw_xmin, ctx, &status)) + switch (xvac_status) { - case XID_INVALID: - report_corruption(ctx, - pstrdup("raw xmin is invalid")); - return false; - case XID_IN_FUTURE: + case XID_IS_CURRENT_XID: report_corruption(ctx, - psprintf("raw xmin %u equals or exceeds next valid transaction ID %u:%u", - raw_xmin, - EpochFromFullTransactionId(ctx->next_fxid), - XidFromFullTransactionId(ctx->next_fxid))); + psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple matches our current transaction ID", + xvac)); return false; /* corrupt */ - case XID_PRECEDES_RELMIN: + case XID_IN_PROGRESS: report_corruption(ctx, - psprintf("raw xmin %u precedes relation freeze threshold %u:%u", - raw_xmin, - EpochFromFullTransactionId(ctx->relfrozenfxid), - XidFromFullTransactionId(ctx->relfrozenfxid))); + psprintf("old-style VACUUM FULL transaction ID %u for moved off tuple appears to be in progress", + xvac)); return false; /* corrupt */ - case XID_PRECEDES_CLUSTERMIN: - report_corruption(ctx, - psprintf("raw xmin %u precedes oldest valid transaction ID %u:%u", - raw_xmin, - EpochFromFullTransactionId(ctx->oldest_fxid), - XidFromFullTransactionId(ctx->oldest_fxid))); - return false; /* corrupt */ - case XID_BOUNDS_OK: - switch (status) - { - case XID_COMMITTED: - break; - case XID_IN_PROGRESS: - case XID_IS_CURRENT_XID: - return true; /* insert or delete in progress */ - case XID_ABORTED: - return false; /* HEAPTUPLE_DEAD */ - } + + case XID_COMMITTED: + + /* + * It should be impossible for xvac to still be running, + * since we've removed all that code, but even if it were, + * it ought to be safe to read the tuple, since the + * original inserter must have committed. But, if the + * xvac transaction committed, this tuple (and its + * associated TOAST tuples) could be pruned at any time. + */ + return true; /* checkable */ + + case XID_ABORTED: + break; } } - } - - if (!(infomask & HEAP_XMAX_INVALID) && !HEAP_XMAX_IS_LOCKED_ONLY(infomask)) - { - if (infomask & HEAP_XMAX_IS_MULTI) + /* Used by pre-9.0 binary upgrades */ + else if (tuphdr->t_infomask & HEAP_MOVED_IN) { - XidCommitStatus status; - TransactionId xmax = HeapTupleGetUpdateXid(tuphdr); + xvac = HeapTupleHeaderGetXvac(tuphdr); - switch (get_xid_status(xmax, ctx, &status)) + switch (get_xid_status(xvac, ctx, &xvac_status)) { - /* not LOCKED_ONLY, so it has to have an xmax */ case XID_INVALID: report_corruption(ctx, - pstrdup("xmax is invalid")); + pstrdup("old-style VACUUM FULL transaction ID for moved in tuple is invalid")); return false; /* corrupt */ case XID_IN_FUTURE: report_corruption(ctx, - psprintf("xmax %u equals or exceeds next valid transaction ID %u:%u", - xmax, + psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple equals or exceeds next valid transaction ID %u:%u", + xvac, EpochFromFullTransactionId(ctx->next_fxid), XidFromFullTransactionId(ctx->next_fxid))); return false; /* corrupt */ case XID_PRECEDES_RELMIN: report_corruption(ctx, - psprintf("xmax %u precedes relation freeze threshold %u:%u", - xmax, + psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple precedes relation freeze threshold %u:%u", + xvac, EpochFromFullTransactionId(ctx->relfrozenfxid), XidFromFullTransactionId(ctx->relfrozenfxid))); return false; /* corrupt */ case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, - psprintf("xmax %u precedes oldest valid transaction ID %u:%u", - xmax, + psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple precedes oldest valid transaction ID %u:%u", + xvac, EpochFromFullTransactionId(ctx->oldest_fxid), XidFromFullTransactionId(ctx->oldest_fxid))); return false; /* corrupt */ case XID_BOUNDS_OK: - switch (status) - { - case XID_IN_PROGRESS: - case XID_IS_CURRENT_XID: - return true; /* HEAPTUPLE_DELETE_IN_PROGRESS */ - case XID_COMMITTED: - case XID_ABORTED: - return false; /* HEAPTUPLE_RECENTLY_DEAD or - * HEAPTUPLE_DEAD */ - } + break; } - /* Ok, the tuple is live */ + switch (xvac_status) + { + case XID_IS_CURRENT_XID: + report_corruption(ctx, + psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple matches our current transaction ID", + xvac)); + return false; /* corrupt */ + case XID_IN_PROGRESS: + report_corruption(ctx, + psprintf("old-style VACUUM FULL transaction ID %u for moved in tuple appears to be in progress", + xvac)); + return false; /* corrupt */ + + case XID_COMMITTED: + break; + + case XID_ABORTED: + + /* + * The VACUUM FULL aborted, so this tuple is dead and + * could be vacuumed away at any time. It's ok to check + * the tuple because we have a buffer lock for the page, + * but not safe to check the toast. + */ + return true; /* checkable */ + } + } + else if (xmin_status == XID_IS_CURRENT_XID) + { + /* + * Don't check tuples from currently running transactions, not + * even our own. + */ + return false; /* checkable, but don't check */ + } + else if (xmin_status == XID_IN_PROGRESS) + { + /* Don't check tuples from currently running transactions */ + return false; /* uncheckable */ + } + else if (xmin_status != XID_COMMITTED) + { + /* + * Inserting transaction is not in progress, and not committed, so + * it either aborted or crashed. We cannot check. + */ + return false; /* uncheckable */ } - else if (!(infomask & HEAP_XMAX_COMMITTED)) - return true; /* HEAPTUPLE_DELETE_IN_PROGRESS or - * HEAPTUPLE_LIVE */ - else - return false; /* HEAPTUPLE_RECENTLY_DEAD or HEAPTUPLE_DEAD */ } - return true; /* not dead */ + + /* + * Okay, the inserter committed, so it was good at some point. Now what + * about the deleting transaction? + */ + + if (tuphdr->t_infomask & HEAP_XMAX_IS_MULTI) + { + /* + * xmax is a multixact, so it should be within valid MXID range. We + * cannot safely look up the update xid if the multixact is out of + * bounds, and must stop checking this tuple. + */ + xmax = HeapTupleHeaderGetRawXmax(tuphdr); + switch (check_mxid_valid_in_rel(xmax, ctx)) + { + case XID_INVALID: + report_corruption(ctx, + pstrdup("multitransaction ID is invalid")); + return false; /* corrupt */ + case XID_PRECEDES_RELMIN: + report_corruption(ctx, + psprintf("multitransaction ID %u precedes relation minimum multitransaction ID threshold %u", + xmax, ctx->relminmxid)); + return false; /* corrupt */ + case XID_PRECEDES_CLUSTERMIN: + report_corruption(ctx, + psprintf("multitransaction ID %u precedes oldest valid multitransaction ID threshold %u", + xmax, ctx->oldest_mxact)); + return false; /* corrupt */ + case XID_IN_FUTURE: + report_corruption(ctx, + psprintf("multitransaction ID %u equals or exceeds next valid multitransaction ID %u", + xmax, + ctx->next_mxact)); + return false; /* corrupt */ + case XID_BOUNDS_OK: + break; + } + } + + if (tuphdr->t_infomask & HEAP_XMAX_INVALID) + { + /* + * This tuple is live. A concurrently running transaction could + * delete it before we get around to checking the toast, but any such + * running transaction is surely not less than our safe_xmin, so the + * toast cannot be vacuumed out from under us. + */ + ctx->tuple_can_be_pruned = false; + return true; /* checkable */ + } + + if (HEAP_XMAX_IS_LOCKED_ONLY(tuphdr->t_infomask)) + { + /* + * "Deleting" xact really only locked it, so the tuple is live in any + * case. As above, a concurrently running transaction could delete + * it, but it cannot be vacuumed out from under us. + */ + ctx->tuple_can_be_pruned = false; + return true; /* checkable */ + } + + if (tuphdr->t_infomask & HEAP_XMAX_IS_MULTI) + { + /* + * We already checked above that this multixact is within limits for + * this table. Now check the update xid from this multixact. + */ + xmax = HeapTupleGetUpdateXid(tuphdr); + switch (get_xid_status(xmax, ctx, &xmax_status)) + { + /* not LOCKED_ONLY, so it has to have an xmax */ + case XID_INVALID: + report_corruption(ctx, + pstrdup("update xid is invalid")); + return false; /* corrupt */ + case XID_IN_FUTURE: + report_corruption(ctx, + psprintf("update xid %u equals or exceeds next valid transaction ID %u:%u", + xmax, + EpochFromFullTransactionId(ctx->next_fxid), + XidFromFullTransactionId(ctx->next_fxid))); + return false; /* corrupt */ + case XID_PRECEDES_RELMIN: + report_corruption(ctx, + psprintf("update xid %u precedes relation freeze threshold %u:%u", + xmax, + EpochFromFullTransactionId(ctx->relfrozenfxid), + XidFromFullTransactionId(ctx->relfrozenfxid))); + return false; /* corrupt */ + case XID_PRECEDES_CLUSTERMIN: + report_corruption(ctx, + psprintf("update xid %u precedes oldest valid transaction ID %u:%u", + xmax, + EpochFromFullTransactionId(ctx->oldest_fxid), + XidFromFullTransactionId(ctx->oldest_fxid))); + return false; /* corrupt */ + case XID_BOUNDS_OK: + break; + } + + switch (xmax_status) + { + case XID_IS_CURRENT_XID: + case XID_IN_PROGRESS: + + /* + * The delete is in progress, so it cannot be visible to our + * snapshot. + */ + ctx->tuple_can_be_pruned = false; + return true; /* checkable */ + case XID_COMMITTED: + + /* + * The delete committed. Whether the toast can be vacuumed + * away depends on how old the deleting transaction is. + */ + ctx->tuple_can_be_pruned = TransactionIdPrecedes(xmax, + ctx->safe_xmin); + return true; /* checkable */ + case XID_ABORTED: + + /* + * The delete aborted or crashed. The tuple is still live. + */ + ctx->tuple_can_be_pruned = false; + return true; /* checkable */ + } + } + + /* + * The tuple is deleted. Whether the toast can be vacuumed away depends + * on how old the deleting transaction is. + */ + xmax = HeapTupleHeaderGetRawXmax(tuphdr); + + switch (get_xid_status(xmax, ctx, &xmax_status)) + { + case XID_IN_FUTURE: + report_corruption(ctx, + psprintf("xmax %u equals or exceeds next valid transaction ID %u:%u", + xmax, + EpochFromFullTransactionId(ctx->next_fxid), + XidFromFullTransactionId(ctx->next_fxid))); + return false; /* corrupt */ + case XID_PRECEDES_RELMIN: + report_corruption(ctx, + psprintf("xmax %u precedes relation freeze threshold %u:%u", + xmax, + EpochFromFullTransactionId(ctx->relfrozenfxid), + XidFromFullTransactionId(ctx->relfrozenfxid))); + return false; /* corrupt */ + case XID_PRECEDES_CLUSTERMIN: + report_corruption(ctx, + psprintf("xmax %u precedes oldest valid transaction ID %u:%u", + xmax, + EpochFromFullTransactionId(ctx->oldest_fxid), + XidFromFullTransactionId(ctx->oldest_fxid))); + return false; /* corrupt */ + case XID_BOUNDS_OK: + case XID_INVALID: + break; + } + + switch (xmax_status) + { + case XID_IS_CURRENT_XID: + case XID_IN_PROGRESS: + + /* + * The delete is in progress, so it cannot be visible to our + * snapshot. + */ + ctx->tuple_can_be_pruned = false; + return true; /* checkable */ + case XID_COMMITTED: + + /* + * The delete committed. Whether the toast can be vacuumed away + * depends on how old the deleting transaction is. + */ + ctx->tuple_can_be_pruned = TransactionIdPrecedes(xmax, + ctx->safe_xmin); + return true; /* checkable */ + case XID_ABORTED: + + /* + * The delete aborted or crashed. The tuple is still live. + */ + ctx->tuple_can_be_pruned = false; + return true; /* checkable */ + } + + return false; /* not reached */ } + /* * Check the current toast tuple against the state tracked in ctx, recording * any corruption found in ctx->tupstore. -- 2.21.1 (Apple Git-122.3)