From b3e2d15a0add4a55907195a05f38f1ce22857ade Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 6 Nov 2023 11:07:35 +0200 Subject: [PATCH] Fix false reports in pg_visibility --- contrib/pg_visibility/pg_visibility.c | 23 +++++++--- src/backend/storage/ipc/procarray.c | 66 +++++++++++++++++++++++++++ src/include/storage/procarray.h | 1 + 3 files changed, 84 insertions(+), 6 deletions(-) diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index 2a4acfd1eee..78dac323e51 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -19,6 +19,7 @@ #include "funcapi.h" #include "miscadmin.h" #include "storage/bufmgr.h" +#include "storage/proc.h" #include "storage/procarray.h" #include "storage/smgr.h" #include "utils/rel.h" @@ -562,8 +563,14 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) /* Only some relkinds have a visibility map */ check_relation_relkind(rel); + /* Mark ourselves as PROC_IN_VACUUM to exclude our influence on xmin's */ + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + MyProc->statusFlags |= PROC_IN_VACUUM; + ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; + LWLockRelease(ProcArrayLock); + if (all_visible) - OldestXmin = GetOldestNonRemovableTransactionId(rel); + OldestXmin = GetStrictOldestNonRemovableTransactionId(); nblocks = RelationGetNumberOfBlocks(rel); @@ -670,12 +677,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) * From a concurrency point of view, it sort of sucks to * retake ProcArrayLock here while we're holding the buffer * exclusively locked, but it should be safe against - * deadlocks, because surely - * GetOldestNonRemovableTransactionId() should never take a - * buffer lock. And this shouldn't happen often, so it's worth - * being careful so as to avoid false positives. + * deadlocks, because surely get_strict_xid_horizon() should + * never take a buffer lock. And this shouldn't happen often, + * so it's worth being careful so as to avoid false positives. */ - RecomputedOldestXmin = GetOldestNonRemovableTransactionId(rel); + RecomputedOldestXmin = GetStrictOldestNonRemovableTransactionId(); if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin)) record_corrupt_item(items, &tuple.t_self); @@ -715,6 +721,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) items->count = items->next; items->next = 0; + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + MyProc->statusFlags &= ~PROC_IN_VACUUM; + ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; + LWLockRelease(ProcArrayLock); + return items; } diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 80ab026bf56..e36a463cc66 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -237,6 +237,30 @@ typedef struct ComputeXidHorizonsResult */ TransactionId data_oldest_nonremovable; + /* + * The "strict" version of GetOldestNonRemovableTransactionId(). The + * pg_visiblity check can tolerale false positives (don't report some of the + * errors), but can't toletate false negatives (report false errors). + * This makes us fundamentally unhappy with ComputeXidHorizons(). Normally, + * horozons moves forwards, but there are cases when it could move backwards + * (see comment for ComputeXidHorizons()). + * + * This is why we have to implement our own function for xid horizon, which + * would be guaranteed to be newer or equal to any xid horizon computed before. + * We have to do the following to achieve this. + * + * 1. Ignore processes xmin's, because they take into account connection to + * other databases which were ignored before. + * 2. Ignore KnownAssignedXids, because they are not database-aware. While + * primary could compute its horizons database-aware. + * 3. Ignore walsender xmin, because it could go backward if some replication + * connections don't use replication slots. + * + * Surely these would significantly sacrifice accuracy. But we have to do so + * in order to avoid reporting false errors. + */ + TransactionId data_strict_oldest_nonremovable; + /* * Oldest xid for which deleted tuples need to be retained in this * session's temporary tables. @@ -252,6 +276,7 @@ typedef enum GlobalVisHorizonKind VISHORIZON_SHARED, VISHORIZON_CATALOG, VISHORIZON_DATA, + VISHORIZON_DATA_STRICT, VISHORIZON_TEMP, } GlobalVisHorizonKind; @@ -1743,6 +1768,7 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) h->oldest_considered_running = initial; h->shared_oldest_nonremovable = initial; h->data_oldest_nonremovable = initial; + h->data_strict_oldest_nonremovable = initial; /* * Only modifications made by this backend affect the horizon for @@ -1842,6 +1868,10 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) { h->data_oldest_nonremovable = TransactionIdOlder(h->data_oldest_nonremovable, xmin); + + if (TransactionIdIsValid(xid)) + h->data_strict_oldest_nonremovable = + TransactionIdOlder(h->data_strict_oldest_nonremovable, xid); } } @@ -1997,6 +2027,8 @@ GetOldestNonRemovableTransactionId(Relation rel) return horizons.catalog_oldest_nonremovable; case VISHORIZON_DATA: return horizons.data_oldest_nonremovable; + case VISHORIZON_DATA_STRICT: + return horizons.data_strict_oldest_nonremovable; case VISHORIZON_TEMP: return horizons.temp_oldest_nonremovable; } @@ -2005,6 +2037,38 @@ GetOldestNonRemovableTransactionId(Relation rel) return InvalidTransactionId; } +/* + * The "strict" version of GetOldestNonRemovableTransactionId(). The + * pg_visiblity check can tolerale false positives (don't report some of the + * errors), but can't toletate false negatives (report false errors). + * This makes us fundamentally unhappy with ComputeXidHorizons(). Normally, + * horozons moves forwards, but there are cases when it could move backwards + * (see comment for ComputeXidHorizons()). + * + * This is why we have to implement our own function for xid horizon, which + * would be guaranteed to be newer or equal to any xid horizon computed before. + * We have to do the following to achieve this. + * + * 1. Ignore processes xmin's, because they take into account connection to + * other databases which were ignored before. + * 2. Ignore KnownAssignedXids, because they are not database-aware. While + * primary could compute its horizons database-aware. + * 3. Ignore walsender xmin, because it could go backward if some replication + * connections don't use replication slots. + * + * Surely these would significantly sacrifice accuracy. But we have to do so + * in order to avoid reporting false errors. + */ +TransactionId +GetStrictOldestNonRemovableTransactionId(void) +{ + ComputeXidHorizonsResult horizons; + + ComputeXidHorizons(&horizons); + + return horizons.data_strict_oldest_nonremovable; +} + /* * Return the oldest transaction id any currently running backend might still * consider running. This should not be used for visibility / pruning @@ -4028,6 +4092,8 @@ GlobalVisTestFor(Relation rel) case VISHORIZON_TEMP: state = &GlobalVisTempRels; break; + default: + break; } Assert(FullTransactionIdIsValid(state->definitely_needed) && diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index d8cae3ce1c5..d44c2a98b2d 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -55,6 +55,7 @@ extern RunningTransactions GetRunningTransactionData(void); extern bool TransactionIdIsInProgress(TransactionId xid); extern bool TransactionIdIsActive(TransactionId xid); extern TransactionId GetOldestNonRemovableTransactionId(Relation rel); +extern TransactionId GetStrictOldestNonRemovableTransactionId(void); extern TransactionId GetOldestTransactionIdConsideredRunning(void); extern TransactionId GetOldestActiveTransactionId(void); extern TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly); -- 2.39.3 (Apple Git-145)