From b3b90046078fbddc8e4b2287a5d04b2cb5142cc6 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Fri, 11 Mar 2022 19:16:02 -0800
Subject: [PATCH v11 1/3] Loosen coupling between relfrozenxid and freezing.

When VACUUM set relfrozenxid before now, it set it to whatever value was
used to determine which tuples to freeze -- the FreezeLimit cutoff.
This approach was very naive: the relfrozenxid invariant only requires
that new relfrozenxid values be <= the oldest extant XID remaining in
the table (at the point that the VACUUM operation ends), which in
general might be much more recent than FreezeLimit.  There is no fixed
relationship between the amount of physical work performed by VACUUM to
make it safe to advance relfrozenxid (freezing and pruning), and the
actual number of XIDs that relfrozenxid can be advanced by (at least in
principle) as a result.  VACUUM might have to freeze all of the tuples
from a hundred million heap pages just to enable relfrozenxid to be
advanced by no more than one or two XIDs.  On the other hand, VACUUM
might end up doing little or no work, and yet still be capable of
advancing relfrozenxid by hundreds of millions of XIDs as a result.

VACUUM now sets relfrozenxid (and relminmxid) using the exact oldest
extant XID (and oldest extant MultiXactId) from the table, including
XIDs from the table's remaining/unfrozen MultiXacts.  This requires that
VACUUM carefully track the oldest unfrozen XID/MultiXactId as it goes.
This optimization doesn't require any changes to the definition of
relfrozenxid, nor does it require changes to the core design of
freezing.

Later work targeting PostgreSQL 16 will teach VACUUM to determine what
to freeze based on page-level characteristics (not XID/XMID based
cutoffs).  But setting relfrozenxid/relminmxid to the exact oldest
extant XID/MXID is independently useful work.  For example, it is
helpful with larger databases that consume many MultiXacts.  If we
assume that the largest tables don't ever need to allocate any
MultiXacts, then aggressive VACUUMs targeting those tables will now
advance relminmxid right up to OldestMxact.  pg_class.relminmxid becomes
a much more precise indicator of what's really going on in each table,
making autovacuums to prevent wraparound (MultiXactId wraparound) occur
less frequently.

Final relfrozenxid values must still be >= FreezeLimit in an aggressive
VACUUM -- FreezeLimit still acts as a lower bound on the final value
that aggressive VACUUM can set relfrozenxid to.  Since standard VACUUMs
still make no guarantees about advancing relfrozenxid, they might as
well set relfrozenxid to a value from well before FreezeLimit when the
opportunity presents itself.  In general standard VACUUMs may now set
relfrozenxid to any value > the original relfrozenxid and <= OldestXmin.

Credit for the general idea of using the oldest extant XID to set
pg_class.relfrozenxid at the end of VACUUM goes to Andres Freund.

Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Robert Haas <robertmhaas@gmail.com>
Discussion: https://postgr.es/m/CAH2-WzkymFbz6D_vL+jmqSn_5q1wsFvFrE+37yLgL_Rkfd6Gzg@mail.gmail.com
---
 src/include/access/heapam.h                   |   7 +-
 src/include/access/heapam_xlog.h              |   4 +-
 src/include/commands/vacuum.h                 |   1 +
 src/backend/access/heap/heapam.c              | 244 ++++++++++++++----
 src/backend/access/heap/vacuumlazy.c          | 120 ++++++---
 src/backend/commands/cluster.c                |   5 +-
 src/backend/commands/vacuum.c                 |  42 +--
 doc/src/sgml/maintenance.sgml                 |  30 ++-
 .../expected/vacuum-no-cleanup-lock.out       | 188 ++++++++++++++
 .../isolation/expected/vacuum-reltuples.out   |  67 -----
 src/test/isolation/isolation_schedule         |   2 +-
 .../specs/vacuum-no-cleanup-lock.spec         | 145 +++++++++++
 .../isolation/specs/vacuum-reltuples.spec     |  49 ----
 13 files changed, 675 insertions(+), 229 deletions(-)
 create mode 100644 src/test/isolation/expected/vacuum-no-cleanup-lock.out
 delete mode 100644 src/test/isolation/expected/vacuum-reltuples.out
 create mode 100644 src/test/isolation/specs/vacuum-no-cleanup-lock.spec
 delete mode 100644 src/test/isolation/specs/vacuum-reltuples.spec

diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index b46ab7d73..6ef3c02bb 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -167,8 +167,11 @@ extern void heap_inplace_update(Relation relation, HeapTuple tuple);
 extern bool heap_freeze_tuple(HeapTupleHeader tuple,
 							  TransactionId relfrozenxid, TransactionId relminmxid,
 							  TransactionId cutoff_xid, TransactionId cutoff_multi);
-extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
-									MultiXactId cutoff_multi);
+extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple,
+									TransactionId limit_xid,
+									MultiXactId limit_multi,
+									TransactionId *relfrozenxid_nofreeze_out,
+									MultiXactId *relminmxid_nofreeze_out);
 extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple);
 
 extern void simple_heap_insert(Relation relation, HeapTuple tup);
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 5c47fdcec..2d8a7f627 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -410,7 +410,9 @@ extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 									  TransactionId cutoff_xid,
 									  TransactionId cutoff_multi,
 									  xl_heap_freeze_tuple *frz,
-									  bool *totally_frozen);
+									  bool *totally_frozen,
+									  TransactionId *relfrozenxid_out,
+									  MultiXactId *relminmxid_out);
 extern void heap_execute_freeze_tuple(HeapTupleHeader tuple,
 									  xl_heap_freeze_tuple *xlrec_tp);
 extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index d64f6268f..ead88edda 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -291,6 +291,7 @@ extern bool vacuum_set_xid_limits(Relation rel,
 								  int multixact_freeze_min_age,
 								  int multixact_freeze_table_age,
 								  TransactionId *oldestXmin,
+								  MultiXactId *oldestMxact,
 								  TransactionId *freezeLimit,
 								  MultiXactId *multiXactCutoff);
 extern bool vacuum_xid_failsafe_check(TransactionId relfrozenxid,
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 3746336a0..5a3c18413 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -6128,7 +6128,12 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
  * NB -- this might have the side-effect of creating a new MultiXactId!
  *
  * "flags" is an output value; it's used to tell caller what to do on return.
- * Possible flags are:
+ *
+ * "xmax_oldest_xid_out" is an output value; we must handle the details of
+ * tracking the oldest extant XID within Multixacts.  This is part of how
+ * caller tracks relfrozenxid_out (the oldest extant XID) on behalf of VACUUM.
+ *
+ * Possible values that we can set in "flags":
  * FRM_NOOP
  *		don't do anything -- keep existing Xmax
  * FRM_INVALIDATE_XMAX
@@ -6140,12 +6145,21 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
  * FRM_RETURN_IS_MULTI
  *		The return value is a new MultiXactId to set as new Xmax.
  *		(caller must obtain proper infomask bits using GetMultiXactIdHintBits)
+ *
+ * Final *xmax_oldest_xid_out value should be ignored completely unless
+ * "flags" contains either FRM_NOOP or FRM_RETURN_IS_MULTI.  Final value is
+ * drawn from oldest extant XID that will remain in some MultiXact (old or
+ * new) after xmax is frozen (XIDs that won't remain after freezing are
+ * ignored, per the general convention).
+ *
+ * Note in particular that caller must deal with FRM_RETURN_IS_XID case
+ * itself, by considering returned Xid (not using *xmax_oldest_xid_out).
  */
 static TransactionId
 FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 				  TransactionId relfrozenxid, TransactionId relminmxid,
 				  TransactionId cutoff_xid, MultiXactId cutoff_multi,
-				  uint16 *flags)
+				  uint16 *flags, TransactionId *xmax_oldest_xid_out)
 {
 	TransactionId xid = InvalidTransactionId;
 	int			i;
@@ -6157,6 +6171,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 	bool		has_lockers;
 	TransactionId update_xid;
 	bool		update_committed;
+	TransactionId temp_xid_out;
 
 	*flags = 0;
 
@@ -6251,13 +6266,13 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 
 	/* is there anything older than the cutoff? */
 	need_replace = false;
+	temp_xid_out = *xmax_oldest_xid_out;	/* initialize temp_xid_out */
 	for (i = 0; i < nmembers; i++)
 	{
 		if (TransactionIdPrecedes(members[i].xid, cutoff_xid))
-		{
 			need_replace = true;
-			break;
-		}
+		if (TransactionIdPrecedes(members[i].xid, temp_xid_out))
+			temp_xid_out = members[i].xid;
 	}
 
 	/*
@@ -6266,6 +6281,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 	 */
 	if (!need_replace)
 	{
+		*xmax_oldest_xid_out = temp_xid_out;
 		*flags |= FRM_NOOP;
 		pfree(members);
 		return InvalidTransactionId;
@@ -6275,6 +6291,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 	 * If the multi needs to be updated, figure out which members do we need
 	 * to keep.
 	 */
+	temp_xid_out = *xmax_oldest_xid_out;	/* reset temp_xid_out */
 	nnewmembers = 0;
 	newmembers = palloc(sizeof(MultiXactMember) * nmembers);
 	has_lockers = false;
@@ -6356,7 +6373,11 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 			 * list.)
 			 */
 			if (TransactionIdIsValid(update_xid))
+			{
 				newmembers[nnewmembers++] = members[i];
+				if (TransactionIdPrecedes(members[i].xid, temp_xid_out))
+					temp_xid_out = members[i].xid;
+			}
 		}
 		else
 		{
@@ -6366,6 +6387,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 			{
 				/* running locker cannot possibly be older than the cutoff */
 				Assert(!TransactionIdPrecedes(members[i].xid, cutoff_xid));
+				Assert(!TransactionIdPrecedes(members[i].xid, *xmax_oldest_xid_out));
 				newmembers[nnewmembers++] = members[i];
 				has_lockers = true;
 			}
@@ -6403,6 +6425,13 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 		 */
 		xid = MultiXactIdCreateFromMembers(nnewmembers, newmembers);
 		*flags |= FRM_RETURN_IS_MULTI;
+
+		/*
+		 * Return oldest remaining XID in new multixact if it's older than
+		 * caller's original xmax_oldest_xid_out (otherwise it's just the
+		 * original xmax_oldest_xid_out value from caller)
+		 */
+		*xmax_oldest_xid_out = temp_xid_out;
 	}
 
 	pfree(newmembers);
@@ -6421,6 +6450,11 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
  * will be totally frozen after these operations are performed and false if
  * more freezing will eventually be required.
  *
+ * Maintains *relfrozenxid_out and *relminmxid_out, which are the current
+ * target relfrozenxid and relminmxid for the relation.  Caller should make
+ * temp copies of global tracking variables before starting to process a page,
+ * so that we can only scribble on copies.
+ *
  * Caller is responsible for setting the offset field, if appropriate.
  *
  * It is assumed that the caller has checked the tuple with
@@ -6445,7 +6479,9 @@ bool
 heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 						  TransactionId relfrozenxid, TransactionId relminmxid,
 						  TransactionId cutoff_xid, TransactionId cutoff_multi,
-						  xl_heap_freeze_tuple *frz, bool *totally_frozen)
+						  xl_heap_freeze_tuple *frz, bool *totally_frozen,
+						  TransactionId *relfrozenxid_out,
+						  MultiXactId *relminmxid_out)
 {
 	bool		changed = false;
 	bool		xmax_already_frozen = false;
@@ -6489,6 +6525,8 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 			frz->t_infomask |= HEAP_XMIN_FROZEN;
 			changed = true;
 		}
+		else if (TransactionIdPrecedes(xid, *relfrozenxid_out))
+			*relfrozenxid_out = xid;
 	}
 
 	/*
@@ -6506,16 +6544,21 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 	{
 		TransactionId newxmax;
 		uint16		flags;
+		TransactionId xmax_oldest_xid_out = *relfrozenxid_out;
 
 		newxmax = FreezeMultiXactId(xid, tuple->t_infomask,
 									relfrozenxid, relminmxid,
-									cutoff_xid, cutoff_multi, &flags);
+									cutoff_xid, cutoff_multi,
+									&flags, &xmax_oldest_xid_out);
 
 		freeze_xmax = (flags & FRM_INVALIDATE_XMAX);
 
 		if (flags & FRM_RETURN_IS_XID)
 		{
 			/*
+			 * xmax will become an updater XID (an XID from the original
+			 * MultiXact's XIDs that needs to be carried forward).
+			 *
 			 * NB -- some of these transformations are only valid because we
 			 * know the return Xid is a tuple updater (i.e. not merely a
 			 * locker.) Also note that the only reason we don't explicitly
@@ -6527,6 +6570,16 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 			if (flags & FRM_MARK_COMMITTED)
 				frz->t_infomask |= HEAP_XMAX_COMMITTED;
 			changed = true;
+			Assert(freeze_xmax);
+
+			/*
+			 * Only consider newxmax Xid to track relfrozenxid_out here, since
+			 * any other XIDs from the old MultiXact won't be left behind once
+			 * xmax is actually frozen.
+			 */
+			Assert(TransactionIdIsValid(newxmax));
+			if (TransactionIdPrecedes(newxmax, *relfrozenxid_out))
+				*relfrozenxid_out = newxmax;
 		}
 		else if (flags & FRM_RETURN_IS_MULTI)
 		{
@@ -6534,6 +6587,10 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 			uint16		newbits2;
 
 			/*
+			 * xmax was an old MultiXactId which we have to replace with a new
+			 * Multixact, that carries forward a subset of the XIDs from the
+			 * original (those that we'll still need).
+			 *
 			 * We can't use GetMultiXactIdHintBits directly on the new multi
 			 * here; that routine initializes the masks to all zeroes, which
 			 * would lose other bits we need.  Doing it this way ensures all
@@ -6548,6 +6605,37 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 			frz->xmax = newxmax;
 
 			changed = true;
+			Assert(!freeze_xmax);
+
+			/*
+			 * FreezeMultiXactId sets xmax_oldest_xid_out to any XID that it
+			 * notices is older than initial relfrozenxid_out, unless the XID
+			 * won't remain after freezing
+			 */
+			Assert(!MultiXactIdPrecedes(newxmax, *relminmxid_out));
+			Assert(TransactionIdPrecedesOrEquals(xmax_oldest_xid_out,
+												 *relfrozenxid_out));
+			*relfrozenxid_out = xmax_oldest_xid_out;
+		}
+		else if (flags & FRM_NOOP)
+		{
+			/*
+			 * xmax is a MultiXactId, and nothing about it changes for now.
+			 *
+			 * Might have to ratchet back relminmxid_out, relfrozenxid_out, or
+			 * both together.  FreezeMultiXactId sets xmax_oldest_xid_out to
+			 * any XID that it notices is older than initial relfrozenxid_out,
+			 * unless the XID won't remain after freezing (or in this case
+			 * after _not_ freezing).
+			 */
+			Assert(MultiXactIdIsValid(xid));
+			Assert(!freeze_xmax);
+
+			if (MultiXactIdPrecedes(xid, *relminmxid_out))
+				*relminmxid_out = xid;
+			Assert(TransactionIdPrecedesOrEquals(xmax_oldest_xid_out,
+												 *relfrozenxid_out));
+			*relfrozenxid_out = xmax_oldest_xid_out;
 		}
 	}
 	else if (TransactionIdIsNormal(xid))
@@ -6575,7 +6663,11 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 			freeze_xmax = true;
 		}
 		else
+		{
 			freeze_xmax = false;
+			if (TransactionIdPrecedes(xid, *relfrozenxid_out))
+				*relfrozenxid_out = xid;
+		}
 	}
 	else if ((tuple->t_infomask & HEAP_XMAX_INVALID) ||
 			 !TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple)))
@@ -6699,11 +6791,14 @@ heap_freeze_tuple(HeapTupleHeader tuple,
 	xl_heap_freeze_tuple frz;
 	bool		do_freeze;
 	bool		tuple_totally_frozen;
+	TransactionId relfrozenxid_out = cutoff_xid;
+	MultiXactId relminmxid_out = cutoff_multi;
 
 	do_freeze = heap_prepare_freeze_tuple(tuple,
 										  relfrozenxid, relminmxid,
 										  cutoff_xid, cutoff_multi,
-										  &frz, &tuple_totally_frozen);
+										  &frz, &tuple_totally_frozen,
+										  &relfrozenxid_out, &relminmxid_out);
 
 	/*
 	 * Note that because this is not a WAL-logged operation, we don't need to
@@ -7133,24 +7228,54 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
  * Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
  * are older than the specified cutoff XID or MultiXactId.  If so, return true.
  *
+ * See heap_prepare_freeze_tuple for information about the basic rules for the
+ * cutoffs used here.
+ *
  * It doesn't matter whether the tuple is alive or dead, we are checking
  * to see if a tuple needs to be removed or frozen to avoid wraparound.
  *
+ * The *relfrozenxid_nofreeze_out and *relminmxid_nofreeze_out arguments are
+ * input/output arguments that work just like heap_prepare_freeze_tuple's
+ * *relfrozenxid_out and *relminmxid_out input/output arguments.  However,
+ * there is one important difference: we track the oldest extant XID and XMID
+ * while making a working assumption that no freezing will actually take
+ * place.  On the other hand, heap_prepare_freeze_tuple assumes that freezing
+ * will take place (based on the specific instructions it also sets up for
+ * caller's tuple).
+ *
+ * Note, in particular, that we even assume that freezing won't go ahead for a
+ * tuple that we indicate "needs freezing" (by returning true).  Not all
+ * callers will be okay with that.  Caller should make temp copies of global
+ * tracking variables, and pass us those.  That way caller can back out at the
+ * last moment when it must freeze the tuple using heap_prepare_freeze_tuple.
+ *
  * NB: Cannot rely on hint bits here, they might not be set after a crash or
  * on a standby.
  */
 bool
-heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
-						MultiXactId cutoff_multi)
+heap_tuple_needs_freeze(HeapTupleHeader tuple,
+						TransactionId limit_xid, MultiXactId limit_multi,
+						TransactionId *relfrozenxid_nofreeze_out,
+						MultiXactId *relminmxid_nofreeze_out)
 {
 	TransactionId xid;
-
-	xid = HeapTupleHeaderGetXmin(tuple);
-	if (TransactionIdIsNormal(xid) &&
-		TransactionIdPrecedes(xid, cutoff_xid))
-		return true;
+	bool		needs_freeze = false;
 
 	/*
+	 * First deal with xmin.
+	 */
+	xid = HeapTupleHeaderGetXmin(tuple);
+	if (TransactionIdIsNormal(xid))
+	{
+		if (TransactionIdPrecedes(xid, *relfrozenxid_nofreeze_out))
+			*relfrozenxid_nofreeze_out = xid;
+		if (TransactionIdPrecedes(xid, limit_xid))
+			needs_freeze = true;
+	}
+
+	/*
+	 * Now deal with xmax.
+	 *
 	 * The considerations for multixacts are complicated; look at
 	 * heap_prepare_freeze_tuple for justifications.  This routine had better
 	 * be in sync with that one!
@@ -7158,57 +7283,80 @@ heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
 	if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
 	{
 		MultiXactId multi;
+		MultiXactMember *members;
+		int			nmembers;
 
 		multi = HeapTupleHeaderGetRawXmax(tuple);
 		if (!MultiXactIdIsValid(multi))
 		{
-			/* no xmax set, ignore */
-			;
+			/* no xmax set -- but xmin might still need freezing */
+			return needs_freeze;
 		}
-		else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
-			return true;
-		else if (MultiXactIdPrecedes(multi, cutoff_multi))
-			return true;
-		else
+
+		/*
+		 * Might have to ratchet back relminmxid_nofreeze_out, which we assume
+		 * won't be frozen by caller (even when we return true)
+		 */
+		if (MultiXactIdPrecedes(multi, *relminmxid_nofreeze_out))
+			*relminmxid_nofreeze_out = multi;
+
+		if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
 		{
-			MultiXactMember *members;
-			int			nmembers;
-			int			i;
-
-			/* need to check whether any member of the mxact is too old */
-
-			nmembers = GetMultiXactIdMembers(multi, &members, false,
-											 HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
-
-			for (i = 0; i < nmembers; i++)
-			{
-				if (TransactionIdPrecedes(members[i].xid, cutoff_xid))
-				{
-					pfree(members);
-					return true;
-				}
-			}
-			if (nmembers > 0)
-				pfree(members);
+			/*
+			 * pg_upgrade'd MultiXact doesn't need to have its XID members
+			 * affect caller's relfrozenxid_nofreeze_out (just freeze it)
+			 */
+			return true;
 		}
+		else if (MultiXactIdPrecedes(multi, limit_multi))
+			needs_freeze = true;
+
+		/*
+		 * Need to check whether any member of the mxact is too old to
+		 * determine if MultiXact needs to be frozen now.  We even access the
+		 * members when we know that the MultiXactId isn't eligible for
+		 * freezing now -- we must still maintain relfrozenxid_nofreeze_out.
+		 */
+		nmembers = GetMultiXactIdMembers(multi, &members, false,
+										 HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
+
+		for (int i = 0; i < nmembers; i++)
+		{
+			xid = members[i].xid;
+
+			if (TransactionIdPrecedes(xid, limit_xid))
+				needs_freeze = true;
+			if (TransactionIdPrecedes(xid, *relfrozenxid_nofreeze_out))
+				*relfrozenxid_nofreeze_out = xid;
+		}
+		if (nmembers > 0)
+			pfree(members);
 	}
 	else
 	{
 		xid = HeapTupleHeaderGetRawXmax(tuple);
-		if (TransactionIdIsNormal(xid) &&
-			TransactionIdPrecedes(xid, cutoff_xid))
-			return true;
+		if (TransactionIdIsNormal(xid))
+		{
+			if (TransactionIdPrecedes(xid, *relfrozenxid_nofreeze_out))
+				*relfrozenxid_nofreeze_out = xid;
+			if (TransactionIdPrecedes(xid, limit_xid))
+				needs_freeze = true;
+		}
 	}
 
 	if (tuple->t_infomask & HEAP_MOVED)
 	{
 		xid = HeapTupleHeaderGetXvac(tuple);
-		if (TransactionIdIsNormal(xid) &&
-			TransactionIdPrecedes(xid, cutoff_xid))
-			return true;
+		if (TransactionIdIsNormal(xid))
+		{
+			if (TransactionIdPrecedes(xid, *relfrozenxid_nofreeze_out))
+				*relfrozenxid_nofreeze_out = xid;
+			if (TransactionIdPrecedes(xid, limit_xid))
+				needs_freeze = true;
+		}
 	}
 
-	return false;
+	return needs_freeze;
 }
 
 /*
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 87ab7775a..ae280d4f9 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -144,7 +144,7 @@ typedef struct LVRelState
 	Relation   *indrels;
 	int			nindexes;
 
-	/* Aggressive VACUUM (scan all unfrozen pages)? */
+	/* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
 	bool		aggressive;
 	/* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
 	bool		skipwithvm;
@@ -173,8 +173,9 @@ typedef struct LVRelState
 	/* VACUUM operation's target cutoffs for freezing XIDs and MultiXactIds */
 	TransactionId FreezeLimit;
 	MultiXactId MultiXactCutoff;
-	/* Are FreezeLimit/MultiXactCutoff still valid? */
-	bool		freeze_cutoffs_valid;
+	/* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
+	TransactionId NewRelfrozenXid;
+	MultiXactId NewRelminMxid;
 
 	/* Error reporting state */
 	char	   *relnamespace;
@@ -328,6 +329,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	PgStat_Counter startreadtime = 0;
 	PgStat_Counter startwritetime = 0;
 	TransactionId OldestXmin;
+	MultiXactId OldestMxact;
 	TransactionId FreezeLimit;
 	MultiXactId MultiXactCutoff;
 
@@ -354,17 +356,17 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	 * used to determine which XIDs/MultiXactIds will be frozen.
 	 *
 	 * If this is an aggressive VACUUM, then we're strictly required to freeze
-	 * any and all XIDs from before FreezeLimit, so that we will be able to
-	 * safely advance relfrozenxid up to FreezeLimit below (we must be able to
-	 * advance relminmxid up to MultiXactCutoff, too).
+	 * any and all XIDs from before FreezeLimit in order to be able to advance
+	 * relfrozenxid to a value >= FreezeLimit below.  There is an analogous
+	 * requirement around MultiXact freezing, relminmxid, and MultiXactCutoff.
 	 */
 	aggressive = vacuum_set_xid_limits(rel,
 									   params->freeze_min_age,
 									   params->freeze_table_age,
 									   params->multixact_freeze_min_age,
 									   params->multixact_freeze_table_age,
-									   &OldestXmin, &FreezeLimit,
-									   &MultiXactCutoff);
+									   &OldestXmin, &OldestMxact,
+									   &FreezeLimit, &MultiXactCutoff);
 
 	skipwithvm = true;
 	if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
@@ -511,10 +513,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	vacrel->vistest = GlobalVisTestFor(rel);
 	/* FreezeLimit controls XID freezing (always <= OldestXmin) */
 	vacrel->FreezeLimit = FreezeLimit;
-	/* MultiXactCutoff controls MXID freezing */
+	/* MultiXactCutoff controls MXID freezing (always <= OldestMxact) */
 	vacrel->MultiXactCutoff = MultiXactCutoff;
-	/* Track if cutoffs became invalid (possible in !aggressive case only) */
-	vacrel->freeze_cutoffs_valid = true;
+	/* Initialize state used to track oldest extant XID/XMID */
+	vacrel->NewRelfrozenXid = OldestXmin;
+	vacrel->NewRelminMxid = OldestMxact;
 
 	/*
 	 * Call lazy_scan_heap to perform all required heap pruning, index
@@ -568,14 +571,13 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	 * Aggressive VACUUM must reliably advance relfrozenxid (and relminmxid).
 	 * We are able to advance relfrozenxid in a non-aggressive VACUUM too,
 	 * provided we didn't skip any all-visible (not all-frozen) pages using
-	 * the visibility map, and assuming that we didn't fail to get a cleanup
-	 * lock that made it unsafe with respect to FreezeLimit (or perhaps our
-	 * MultiXactCutoff) established for VACUUM operation.
+	 * the visibility map.  A non-aggressive VACUUM might advance relfrozenxid
+	 * to an XID that is either older or newer than FreezeLimit (same applies
+	 * to relminmxid and MultiXactCutoff).
 	 */
-	if (vacrel->scanned_pages + vacrel->frozenskipped_pages < orig_rel_pages ||
-		!vacrel->freeze_cutoffs_valid)
+	if (vacrel->scanned_pages + vacrel->frozenskipped_pages < orig_rel_pages)
 	{
-		/* Cannot advance relfrozenxid/relminmxid */
+		/* Skipped an all-visible page, so cannot advance relfrozenxid */
 		Assert(!aggressive);
 		frozenxid_updated = minmulti_updated = false;
 		vac_update_relstats(rel, new_rel_pages, new_live_tuples,
@@ -587,9 +589,15 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	{
 		Assert(vacrel->scanned_pages + vacrel->frozenskipped_pages ==
 			   orig_rel_pages);
+		Assert(!aggressive ||
+			   TransactionIdPrecedesOrEquals(FreezeLimit,
+											 vacrel->NewRelfrozenXid));
+		Assert(!aggressive ||
+			   MultiXactIdPrecedesOrEquals(MultiXactCutoff,
+										   vacrel->NewRelminMxid));
 		vac_update_relstats(rel, new_rel_pages, new_live_tuples,
 							new_rel_allvisible, vacrel->nindexes > 0,
-							FreezeLimit, MultiXactCutoff,
+							vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
 							&frozenxid_updated, &minmulti_updated, false);
 	}
 
@@ -694,17 +702,19 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 							 OldestXmin, diff);
 			if (frozenxid_updated)
 			{
-				diff = (int32) (FreezeLimit - vacrel->relfrozenxid);
+				diff = (int32) (vacrel->NewRelfrozenXid - vacrel->relfrozenxid);
+				Assert(diff > 0);
 				appendStringInfo(&buf,
 								 _("new relfrozenxid: %u, which is %d xids ahead of previous value\n"),
-								 FreezeLimit, diff);
+								 vacrel->NewRelfrozenXid, diff);
 			}
 			if (minmulti_updated)
 			{
-				diff = (int32) (MultiXactCutoff - vacrel->relminmxid);
+				diff = (int32) (vacrel->NewRelminMxid - vacrel->relminmxid);
+				Assert(diff > 0);
 				appendStringInfo(&buf,
 								 _("new relminmxid: %u, which is %d mxids ahead of previous value\n"),
-								 MultiXactCutoff, diff);
+								 vacrel->NewRelminMxid, diff);
 			}
 			if (orig_rel_pages > 0)
 			{
@@ -896,8 +906,8 @@ lazy_scan_heap(LVRelState *vacrel, int nworkers)
 	 * find them.  But even when aggressive *is* set, it's still OK if we miss
 	 * a page whose all-frozen marking has just been cleared.  Any new XIDs
 	 * just added to that page are necessarily >= vacrel->OldestXmin, and so
-	 * they'll have no effect on the value to which we can safely set
-	 * relfrozenxid.  A similar argument applies for MXIDs and relminmxid.
+	 * they cannot invalidate NewRelfrozenXid tracking.  A similar argument
+	 * applies for NewRelminMxid tracking and OldestMxact.
 	 */
 	next_unskippable_block = 0;
 	if (vacrel->skipwithvm)
@@ -1584,6 +1594,8 @@ lazy_scan_prune(LVRelState *vacrel,
 				recently_dead_tuples;
 	int			nnewlpdead;
 	int			nfrozen;
+	TransactionId NewRelfrozenXid;
+	MultiXactId NewRelminMxid;
 	OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
 	xl_heap_freeze_tuple frozen[MaxHeapTuplesPerPage];
 
@@ -1593,7 +1605,9 @@ lazy_scan_prune(LVRelState *vacrel,
 
 retry:
 
-	/* Initialize (or reset) page-level counters */
+	/* Initialize (or reset) page-level state */
+	NewRelfrozenXid = vacrel->NewRelfrozenXid;
+	NewRelminMxid = vacrel->NewRelminMxid;
 	tuples_deleted = 0;
 	lpdead_items = 0;
 	live_tuples = 0;
@@ -1801,7 +1815,8 @@ retry:
 									  vacrel->FreezeLimit,
 									  vacrel->MultiXactCutoff,
 									  &frozen[nfrozen],
-									  &tuple_totally_frozen))
+									  &tuple_totally_frozen,
+									  &NewRelfrozenXid, &NewRelminMxid))
 		{
 			/* Will execute freeze below */
 			frozen[nfrozen++].offset = offnum;
@@ -1815,13 +1830,16 @@ retry:
 			prunestate->all_frozen = false;
 	}
 
+	vacrel->offnum = InvalidOffsetNumber;
+
 	/*
 	 * We have now divided every item on the page into either an LP_DEAD item
 	 * that will need to be vacuumed in indexes later, or a LP_NORMAL tuple
 	 * that remains and needs to be considered for freezing now (LP_UNUSED and
 	 * LP_REDIRECT items also remain, but are of no further interest to us).
 	 */
-	vacrel->offnum = InvalidOffsetNumber;
+	vacrel->NewRelfrozenXid = NewRelfrozenXid;
+	vacrel->NewRelminMxid = NewRelminMxid;
 
 	/*
 	 * Consider the need to freeze any items with tuple storage from the page
@@ -1972,6 +1990,8 @@ lazy_scan_noprune(LVRelState *vacrel,
 				missed_dead_tuples;
 	HeapTupleHeader tupleheader;
 	OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
+	TransactionId NoFreezeNewRelfrozenXid = vacrel->NewRelfrozenXid;
+	MultiXactId NoFreezeNewRelminMxid = vacrel->NewRelminMxid;
 
 	Assert(BufferGetBlockNumber(buf) == blkno);
 
@@ -2017,20 +2037,40 @@ lazy_scan_noprune(LVRelState *vacrel,
 		tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
 		if (heap_tuple_needs_freeze(tupleheader,
 									vacrel->FreezeLimit,
-									vacrel->MultiXactCutoff))
+									vacrel->MultiXactCutoff,
+									&NoFreezeNewRelfrozenXid,
+									&NoFreezeNewRelminMxid))
 		{
 			if (vacrel->aggressive)
 			{
-				/* Going to have to get cleanup lock for lazy_scan_prune */
+				/*
+				 * heap_tuple_needs_freeze determined that it isn't going to
+				 * be possible for the ongoing aggressive VACUUM operation to
+				 * advance relfrozenxid to a value >= FreezeLimit without
+				 * freezing one or more tuples with older XIDs from this page.
+				 * (Or perhaps the issue was that MultiXactCutoff could not be
+				 * respected.  Might have even been both cutoffs, together.)
+				 *
+				 * Tell caller that it must acquire a full cleanup lock.  It's
+				 * possible that caller will have to wait a while for one, but
+				 * that can't be helped -- full processing by lazy_scan_prune
+				 * is required to freeze the older XIDs (and/or freeze older
+				 * MultiXactIds).
+				 */
 				vacrel->offnum = InvalidOffsetNumber;
 				return false;
 			}
-
-			/*
-			 * Current non-aggressive VACUUM operation definitely won't be
-			 * able to advance relfrozenxid or relminmxid
-			 */
-			vacrel->freeze_cutoffs_valid = false;
+			else
+			{
+				/*
+				 * This is a non-aggressive VACUUM, which is under no strict
+				 * obligation to advance relfrozenxid at all (much less to
+				 * advance it to a value >= FreezeLimit).  Non-aggressive
+				 * VACUUM advances relfrozenxid/relminmxid on a best-effort
+				 * basis.  Accept an older final relfrozenxid/relminmxid value
+				 * rather than waiting for a cleanup lock.
+				 */
+			}
 		}
 
 		ItemPointerSet(&(tuple.t_self), blkno, offnum);
@@ -2079,6 +2119,16 @@ lazy_scan_noprune(LVRelState *vacrel,
 
 	vacrel->offnum = InvalidOffsetNumber;
 
+	/*
+	 * By here we know for sure that caller can tolerate having reduced
+	 * processing for this particular page.  Before we return to report
+	 * success, update vacrel with details of how we processed the page.
+	 * (lazy_scan_prune expects a clean slate, so we have to delay these steps
+	 * until here.)
+	 */
+	vacrel->NewRelfrozenXid = NoFreezeNewRelfrozenXid;
+	vacrel->NewRelminMxid = NoFreezeNewRelminMxid;
+
 	/*
 	 * Now save details of the LP_DEAD items from the page in vacrel (though
 	 * only when VACUUM uses two-pass strategy)
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 02a7e94bf..a7e988298 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -767,6 +767,7 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
 	TupleDesc	oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
 	TupleDesc	newTupDesc PG_USED_FOR_ASSERTS_ONLY;
 	TransactionId OldestXmin;
+	MultiXactId oldestMxact;
 	TransactionId FreezeXid;
 	MultiXactId MultiXactCutoff;
 	bool		use_sort;
@@ -856,8 +857,8 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
 	 * Since we're going to rewrite the whole table anyway, there's no reason
 	 * not to be aggressive about this.
 	 */
-	vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0,
-						  &OldestXmin, &FreezeXid, &MultiXactCutoff);
+	vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, &OldestXmin, &oldestMxact,
+						  &FreezeXid, &MultiXactCutoff);
 
 	/*
 	 * FreezeXid will become the table's new relfrozenxid, and that mustn't go
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 50a4a612e..0ae3b4506 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -945,14 +945,22 @@ get_all_vacuum_rels(int options)
  * The output parameters are:
  * - oldestXmin is the Xid below which tuples deleted by any xact (that
  *   committed) should be considered DEAD, not just RECENTLY_DEAD.
- * - freezeLimit is the Xid below which all Xids are replaced by
- *	 FrozenTransactionId during vacuum.
- * - multiXactCutoff is the value below which all MultiXactIds are removed
- *   from Xmax.
+ * - oldestMxact is the Mxid below which MultiXacts are definitely not
+ *   seen as visible by any running transaction.
+ * - freezeLimit is the Xid below which all Xids are definitely replaced by
+ *   FrozenTransactionId during aggressive vacuums.
+ * - multiXactCutoff is the value below which all MultiXactIds are definitely
+ *   removed from Xmax during aggressive vacuums.
  *
  * Return value indicates if vacuumlazy.c caller should make its VACUUM
  * operation aggressive.  An aggressive VACUUM must advance relfrozenxid up to
- * FreezeLimit, and relminmxid up to multiXactCutoff.
+ * FreezeLimit (at a minimum), and relminmxid up to multiXactCutoff (at a
+ * minimum).
+ *
+ * oldestXmin and oldestMxact are the most recent values that can ever be
+ * passed to vac_update_relstats() as frozenxid and minmulti arguments by our
+ * vacuumlazy.c caller later on.  These values should be passed when it turns
+ * out that VACUUM will leave no unfrozen XIDs/XMIDs behind in the table.
  */
 bool
 vacuum_set_xid_limits(Relation rel,
@@ -961,6 +969,7 @@ vacuum_set_xid_limits(Relation rel,
 					  int multixact_freeze_min_age,
 					  int multixact_freeze_table_age,
 					  TransactionId *oldestXmin,
+					  MultiXactId *oldestMxact,
 					  TransactionId *freezeLimit,
 					  MultiXactId *multiXactCutoff)
 {
@@ -969,7 +978,6 @@ vacuum_set_xid_limits(Relation rel,
 	int			effective_multixact_freeze_max_age;
 	TransactionId limit;
 	TransactionId safeLimit;
-	MultiXactId oldestMxact;
 	MultiXactId mxactLimit;
 	MultiXactId safeMxactLimit;
 	int			freezetable;
@@ -1065,9 +1073,11 @@ vacuum_set_xid_limits(Relation rel,
 						 effective_multixact_freeze_max_age / 2);
 	Assert(mxid_freezemin >= 0);
 
+	/* Remember for caller */
+	*oldestMxact = GetOldestMultiXactId();
+
 	/* compute the cutoff multi, being careful to generate a valid value */
-	oldestMxact = GetOldestMultiXactId();
-	mxactLimit = oldestMxact - mxid_freezemin;
+	mxactLimit = *oldestMxact - mxid_freezemin;
 	if (mxactLimit < FirstMultiXactId)
 		mxactLimit = FirstMultiXactId;
 
@@ -1082,8 +1092,8 @@ vacuum_set_xid_limits(Relation rel,
 				(errmsg("oldest multixact is far in the past"),
 				 errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
 		/* Use the safe limit, unless an older mxact is still running */
-		if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit))
-			mxactLimit = oldestMxact;
+		if (MultiXactIdPrecedes(*oldestMxact, safeMxactLimit))
+			mxactLimit = *oldestMxact;
 		else
 			mxactLimit = safeMxactLimit;
 	}
@@ -1390,14 +1400,10 @@ vac_update_relstats(Relation relation,
 	 * Update relfrozenxid, unless caller passed InvalidTransactionId
 	 * indicating it has no new data.
 	 *
-	 * Ordinarily, we don't let relfrozenxid go backwards: if things are
-	 * working correctly, the only way the new frozenxid could be older would
-	 * be if a previous VACUUM was done with a tighter freeze_min_age, in
-	 * which case we don't want to forget the work it already did.  However,
-	 * if the stored relfrozenxid is "in the future", then it must be corrupt
-	 * and it seems best to overwrite it with the cutoff we used this time.
-	 * This should match vac_update_datfrozenxid() concerning what we consider
-	 * to be "in the future".
+	 * Ordinarily, we don't let relfrozenxid go backwards.  However, if the
+	 * stored relfrozenxid is "in the future", then it must be corrupt, so
+	 * just overwrite it.  This should match vac_update_datfrozenxid()
+	 * concerning what we consider to be "in the future".
 	 */
 	if (frozenxid_updated)
 		*frozenxid_updated = false;
diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml
index 36f975b1e..6a02d0fa8 100644
--- a/doc/src/sgml/maintenance.sgml
+++ b/doc/src/sgml/maintenance.sgml
@@ -563,9 +563,11 @@
     statistics in the system tables <structname>pg_class</structname> and
     <structname>pg_database</structname>.  In particular,
     the <structfield>relfrozenxid</structfield> column of a table's
-    <structname>pg_class</structname> row contains the freeze cutoff XID that was used
-    by the last aggressive <command>VACUUM</command> for that table.  All rows
-    inserted by transactions with XIDs older than this cutoff XID are
+    <structname>pg_class</structname> row contains the oldest
+    remaining XID at the end of the most recent <command>VACUUM</command>
+    that successfully advanced <structfield>relfrozenxid</structfield>
+    (typically the most recent aggressive VACUUM).  All rows inserted
+    by transactions with XIDs older than this cutoff XID are
     guaranteed to have been frozen.  Similarly,
     the <structfield>datfrozenxid</structfield> column of a database's
     <structname>pg_database</structname> row is a lower bound on the unfrozen XIDs
@@ -588,6 +590,17 @@ SELECT datname, age(datfrozenxid) FROM pg_database;
     cutoff XID to the current transaction's XID.
    </para>
 
+   <tip>
+    <para>
+     <literal>VACUUM VERBOSE</literal> outputs information about
+     <structfield>relfrozenxid</structfield> and/or
+     <structfield>relminmxid</structfield> when either field was
+     advanced.  The same details appear in the server log when <xref
+      linkend="guc-log-autovacuum-min-duration"/> reports on vacuuming
+     by autovacuum.
+    </para>
+   </tip>
+
    <para>
     <command>VACUUM</command> normally only scans pages that have been modified
     since the last vacuum, but <structfield>relfrozenxid</structfield> can only be
@@ -602,7 +615,11 @@ SELECT datname, age(datfrozenxid) FROM pg_database;
     set <literal>age(relfrozenxid)</literal> to a value just a little more than the
     <varname>vacuum_freeze_min_age</varname> setting
     that was used (more by the number of transactions started since the
-    <command>VACUUM</command> started).  If no <structfield>relfrozenxid</structfield>-advancing
+    <command>VACUUM</command> started).  <command>VACUUM</command>
+    will set <structfield>relfrozenxid</structfield> to the oldest XID
+    that remains in the table, so it's possible that the final value
+    will be much more recent than strictly required.
+    If no <structfield>relfrozenxid</structfield>-advancing
     <command>VACUUM</command> is issued on the table until
     <varname>autovacuum_freeze_max_age</varname> is reached, an autovacuum will soon
     be forced for the table.
@@ -689,8 +706,9 @@ HINT:  Stop the postmaster and vacuum that database in single-user mode.
     </para>
 
     <para>
-     Aggressive <command>VACUUM</command> scans, regardless of
-     what causes them, enable advancing the value for that table.
+     Aggressive <command>VACUUM</command> scans, regardless of what
+     causes them, are <emphasis>guaranteed</emphasis> to be able to
+     advance the table's <structfield>relminmxid</structfield>.
      Eventually, as all tables in all databases are scanned and their
      oldest multixact values are advanced, on-disk storage for older
      multixacts can be removed.
diff --git a/src/test/isolation/expected/vacuum-no-cleanup-lock.out b/src/test/isolation/expected/vacuum-no-cleanup-lock.out
new file mode 100644
index 000000000..9b77bb5b4
--- /dev/null
+++ b/src/test/isolation/expected/vacuum-no-cleanup-lock.out
@@ -0,0 +1,188 @@
+Parsed test spec with 4 sessions
+
+starting permutation: vacuumer_pg_class_stats dml_insert vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       20
+(1 row)
+
+step dml_insert: 
+  INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step vacuumer_nonaggressive_vacuum: 
+  VACUUM smalltbl;
+
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       21
+(1 row)
+
+
+starting permutation: vacuumer_pg_class_stats dml_insert pinholder_cursor vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats pinholder_commit
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       20
+(1 row)
+
+step dml_insert: 
+  INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step pinholder_cursor: 
+  BEGIN;
+  DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl;
+  FETCH NEXT FROM c1;
+
+dummy
+-----
+    1
+(1 row)
+
+step vacuumer_nonaggressive_vacuum: 
+  VACUUM smalltbl;
+
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       21
+(1 row)
+
+step pinholder_commit: 
+  COMMIT;
+
+
+starting permutation: vacuumer_pg_class_stats pinholder_cursor dml_insert dml_delete dml_insert vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats pinholder_commit
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       20
+(1 row)
+
+step pinholder_cursor: 
+  BEGIN;
+  DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl;
+  FETCH NEXT FROM c1;
+
+dummy
+-----
+    1
+(1 row)
+
+step dml_insert: 
+  INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step dml_delete: 
+  DELETE FROM smalltbl WHERE id = (SELECT min(id) FROM smalltbl);
+
+step dml_insert: 
+  INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step vacuumer_nonaggressive_vacuum: 
+  VACUUM smalltbl;
+
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       21
+(1 row)
+
+step pinholder_commit: 
+  COMMIT;
+
+
+starting permutation: vacuumer_pg_class_stats dml_insert dml_delete pinholder_cursor dml_insert vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats pinholder_commit
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       20
+(1 row)
+
+step dml_insert: 
+  INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step dml_delete: 
+  DELETE FROM smalltbl WHERE id = (SELECT min(id) FROM smalltbl);
+
+step pinholder_cursor: 
+  BEGIN;
+  DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl;
+  FETCH NEXT FROM c1;
+
+dummy
+-----
+    1
+(1 row)
+
+step dml_insert: 
+  INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step vacuumer_nonaggressive_vacuum: 
+  VACUUM smalltbl;
+
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       21
+(1 row)
+
+step pinholder_commit: 
+  COMMIT;
+
+
+starting permutation: dml_begin dml_other_begin dml_key_share dml_other_key_share vacuumer_nonaggressive_vacuum pinholder_cursor dml_commit dml_other_commit vacuumer_nonaggressive_vacuum pinholder_commit vacuumer_nonaggressive_vacuum
+step dml_begin: BEGIN;
+step dml_other_begin: BEGIN;
+step dml_key_share: SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE;
+id
+--
+ 3
+(1 row)
+
+step dml_other_key_share: SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE;
+id
+--
+ 3
+(1 row)
+
+step vacuumer_nonaggressive_vacuum: 
+  VACUUM smalltbl;
+
+step pinholder_cursor: 
+  BEGIN;
+  DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl;
+  FETCH NEXT FROM c1;
+
+dummy
+-----
+    1
+(1 row)
+
+step dml_commit: COMMIT;
+step dml_other_commit: COMMIT;
+step vacuumer_nonaggressive_vacuum: 
+  VACUUM smalltbl;
+
+step pinholder_commit: 
+  COMMIT;
+
+step vacuumer_nonaggressive_vacuum: 
+  VACUUM smalltbl;
+
diff --git a/src/test/isolation/expected/vacuum-reltuples.out b/src/test/isolation/expected/vacuum-reltuples.out
deleted file mode 100644
index ce55376e7..000000000
--- a/src/test/isolation/expected/vacuum-reltuples.out
+++ /dev/null
@@ -1,67 +0,0 @@
-Parsed test spec with 2 sessions
-
-starting permutation: modify vac stats
-step modify: 
-    insert into smalltbl select max(id)+1 from smalltbl;
-
-step vac: 
-    vacuum smalltbl;
-
-step stats: 
-    select relpages, reltuples from pg_class
-     where oid='smalltbl'::regclass;
-
-relpages|reltuples
---------+---------
-       1|       21
-(1 row)
-
-
-starting permutation: modify open fetch1 vac close stats
-step modify: 
-    insert into smalltbl select max(id)+1 from smalltbl;
-
-step open: 
-    begin;
-    declare c1 cursor for select 1 as dummy from smalltbl;
-
-step fetch1: 
-    fetch next from c1;
-
-dummy
------
-    1
-(1 row)
-
-step vac: 
-    vacuum smalltbl;
-
-step close: 
-    commit;
-
-step stats: 
-    select relpages, reltuples from pg_class
-     where oid='smalltbl'::regclass;
-
-relpages|reltuples
---------+---------
-       1|       21
-(1 row)
-
-
-starting permutation: modify vac stats
-step modify: 
-    insert into smalltbl select max(id)+1 from smalltbl;
-
-step vac: 
-    vacuum smalltbl;
-
-step stats: 
-    select relpages, reltuples from pg_class
-     where oid='smalltbl'::regclass;
-
-relpages|reltuples
---------+---------
-       1|       21
-(1 row)
-
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 0dae483e8..06436cf46 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -80,7 +80,7 @@ test: alter-table-4
 test: create-trigger
 test: sequence-ddl
 test: async-notify
-test: vacuum-reltuples
+test: vacuum-no-cleanup-lock
 test: timeouts
 test: vacuum-concurrent-drop
 test: vacuum-conflict
diff --git a/src/test/isolation/specs/vacuum-no-cleanup-lock.spec b/src/test/isolation/specs/vacuum-no-cleanup-lock.spec
new file mode 100644
index 000000000..991738247
--- /dev/null
+++ b/src/test/isolation/specs/vacuum-no-cleanup-lock.spec
@@ -0,0 +1,145 @@
+# Test for vacuum's reduced processing of heap pages (used for any heap page
+# where a cleanup lock isn't immediately available)
+#
+# Debugging tip: Change VACUUM to VACUUM VERBOSE to get feedback on what's
+# really going on
+setup
+{
+  CREATE TABLE smalltbl AS SELECT i AS id FROM generate_series(1,20) i;
+  ALTER TABLE smalltbl SET (autovacuum_enabled = off);
+}
+setup
+{
+  VACUUM ANALYZE smalltbl;
+}
+
+teardown
+{
+  DROP TABLE smalltbl;
+}
+
+# This session holds a pin on smalltbl's only heap page:
+session pinholder
+step pinholder_cursor
+{
+  BEGIN;
+  DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl;
+  FETCH NEXT FROM c1;
+}
+step pinholder_commit
+{
+  COMMIT;
+}
+
+# This session inserts and deletes tuples, potentially affecting reltuples:
+session dml
+step dml_insert
+{
+  INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+}
+step dml_delete
+{
+  DELETE FROM smalltbl WHERE id = (SELECT min(id) FROM smalltbl);
+}
+step dml_begin            { BEGIN; }
+step dml_key_share        { SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE; }
+step dml_commit           { COMMIT; }
+
+# Needed for Multixact test:
+session dml_other
+step dml_other_begin      { BEGIN; }
+step dml_other_key_share  { SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE; }
+step dml_other_commit     { COMMIT; }
+
+# This session runs non-aggressive VACUUM, but with maximally aggressive
+# cutoffs for tuple freezing (e.g., FreezeLimit == OldestXmin):
+session vacuumer
+setup
+{
+  SET vacuum_freeze_min_age = 0;
+  SET vacuum_multixact_freeze_min_age = 0;
+}
+step vacuumer_nonaggressive_vacuum
+{
+  VACUUM smalltbl;
+}
+step vacuumer_pg_class_stats
+{
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+}
+
+# Test VACUUM's reltuples counting mechanism.
+#
+# Final pg_class.reltuples should never be affected by VACUUM's inability to
+# get a cleanup lock on any page, except to the extent that any cleanup lock
+# contention changes the number of tuples that remain ("missed dead" tuples
+# are counted in reltuples, much like "recently dead" tuples).
+
+# Easy case:
+permutation
+    vacuumer_pg_class_stats  # Start with 20 tuples
+    dml_insert
+    vacuumer_nonaggressive_vacuum
+    vacuumer_pg_class_stats  # End with 21 tuples
+
+# Harder case -- count 21 tuples at the end (like last time), but with cleanup
+# lock contention this time:
+permutation
+    vacuumer_pg_class_stats  # Start with 20 tuples
+    dml_insert
+    pinholder_cursor
+    vacuumer_nonaggressive_vacuum
+    vacuumer_pg_class_stats  # End with 21 tuples
+    pinholder_commit  # order doesn't matter
+
+# Same as "harder case", but vary the order, and delete an inserted row:
+permutation
+    vacuumer_pg_class_stats  # Start with 20 tuples
+    pinholder_cursor
+    dml_insert
+    dml_delete
+    dml_insert
+    vacuumer_nonaggressive_vacuum
+    # reltuples is 21 here again -- "recently dead" tuple won't be included in
+    # count here:
+    vacuumer_pg_class_stats
+    pinholder_commit  # order doesn't matter
+
+# Same as "harder case", but initial insert and delete before cursor:
+permutation
+    vacuumer_pg_class_stats  # Start with 20 tuples
+    dml_insert
+    dml_delete
+    pinholder_cursor
+    dml_insert
+    vacuumer_nonaggressive_vacuum
+    # reltuples is 21 here again -- "missed dead" tuple ("recently dead" when
+    # concurrent activity held back VACUUM's OldestXmin) won't be included in
+    # count here:
+    vacuumer_pg_class_stats
+    pinholder_commit  # order doesn't matter
+
+# Test VACUUM's mechanism for skipping MultiXact freezing.
+#
+# This provides test coverage for code paths that are only hit when we need to
+# freeze, but inability to acquire a cleanup lock on a heap page makes
+# freezing some XIDs/XMIDs < FreezeLimit/MultiXactCutoff impossible (without
+# waiting for a cleanup lock, which non-aggressive VACUUM is unwilling to do).
+permutation
+    dml_begin
+    dml_other_begin
+    dml_key_share
+    dml_other_key_share
+    # Will get cleanup lock, can't advance relminmxid yet:
+    # (though will usually advance relfrozenxid by ~2 XIDs)
+    vacuumer_nonaggressive_vacuum
+    pinholder_cursor
+    dml_commit
+    dml_other_commit
+    # Can't cleanup lock, so still can't advance relminmxid here:
+    # (relfrozenxid held back by XIDs in MultiXact too)
+    vacuumer_nonaggressive_vacuum
+    pinholder_commit
+    # Pin was dropped, so will advance relminmxid, at long last:
+    # (ditto for relfrozenxid advancement)
+    vacuumer_nonaggressive_vacuum
diff --git a/src/test/isolation/specs/vacuum-reltuples.spec b/src/test/isolation/specs/vacuum-reltuples.spec
deleted file mode 100644
index a2a461f2f..000000000
--- a/src/test/isolation/specs/vacuum-reltuples.spec
+++ /dev/null
@@ -1,49 +0,0 @@
-# Test for vacuum's handling of reltuples when pages are skipped due
-# to page pins. We absolutely need to avoid setting reltuples=0 in
-# such cases, since that interferes badly with planning.
-#
-# Expected result for all three permutation is 21 tuples, including
-# the second permutation.  VACUUM is able to count the concurrently
-# inserted tuple in its final reltuples, even when a cleanup lock
-# cannot be acquired on the affected heap page.
-
-setup {
-    create table smalltbl
-        as select i as id from generate_series(1,20) i;
-    alter table smalltbl set (autovacuum_enabled = off);
-}
-setup {
-    vacuum analyze smalltbl;
-}
-
-teardown {
-    drop table smalltbl;
-}
-
-session worker
-step open {
-    begin;
-    declare c1 cursor for select 1 as dummy from smalltbl;
-}
-step fetch1 {
-    fetch next from c1;
-}
-step close {
-    commit;
-}
-step stats {
-    select relpages, reltuples from pg_class
-     where oid='smalltbl'::regclass;
-}
-
-session vacuumer
-step vac {
-    vacuum smalltbl;
-}
-step modify {
-    insert into smalltbl select max(id)+1 from smalltbl;
-}
-
-permutation modify vac stats
-permutation modify open fetch1 vac close stats
-permutation modify vac stats
-- 
2.30.2