From 8b1bc24566a2e732177fbecae849570554a797e1 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Sat, 13 Mar 2021 20:37:32 -0800
Subject: [PATCH v4 1/3] Refactor vacuumlazy.c.

Break up lazy_scan_heap() into functions.

Aside from being useful cleanup work in its own right, this is also
preparation for an upcoming patch that removes the "tupgone" special
case from vacuumlazy.c.
---
 src/backend/access/heap/vacuumlazy.c  | 1384 +++++++++++++++----------
 contrib/pg_visibility/pg_visibility.c |    8 +-
 contrib/pgstattuple/pgstatapprox.c    |    8 +-
 3 files changed, 832 insertions(+), 568 deletions(-)

diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 8341879d89..6382393516 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -294,8 +294,6 @@ typedef struct LVRelStats
 {
 	char	   *relnamespace;
 	char	   *relname;
-	/* useindex = true means two-pass strategy; false means one-pass */
-	bool		useindex;
 	/* Overall statistics about rel */
 	BlockNumber old_rel_pages;	/* previous value of pg_class.relpages */
 	BlockNumber rel_pages;		/* total number of pages */
@@ -330,9 +328,47 @@ typedef struct LVSavedErrInfo
 	VacErrPhase phase;
 } LVSavedErrInfo;
 
+/*
+ * Counters maintained by lazy_scan_heap() (and scan_prune_page()):
+ */
+typedef struct LVTempCounters
+{
+	double		num_tuples;		/* total number of nonremovable tuples */
+	double		live_tuples;	/* live tuples (reltuples estimate) */
+	double		tups_vacuumed;	/* tuples cleaned up by current vacuum */
+	double		nkeep;			/* dead-but-not-removable tuples */
+	double		nunused;		/* # existing unused line pointers */
+} LVTempCounters;
+
+/*
+ * State output by scan_prune_page():
+ */
+typedef struct LVPrunePageState
+{
+	bool		hastup;			/* Page is truncatable? */
+	bool		has_dead_items; /* includes existing LP_DEAD items */
+	bool		all_visible;	/* Every item visible to all? */
+	bool		all_frozen;		/* provided all_visible is also true */
+} LVPrunePageState;
+
+/*
+ * State set up and maintained in lazy_scan_heap() (also maintained in
+ * scan_prune_page()) that represents VM bit status.
+ *
+ * Used by scan_setvmbit_page() when we're done pruning.
+ */
+typedef struct LVVisMapPageState
+{
+	bool		all_visible_according_to_vm;
+	TransactionId visibility_cutoff_xid;
+} LVVisMapPageState;
+
 /* A few variables that don't seem worth passing around as parameters */
 static int	elevel = -1;
 
+static TransactionId RelFrozenXid;
+static MultiXactId RelMinMxid;
+
 static TransactionId OldestXmin;
 static TransactionId FreezeLimit;
 static MultiXactId MultiXactCutoff;
@@ -344,6 +380,10 @@ static BufferAccessStrategy vac_strategy;
 static void lazy_scan_heap(Relation onerel, VacuumParams *params,
 						   LVRelStats *vacrelstats, Relation *Irel, int nindexes,
 						   bool aggressive);
+static void two_pass_strategy(Relation onerel, LVRelStats *vacrelstats,
+							  Relation *Irel, IndexBulkDeleteResult **indstats,
+							  int nindexes, LVParallelState *lps,
+							  VacOptTernaryValue index_cleanup);
 static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
 static bool lazy_check_needs_freeze(Buffer buf, bool *hastup,
 									LVRelStats *vacrelstats);
@@ -363,7 +403,8 @@ static bool should_attempt_truncation(VacuumParams *params,
 static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
 static BlockNumber count_nondeletable_pages(Relation onerel,
 											LVRelStats *vacrelstats);
-static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
+static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks,
+							 bool hasindex);
 static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples,
 								   ItemPointer itemptr);
 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
@@ -448,10 +489,6 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
 	Assert(params->index_cleanup != VACOPT_TERNARY_DEFAULT);
 	Assert(params->truncate != VACOPT_TERNARY_DEFAULT);
 
-	/* not every AM requires these to be valid, but heap does */
-	Assert(TransactionIdIsNormal(onerel->rd_rel->relfrozenxid));
-	Assert(MultiXactIdIsValid(onerel->rd_rel->relminmxid));
-
 	/* measure elapsed time iff autovacuum logging requires it */
 	if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
 	{
@@ -474,6 +511,13 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
 
 	vac_strategy = bstrategy;
 
+	RelFrozenXid = onerel->rd_rel->relfrozenxid;
+	RelMinMxid = onerel->rd_rel->relminmxid;
+
+	/* not every AM requires these to be valid, but heap does */
+	Assert(TransactionIdIsNormal(RelFrozenXid));
+	Assert(MultiXactIdIsValid(RelMinMxid));
+
 	vacuum_set_xid_limits(onerel,
 						  params->freeze_min_age,
 						  params->freeze_table_age,
@@ -509,8 +553,6 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
 
 	/* Open all indexes of the relation */
 	vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
-	vacrelstats->useindex = (nindexes > 0 &&
-							 params->index_cleanup == VACOPT_TERNARY_ENABLED);
 
 	/*
 	 * Setup error traceback support for ereport().  The idea is to set up an
@@ -740,6 +782,555 @@ vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats)
 		(void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid);
 }
 
+/*
+ * Handle new page during lazy_scan_heap().
+ *
+ * Caller must hold pin and buffer cleanup lock on buf.
+ *
+ * All-zeroes pages can be left over if either a backend extends the relation
+ * by a single page, but crashes before the newly initialized page has been
+ * written out, or when bulk-extending the relation (which creates a number of
+ * empty pages at the tail end of the relation, but enters them into the FSM).
+ *
+ * Note we do not enter the page into the visibilitymap. That has the downside
+ * that we repeatedly visit this page in subsequent vacuums, but otherwise
+ * we'll never not discover the space on a promoted standby. The harm of
+ * repeated checking ought to normally not be too bad - the space usually
+ * should be used at some point, otherwise there wouldn't be any regular
+ * vacuums.
+ *
+ * Make sure these pages are in the FSM, to ensure they can be reused. Do that
+ * by testing if there's any space recorded for the page. If not, enter it. We
+ * do so after releasing the lock on the heap page, the FSM is approximate,
+ * after all.
+ */
+static void
+scan_new_page(Relation onerel, Buffer buf)
+{
+	BlockNumber blkno = BufferGetBlockNumber(buf);
+
+	if (GetRecordedFreeSpace(onerel, blkno) == 0)
+	{
+		Size		freespace = BufferGetPageSize(buf) - SizeOfPageHeaderData;
+
+		UnlockReleaseBuffer(buf);
+		RecordPageWithFreeSpace(onerel, blkno, freespace);
+		return;
+	}
+
+	UnlockReleaseBuffer(buf);
+}
+
+/*
+ * Handle empty page during lazy_scan_heap().
+ *
+ * Caller must hold pin and buffer cleanup lock on buf, as well as a pin (but
+ * not a lock) on vmbuffer.
+ */
+static void
+scan_empty_page(Relation onerel, Buffer buf, Buffer vmbuffer,
+				LVRelStats *vacrelstats)
+{
+	Page		page = BufferGetPage(buf);
+	BlockNumber blkno = BufferGetBlockNumber(buf);
+	Size		freespace = PageGetHeapFreeSpace(page);
+
+	/*
+	 * Empty pages are always all-visible and all-frozen (note that the same
+	 * is currently not true for new pages, see scan_new_page()).
+	 */
+	if (!PageIsAllVisible(page))
+	{
+		START_CRIT_SECTION();
+
+		/* mark buffer dirty before writing a WAL record */
+		MarkBufferDirty(buf);
+
+		/*
+		 * It's possible that another backend has extended the heap,
+		 * initialized the page, and then failed to WAL-log the page due to an
+		 * ERROR.  Since heap extension is not WAL-logged, recovery might try
+		 * to replay our record setting the page all-visible and find that the
+		 * page isn't initialized, which will cause a PANIC.  To prevent that,
+		 * check whether the page has been previously WAL-logged, and if not,
+		 * do that now.
+		 */
+		if (RelationNeedsWAL(onerel) &&
+			PageGetLSN(page) == InvalidXLogRecPtr)
+			log_newpage_buffer(buf, true);
+
+		PageSetAllVisible(page);
+		visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
+						  vmbuffer, InvalidTransactionId,
+						  VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
+		END_CRIT_SECTION();
+	}
+
+	UnlockReleaseBuffer(buf);
+	RecordPageWithFreeSpace(onerel, blkno, freespace);
+}
+
+/*
+ *	scan_prune_page() -- lazy_scan_heap() pruning and freezing.
+ *
+ * Caller must hold pin and buffer cleanup lock on the buffer.
+ *
+ * Prior to PostgreSQL 14 there were very rare cases where lazy_scan_heap()
+ * treated tuples that still had storage after pruning as DEAD.  That happened
+ * when heap_page_prune() could not prune tuples that were nevertheless deemed
+ * DEAD by its own HeapTupleSatisfiesVacuum() call.  This created rare hard to
+ * test cases.  It meant that there was no very sharp distinction between DEAD
+ * tuples and tuples that are to be kept and be considered for freezing inside
+ * heap_prepare_freeze_tuple().  It also meant that lazy_vacuum_page() had to
+ * be prepared to remove items with storage (tuples with tuple headers) that
+ * didn't get pruned, which created a special case to handle recovery
+ * conflicts.
+ *
+ * The approach we take here now (to eliminate all of this complexity) is to
+ * simply restart pruning in these very rare cases -- cases where a concurrent
+ * abort of an xact makes our HeapTupleSatisfiesVacuum() call disagrees with
+ * what heap_page_prune() thought about the tuple only microseconds earlier.
+ *
+ * Since we might have to prune a second time here, the code is structured to
+ * use a local per-page copy of the counters that caller accumulates.  We add
+ * our per-page counters to the per-VACUUM totals from caller last of all, to
+ * avoid double counting.
+ */
+static void
+scan_prune_page(Relation onerel, Buffer buf,
+				LVRelStats *vacrelstats,
+				GlobalVisState *vistest, xl_heap_freeze_tuple *frozen,
+				LVTempCounters *c, LVPrunePageState *ps,
+				LVVisMapPageState *vms,
+				VacOptTernaryValue index_cleanup)
+{
+	BlockNumber blkno;
+	Page		page;
+	OffsetNumber offnum,
+				maxoff;
+	int			nfrozen,
+				ndead;
+	LVTempCounters pc;
+	OffsetNumber deaditems[MaxHeapTuplesPerPage];
+	bool		tupgone;
+
+	blkno = BufferGetBlockNumber(buf);
+	page = BufferGetPage(buf);
+
+	/* Initialize (or reset) page-level counters */
+	pc.num_tuples = 0;
+	pc.live_tuples = 0;
+	pc.tups_vacuumed = 0;
+	pc.nkeep = 0;
+	pc.nunused = 0;
+
+	/*
+	 * Prune all HOT-update chains in this page.
+	 *
+	 * We count tuples removed by the pruning step as removed by VACUUM
+	 * (existing LP_DEAD line pointers don't count).
+	 */
+	pc.tups_vacuumed = heap_page_prune(onerel, buf, vistest,
+									   InvalidTransactionId, 0, false,
+									   &vacrelstats->latestRemovedXid,
+									   &vacrelstats->offnum);
+
+	/*
+	 * Now scan the page to collect vacuumable items and check for tuples
+	 * requiring freezing.
+	 */
+	ps->hastup = false;
+	ps->has_dead_items = false;
+	ps->all_visible = true;
+	ps->all_frozen = true;
+	nfrozen = 0;
+	ndead = 0;
+	maxoff = PageGetMaxOffsetNumber(page);
+
+	tupgone = false;
+
+	/*
+	 * Note: If you change anything in the loop below, also look at
+	 * heap_page_is_all_visible to see if that needs to be changed.
+	 */
+	for (offnum = FirstOffsetNumber;
+		 offnum <= maxoff;
+		 offnum = OffsetNumberNext(offnum))
+	{
+		ItemId		itemid;
+		HeapTupleData tuple;
+
+		/*
+		 * Set the offset number so that we can display it along with any
+		 * error that occurred while processing this tuple.
+		 */
+		vacrelstats->offnum = offnum;
+		itemid = PageGetItemId(page, offnum);
+
+		/* Unused items require no processing, but we count 'em */
+		if (!ItemIdIsUsed(itemid))
+		{
+			pc.nunused += 1;
+			continue;
+		}
+
+		/* Redirect items mustn't be touched */
+		if (ItemIdIsRedirected(itemid))
+		{
+			ps->hastup = true;	/* this page won't be truncatable */
+			continue;
+		}
+
+		/*
+		 * LP_DEAD line pointers are to be vacuumed normally; but we don't
+		 * count them in tups_vacuumed, else we'd be double-counting (at least
+		 * in the common case where heap_page_prune() just freed up a non-HOT
+		 * tuple).
+		 *
+		 * Note also that the final tups_vacuumed value might be very low for
+		 * tables where opportunistic page pruning happens to occur very
+		 * frequently (via heap_page_prune_opt() calls that free up non-HOT
+		 * tuples).
+		 */
+		if (ItemIdIsDead(itemid))
+		{
+			deaditems[ndead++] = offnum;
+			ps->all_visible = false;
+			ps->has_dead_items = true;
+			continue;
+		}
+
+		Assert(ItemIdIsNormal(itemid));
+
+		ItemPointerSet(&(tuple.t_self), blkno, offnum);
+		tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+		tuple.t_len = ItemIdGetLength(itemid);
+		tuple.t_tableOid = RelationGetRelid(onerel);
+
+		/*
+		 * The criteria for counting a tuple as live in this block need to
+		 * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
+		 * and ANALYZE may produce wildly different reltuples values, e.g.
+		 * when there are many recently-dead tuples.
+		 *
+		 * The logic here is a bit simpler than acquire_sample_rows(), as
+		 * VACUUM can't run inside a transaction block, which makes some cases
+		 * impossible (e.g. in-progress insert from the same transaction).
+		 */
+		switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
+		{
+			case HEAPTUPLE_DEAD:
+
+				/*
+				 * Ordinarily, DEAD tuples would have been removed by
+				 * heap_page_prune(), but it's possible that the tuple state
+				 * changed since heap_page_prune() looked.  In particular an
+				 * INSERT_IN_PROGRESS tuple could have changed to DEAD if the
+				 * inserter aborted.  So this cannot be considered an error
+				 * condition.
+				 *
+				 * If the tuple is HOT-updated then it must only be removed by
+				 * a prune operation; so we keep it just as if it were
+				 * RECENTLY_DEAD.  Also, if it's a heap-only tuple, we choose
+				 * to keep it, because it'll be a lot cheaper to get rid of it
+				 * in the next pruning pass than to treat it like an indexed
+				 * tuple. Finally, if index cleanup is disabled, the second
+				 * heap pass will not execute, and the tuple will not get
+				 * removed, so we must treat it like any other dead tuple that
+				 * we choose to keep.
+				 *
+				 * If this were to happen for a tuple that actually needed to
+				 * be deleted, we'd be in trouble, because it'd possibly leave
+				 * a tuple below the relation's xmin horizon alive.
+				 * heap_prepare_freeze_tuple() is prepared to detect that case
+				 * and abort the transaction, preventing corruption.
+				 */
+				if (HeapTupleIsHotUpdated(&tuple) ||
+					HeapTupleIsHeapOnly(&tuple) ||
+					index_cleanup == VACOPT_TERNARY_DISABLED)
+					pc.nkeep += 1;
+				else
+					tupgone = true; /* we can delete the tuple */
+				ps->all_visible = false;
+				break;
+			case HEAPTUPLE_LIVE:
+
+				/*
+				 * Count it as live.  Not only is this natural, but it's also
+				 * what acquire_sample_rows() does.
+				 */
+				pc.live_tuples += 1;
+
+				/*
+				 * Is the tuple definitely visible to all transactions?
+				 *
+				 * NB: Like with per-tuple hint bits, we can't set the
+				 * PD_ALL_VISIBLE flag if the inserter committed
+				 * asynchronously. See SetHintBits for more info. Check that
+				 * the tuple is hinted xmin-committed because of that.
+				 */
+				if (ps->all_visible)
+				{
+					TransactionId xmin;
+
+					if (!HeapTupleHeaderXminCommitted(tuple.t_data))
+					{
+						ps->all_visible = false;
+						break;
+					}
+
+					/*
+					 * The inserter definitely committed. But is it old enough
+					 * that everyone sees it as committed?
+					 */
+					xmin = HeapTupleHeaderGetXmin(tuple.t_data);
+					if (!TransactionIdPrecedes(xmin, OldestXmin))
+					{
+						ps->all_visible = false;
+						break;
+					}
+
+					/* Track newest xmin on page. */
+					if (TransactionIdFollows(xmin, vms->visibility_cutoff_xid))
+						vms->visibility_cutoff_xid = xmin;
+				}
+				break;
+			case HEAPTUPLE_RECENTLY_DEAD:
+
+				/*
+				 * If tuple is recently deleted then we must not remove it
+				 * from relation.
+				 */
+				pc.nkeep += 1;
+				ps->all_visible = false;
+				break;
+			case HEAPTUPLE_INSERT_IN_PROGRESS:
+
+				/*
+				 * This is an expected case during concurrent vacuum.
+				 *
+				 * We do not count these rows as live, because we expect the
+				 * inserting transaction to update the counters at commit, and
+				 * we assume that will happen only after we report our
+				 * results.  This assumption is a bit shaky, but it is what
+				 * acquire_sample_rows() does, so be consistent.
+				 */
+				ps->all_visible = false;
+				break;
+			case HEAPTUPLE_DELETE_IN_PROGRESS:
+				/* This is an expected case during concurrent vacuum */
+				ps->all_visible = false;
+
+				/*
+				 * Count such rows as live.  As above, we assume the deleting
+				 * transaction will commit and update the counters after we
+				 * report.
+				 */
+				pc.live_tuples += 1;
+				break;
+			default:
+				elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
+				break;
+		}
+
+		if (tupgone)
+		{
+			deaditems[ndead++] = offnum;
+			HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
+												   &vacrelstats->latestRemovedXid);
+			pc.tups_vacuumed += 1;
+			ps->has_dead_items = true;
+		}
+		else
+		{
+			bool		tuple_totally_frozen;
+
+			pc.num_tuples += 1;
+			ps->hastup = true;
+
+			/*
+			 * Each non-removable tuple must be checked to see if it needs
+			 * freezing
+			 */
+			if (heap_prepare_freeze_tuple(tuple.t_data,
+										  RelFrozenXid, RelMinMxid,
+										  FreezeLimit, MultiXactCutoff,
+										  &frozen[nfrozen],
+										  &tuple_totally_frozen))
+				frozen[nfrozen++].offset = offnum;
+
+			pc.num_tuples += 1;
+			ps->hastup = true;
+
+			if (!tuple_totally_frozen)
+				ps->all_frozen = false;
+		}
+	}
+
+	/*
+	 * Success -- we're done pruning, and have determined which tuples are to
+	 * be recorded as dead in local array.  We've also prepared the details of
+	 * which remaining tuples are to be frozen.
+	 *
+	 * First clear the offset information once we have processed all the
+	 * tuples on the page.
+	 */
+	vacrelstats->offnum = InvalidOffsetNumber;
+
+	/*
+	 * Next add page level counters to caller's counts
+	 */
+	c->num_tuples += pc.num_tuples;
+	c->live_tuples += pc.live_tuples;
+	c->tups_vacuumed += pc.tups_vacuumed;
+	c->nkeep += pc.nkeep;
+	c->nunused += pc.nunused;
+
+	/*
+	 * Now save the local dead items array to VACUUM's dead_tuples array.
+	 */
+	for (int i = 0; i < ndead; i++)
+	{
+		ItemPointerData itemptr;
+
+		ItemPointerSet(&itemptr, blkno, deaditems[i]);
+		lazy_record_dead_tuple(vacrelstats->dead_tuples, &itemptr);
+	}
+
+	/*
+	 * Finally, execute tuple freezing as planned.
+	 *
+	 * If we need to freeze any tuples we'll mark the buffer dirty, and write
+	 * a WAL record recording the changes.  We must log the changes to be
+	 * crash-safe against future truncation of CLOG.
+	 */
+	if (nfrozen > 0)
+	{
+		START_CRIT_SECTION();
+
+		MarkBufferDirty(buf);
+
+		/* execute collected freezes */
+		for (int i = 0; i < nfrozen; i++)
+		{
+			ItemId		itemid;
+			HeapTupleHeader htup;
+
+			itemid = PageGetItemId(page, frozen[i].offset);
+			htup = (HeapTupleHeader) PageGetItem(page, itemid);
+
+			heap_execute_freeze_tuple(htup, &frozen[i]);
+		}
+
+		/* Now WAL-log freezing if necessary */
+		if (RelationNeedsWAL(onerel))
+		{
+			XLogRecPtr	recptr;
+
+			recptr = log_heap_freeze(onerel, buf, FreezeLimit,
+									 frozen, nfrozen);
+			PageSetLSN(page, recptr);
+		}
+
+		END_CRIT_SECTION();
+	}
+}
+
+/*
+ * Handle setting VM bit inside lazy_scan_heap(), after pruning and freezing.
+ */
+static void
+scan_setvmbit_page(Relation onerel, Buffer buf, Buffer vmbuffer,
+				   LVPrunePageState *ps, LVVisMapPageState *vms)
+{
+	Page		page = BufferGetPage(buf);
+	BlockNumber blkno = BufferGetBlockNumber(buf);
+
+	/* mark page all-visible, if appropriate */
+	if (ps->all_visible && !vms->all_visible_according_to_vm)
+	{
+		uint8		flags = VISIBILITYMAP_ALL_VISIBLE;
+
+		if (ps->all_frozen)
+			flags |= VISIBILITYMAP_ALL_FROZEN;
+
+		/*
+		 * It should never be the case that the visibility map page is set
+		 * while the page-level bit is clear, but the reverse is allowed (if
+		 * checksums are not enabled).  Regardless, set both bits so that we
+		 * get back in sync.
+		 *
+		 * NB: If the heap page is all-visible but the VM bit is not set, we
+		 * don't need to dirty the heap page.  However, if checksums are
+		 * enabled, we do need to make sure that the heap page is dirtied
+		 * before passing it to visibilitymap_set(), because it may be logged.
+		 * Given that this situation should only happen in rare cases after a
+		 * crash, it is not worth optimizing.
+		 */
+		PageSetAllVisible(page);
+		MarkBufferDirty(buf);
+		visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
+						  vmbuffer, vms->visibility_cutoff_xid, flags);
+	}
+
+	/*
+	 * The visibility map bit should never be set if the page-level bit is
+	 * clear.  However, it's possible that the bit got cleared after we
+	 * checked it and before we took the buffer content lock, so we must
+	 * recheck before jumping to the conclusion that something bad has
+	 * happened.
+	 */
+	else if (vms->all_visible_according_to_vm && !PageIsAllVisible(page) &&
+			 VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
+	{
+		elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
+			 RelationGetRelationName(onerel), blkno);
+		visibilitymap_clear(onerel, blkno, vmbuffer,
+							VISIBILITYMAP_VALID_BITS);
+	}
+
+	/*
+	 * It's possible for the value returned by
+	 * GetOldestNonRemovableTransactionId() to move backwards, so it's not
+	 * wrong for us to see tuples that appear to not be visible to everyone
+	 * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
+	 * never moves backwards, but GetOldestNonRemovableTransactionId() is
+	 * conservative and sometimes returns a value that's unnecessarily small,
+	 * so if we see that contradiction it just means that the tuples that we
+	 * think are not visible to everyone yet actually are, and the
+	 * PD_ALL_VISIBLE flag is correct.
+	 *
+	 * There should never be dead tuples on a page with PD_ALL_VISIBLE set,
+	 * however.
+	 */
+	else if (PageIsAllVisible(page) && ps->has_dead_items)
+	{
+		elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
+			 RelationGetRelationName(onerel), blkno);
+		PageClearAllVisible(page);
+		MarkBufferDirty(buf);
+		visibilitymap_clear(onerel, blkno, vmbuffer,
+							VISIBILITYMAP_VALID_BITS);
+	}
+
+	/*
+	 * If the all-visible page is all-frozen but not marked as such yet, mark
+	 * it as all-frozen.  Note that all_frozen is only valid if all_visible is
+	 * true, so we must check both.
+	 */
+	else if (vms->all_visible_according_to_vm && ps->all_visible &&
+			 ps->all_frozen && !VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
+	{
+		/*
+		 * We can pass InvalidTransactionId as the cutoff XID here, because
+		 * setting the all-frozen bit doesn't cause recovery conflicts.
+		 */
+		visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
+						  vmbuffer, InvalidTransactionId,
+						  VISIBILITYMAP_ALL_FROZEN);
+	}
+}
+
 /*
  *	lazy_scan_heap() -- scan an open heap relation
  *
@@ -748,9 +1339,9 @@ vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats)
  *		page, and set commit status bits (see heap_page_prune).  It also builds
  *		lists of dead tuples and pages with free space, calculates statistics
  *		on the number of live tuples in the heap, and marks pages as
- *		all-visible if appropriate.  When done, or when we run low on space for
- *		dead-tuple TIDs, invoke vacuuming of indexes and call lazy_vacuum_heap
- *		to reclaim dead line pointers.
+ *		all-visible if appropriate.  When done, or when we run low on space
+ *		for dead-tuple TIDs, invoke two_pass_strategy to vacuum indexes and
+ *		mark dead line pointers for reuse via a second heap pass.
  *
  *		If the table has at least two indexes, we execute both index vacuum
  *		and index cleanup with parallel workers unless parallel vacuum is
@@ -775,23 +1366,12 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	LVParallelState *lps = NULL;
 	LVDeadTuples *dead_tuples;
 	BlockNumber nblocks,
-				blkno;
-	HeapTupleData tuple;
-	TransactionId relfrozenxid = onerel->rd_rel->relfrozenxid;
-	TransactionId relminmxid = onerel->rd_rel->relminmxid;
-	BlockNumber empty_pages,
-				vacuumed_pages,
+				blkno,
+				next_unskippable_block,
 				next_fsm_block_to_vacuum;
-	double		num_tuples,		/* total number of nonremovable tuples */
-				live_tuples,	/* live tuples (reltuples estimate) */
-				tups_vacuumed,	/* tuples cleaned up by current vacuum */
-				nkeep,			/* dead-but-not-removable tuples */
-				nunused;		/* # existing unused line pointers */
 	IndexBulkDeleteResult **indstats;
-	int			i;
 	PGRUsage	ru0;
 	Buffer		vmbuffer = InvalidBuffer;
-	BlockNumber next_unskippable_block;
 	bool		skipping_blocks;
 	xl_heap_freeze_tuple *frozen;
 	StringInfoData buf;
@@ -802,6 +1382,11 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	};
 	int64		initprog_val[3];
 	GlobalVisState *vistest;
+	LVTempCounters c;
+
+	/* Counters of # blocks in onerel: */
+	BlockNumber empty_pages,
+				vacuumed_pages;
 
 	pg_rusage_init(&ru0);
 
@@ -817,18 +1402,24 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 						vacrelstats->relname)));
 
 	empty_pages = vacuumed_pages = 0;
-	next_fsm_block_to_vacuum = (BlockNumber) 0;
-	num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0;
+
+	/* Initialize counters */
+	c.num_tuples = 0;
+	c.live_tuples = 0;
+	c.tups_vacuumed = 0;
+	c.nkeep = 0;
+	c.nunused = 0;
 
 	indstats = (IndexBulkDeleteResult **)
 		palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
 
 	nblocks = RelationGetNumberOfBlocks(onerel);
+	next_unskippable_block = 0;
+	next_fsm_block_to_vacuum = 0;
 	vacrelstats->rel_pages = nblocks;
 	vacrelstats->scanned_pages = 0;
 	vacrelstats->tupcount_pages = 0;
 	vacrelstats->nonempty_pages = 0;
-	vacrelstats->latestRemovedXid = InvalidTransactionId;
 
 	vistest = GlobalVisTestFor(onerel);
 
@@ -837,7 +1428,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	 * be used for an index, so we invoke parallelism only if there are at
 	 * least two indexes on a table.
 	 */
-	if (params->nworkers >= 0 && vacrelstats->useindex && nindexes > 1)
+	if (params->nworkers >= 0 && nindexes > 1)
 	{
 		/*
 		 * Since parallel workers cannot access data in temporary tables, we
@@ -865,7 +1456,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	 * initialized.
 	 */
 	if (!ParallelVacuumIsActive(lps))
-		lazy_space_alloc(vacrelstats, nblocks);
+		lazy_space_alloc(vacrelstats, nblocks, nindexes > 0);
 
 	dead_tuples = vacrelstats->dead_tuples;
 	frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage);
@@ -920,7 +1511,6 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	 * the last page.  This is worth avoiding mainly because such a lock must
 	 * be replayed on any hot standby, where it can be disruptive.
 	 */
-	next_unskippable_block = 0;
 	if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
 	{
 		while (next_unskippable_block < nblocks)
@@ -953,20 +1543,22 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	{
 		Buffer		buf;
 		Page		page;
-		OffsetNumber offnum,
-					maxoff;
-		bool		tupgone,
-					hastup;
-		int			prev_dead_count;
-		int			nfrozen;
+		LVVisMapPageState vms;
+		LVPrunePageState ps;
+		bool		savefreespace;
 		Size		freespace;
-		bool		all_visible_according_to_vm = false;
-		bool		all_visible;
-		bool		all_frozen = true;	/* provided all_visible is also true */
-		bool		has_dead_items;		/* includes existing LP_DEAD items */
-		TransactionId visibility_cutoff_xid = InvalidTransactionId;
 
-		/* see note above about forcing scanning of last page */
+		/* Initialize vm state for block: */
+		vms.all_visible_according_to_vm = false;
+		vms.visibility_cutoff_xid = InvalidTransactionId;
+
+		/* Note: Can't touch ps until we reach scan_prune_page() */
+
+		/*
+		 * Step 1 for block: Consider need to skip blocks.
+		 *
+		 * See note above about forcing scanning of last page.
+		 */
 #define FORCE_CHECK_PAGE() \
 		(blkno == nblocks - 1 && should_attempt_truncation(params, vacrelstats))
 
@@ -1018,7 +1610,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			 * that it's not all-frozen, so it might still be all-visible.
 			 */
 			if (aggressive && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
-				all_visible_according_to_vm = true;
+				vms.all_visible_according_to_vm = true;
 		}
 		else
 		{
@@ -1045,12 +1637,15 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 					vacrelstats->frozenskipped_pages++;
 				continue;
 			}
-			all_visible_according_to_vm = true;
+			vms.all_visible_according_to_vm = true;
 		}
 
 		vacuum_delay_point();
 
 		/*
+		 * Step 2 for block: Consider if we definitely have enough space to
+		 * process TIDs on page already.
+		 *
 		 * If we are close to overrunning the available space for dead-tuple
 		 * TIDs, pause and do a cycle of vacuuming before we tackle this page.
 		 */
@@ -1069,23 +1664,16 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 				vmbuffer = InvalidBuffer;
 			}
 
-			/* Work on all the indexes, then the heap */
-			lazy_vacuum_all_indexes(onerel, Irel, indstats,
-									vacrelstats, lps, nindexes);
-
-			/* Remove tuples from heap */
-			lazy_vacuum_heap(onerel, vacrelstats);
-
-			/*
-			 * Forget the now-vacuumed tuples, and press on, but be careful
-			 * not to reset latestRemovedXid since we want that value to be
-			 * valid.
-			 */
-			dead_tuples->num_tuples = 0;
+			/* Remove the collected garbage tuples from table and indexes */
+			two_pass_strategy(onerel, vacrelstats, Irel, indstats, nindexes,
+							  lps, params->index_cleanup);
 
 			/*
 			 * Vacuum the Free Space Map to make newly-freed space visible on
 			 * upper-level FSM pages.  Note we have not yet processed blkno.
+			 * Even if we skipped heap vacuum, FSM vacuuming could be
+			 * worthwhile since we could have updated the freespace of empty
+			 * pages.
 			 */
 			FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
 			next_fsm_block_to_vacuum = blkno;
@@ -1096,22 +1684,29 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 		}
 
 		/*
+		 * Step 3 for block: Set up visibility map page as needed.
+		 *
 		 * Pin the visibility map page in case we need to mark the page
 		 * all-visible.  In most cases this will be very cheap, because we'll
 		 * already have the correct page pinned anyway.  However, it's
 		 * possible that (a) next_unskippable_block is covered by a different
 		 * VM page than the current block or (b) we released our pin and did a
 		 * cycle of index vacuuming.
-		 *
 		 */
 		visibilitymap_pin(onerel, blkno, &vmbuffer);
 
 		buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
 								 RBM_NORMAL, vac_strategy);
 
-		/* We need buffer cleanup lock so that we can prune HOT chains. */
+		/*
+		 * Step 4 for block: Acquire super-exclusive lock for pruning.
+		 *
+		 * We need buffer cleanup lock so that we can prune HOT chains.
+		 */
 		if (!ConditionalLockBufferForCleanup(buf))
 		{
+			bool		hastup;
+
 			/*
 			 * If we're not performing an aggressive scan to guard against XID
 			 * wraparound, and we don't want to forcibly check the page, then
@@ -1168,6 +1763,12 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			/* drop through to normal processing */
 		}
 
+		/*
+		 * Step 5 for block: Handle empty/new pages.
+		 *
+		 * By here we have a super-exclusive lock, and it's clear that this
+		 * page is one that we consider scanned
+		 */
 		vacrelstats->scanned_pages++;
 		vacrelstats->tupcount_pages++;
 
@@ -1175,399 +1776,84 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 
 		if (PageIsNew(page))
 		{
-			/*
-			 * All-zeroes pages can be left over if either a backend extends
-			 * the relation by a single page, but crashes before the newly
-			 * initialized page has been written out, or when bulk-extending
-			 * the relation (which creates a number of empty pages at the tail
-			 * end of the relation, but enters them into the FSM).
-			 *
-			 * Note we do not enter the page into the visibilitymap. That has
-			 * the downside that we repeatedly visit this page in subsequent
-			 * vacuums, but otherwise we'll never not discover the space on a
-			 * promoted standby. The harm of repeated checking ought to
-			 * normally not be too bad - the space usually should be used at
-			 * some point, otherwise there wouldn't be any regular vacuums.
-			 *
-			 * Make sure these pages are in the FSM, to ensure they can be
-			 * reused. Do that by testing if there's any space recorded for
-			 * the page. If not, enter it. We do so after releasing the lock
-			 * on the heap page, the FSM is approximate, after all.
-			 */
-			UnlockReleaseBuffer(buf);
-
 			empty_pages++;
-
-			if (GetRecordedFreeSpace(onerel, blkno) == 0)
-			{
-				Size		freespace;
-
-				freespace = BufferGetPageSize(buf) - SizeOfPageHeaderData;
-				RecordPageWithFreeSpace(onerel, blkno, freespace);
-			}
+			/* Releases lock on buf for us: */
+			scan_new_page(onerel, buf);
 			continue;
 		}
-
-		if (PageIsEmpty(page))
+		else if (PageIsEmpty(page))
 		{
 			empty_pages++;
-			freespace = PageGetHeapFreeSpace(page);
-
-			/*
-			 * Empty pages are always all-visible and all-frozen (note that
-			 * the same is currently not true for new pages, see above).
-			 */
-			if (!PageIsAllVisible(page))
-			{
-				START_CRIT_SECTION();
-
-				/* mark buffer dirty before writing a WAL record */
-				MarkBufferDirty(buf);
-
-				/*
-				 * It's possible that another backend has extended the heap,
-				 * initialized the page, and then failed to WAL-log the page
-				 * due to an ERROR.  Since heap extension is not WAL-logged,
-				 * recovery might try to replay our record setting the page
-				 * all-visible and find that the page isn't initialized, which
-				 * will cause a PANIC.  To prevent that, check whether the
-				 * page has been previously WAL-logged, and if not, do that
-				 * now.
-				 */
-				if (RelationNeedsWAL(onerel) &&
-					PageGetLSN(page) == InvalidXLogRecPtr)
-					log_newpage_buffer(buf, true);
-
-				PageSetAllVisible(page);
-				visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
-								  vmbuffer, InvalidTransactionId,
-								  VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
-				END_CRIT_SECTION();
-			}
-
-			UnlockReleaseBuffer(buf);
-			RecordPageWithFreeSpace(onerel, blkno, freespace);
+			/* Releases lock on buf for us (though keeps vmbuffer pin): */
+			scan_empty_page(onerel, buf, vmbuffer, vacrelstats);
 			continue;
 		}
 
 		/*
-		 * Prune all HOT-update chains in this page.
+		 * Step 6 for block: Do pruning.
 		 *
-		 * We count tuples removed by the pruning step as removed by VACUUM
-		 * (existing LP_DEAD line pointers don't count).
+		 * Also accumulates details of remaining LP_DEAD line pointers on page
+		 * in dead tuple list.  This includes LP_DEAD line pointers that we
+		 * ourselves just pruned, as well as existing LP_DEAD line pointers
+		 * pruned earlier.
+		 *
+		 * Also handles tuple freezing -- considers freezing XIDs from all
+		 * tuple headers left behind following pruning.
 		 */
-		tups_vacuumed += heap_page_prune(onerel, buf, vistest,
-										 InvalidTransactionId, 0, false,
-										 &vacrelstats->latestRemovedXid,
-										 &vacrelstats->offnum);
+		scan_prune_page(onerel, buf, vacrelstats, vistest, frozen,
+						&c, &ps, &vms, params->index_cleanup);
 
 		/*
-		 * Now scan the page to collect vacuumable items and check for tuples
-		 * requiring freezing.
+		 * Step 7 for block: Set up details for saving free space in FSM at
+		 * end of loop.  (Also performs extra single pass strategy steps in
+		 * "nindexes == 0" case.)
+		 *
+		 * If we have any LP_DEAD items on this page (i.e. any new dead_tuples
+		 * entries compared to just before scan_prune_page()) then the page
+		 * will be visited again by lazy_vacuum_heap(), which will compute and
+		 * record its post-compaction free space.  If not, then we're done
+		 * with this page, so remember its free space as-is.
 		 */
-		all_visible = true;
-		has_dead_items = false;
-		nfrozen = 0;
-		hastup = false;
-		prev_dead_count = dead_tuples->num_tuples;
-		maxoff = PageGetMaxOffsetNumber(page);
-
-		/*
-		 * Note: If you change anything in the loop below, also look at
-		 * heap_page_is_all_visible to see if that needs to be changed.
-		 */
-		for (offnum = FirstOffsetNumber;
-			 offnum <= maxoff;
-			 offnum = OffsetNumberNext(offnum))
+		savefreespace = false;
+		freespace = 0;
+		if (nindexes > 0 && ps.has_dead_items &&
+			params->index_cleanup != VACOPT_TERNARY_DISABLED)
 		{
-			ItemId		itemid;
-
-			/*
-			 * Set the offset number so that we can display it along with any
-			 * error that occurred while processing this tuple.
-			 */
-			vacrelstats->offnum = offnum;
-			itemid = PageGetItemId(page, offnum);
-
-			/* Unused items require no processing, but we count 'em */
-			if (!ItemIdIsUsed(itemid))
-			{
-				nunused += 1;
-				continue;
-			}
-
-			/* Redirect items mustn't be touched */
-			if (ItemIdIsRedirected(itemid))
-			{
-				hastup = true;	/* this page won't be truncatable */
-				continue;
-			}
-
-			ItemPointerSet(&(tuple.t_self), blkno, offnum);
-
-			/*
-			 * LP_DEAD line pointers are to be vacuumed normally; but we don't
-			 * count them in tups_vacuumed, else we'd be double-counting (at
-			 * least in the common case where heap_page_prune() just freed up
-			 * a non-HOT tuple).  Note also that the final tups_vacuumed value
-			 * might be very low for tables where opportunistic page pruning
-			 * happens to occur very frequently (via heap_page_prune_opt()
-			 * calls that free up non-HOT tuples).
-			 */
-			if (ItemIdIsDead(itemid))
-			{
-				lazy_record_dead_tuple(dead_tuples, &(tuple.t_self));
-				all_visible = false;
-				has_dead_items = true;
-				continue;
-			}
-
-			Assert(ItemIdIsNormal(itemid));
-
-			tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
-			tuple.t_len = ItemIdGetLength(itemid);
-			tuple.t_tableOid = RelationGetRelid(onerel);
-
-			tupgone = false;
-
-			/*
-			 * The criteria for counting a tuple as live in this block need to
-			 * match what analyze.c's acquire_sample_rows() does, otherwise
-			 * VACUUM and ANALYZE may produce wildly different reltuples
-			 * values, e.g. when there are many recently-dead tuples.
-			 *
-			 * The logic here is a bit simpler than acquire_sample_rows(), as
-			 * VACUUM can't run inside a transaction block, which makes some
-			 * cases impossible (e.g. in-progress insert from the same
-			 * transaction).
-			 */
-			switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
-			{
-				case HEAPTUPLE_DEAD:
-
-					/*
-					 * Ordinarily, DEAD tuples would have been removed by
-					 * heap_page_prune(), but it's possible that the tuple
-					 * state changed since heap_page_prune() looked.  In
-					 * particular an INSERT_IN_PROGRESS tuple could have
-					 * changed to DEAD if the inserter aborted.  So this
-					 * cannot be considered an error condition.
-					 *
-					 * If the tuple is HOT-updated then it must only be
-					 * removed by a prune operation; so we keep it just as if
-					 * it were RECENTLY_DEAD.  Also, if it's a heap-only
-					 * tuple, we choose to keep it, because it'll be a lot
-					 * cheaper to get rid of it in the next pruning pass than
-					 * to treat it like an indexed tuple. Finally, if index
-					 * cleanup is disabled, the second heap pass will not
-					 * execute, and the tuple will not get removed, so we must
-					 * treat it like any other dead tuple that we choose to
-					 * keep.
-					 *
-					 * If this were to happen for a tuple that actually needed
-					 * to be deleted, we'd be in trouble, because it'd
-					 * possibly leave a tuple below the relation's xmin
-					 * horizon alive.  heap_prepare_freeze_tuple() is prepared
-					 * to detect that case and abort the transaction,
-					 * preventing corruption.
-					 */
-					if (HeapTupleIsHotUpdated(&tuple) ||
-						HeapTupleIsHeapOnly(&tuple) ||
-						params->index_cleanup == VACOPT_TERNARY_DISABLED)
-						nkeep += 1;
-					else
-						tupgone = true; /* we can delete the tuple */
-					all_visible = false;
-					break;
-				case HEAPTUPLE_LIVE:
-
-					/*
-					 * Count it as live.  Not only is this natural, but it's
-					 * also what acquire_sample_rows() does.
-					 */
-					live_tuples += 1;
-
-					/*
-					 * Is the tuple definitely visible to all transactions?
-					 *
-					 * NB: Like with per-tuple hint bits, we can't set the
-					 * PD_ALL_VISIBLE flag if the inserter committed
-					 * asynchronously. See SetHintBits for more info. Check
-					 * that the tuple is hinted xmin-committed because of
-					 * that.
-					 */
-					if (all_visible)
-					{
-						TransactionId xmin;
-
-						if (!HeapTupleHeaderXminCommitted(tuple.t_data))
-						{
-							all_visible = false;
-							break;
-						}
-
-						/*
-						 * The inserter definitely committed. But is it old
-						 * enough that everyone sees it as committed?
-						 */
-						xmin = HeapTupleHeaderGetXmin(tuple.t_data);
-						if (!TransactionIdPrecedes(xmin, OldestXmin))
-						{
-							all_visible = false;
-							break;
-						}
-
-						/* Track newest xmin on page. */
-						if (TransactionIdFollows(xmin, visibility_cutoff_xid))
-							visibility_cutoff_xid = xmin;
-					}
-					break;
-				case HEAPTUPLE_RECENTLY_DEAD:
-
-					/*
-					 * If tuple is recently deleted then we must not remove it
-					 * from relation.
-					 */
-					nkeep += 1;
-					all_visible = false;
-					break;
-				case HEAPTUPLE_INSERT_IN_PROGRESS:
-
-					/*
-					 * This is an expected case during concurrent vacuum.
-					 *
-					 * We do not count these rows as live, because we expect
-					 * the inserting transaction to update the counters at
-					 * commit, and we assume that will happen only after we
-					 * report our results.  This assumption is a bit shaky,
-					 * but it is what acquire_sample_rows() does, so be
-					 * consistent.
-					 */
-					all_visible = false;
-					break;
-				case HEAPTUPLE_DELETE_IN_PROGRESS:
-					/* This is an expected case during concurrent vacuum */
-					all_visible = false;
-
-					/*
-					 * Count such rows as live.  As above, we assume the
-					 * deleting transaction will commit and update the
-					 * counters after we report.
-					 */
-					live_tuples += 1;
-					break;
-				default:
-					elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
-					break;
-			}
-
-			if (tupgone)
-			{
-				lazy_record_dead_tuple(dead_tuples, &(tuple.t_self));
-				HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
-													   &vacrelstats->latestRemovedXid);
-				tups_vacuumed += 1;
-				has_dead_items = true;
-			}
-			else
-			{
-				bool		tuple_totally_frozen;
-
-				num_tuples += 1;
-				hastup = true;
-
-				/*
-				 * Each non-removable tuple must be checked to see if it needs
-				 * freezing.  Note we already have exclusive buffer lock.
-				 */
-				if (heap_prepare_freeze_tuple(tuple.t_data,
-											  relfrozenxid, relminmxid,
-											  FreezeLimit, MultiXactCutoff,
-											  &frozen[nfrozen],
-											  &tuple_totally_frozen))
-					frozen[nfrozen++].offset = offnum;
-
-				if (!tuple_totally_frozen)
-					all_frozen = false;
-			}
-		}						/* scan along page */
-
-		/*
-		 * Clear the offset information once we have processed all the tuples
-		 * on the page.
-		 */
-		vacrelstats->offnum = InvalidOffsetNumber;
-
-		/*
-		 * If we froze any tuples, mark the buffer dirty, and write a WAL
-		 * record recording the changes.  We must log the changes to be
-		 * crash-safe against future truncation of CLOG.
-		 */
-		if (nfrozen > 0)
+			/* Wait until lazy_vacuum_heap() to save free space */
+		}
+		else
 		{
-			START_CRIT_SECTION();
-
-			MarkBufferDirty(buf);
-
-			/* execute collected freezes */
-			for (i = 0; i < nfrozen; i++)
-			{
-				ItemId		itemid;
-				HeapTupleHeader htup;
-
-				itemid = PageGetItemId(page, frozen[i].offset);
-				htup = (HeapTupleHeader) PageGetItem(page, itemid);
-
-				heap_execute_freeze_tuple(htup, &frozen[i]);
-			}
-
-			/* Now WAL-log freezing if necessary */
-			if (RelationNeedsWAL(onerel))
-			{
-				XLogRecPtr	recptr;
-
-				recptr = log_heap_freeze(onerel, buf, FreezeLimit,
-										 frozen, nfrozen);
-				PageSetLSN(page, recptr);
-			}
-
-			END_CRIT_SECTION();
+			/*
+			 * Will never reach lazy_vacuum_heap() (or will, but won't reach
+			 * this specific page)
+			 */
+			savefreespace = true;
+			freespace = PageGetHeapFreeSpace(page);
 		}
 
-		/*
-		 * If there are no indexes we can vacuum the page right now instead of
-		 * doing a second scan. Also we don't do that but forget dead tuples
-		 * when index cleanup is disabled.
-		 */
-		if (!vacrelstats->useindex && dead_tuples->num_tuples > 0)
+		if (nindexes == 0 && ps.has_dead_items)
 		{
-			if (nindexes == 0)
-			{
-				/* Remove tuples from heap if the table has no index */
-				lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer);
-				vacuumed_pages++;
-				has_dead_items = false;
-			}
-			else
-			{
-				/*
-				 * Here, we have indexes but index cleanup is disabled.
-				 * Instead of vacuuming the dead tuples on the heap, we just
-				 * forget them.
-				 *
-				 * Note that vacrelstats->dead_tuples could have tuples which
-				 * became dead after HOT-pruning but are not marked dead yet.
-				 * We do not process them because it's a very rare condition,
-				 * and the next vacuum will process them anyway.
-				 */
-				Assert(params->index_cleanup == VACOPT_TERNARY_DISABLED);
-			}
+			Assert(dead_tuples->num_tuples > 0);
 
 			/*
-			 * Forget the now-vacuumed tuples, and press on, but be careful
-			 * not to reset latestRemovedXid since we want that value to be
-			 * valid.
+			 * One pass strategy (no indexes) case.
+			 *
+			 * Mark LP_DEAD item pointers for LP_UNUSED now, since there won't
+			 * be a second pass in lazy_vacuum_heap().
 			 */
+			lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer);
+			vacuumed_pages++;
+
+			/* This won't have changed: */
+			Assert(savefreespace && freespace == PageGetHeapFreeSpace(page));
+
+			/*
+			 * Make sure scan_setvmbit_page() won't stop setting VM due to
+			 * now-vacuumed LP_DEAD items:
+			 */
+			ps.has_dead_items = false;
+
+			/* Forget the now-vacuumed tuples */
 			dead_tuples->num_tuples = 0;
 
 			/*
@@ -1584,109 +1870,27 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			}
 		}
 
-		freespace = PageGetHeapFreeSpace(page);
-
-		/* mark page all-visible, if appropriate */
-		if (all_visible && !all_visible_according_to_vm)
-		{
-			uint8		flags = VISIBILITYMAP_ALL_VISIBLE;
-
-			if (all_frozen)
-				flags |= VISIBILITYMAP_ALL_FROZEN;
-
-			/*
-			 * It should never be the case that the visibility map page is set
-			 * while the page-level bit is clear, but the reverse is allowed
-			 * (if checksums are not enabled).  Regardless, set both bits so
-			 * that we get back in sync.
-			 *
-			 * NB: If the heap page is all-visible but the VM bit is not set,
-			 * we don't need to dirty the heap page.  However, if checksums
-			 * are enabled, we do need to make sure that the heap page is
-			 * dirtied before passing it to visibilitymap_set(), because it
-			 * may be logged.  Given that this situation should only happen in
-			 * rare cases after a crash, it is not worth optimizing.
-			 */
-			PageSetAllVisible(page);
-			MarkBufferDirty(buf);
-			visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
-							  vmbuffer, visibility_cutoff_xid, flags);
-		}
+		/* One pass strategy had better have no dead tuples by now: */
+		Assert(nindexes > 0 || dead_tuples->num_tuples == 0);
 
 		/*
-		 * As of PostgreSQL 9.2, the visibility map bit should never be set if
-		 * the page-level bit is clear.  However, it's possible that the bit
-		 * got cleared after we checked it and before we took the buffer
-		 * content lock, so we must recheck before jumping to the conclusion
-		 * that something bad has happened.
+		 * Step 8 for block: Handle setting visibility map bit as appropriate
 		 */
-		else if (all_visible_according_to_vm && !PageIsAllVisible(page)
-				 && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
-		{
-			elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
-				 vacrelstats->relname, blkno);
-			visibilitymap_clear(onerel, blkno, vmbuffer,
-								VISIBILITYMAP_VALID_BITS);
-		}
+		scan_setvmbit_page(onerel, buf, vmbuffer, &ps, &vms);
 
 		/*
-		 * It's possible for the value returned by
-		 * GetOldestNonRemovableTransactionId() to move backwards, so it's not
-		 * wrong for us to see tuples that appear to not be visible to
-		 * everyone yet, while PD_ALL_VISIBLE is already set. The real safe
-		 * xmin value never moves backwards, but
-		 * GetOldestNonRemovableTransactionId() is conservative and sometimes
-		 * returns a value that's unnecessarily small, so if we see that
-		 * contradiction it just means that the tuples that we think are not
-		 * visible to everyone yet actually are, and the PD_ALL_VISIBLE flag
-		 * is correct.
-		 *
-		 * There should never be dead tuples on a page with PD_ALL_VISIBLE
-		 * set, however.
+		 * Step 9 for block: drop super-exclusive lock, finalize page by
+		 * recording its free space in the FSM as appropriate
 		 */
-		else if (PageIsAllVisible(page) && has_dead_items)
-		{
-			elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
-				 vacrelstats->relname, blkno);
-			PageClearAllVisible(page);
-			MarkBufferDirty(buf);
-			visibilitymap_clear(onerel, blkno, vmbuffer,
-								VISIBILITYMAP_VALID_BITS);
-		}
-
-		/*
-		 * If the all-visible page is all-frozen but not marked as such yet,
-		 * mark it as all-frozen.  Note that all_frozen is only valid if
-		 * all_visible is true, so we must check both.
-		 */
-		else if (all_visible_according_to_vm && all_visible && all_frozen &&
-				 !VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
-		{
-			/*
-			 * We can pass InvalidTransactionId as the cutoff XID here,
-			 * because setting the all-frozen bit doesn't cause recovery
-			 * conflicts.
-			 */
-			visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
-							  vmbuffer, InvalidTransactionId,
-							  VISIBILITYMAP_ALL_FROZEN);
-		}
 
 		UnlockReleaseBuffer(buf);
-
 		/* Remember the location of the last page with nonremovable tuples */
-		if (hastup)
+		if (ps.hastup)
 			vacrelstats->nonempty_pages = blkno + 1;
-
-		/*
-		 * If we remembered any tuples for deletion, then the page will be
-		 * visited again by lazy_vacuum_heap, which will compute and record
-		 * its post-compaction free space.  If not, then we're done with this
-		 * page, so remember its free space as-is.  (This path will always be
-		 * taken if there are no indexes.)
-		 */
-		if (dead_tuples->num_tuples == prev_dead_count)
+		if (savefreespace)
 			RecordPageWithFreeSpace(onerel, blkno, freespace);
+
+		/* Finished all steps for block by here (at the latest) */
 	}
 
 	/* report that everything is scanned and vacuumed */
@@ -1698,14 +1902,14 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	pfree(frozen);
 
 	/* save stats for use later */
-	vacrelstats->tuples_deleted = tups_vacuumed;
-	vacrelstats->new_dead_tuples = nkeep;
+	vacrelstats->tuples_deleted = c.tups_vacuumed;
+	vacrelstats->new_dead_tuples = c.nkeep;
 
 	/* now we can compute the new value for pg_class.reltuples */
 	vacrelstats->new_live_tuples = vac_estimate_reltuples(onerel,
 														  nblocks,
 														  vacrelstats->tupcount_pages,
-														  live_tuples);
+														  c.live_tuples);
 
 	/*
 	 * Also compute the total number of surviving heap entries.  In the
@@ -1724,20 +1928,14 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	}
 
 	/* If any tuples need to be deleted, perform final vacuum cycle */
-	/* XXX put a threshold on min number of tuples here? */
+	Assert(nindexes > 0 || dead_tuples->num_tuples == 0);
 	if (dead_tuples->num_tuples > 0)
-	{
-		/* Work on all the indexes, and then the heap */
-		lazy_vacuum_all_indexes(onerel, Irel, indstats, vacrelstats,
-								lps, nindexes);
-
-		/* Remove tuples from heap */
-		lazy_vacuum_heap(onerel, vacrelstats);
-	}
+		two_pass_strategy(onerel, vacrelstats, Irel, indstats, nindexes,
+						  lps, params->index_cleanup);
 
 	/*
 	 * Vacuum the remainder of the Free Space Map.  We must do this whether or
-	 * not there were indexes.
+	 * not there were indexes, and whether or not we skipped index vacuuming.
 	 */
 	if (blkno > next_fsm_block_to_vacuum)
 		FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
@@ -1745,8 +1943,13 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	/* report all blocks vacuumed */
 	pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
 
-	/* Do post-vacuum cleanup */
-	if (vacrelstats->useindex)
+	/*
+	 * Do post-vacuum cleanup.
+	 *
+	 * Note that post-vacuum cleanup does not take place with
+	 * INDEX_CLEANUP=OFF.
+	 */
+	if (nindexes > 0 && params->index_cleanup != VACOPT_TERNARY_DISABLED)
 		lazy_cleanup_all_indexes(Irel, indstats, vacrelstats, lps, nindexes);
 
 	/*
@@ -1756,23 +1959,29 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	if (ParallelVacuumIsActive(lps))
 		end_parallel_vacuum(indstats, lps, nindexes);
 
-	/* Update index statistics */
-	if (vacrelstats->useindex)
+	/*
+	 * Update index statistics.
+	 *
+	 * Note that updating the statistics does not take place with
+	 * INDEX_CLEANUP=OFF.
+	 */
+	if (nindexes > 0 && params->index_cleanup != VACOPT_TERNARY_DISABLED)
 		update_index_statistics(Irel, indstats, nindexes);
 
-	/* If no indexes, make log report that lazy_vacuum_heap would've made */
-	if (vacuumed_pages)
+	/* If no indexes, make log report that two_pass_strategy() would've made */
+	Assert(nindexes == 0 || vacuumed_pages == 0);
+	if (nindexes == 0)
 		ereport(elevel,
 				(errmsg("\"%s\": removed %.0f row versions in %u pages",
 						vacrelstats->relname,
-						tups_vacuumed, vacuumed_pages)));
+						vacrelstats->tuples_deleted, vacuumed_pages)));
 
 	initStringInfo(&buf);
 	appendStringInfo(&buf,
 					 _("%.0f dead row versions cannot be removed yet, oldest xmin: %u\n"),
-					 nkeep, OldestXmin);
+					 c.nkeep, OldestXmin);
 	appendStringInfo(&buf, _("There were %.0f unused item identifiers.\n"),
-					 nunused);
+					 c.nunused);
 	appendStringInfo(&buf, ngettext("Skipped %u page due to buffer pins, ",
 									"Skipped %u pages due to buffer pins, ",
 									vacrelstats->pinskipped_pages),
@@ -1788,18 +1997,76 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	appendStringInfo(&buf, _("%s."), pg_rusage_show(&ru0));
 
 	ereport(elevel,
-			(errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
+			(errmsg("\"%s\": newly pruned %.0f items, found %.0f nonremovable items in %u out of %u pages",
 					vacrelstats->relname,
-					tups_vacuumed, num_tuples,
+					c.tups_vacuumed, c.num_tuples,
 					vacrelstats->scanned_pages, nblocks),
 			 errdetail_internal("%s", buf.data)));
 	pfree(buf.data);
 }
 
 /*
- *	lazy_vacuum_all_indexes() -- vacuum all indexes of relation.
+ * Remove the collected garbage tuples from the table and its indexes.
  *
- * We process the indexes serially unless we are doing parallel vacuum.
+ * We may be required to skip index vacuuming by INDEX_CLEANUP reloption.
+ */
+static void
+two_pass_strategy(Relation onerel, LVRelStats *vacrelstats,
+				  Relation *Irel, IndexBulkDeleteResult **indstats, int nindexes,
+				  LVParallelState *lps, VacOptTernaryValue index_cleanup)
+{
+	bool		skipping;
+
+	/* Should not end up here with no indexes */
+	Assert(nindexes > 0);
+	Assert(!IsParallelWorker());
+
+	/* Check whether or not to do index vacuum and heap vacuum */
+	if (index_cleanup == VACOPT_TERNARY_DISABLED)
+		skipping = true;
+	else
+		skipping = false;
+
+	if (!skipping)
+	{
+		/* Okay, we're going to do index vacuuming */
+		lazy_vacuum_all_indexes(onerel, Irel, indstats, vacrelstats, lps,
+								nindexes);
+
+		/* Remove tuples from heap */
+		lazy_vacuum_heap(onerel, vacrelstats);
+	}
+	else
+	{
+		/*
+		 * skipped index vacuuming.  Make log report that lazy_vacuum_heap
+		 * would've made.
+		 *
+		 * Don't report tups_vacuumed here because it will be zero here in
+		 * common case where there are no newly pruned LP_DEAD items for this
+		 * VACUUM.  This is roughly consistent with lazy_vacuum_heap(), and
+		 * the similar !useindex ereport() at the end of lazy_scan_heap().
+		 * Note, however, that has_dead_items_pages is # of heap pages with
+		 * one or more LP_DEAD items (could be from us or from another
+		 * VACUUM), not # blocks scanned.
+		 */
+		ereport(elevel,
+				(errmsg("\"%s\": INDEX_CLEANUP off forced VACUUM to not totally remove %d pruned items",
+						vacrelstats->relname,
+						vacrelstats->dead_tuples->num_tuples)));
+	}
+
+	/*
+	 * Forget the now-vacuumed tuples, and press on, but be careful not to
+	 * reset latestRemovedXid since we want that value to be valid.
+	 */
+	vacrelstats->dead_tuples->num_tuples = 0;
+}
+
+/*
+ *	lazy_vacuum_all_indexes() -- Main entry for index vacuuming
+ *
+ * Should only be called through two_pass_strategy()
  */
 static void
 lazy_vacuum_all_indexes(Relation onerel, Relation *Irel,
@@ -1848,17 +2115,14 @@ lazy_vacuum_all_indexes(Relation onerel, Relation *Irel,
 								 vacrelstats->num_index_scans);
 }
 
-
 /*
- *	lazy_vacuum_heap() -- second pass over the heap
+ *	lazy_vacuum_heap() -- second pass over the heap for two pass strategy
  *
  *		This routine marks dead tuples as unused and compacts out free
  *		space on their pages.  Pages not having dead tuples recorded from
  *		lazy_scan_heap are not visited at all.
  *
- * Note: the reason for doing this as a second pass is we cannot remove
- * the tuples until we've removed their index entries, and we want to
- * process index entry removal in batches as large as possible.
+ * Should only be called through two_pass_strategy()
  */
 static void
 lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
@@ -2867,14 +3131,14 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
  * Return the maximum number of dead tuples we can record.
  */
 static long
-compute_max_dead_tuples(BlockNumber relblocks, bool useindex)
+compute_max_dead_tuples(BlockNumber relblocks, bool hasindex)
 {
 	long		maxtuples;
 	int			vac_work_mem = IsAutoVacuumWorkerProcess() &&
 	autovacuum_work_mem != -1 ?
 	autovacuum_work_mem : maintenance_work_mem;
 
-	if (useindex)
+	if (hasindex)
 	{
 		maxtuples = MAXDEADTUPLES(vac_work_mem * 1024L);
 		maxtuples = Min(maxtuples, INT_MAX);
@@ -2899,12 +3163,12 @@ compute_max_dead_tuples(BlockNumber relblocks, bool useindex)
  * See the comments at the head of this file for rationale.
  */
 static void
-lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
+lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks, bool hasindex)
 {
 	LVDeadTuples *dead_tuples = NULL;
 	long		maxtuples;
 
-	maxtuples = compute_max_dead_tuples(relblocks, vacrelstats->useindex);
+	maxtuples = compute_max_dead_tuples(relblocks, hasindex);
 
 	dead_tuples = (LVDeadTuples *) palloc(SizeOfDeadTuples(maxtuples));
 	dead_tuples->num_tuples = 0;
@@ -3024,7 +3288,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf,
 
 	/*
 	 * This is a stripped down version of the line pointer scan in
-	 * lazy_scan_heap(). So if you change anything here, also check that code.
+	 * scan_new_page. So if you change anything here, also check that code.
 	 */
 	maxoff = PageGetMaxOffsetNumber(page);
 	for (offnum = FirstOffsetNumber;
@@ -3070,7 +3334,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf,
 				{
 					TransactionId xmin;
 
-					/* Check comments in lazy_scan_heap. */
+					/* Check comments in scan_new_page. */
 					if (!HeapTupleHeaderXminCommitted(tuple.t_data))
 					{
 						all_visible = false;
diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
index dd0c124e62..3ac8df7d07 100644
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -756,10 +756,10 @@ tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer)
 		return false;			/* all-visible implies live */
 
 	/*
-	 * Neither lazy_scan_heap nor heap_page_is_all_visible will mark a page
-	 * all-visible unless every tuple is hinted committed. However, those hint
-	 * bits could be lost after a crash, so we can't be certain that they'll
-	 * be set here.  So just check the xmin.
+	 * Neither lazy_scan_heap/scan_new_page nor heap_page_is_all_visible will
+	 * mark a page all-visible unless every tuple is hinted committed.
+	 * However, those hint bits could be lost after a crash, so we can't be
+	 * certain that they'll be set here.  So just check the xmin.
 	 */
 
 	xmin = HeapTupleHeaderGetXmin(tup->t_data);
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c
index 1fe193bb25..34670c6264 100644
--- a/contrib/pgstattuple/pgstatapprox.c
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -58,8 +58,8 @@ typedef struct output_type
  * and approximate tuple_len on that basis. For the others, we count
  * the exact number of dead tuples etc.
  *
- * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but
- * we do not try to avoid skipping single pages.
+ * This scan is loosely based on vacuumlazy.c:lazy_scan_heap/scan_new_page,
+ * but we do not try to avoid skipping single pages.
  */
 static void
 statapprox_heap(Relation rel, output_type *stat)
@@ -126,8 +126,8 @@ statapprox_heap(Relation rel, output_type *stat)
 
 		/*
 		 * Look at each tuple on the page and decide whether it's live or
-		 * dead, then count it and its size. Unlike lazy_scan_heap, we can
-		 * afford to ignore problems and special cases.
+		 * dead, then count it and its size. Unlike lazy_scan_heap and
+		 * scan_new_page, we can afford to ignore problems and special cases.
 		 */
 		maxoff = PageGetMaxOffsetNumber(page);
 
-- 
2.27.0