From fb7f8d958266125bd8df1c7e6b643eee4952d468 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Fri, 23 Oct 2020 16:57:01 -0700 Subject: [PATCH v1] Avoid nbtree cleanup-only VACUUM stats inaccuracies. --- src/backend/access/nbtree/nbtree.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index c822b49a71..47017ad4c1 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -933,6 +933,13 @@ btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) * double-counting some index tuples, so disbelieve any total that exceeds * the underlying heap's count ... if we know that accurately. Otherwise * this might just make matters worse. + * + * Posting list tuples are another source of inaccuracy. Cleanup-only + * btvacuumscan calls assume that the number of index tuples can be used + * as num_index_tuples, even though num_index_tuples is supposed to + * represent the number of TIDs in the index. This naive approach can + * underestimate the number of tuples in the index (by about a factor of 3 + * in the worst case). */ if (!info->estimated_count) { @@ -1394,11 +1401,18 @@ backtrack: * separate live tuples). We don't delete when backtracking, though, * since that would require teaching _bt_pagedel() about backtracking * (doesn't seem worth adding more complexity to deal with that). + * + * We don't count the number of live TIDs during cleanup-only calls to + * btvacuumscan (i.e. when callback is not set). We count the number + * of index tuples directly instead. This avoids the expense of + * directly examining all of the tuples on each page. */ if (minoff > maxoff) attempt_pagedel = (blkno == scanblkno); - else + else if (callback) stats->num_index_tuples += nhtidslive; + else + stats->num_index_tuples += maxoff - minoff + 1; Assert(!attempt_pagedel || nhtidslive == 0); } -- 2.25.1