I wrote:
> Jeff Janes <jeff.janes@gmail.com> writes:
>> If a partially-active table develops a slug of stable all-visible,
>> non-empty pages at the end of it, then every autovacuum of that table
>> will skip the end pages on the forward scan, think they might be
>> truncatable, and take the access exclusive lock to do the truncation.
>> And then immediately fail when it sees the last page is not empty.
>> This can persist for an indefinite number of autovac rounds.
>> The simple solution is to always scan the last page of a table, so it
>> can be noticed that it is not empty and avoid the truncation attempt.
> This seems like a reasonable proposal, but I find your implementation
> unconvincing: there are two places in lazy_scan_heap() that pay attention
> to scan_all, and you touched only one of them.
After further investigation, there is another pre-existing bug: the short
circuit path for pages not requiring freezing doesn't bother to update
vacrelstats->nonempty_pages, causing the later logic to think that the
page is potentially truncatable even if we fix the second check of
scan_all! So this is pretty broken, and I almost think we should treat it
as a back-patchable bug fix.
In the attached proposed patch, I added another refinement, which is to
not bother with forcing the last page to be scanned if we already know
that we're not going to attempt truncation, because we already found a
nonempty page too close to the end of the rel. I'm not quite sure this
is worthwhile, but it takes very little added logic, and saving an I/O
is probably worth the trouble.
regards, tom lane
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 2429889..2833909 100644
*** a/src/backend/commands/vacuumlazy.c
--- b/src/backend/commands/vacuumlazy.c
*************** static BufferAccessStrategy vac_strategy
*** 138,144 ****
static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
Relation *Irel, int nindexes, bool scan_all);
static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
! static bool lazy_check_needs_freeze(Buffer buf);
static void lazy_vacuum_index(Relation indrel,
IndexBulkDeleteResult **stats,
LVRelStats *vacrelstats);
--- 138,144 ----
static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
Relation *Irel, int nindexes, bool scan_all);
static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
! static bool lazy_check_needs_freeze(Buffer buf, bool *hastup);
static void lazy_vacuum_index(Relation indrel,
IndexBulkDeleteResult **stats,
LVRelStats *vacrelstats);
*************** static void lazy_cleanup_index(Relation
*** 147,152 ****
--- 147,153 ----
LVRelStats *vacrelstats);
static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
+ static bool should_attempt_truncation(LVRelStats *vacrelstats);
static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
static BlockNumber count_nondeletable_pages(Relation onerel,
LVRelStats *vacrelstats);
*************** lazy_vacuum_rel(Relation onerel, int opt
*** 175,181 ****
LVRelStats *vacrelstats;
Relation *Irel;
int nindexes;
- BlockNumber possibly_freeable;
PGRUsage ru0;
TimestampTz starttime = 0;
long secs;
--- 176,181 ----
*************** lazy_vacuum_rel(Relation onerel, int opt
*** 263,276 ****
/*
* Optionally truncate the relation.
- *
- * Don't even think about it unless we have a shot at releasing a goodly
- * number of pages. Otherwise, the time taken isn't worth it.
*/
! possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
! if (possibly_freeable > 0 &&
! (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
! possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
lazy_truncate_heap(onerel, vacrelstats);
/* Vacuum the Free Space Map */
--- 263,270 ----
/*
* Optionally truncate the relation.
*/
! if (should_attempt_truncation(vacrelstats))
lazy_truncate_heap(onerel, vacrelstats);
/* Vacuum the Free Space Map */
*************** lazy_scan_heap(Relation onerel, LVRelSta
*** 498,503 ****
--- 492,504 ----
* started skipping blocks, we may as well skip everything up to the next
* not-all-visible block.
*
+ * We don't skip the last page, unless we've already determined that it's
+ * not worth trying to truncate the table. This avoids having every
+ * vacuum take access exclusive lock on the table to attempt a truncation
+ * that just fails immediately (if there are tuples in the last page).
+ * This is worth avoiding mainly because such a lock must be replayed on
+ * any hot standby, where it can be disruptive.
+ *
* Note: if scan_all is true, we won't actually skip any pages; but we
* maintain next_not_all_visible_block anyway, so as to set up the
* all_visible_according_to_vm flag correctly for each page.
*************** lazy_scan_heap(Relation onerel, LVRelSta
*** 530,536 ****
Page page;
OffsetNumber offnum,
maxoff;
! bool tupgone,
hastup;
int prev_dead_count;
int nfrozen;
--- 531,538 ----
Page page;
OffsetNumber offnum,
maxoff;
! bool force_scan_page,
! tupgone,
hastup;
int prev_dead_count;
int nfrozen;
*************** lazy_scan_heap(Relation onerel, LVRelSta
*** 540,545 ****
--- 542,551 ----
bool has_dead_tuples;
TransactionId visibility_cutoff_xid = InvalidTransactionId;
+ /* see note above about forcing scanning of last page */
+ force_scan_page = scan_all ||
+ (blkno == nblocks - 1 && should_attempt_truncation(vacrelstats));
+
if (blkno == next_not_all_visible_block)
{
/* Time to advance next_not_all_visible_block */
*************** lazy_scan_heap(Relation onerel, LVRelSta
*** 567,573 ****
else
{
/* Current block is all-visible */
! if (skipping_all_visible_blocks && !scan_all)
continue;
all_visible_according_to_vm = true;
}
--- 573,579 ----
else
{
/* Current block is all-visible */
! if (skipping_all_visible_blocks && !force_scan_page)
continue;
all_visible_according_to_vm = true;
}
*************** lazy_scan_heap(Relation onerel, LVRelSta
*** 630,640 ****
if (!ConditionalLockBufferForCleanup(buf))
{
/*
! * If we're not scanning the whole relation to guard against XID
! * wraparound, it's OK to skip vacuuming a page. The next vacuum
! * will clean it up.
*/
! if (!scan_all)
{
ReleaseBuffer(buf);
vacrelstats->pinskipped_pages++;
--- 636,645 ----
if (!ConditionalLockBufferForCleanup(buf))
{
/*
! * If we're not forcibly scanning the page, it's OK to skip it.
! * The next vacuum will clean it up.
*/
! if (!force_scan_page)
{
ReleaseBuffer(buf);
vacrelstats->pinskipped_pages++;
*************** lazy_scan_heap(Relation onerel, LVRelSta
*** 642,651 ****
}
/*
! * If this is a wraparound checking vacuum, then we read the page
! * with share lock to see if any xids need to be frozen. If the
! * page doesn't need attention we just skip and continue. If it
! * does, we wait for cleanup lock.
*
* We could defer the lock request further by remembering the page
* and coming back to it later, or we could even register
--- 647,656 ----
}
/*
! * Read the page with share lock to see if any xids need to be
! * frozen. If the page doesn't need attention we just skip it,
! * after updating our scan statistics. If it does, we wait for
! * cleanup lock.
*
* We could defer the lock request further by remembering the page
* and coming back to it later, or we could even register
*************** lazy_scan_heap(Relation onerel, LVRelSta
*** 653,663 ****
* is received first. For now, this seems good enough.
*/
LockBuffer(buf, BUFFER_LOCK_SHARE);
! if (!lazy_check_needs_freeze(buf))
{
UnlockReleaseBuffer(buf);
vacrelstats->scanned_pages++;
vacrelstats->pinskipped_pages++;
continue;
}
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
--- 658,670 ----
* is received first. For now, this seems good enough.
*/
LockBuffer(buf, BUFFER_LOCK_SHARE);
! if (!lazy_check_needs_freeze(buf, &hastup))
{
UnlockReleaseBuffer(buf);
vacrelstats->scanned_pages++;
vacrelstats->pinskipped_pages++;
+ if (hastup)
+ vacrelstats->nonempty_pages = blkno + 1;
continue;
}
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
*************** lazy_vacuum_page(Relation onerel, BlockN
*** 1304,1325 ****
* need to be cleaned to avoid wraparound
*
* Returns true if the page needs to be vacuumed using cleanup lock.
*/
static bool
! lazy_check_needs_freeze(Buffer buf)
{
! Page page;
OffsetNumber offnum,
maxoff;
HeapTupleHeader tupleheader;
! page = BufferGetPage(buf);
! if (PageIsNew(page) || PageIsEmpty(page))
! {
! /* PageIsNew probably shouldn't happen... */
return false;
- }
maxoff = PageGetMaxOffsetNumber(page);
for (offnum = FirstOffsetNumber;
--- 1311,1335 ----
* need to be cleaned to avoid wraparound
*
* Returns true if the page needs to be vacuumed using cleanup lock.
+ * Also returns a flag indicating whether page contains any tuples at all.
*/
static bool
! lazy_check_needs_freeze(Buffer buf, bool *hastup)
{
! Page page = BufferGetPage(buf);
OffsetNumber offnum,
maxoff;
HeapTupleHeader tupleheader;
! *hastup = false;
! /* If we hit an uninitialized page, we want to force vacuuming it. */
! if (PageIsNew(page))
! return true;
!
! /* Quick out for ordinary empty page. */
! if (PageIsEmpty(page))
return false;
maxoff = PageGetMaxOffsetNumber(page);
for (offnum = FirstOffsetNumber;
*************** lazy_check_needs_freeze(Buffer buf)
*** 1333,1338 ****
--- 1343,1350 ----
if (!ItemIdIsNormal(itemid))
continue;
+ *hastup = true;
+
tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
if (heap_tuple_needs_freeze(tupleheader, FreezeLimit,
*************** lazy_cleanup_index(Relation indrel,
*** 1433,1438 ****
--- 1445,1474 ----
}
/*
+ * should_attempt_truncation - should we attempt to truncate the heap?
+ *
+ * Don't even think about it unless we have a shot at releasing a goodly
+ * number of pages. Otherwise, the time taken isn't worth it.
+ *
+ * This is split out so that we can test whether truncation is going to be
+ * called for before we actually do it. If you change the logic here, be
+ * careful to depend only on fields that lazy_scan_heap updates on-the-fly.
+ */
+ static bool
+ should_attempt_truncation(LVRelStats *vacrelstats)
+ {
+ BlockNumber possibly_freeable;
+
+ possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
+ if (possibly_freeable > 0 &&
+ (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
+ possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION))
+ return true;
+ else
+ return false;
+ }
+
+ /*
* lazy_truncate_heap - try to truncate off any empty pages at the end
*/
static void