There's been a thread in pgsql-performance about ANALYZE reporting an
overly large number of dead rows because it counts anything that's not
visible-per-SnapshotNow-rules as "dead", in particular
insert-in-progress tuples are reported as "dead". This seems a bad
idea because it may prompt autovacuum to launch a useless vacuum.
The attached patch revises ANALYZE to distinguish tuple states the same
way VACUUM does, and to not count INSERT_IN_PROGRESS nor
DELETE_IN_PROGRESS tuples as either "live" or "dead".
Comments? In particular, should we report a count of in-doubt tuples in
the eventual LOG message, and does anyone want to argue for still
counting DELETE_IN_PROGRESS as "dead"?
regards, tom lane
*** src/backend/commands/analyze.c.orig Thu Nov 15 17:38:16 2007
--- src/backend/commands/analyze.c Fri Nov 16 17:45:10 2007
***************
*** 32,37 ****
--- 32,38 ----
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/proc.h"
+ #include "storage/procarray.h"
#include "utils/acl.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
***************
*** 835,840 ****
--- 836,842 ----
double deadrows = 0; /* # dead rows seen */
double rowstoskip = -1; /* -1 means not set yet */
BlockNumber totalblocks;
+ TransactionId OldestXmin;
BlockSamplerData bs;
double rstate;
***************
*** 842,847 ****
--- 844,852 ----
totalblocks = RelationGetNumberOfBlocks(onerel);
+ /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
+ OldestXmin = GetOldestXmin(onerel->rd_rel->relisshared, true);
+
/* Prepare for sampling block numbers */
BlockSampler_Init(&bs, totalblocks, targrows);
/* Prepare for sampling rows */
***************
*** 862,917 ****
* We must maintain a pin on the target page's buffer to ensure that
* the maxoffset value stays good (else concurrent VACUUM might delete
* tuples out from under us). Hence, pin the page until we are done
! * looking at it. We don't maintain a lock on the page, so tuples
! * could get added to it, but we ignore such tuples.
*/
targbuffer = ReadBufferWithStrategy(onerel, targblock, vac_strategy);
LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
targpage = BufferGetPage(targbuffer);
maxoffset = PageGetMaxOffsetNumber(targpage);
- LockBuffer(targbuffer, BUFFER_LOCK_UNLOCK);
/* Inner loop over all tuples on the selected page */
for (targoffset = FirstOffsetNumber; targoffset <= maxoffset; targoffset++)
{
HeapTupleData targtuple;
ItemPointerSet(&targtuple.t_self, targblock, targoffset);
! /* We use heap_release_fetch to avoid useless bufmgr traffic */
! if (heap_release_fetch(onerel, SnapshotNow,
! &targtuple, &targbuffer,
! true, NULL))
{
/*
* The first targrows live rows are simply copied into the
* reservoir. Then we start replacing tuples in the sample
! * until we reach the end of the relation. This algorithm is
! * from Jeff Vitter's paper (see full citation below). It
! * works by repeatedly computing the number of tuples to skip
! * before selecting a tuple, which replaces a randomly chosen
! * element of the reservoir (current set of tuples). At all
! * times the reservoir is a true random sample of the tuples
! * we've passed over so far, so when we fall off the end of
! * the relation we're done.
*/
if (numrows < targrows)
rows[numrows++] = heap_copytuple(&targtuple);
else
{
/*
! * t in Vitter's paper is the number of records already
! * processed. If we need to compute a new S value, we
! * must use the not-yet-incremented value of liverows as
! * t.
*/
if (rowstoskip < 0)
! rowstoskip = get_next_S(liverows, targrows, &rstate);
!
if (rowstoskip <= 0)
{
/*
! * Found a suitable tuple, so save it, replacing one
! * old tuple at random
*/
int k = (int) (targrows * random_fract());
--- 867,935 ----
* We must maintain a pin on the target page's buffer to ensure that
* the maxoffset value stays good (else concurrent VACUUM might delete
* tuples out from under us). Hence, pin the page until we are done
! * looking at it. We also choose to hold sharelock on the buffer
! * throughout --- we could release and re-acquire sharelock for
! * each tuple, but since we aren't doing much work per tuple, the
! * extra lock traffic is probably better avoided.
*/
targbuffer = ReadBufferWithStrategy(onerel, targblock, vac_strategy);
LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
targpage = BufferGetPage(targbuffer);
maxoffset = PageGetMaxOffsetNumber(targpage);
/* Inner loop over all tuples on the selected page */
for (targoffset = FirstOffsetNumber; targoffset <= maxoffset; targoffset++)
{
+ ItemId itemid;
HeapTupleData targtuple;
+ itemid = PageGetItemId(targpage, targoffset);
+
+ /* Ignore unused, redirect, and dead tuple slots */
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
ItemPointerSet(&targtuple.t_self, targblock, targoffset);
!
! targtuple.t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
! targtuple.t_len = ItemIdGetLength(itemid);
!
! switch (HeapTupleSatisfiesVacuum(targtuple.t_data,
! OldestXmin,
! targbuffer))
{
+ case HEAPTUPLE_LIVE:
+
/*
* The first targrows live rows are simply copied into the
* reservoir. Then we start replacing tuples in the sample
! * until we reach the end of the relation. This algorithm
! * is from Jeff Vitter's paper (see full citation
! * below). It works by repeatedly computing the number of
! * tuples to skip before selecting a tuple, which replaces
! * a randomly chosen element of the reservoir (current set
! * of tuples). At all times the reservoir is a true
! * random sample of the tuples we've passed over so far,
! * so when we fall off the end of the relation we're done.
*/
if (numrows < targrows)
rows[numrows++] = heap_copytuple(&targtuple);
else
{
/*
! * t in Vitter's paper is the number of records
! * already processed. If we need to compute a new S
! * value, we must use the not-yet-incremented value of
! * liverows as t.
*/
if (rowstoskip < 0)
! rowstoskip = get_next_S(liverows, targrows,
! &rstate);
if (rowstoskip <= 0)
{
/*
! * Found a suitable tuple, so save it, replacing
! * one old tuple at random
*/
int k = (int) (targrows * random_fract());
***************
*** 924,939 ****
}
liverows += 1;
! }
! else
! {
! /* Count dead rows, but not empty slots */
! if (targtuple.t_data != NULL)
deadrows += 1;
}
}
! /* Now release the pin on the page */
ReleaseBuffer(targbuffer);
}
--- 942,968 ----
}
liverows += 1;
! break;
!
! case HEAPTUPLE_DEAD:
! case HEAPTUPLE_RECENTLY_DEAD:
! /* Count dead and recently-dead rows */
deadrows += 1;
+ break;
+
+ case HEAPTUPLE_INSERT_IN_PROGRESS:
+ case HEAPTUPLE_DELETE_IN_PROGRESS:
+ /* We do not currently count in-doubt rows */
+ break;
+
+ default:
+ elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
+ break;
}
}
! /* Now release the lock and pin on the page */
! LockBuffer(targbuffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(targbuffer);
}