I believe the attached patch incorporates all the ideas discussed in the
pghackers thread about using the current actual table size for planner
estimation purposes. The patch is not complete yet because (a) I
haven't touched the documentation, and (b) some of the regression tests
fail because of changes in plans; those expected-output files will need
adjustment.
Any comments, objections?
regards, tom lane
*** src/backend/catalog/heap.c.orig Tue Aug 31 13:10:36 2004
--- src/backend/catalog/heap.c Mon Nov 29 18:45:34 2004
***************
*** 607,643 ****
*/
new_rel_reltup = new_rel_desc->rd_rel;
- /*
- * Here we insert bogus estimates of the size of the new relation. In
- * reality, of course, the new relation has 0 tuples and pages, and if
- * we were tracking these statistics accurately then we'd set the
- * fields that way. But at present the stats will be updated only by
- * VACUUM or CREATE INDEX, and the user might insert a lot of tuples
- * before he gets around to doing either of those. So, instead of
- * saying the relation is empty, we insert guesstimates. The point is
- * to keep the optimizer from making really stupid choices on
- * never-yet-vacuumed tables; so the estimates need only be large
- * enough to discourage the optimizer from using nested-loop plans.
- * With this hack, nested-loop plans will be preferred only after the
- * table has been proven to be small by VACUUM or CREATE INDEX.
- * Maintaining the stats on-the-fly would solve the problem more
- * cleanly, but the overhead of that would likely cost more than it'd
- * save. (NOTE: CREATE INDEX inserts the same bogus estimates if it
- * finds the relation has 0 rows and pages. See index.c.)
- */
switch (relkind)
{
case RELKIND_RELATION:
case RELKIND_INDEX:
case RELKIND_TOASTVALUE:
! new_rel_reltup->relpages = 10; /* bogus estimates */
! new_rel_reltup->reltuples = 1000;
break;
case RELKIND_SEQUENCE:
new_rel_reltup->relpages = 1;
new_rel_reltup->reltuples = 1;
break;
! default: /* views, etc */
new_rel_reltup->relpages = 0;
new_rel_reltup->reltuples = 0;
break;
--- 607,628 ----
*/
new_rel_reltup = new_rel_desc->rd_rel;
switch (relkind)
{
case RELKIND_RELATION:
case RELKIND_INDEX:
case RELKIND_TOASTVALUE:
! /* The relation is real, but as yet empty */
! new_rel_reltup->relpages = 0;
! new_rel_reltup->reltuples = 0;
break;
case RELKIND_SEQUENCE:
+ /* Sequences always have a known size */
new_rel_reltup->relpages = 1;
new_rel_reltup->reltuples = 1;
break;
! default:
! /* Views, etc, have no disk storage */
new_rel_reltup->relpages = 0;
new_rel_reltup->reltuples = 0;
break;
*** src/backend/catalog/index.c.orig Fri Oct 15 18:39:53 2004
--- src/backend/catalog/index.c Mon Nov 29 18:46:51 2004
***************
*** 1170,1176 ****
*
* Update pg_class' relpages and reltuples statistics for the given relation
* (which can be either a table or an index). Note that this is not used
! * in the context of VACUUM.
* ----------------
*/
void
--- 1170,1176 ----
*
* Update pg_class' relpages and reltuples statistics for the given relation
* (which can be either a table or an index). Note that this is not used
! * in the context of VACUUM, only CREATE INDEX.
* ----------------
*/
void
***************
*** 1209,1215 ****
* Find the tuple to update in pg_class. Normally we make a copy of
* the tuple using the syscache, modify it, and apply heap_update. But
* in bootstrap mode we can't use heap_update, so we cheat and
! * overwrite the tuple in-place.
*
* We also must cheat if reindexing pg_class itself, because the target
* index may presently not be part of the set of indexes that
--- 1209,1216 ----
* Find the tuple to update in pg_class. Normally we make a copy of
* the tuple using the syscache, modify it, and apply heap_update. But
* in bootstrap mode we can't use heap_update, so we cheat and
! * overwrite the tuple in-place. (Note: as of PG 8.0 this isn't called
! * during bootstrap, but leave the code here for possible future use.)
*
* We also must cheat if reindexing pg_class itself, because the target
* index may presently not be part of the set of indexes that
***************
*** 1248,1281 ****
/*
* Figure values to insert.
*
! * If we found zero tuples in the scan, do NOT believe it; instead put a
! * bogus estimate into the statistics fields. Otherwise, the common
! * pattern "CREATE TABLE; CREATE INDEX; insert data" leaves the table
! * with zero size statistics until a VACUUM is done. The optimizer
! * will generate very bad plans if the stats claim the table is empty
! * when it is actually sizable. See also CREATE TABLE in heap.c.
! *
! * Note: this path is also taken during bootstrap, because bootstrap.c
! * passes reltuples = 0 after loading a table. We have to estimate
! * some number for reltuples based on the actual number of pages.
*/
relpages = RelationGetNumberOfBlocks(whichRel);
! if (reltuples == 0)
{
! if (relpages == 0)
! {
! /* Bogus defaults for a virgin table, same as heap.c */
! reltuples = 1000;
! relpages = 10;
! }
! else if (whichRel->rd_rel->relkind == RELKIND_INDEX && relpages <= 2)
! {
! /* Empty index, leave bogus defaults in place */
! reltuples = 1000;
! }
! else
! reltuples = ((double) relpages) * NTUPLES_PER_PAGE(whichRel->rd_rel->relnatts);
}
/*
--- 1249,1275 ----
/*
* Figure values to insert.
*
! * If the relation has been truncated to zero tuples/pages, do not
! * simply insert 0/0 into pg_class; instead attempt to preserve the
! * previous tuple density information. plancat.c will interpret
! * nonzero reltuples with zero relpages as a direct density value,
! * so we can truthfully zero out relpages while preserving the old
! * density value. This gives a better result when a table is truncated
! * and refilled than just falling back to a default density estimate.
*/
relpages = RelationGetNumberOfBlocks(whichRel);
! if (reltuples == 0 &&
! (relpages == 0 ||
! (whichRel->rd_rel->relkind == RELKIND_INDEX && relpages <= 2)))
{
! /* extract old density estimate */
! reltuples = rd_rel->reltuples;
! if (rd_rel->relpages != 0)
! reltuples /= ((BlockNumber) rd_rel->relpages);
! /* re-scale in case it's an index */
! if (relpages > 1)
! reltuples *= relpages;
}
/*
*** src/backend/commands/vacuum.c.orig Fri Oct 15 18:39:56 2004
--- src/backend/commands/vacuum.c Mon Nov 29 18:47:11 2004
***************
*** 646,652 ****
/* overwrite the existing statistics in the tuple */
pgcform = (Form_pg_class) GETSTRUCT(&rtup);
pgcform->relpages = (int32) num_pages;
! pgcform->reltuples = num_tuples;
pgcform->relhasindex = hasindex;
/*
--- 646,652 ----
/* overwrite the existing statistics in the tuple */
pgcform = (Form_pg_class) GETSTRUCT(&rtup);
pgcform->relpages = (int32) num_pages;
! pgcform->reltuples = (float4) num_tuples;
pgcform->relhasindex = hasindex;
/*
*** src/backend/commands/vacuumlazy.c.orig Mon Oct 25 11:42:02 2004
--- src/backend/commands/vacuumlazy.c Mon Nov 29 18:47:12 2004
***************
*** 37,42 ****
--- 37,44 ----
*/
#include "postgres.h"
+ #include <math.h>
+
#include "access/genam.h"
#include "access/heapam.h"
#include "access/xlog.h"
***************
*** 67,72 ****
--- 69,75 ----
/* Overall statistics about rel */
BlockNumber rel_pages;
double rel_tuples;
+ double tuples_deleted;
BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
Size threshold; /* minimum interesting free space */
/* List of TIDs of tuples we intend to delete */
***************
*** 94,105 ****
Relation *Irel, int nindexes);
static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
static void lazy_scan_index(Relation indrel, LVRelStats *vacrelstats);
! static void lazy_vacuum_index(Relation indrel, LVRelStats *vacrelstats);
static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
int tupindex, LVRelStats *vacrelstats);
static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
static BlockNumber count_nondeletable_pages(Relation onerel,
LVRelStats *vacrelstats);
static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
ItemPointer itemptr);
--- 97,112 ----
Relation *Irel, int nindexes);
static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
static void lazy_scan_index(Relation indrel, LVRelStats *vacrelstats);
! static void lazy_vacuum_index(Relation indrel, double *index_tups_vacuumed,
! LVRelStats *vacrelstats);
static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
int tupindex, LVRelStats *vacrelstats);
static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
static BlockNumber count_nondeletable_pages(Relation onerel,
LVRelStats *vacrelstats);
+ static void lazy_update_relstats(Relation rel, BlockNumber num_pages,
+ double num_tuples, double tuples_removed,
+ bool hasindex);
static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
ItemPointer itemptr);
***************
*** 169,176 ****
lazy_update_fsm(onerel, vacrelstats);
/* Update statistics in pg_class */
! vac_update_relstats(RelationGetRelid(onerel), vacrelstats->rel_pages,
! vacrelstats->rel_tuples, hasindex);
}
--- 176,184 ----
lazy_update_fsm(onerel, vacrelstats);
/* Update statistics in pg_class */
! lazy_update_relstats(onerel, vacrelstats->rel_pages,
! vacrelstats->rel_tuples, vacrelstats->tuples_deleted,
! hasindex);
}
***************
*** 195,200 ****
--- 203,210 ----
tups_vacuumed,
nkeep,
nunused;
+ double *index_tups_vacuumed;
+ bool did_vacuum_index = false;
int i;
VacRUsage ru0;
***************
*** 209,214 ****
--- 219,233 ----
empty_pages = 0;
num_tuples = tups_vacuumed = nkeep = nunused = 0;
+ /*
+ * Because index vacuuming is done in multiple passes, we have to
+ * keep track of the total number of rows removed from each index.
+ * index_tups_vacuumed[i] is the number removed so far from the i'th
+ * index. (For partial indexes this could well be different from
+ * tups_vacuumed.)
+ */
+ index_tups_vacuumed = (double *) palloc0(nindexes * sizeof(double));
+
nblocks = RelationGetNumberOfBlocks(onerel);
vacrelstats->rel_pages = nblocks;
vacrelstats->nonempty_pages = 0;
***************
*** 238,244 ****
{
/* Remove index entries */
for (i = 0; i < nindexes; i++)
! lazy_vacuum_index(Irel[i], vacrelstats);
/* Remove tuples from heap */
lazy_vacuum_heap(onerel, vacrelstats);
/* Forget the now-vacuumed tuples, and press on */
--- 257,265 ----
{
/* Remove index entries */
for (i = 0; i < nindexes; i++)
! lazy_vacuum_index(Irel[i], &index_tups_vacuumed[i],
! vacrelstats);
! did_vacuum_index = true;
/* Remove tuples from heap */
lazy_vacuum_heap(onerel, vacrelstats);
/* Forget the now-vacuumed tuples, and press on */
***************
*** 400,405 ****
--- 421,427 ----
/* save stats for use later */
vacrelstats->rel_tuples = num_tuples;
+ vacrelstats->tuples_deleted = tups_vacuumed;
/* If any tuples need to be deleted, perform final vacuum cycle */
/* XXX put a threshold on min number of tuples here? */
***************
*** 407,417 ****
{
/* Remove index entries */
for (i = 0; i < nindexes; i++)
! lazy_vacuum_index(Irel[i], vacrelstats);
/* Remove tuples from heap */
lazy_vacuum_heap(onerel, vacrelstats);
}
! else
{
/* Must do post-vacuum cleanup and statistics update anyway */
for (i = 0; i < nindexes; i++)
--- 429,440 ----
{
/* Remove index entries */
for (i = 0; i < nindexes; i++)
! lazy_vacuum_index(Irel[i], &index_tups_vacuumed[i],
! vacrelstats);
/* Remove tuples from heap */
lazy_vacuum_heap(onerel, vacrelstats);
}
! else if (!did_vacuum_index)
{
/* Must do post-vacuum cleanup and statistics update anyway */
for (i = 0; i < nindexes; i++)
***************
*** 588,596 ****
return;
/* now update statistics in pg_class */
! vac_update_relstats(RelationGetRelid(indrel),
! stats->num_pages, stats->num_index_tuples,
! false);
ereport(elevel,
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
--- 611,619 ----
return;
/* now update statistics in pg_class */
! lazy_update_relstats(indrel, stats->num_pages,
! stats->num_index_tuples, stats->tuples_removed,
! false);
ereport(elevel,
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
***************
*** 611,621 ****
* Delete all the index entries pointing to tuples listed in
* vacrelstats->dead_tuples.
*
* Finally, we arrange to update the index relation's statistics in
* pg_class.
*/
static void
! lazy_vacuum_index(Relation indrel, LVRelStats *vacrelstats)
{
IndexBulkDeleteResult *stats;
IndexVacuumCleanupInfo vcinfo;
--- 634,648 ----
* Delete all the index entries pointing to tuples listed in
* vacrelstats->dead_tuples.
*
+ * Increment *index_tups_vacuumed by the number of index entries
+ * removed.
+ *
* Finally, we arrange to update the index relation's statistics in
* pg_class.
*/
static void
! lazy_vacuum_index(Relation indrel, double *index_tups_vacuumed,
! LVRelStats *vacrelstats)
{
IndexBulkDeleteResult *stats;
IndexVacuumCleanupInfo vcinfo;
***************
*** 652,661 ****
if (!stats)
return;
/* now update statistics in pg_class */
! vac_update_relstats(RelationGetRelid(indrel),
! stats->num_pages, stats->num_index_tuples,
! false);
ereport(elevel,
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
--- 679,691 ----
if (!stats)
return;
+ /* accumulate total removed over multiple index-cleaning cycles */
+ *index_tups_vacuumed += stats->tuples_removed;
+
/* now update statistics in pg_class */
! lazy_update_relstats(indrel, stats->num_pages,
! stats->num_index_tuples, *index_tups_vacuumed,
! false);
ereport(elevel,
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
***************
*** 884,889 ****
--- 914,942 ----
* known-nonempty page.
*/
return vacrelstats->nonempty_pages;
+ }
+
+ /*
+ * lazy_update_relstats - update pg_class statistics for a table or index
+ *
+ * We always want to set relpages to an accurate value. However, for lazy
+ * VACUUM it seems best to set reltuples to the average of the number of
+ * rows before vacuuming and the number after vacuuming, rather than just
+ * using the number after vacuuming. This will result in the best average
+ * performance in a steady-state situation where VACUUMs are performed
+ * regularly on a table of roughly constant size, assuming that the physical
+ * number of pages in the table stays about the same throughout. (Note that
+ * we do not apply the same logic to VACUUM FULL, because it repacks the table
+ * and thereby boosts the tuple density.)
+ */
+ static void
+ lazy_update_relstats(Relation rel, BlockNumber num_pages,
+ double num_tuples, double tuples_removed,
+ bool hasindex)
+ {
+ num_tuples = ceil(num_tuples + tuples_removed * 0.5);
+ vac_update_relstats(RelationGetRelid(rel), num_pages, num_tuples,
+ hasindex);
}
/*
*** src/backend/optimizer/util/plancat.c.orig Fri Oct 1 13:11:50 2004
--- src/backend/optimizer/util/plancat.c Mon Nov 29 18:47:54 2004
***************
*** 38,43 ****
--- 38,47 ----
#include "miscadmin.h"
+ static void estimate_rel_size(Relation rel,
+ BlockNumber *pages, double *tuples);
+
+
/*
* get_relation_info -
* Retrieves catalog information for a given relation.
***************
*** 121,128 ****
}
info->relam = indexRelation->rd_rel->relam;
! info->pages = indexRelation->rd_rel->relpages;
! info->tuples = indexRelation->rd_rel->reltuples;
info->amcostestimate = index_cost_estimator(indexRelation);
/*
--- 125,131 ----
}
info->relam = indexRelation->rd_rel->relam;
! estimate_rel_size(indexRelation, &info->pages, &info->tuples);
info->amcostestimate = index_cost_estimator(indexRelation);
/*
***************
*** 170,180 ****
rel->indexlist = indexinfos;
! rel->pages = relation->rd_rel->relpages;
! rel->tuples = relation->rd_rel->reltuples;
/* XXX keep the lock here? */
heap_close(relation, AccessShareLock);
}
/*
--- 173,245 ----
rel->indexlist = indexinfos;
! estimate_rel_size(relation, &rel->pages, &rel->tuples);
/* XXX keep the lock here? */
heap_close(relation, AccessShareLock);
+ }
+
+ /*
+ * estimate_rel_size - estimate # pages and # tuples in a table or index
+ */
+ static void
+ estimate_rel_size(Relation rel,
+ BlockNumber *pages, double *tuples)
+ {
+ #if 1
+ BlockNumber relpages;
+ double reltuples;
+ double density;
+
+ switch (rel->rd_rel->relkind)
+ {
+ case RELKIND_RELATION:
+ case RELKIND_INDEX:
+ case RELKIND_TOASTVALUE:
+ /* it has storage, ok to call the smgr */
+ *pages = RelationGetNumberOfBlocks(rel);
+ /* coerce values in pg_class to more desirable types */
+ relpages = (BlockNumber) rel->rd_rel->relpages;
+ reltuples = (double) rel->rd_rel->reltuples;
+ /* estimate number of tuples from previous tuple density */
+ if (relpages > 0)
+ density = reltuples / (double) relpages;
+ else if (reltuples > 0)
+ {
+ /*
+ * Assume that the value in reltuples is meant as a direct
+ * density estimate.
+ */
+ density = reltuples;
+ }
+ else
+ {
+ /*
+ * When we have no data because the relation was truncated,
+ * use a default that matches the pre-8.0 default settings
+ * (reltuples = 1000, relpages = 10). An alternative is
+ * to estimate an average tuple width on the basis of column
+ * datatypes, but it's unclear that that's worth the trouble.
+ */
+ density = 100;
+ }
+ *tuples = rint(density * (double) *pages);
+ break;
+ case RELKIND_SEQUENCE:
+ /* Sequences always have a known size */
+ *pages = 1;
+ *tuples = 1;
+ break;
+ default:
+ /* else it has no disk storage; probably shouldn't get here? */
+ *pages = 0;
+ *tuples = 0;
+ break;
+ }
+ #else /* pre-8.0 behavior */
+ *pages = (BlockNumber) rel->rd_rel->relpages;
+ *tuples = (double) rel->rd_rel->reltuples;
+ #endif
}
/*
*** src/include/nodes/relation.h.orig Fri Nov 26 16:08:35 2004
--- src/include/nodes/relation.h Mon Nov 29 16:41:52 2004
***************
*** 255,261 ****
Oid indexoid; /* OID of the index relation */
/* statistics from pg_class */
! long pages; /* number of disk pages in index */
double tuples; /* number of index tuples in index */
/* index descriptor information */
--- 255,261 ----
Oid indexoid; /* OID of the index relation */
/* statistics from pg_class */
! BlockNumber pages; /* number of disk pages in index */
double tuples; /* number of index tuples in index */
/* index descriptor information */