From 1c7daec661f2b6e7ac388e4b74efd119945d6421 Mon Sep 17 00:00:00 2001 From: Pengzhou Tang Date: Wed, 20 Nov 2019 06:43:33 -0500 Subject: [PATCH 2/3] Planner can estimate the pages based on the columns selected Planner used to assume we need to scan all the pages even we only need one or two columns in a query, this is right for heap tables, however, if we using a column store like zedstore, we can optimize the number of pages with only selected columns, this will reduce the IO cost and the number of parallel workers in some cases. To do this, this commit added a new field `stadiskfrac` in catalog `pg_statistic`, it records the fraction of physical size that a column used comparing to the whole table. planer will calculate a pages selectivity based on the targetlist and baserestriction info, then scale it with the rel->pages got from estimate_rel_size(). --- src/backend/commands/analyze.c | 48 +++++++++++++++++++ src/backend/optimizer/path/allpaths.c | 87 +++++++++++++++++++++++++++++++++-- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_statistic.h | 3 ++ src/include/commands/vacuum.h | 6 +++ 5 files changed, 142 insertions(+), 4 deletions(-) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index a97297b..f8fce9c 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -87,6 +87,9 @@ static void do_analyze_rel(Relation onerel, VacuumParams *params, List *va_cols, AcquireSampleRowsFunc acquirefunc, BlockNumber relpages, bool inh, bool in_outer_xact, int elevel); +static void compute_disk_stats(VacAttrStats **stats, int natts, + TupleDesc desc, HeapTuple *rows, + int numrows); static void compute_index_stats(Relation onerel, double totalrows, AnlIndexData *indexdata, int nindexes, HeapTuple *rows, int numrows, @@ -560,6 +563,15 @@ do_analyze_rel(Relation onerel, VacuumParams *params, MemoryContextResetAndDeleteChildren(col_context); } + if (!va_cols && sample_context->k_slots[SAMPLE_KIND_DISKSIZE]) + { + TupleDesc tupdesc = + sample_context->k_slots[SAMPLE_KIND_DISKSIZE]->tts_tupleDescriptor; + HeapTuple *rows = sample_context->k_rows[SAMPLE_KIND_DISKSIZE]; + + compute_disk_stats(vacattrstats, attr_cnt, tupdesc, rows, numrows); + } + if (hasindex) compute_index_stats(onerel, totalrows, indexdata, nindexes, @@ -705,6 +717,41 @@ do_analyze_rel(Relation onerel, VacuumParams *params, anl_context = NULL; } +static void +compute_disk_stats(VacAttrStats **stats, int natts, + TupleDesc desc, HeapTuple *rows, + int numrows) +{ + int i, j; + float8 attr_size = 0; + float8 total = 0; + bool isNull; + + for (i = 0; i < numrows; i++) + { + HeapTuple tup = rows[i]; + + for (j = 0; j < natts; j++) + { + VacAttrStats *vac = stats[j]; + Datum dat = heap_getattr(tup, j + 1, desc, &isNull); + + if (!isNull) + { + attr_size = DatumGetFloat8(dat); + vac->disksize += attr_size; + total += attr_size; + } + } + } + + for (j = 0; j < natts; j++) + { + VacAttrStats *vac = stats[j]; + vac->stadiskfrac = vac->disksize / total; + } +} + /* * Compute statistics about indexes of a relation */ @@ -1425,6 +1472,7 @@ update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats) values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum); values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh); values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac); + values[Anum_pg_statistic_stadiskfrac - 1] = Float4GetDatum(stats->stadiskfrac); values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth); values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct); i = Anum_pg_statistic_stakind1 - 1; diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index db3a68a..5df1466 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -23,6 +23,7 @@ #include "catalog/pg_class.h" #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" +#include "catalog/pg_statistic.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -47,6 +48,7 @@ #include "partitioning/partbounds.h" #include "partitioning/partprune.h" #include "rewrite/rewriteManip.h" +#include "utils/syscache.h" #include "utils/lsyscache.h" @@ -79,7 +81,11 @@ static void set_rel_size(PlannerInfo *root, RelOptInfo *rel, static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); + Index rti, RangeTblEntry *rte); +static void set_plain_rel_page_estimates(PlannerInfo *root, + RelOptInfo *rel, + Index rti, + RangeTblEntry *rte); static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel); static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); @@ -409,7 +415,7 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel, else { /* Plain relation */ - set_plain_rel_size(root, rel, rte); + set_plain_rel_size(root, rel, rti, rte); } break; case RTE_SUBQUERY: @@ -571,7 +577,7 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, * Set size estimates for a plain relation (no subquery, no inheritance) */ static void -set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { /* * Test any partial indexes of rel for applicability. We must do this @@ -581,6 +587,81 @@ set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) /* Mark rel with estimated output rows, width, etc */ set_baserel_size_estimates(root, rel); + + /* Estimate the pages based on the selected columns */ + set_plain_rel_page_estimates(root, rel, rti, rte); +} + +static void +set_plain_rel_page_estimates(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) +{ + Var *var; + List *vars; + double pages; + ListCell *lc; + ListCell *lc1; + Bitmapset *cols = NULL; + HeapTuple tp; + AttrNumber attno; + Selectivity sel = 0; + + Assert(rel->rtekind == RTE_RELATION); + + foreach(lc, rel->reltarget->exprs) + { + Node *node; + node = lfirst(lc); + vars = pull_var_clause(node, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_RECURSE_PLACEHOLDERS); + foreach(lc1, vars) + { + var = lfirst(lc1); + if (var->varno == rti && var->varattno >= 0) + cols = bms_add_member(cols, var->varattno); + } + } + + foreach(lc, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + vars = pull_var_clause((Node *)rinfo->clause, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_RECURSE_PLACEHOLDERS); + foreach(lc1, vars) + { + var = lfirst(lc1); + if (var->varno == rti && var->varattno >= 0) + cols = bms_add_member(cols, var->varattno); + } + } + + attno = -1; + while ((attno = bms_next_member(cols, attno)) >= 0) + { + tp = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(rte->relid), + Int16GetDatum(attno), + BoolGetDatum(rte->inh)); + + if (HeapTupleIsValid(tp)) + { + sel += ((Form_pg_statistic) GETSTRUCT(tp))->stadiskfrac; + ReleaseSysCache(tp); + } + } + + if (sel > 0) + { + pages = rel->pages * sel; + + if (pages <= 1.0) + rel->pages = 1; + else + rel->pages = rint(pages); + } } /* diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 1f6de76..1c14c6b 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201910251 +#define CATALOG_VERSION_NO 201912041 #endif diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h index 207be54..66029f6 100644 --- a/src/include/catalog/pg_statistic.h +++ b/src/include/catalog/pg_statistic.h @@ -36,6 +36,9 @@ CATALOG(pg_statistic,2619,StatisticRelationId) /* the fraction of the column's entries that are NULL: */ float4 stanullfrac; + /* the fraction of the column's disksize of all columns */ + float4 stadiskfrac; + /* * stawidth is the average width in bytes of non-null entries. For * fixed-width datatypes this is of course the same as the typlen, but for diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 128f7ae..077a3c1 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -114,6 +114,12 @@ typedef struct VacAttrStats Datum *stavalues[STATISTIC_NUM_SLOTS]; /* + * These fields are to be filled in compute_disk_stats + */ + float4 stadiskfrac; /* fraction of the physical size */ + float8 disksize; /* value of the physical size */ + + /* * These fields describe the stavalues[n] element types. They will be * initialized to match attrtypid, but a custom typanalyze function might * want to store an array of something other than the analyzed column's -- 1.8.3.1