diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml new file mode 100644 index fffb79f..2fc561f --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -6570,7 +6570,8 @@ SCRAM-SHA-256$<iteration An array containing codes for the enabled statistic kinds; valid values are: d for n-distinct statistics, - f for functional dependency statistics + f for functional dependency statistics, and + m for most common values (MCV) list statistics @@ -6593,6 +6594,16 @@ SCRAM-SHA-256$<iteration + + stxmcv + pg_mcv_list + + + MCV (most-common values) list statistics, serialized as + pg_mcv_list type. + + + diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml new file mode 100644 index edc9be9..d68b789 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -20917,4 +20917,81 @@ CREATE EVENT TRIGGER test_table_rewrite_ + + Statistics Information Functions + + + function + statistics + + + + To inspect statistics defined using CREATE STATISTICS + command, PostgreSQL provides multiple functions. + + + + Inspecting MCV lists + + + pg_mcv_list_items + pg_mcv_list + + + + pg_mcv_list_items returns a list of all items + stored in a multi-column MCV list, and returns the + following columns: + + + + + + Name + Type + Description + + + + + + index + int + index of the item in the MCV list + + + values + text[] + values stored in the MCV item + + + nulls + boolean[] + flags identifying NULL values + + + frequency + double precision + frequency of this MCV item + + + + + + + + The pg_mcv_list_items function can be used like this: + + +SELECT m.* FROM pg_statistic_ext, + pg_mcv_list_items(stxmcv) m WHERE stxname = 'stts'; + + + Values of the pg_mcv_list can be obtained only from the + pg_statistic.stxmcv column. + + + + + diff --git a/doc/src/sgml/planstats.sgml b/doc/src/sgml/planstats.sgml new file mode 100644 index ef643ad..de8ef16 --- a/doc/src/sgml/planstats.sgml +++ b/doc/src/sgml/planstats.sgml @@ -455,7 +455,7 @@ rows = (outer_cardinality * inner_cardin multivariate - + Functional Dependencies @@ -540,7 +540,7 @@ EXPLAIN (ANALYZE, TIMING OFF) SELECT * F - + Multivariate N-Distinct Counts @@ -585,6 +585,116 @@ EXPLAIN (ANALYZE, TIMING OFF) SELECT COU + + + MCV lists + + + As explained in , functional + dependencies are very cheap and efficient type of statistics, but their + main limitation is their global nature (only tracking dependencies at + the column level, not between individual column values). + + + + This section introduces multivariate variant of MCV + (most-common values) lists, a straight-forward extension of the per-column + statistics described in . This + statistics adresses the limitation by storing individual values, but it + is naturally more expensive, both in terms of storage and planning time. + + + + Let's look at the query from + again, but this time with a MCV list created on the + same set of columns (be sure to drop the functional dependencies, to + make sure the planner uses the newly created statistics). + + +DROP STATISTICS stts; +CREATE STATISTICS stts2 (mcv) ON a, b FROM t; +ANALYZE t; +EXPLAIN (ANALYZE, TIMING OFF) SELECT * FROM t WHERE a = 1 AND b = 1; + QUERY PLAN +------------------------------------------------------------------------------- + Seq Scan on t (cost=0.00..195.00 rows=100 width=8) (actual rows=100 loops=1) + Filter: ((a = 1) AND (b = 1)) + Rows Removed by Filter: 9900 + + + The estimate is as accurate as with the functional dependencies, mostly + thanks to the table being fairly small and having a simple distribution + with low number of distinct values. Before looking at the second query, + which was not handled by functional dependencies particularly well, + let's inspect the MCV list a bit. + + + + Inspecting the MCV list is possible using + pg_mcv_list_items set-returning function. + + +SELECT m.* FROM pg_statistic_ext, + pg_mcv_list_items(stxmcv) m WHERE stxname = 'stts2'; + index | values | nulls | frequency +-------+---------+-------+----------- + 0 | {0,0} | {f,f} | 0.01 + 1 | {1,1} | {f,f} | 0.01 + 2 | {2,2} | {f,f} | 0.01 +... + 49 | {49,49} | {f,f} | 0.01 + 50 | {50,0} | {f,f} | 0.01 +... + 97 | {97,47} | {f,f} | 0.01 + 98 | {98,48} | {f,f} | 0.01 + 99 | {99,49} | {f,f} | 0.01 +(100 rows) + + + Which confirms there are 100 distinct combinations in the two columns, + and all of them are about equally likely (1% frequency for each one). + Had there been any null values in either of the columns, this would be + identified in the nulls column. + + + + When estimating the selectivity, the planner applies all the conditions + on items in the MCV list, and them sums the frequencies + of the matching ones. See mcv_clauselist_selectivity + in src/backend/statistics/mcv.c for details. + + + + Compared to functional dependencies, MCV lists have two + major advantages. Firstly, the list stores actual values, making it possible + to decide which combinations are compatible. + + +EXPLAIN (ANALYZE, TIMING OFF) SELECT * FROM t WHERE a = 1 AND b = 10; + QUERY PLAN +--------------------------------------------------------------------------- + Seq Scan on t (cost=0.00..195.00 rows=1 width=8) (actual rows=0 loops=1) + Filter: ((a = 1) AND (b = 10)) + Rows Removed by Filter: 10000 + + + Secondly, MCV lists handle a wider range of clause types, + not just equality clauses like functional dependencies. See for example the + example range query, presented earlier: + + +EXPLAIN (ANALYZE, TIMING OFF) SELECT * FROM t WHERE a <= 49 AND b > 49; + QUERY PLAN +--------------------------------------------------------------------------- + Seq Scan on t (cost=0.00..195.00 rows=1 width=8) (actual rows=0 loops=1) + Filter: ((a <= 49) AND (b > 49)) + Rows Removed by Filter: 10000 + + + + + + diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml new file mode 100644 index 539f5bd..fcbfa56 --- a/doc/src/sgml/ref/create_statistics.sgml +++ b/doc/src/sgml/ref/create_statistics.sgml @@ -83,7 +83,8 @@ CREATE STATISTICS [ IF NOT EXISTS ] ndistinct, which enables n-distinct statistics, and dependencies, which enables functional - dependency statistics. + dependency statistics, and mcv which enables + most-common values lists. If this clause is omitted, all supported statistics kinds are included in the statistics object. For more information, see @@ -164,6 +165,31 @@ EXPLAIN ANALYZE SELECT * FROM t1 WHERE ( conditions are redundant and does not underestimate the row count. + + Create table t2 with two perfectly correlated columns + (containing identical data), and a MCV list on those columns: + + +CREATE TABLE t2 ( + a int, + b int +); + +INSERT INTO t2 SELECT mod(i,100), mod(i,100) + FROM generate_series(1,1000000) s(i); + +CREATE STATISTICS s2 WITH (mcv) ON (a, b) FROM t2; + +ANALYZE t2; + +-- valid combination (found in MCV) +EXPLAIN ANALYZE SELECT * FROM t2 WHERE (a = 1) AND (b = 1); + +-- invalid combination (not found in MCV) +EXPLAIN ANALYZE SELECT * FROM t2 WHERE (a = 1) AND (b = 2); + + + diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c new file mode 100644 index 3e148f0..92456dc --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -1769,12 +1769,6 @@ static void compute_scalar_stats(VacAttr double totalrows); static int compare_scalars(const void *a, const void *b, void *arg); static int compare_mcvs(const void *a, const void *b); -static int analyze_mcv_list(int *mcv_counts, - int num_mcv, - double stadistinct, - double stanullfrac, - int samplerows, - double totalrows); /* @@ -2869,7 +2863,7 @@ compare_mcvs(const void *a, const void * * number that are significantly more common than the values not in the list, * and which are therefore deemed worth storing in the table's MCV list. */ -static int +int analyze_mcv_list(int *mcv_counts, int num_mcv, double stadistinct, diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c new file mode 100644 index 3bb0d24..903d815 --- a/src/backend/commands/statscmds.c +++ b/src/backend/commands/statscmds.c @@ -70,11 +70,12 @@ CreateStatistics(CreateStatsStmt *stmt) Oid relid; ObjectAddress parentobject, myself; - Datum types[2]; /* one for each possible type of statistic */ + Datum types[3]; /* one for each possible type of statistic */ int ntypes; ArrayType *stxkind; bool build_ndistinct; bool build_dependencies; + bool build_mcv; bool requested_type = false; int i; ListCell *cell; @@ -269,6 +270,7 @@ CreateStatistics(CreateStatsStmt *stmt) */ build_ndistinct = false; build_dependencies = false; + build_mcv = false; foreach(cell, stmt->stat_types) { char *type = strVal((Value *) lfirst(cell)); @@ -283,6 +285,11 @@ CreateStatistics(CreateStatsStmt *stmt) build_dependencies = true; requested_type = true; } + else if (strcmp(type, "mcv") == 0) + { + build_mcv = true; + requested_type = true; + } else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -294,6 +301,7 @@ CreateStatistics(CreateStatsStmt *stmt) { build_ndistinct = true; build_dependencies = true; + build_mcv = true; } /* construct the char array of enabled statistic types */ @@ -302,6 +310,8 @@ CreateStatistics(CreateStatsStmt *stmt) types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT); if (build_dependencies) types[ntypes++] = CharGetDatum(STATS_EXT_DEPENDENCIES); + if (build_mcv) + types[ntypes++] = CharGetDatum(STATS_EXT_MCV); Assert(ntypes > 0 && ntypes <= lengthof(types)); stxkind = construct_array(types, ntypes, CHAROID, 1, true, 'c'); @@ -320,6 +330,7 @@ CreateStatistics(CreateStatsStmt *stmt) /* no statistics built yet */ nulls[Anum_pg_statistic_ext_stxndistinct - 1] = true; nulls[Anum_pg_statistic_ext_stxdependencies - 1] = true; + nulls[Anum_pg_statistic_ext_stxmcv - 1] = true; /* insert it into pg_statistic_ext */ statrel = heap_open(StatisticExtRelationId, RowExclusiveLock); @@ -415,23 +426,97 @@ RemoveStatisticsById(Oid statsOid) * null until the next ANALYZE. (Note that the type change hasn't actually * happened yet, so one option that's *not* on the table is to recompute * immediately.) + * + * For both ndistinct and functional-dependencies stats, the on-disk + * representation is independent of the source column data types, and it is + * plausible to assume that the old statistic values will still be good for + * the new column contents. (Obviously, if the ALTER COLUMN TYPE has a USING + * expression that substantially alters the semantic meaning of the column + * values, this assumption could fail. But that seems like a corner case + * that doesn't justify zapping the stats in common cases.) + * + * For MCV lists that's not the case, as those statistics store the datums + * internally. In this case we simply reset the statistics value to NULL. */ void UpdateStatisticsForTypeChange(Oid statsOid, Oid relationOid, int attnum, Oid oldColumnType, Oid newColumnType) { + Form_pg_statistic_ext staForm; + HeapTuple stup, + oldtup; + int i; + + /* Do we need to reset anything? */ + bool attribute_referenced; + bool reset_stats = false; + + Relation rel; + + Datum values[Natts_pg_statistic_ext]; + bool nulls[Natts_pg_statistic_ext]; + bool replaces[Natts_pg_statistic_ext]; + + oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid)); + if (!oldtup) + elog(ERROR, "cache lookup failed for statistics object %u", statsOid); + staForm = (Form_pg_statistic_ext) GETSTRUCT(oldtup); + /* - * Currently, we don't actually need to do anything here. For both - * ndistinct and functional-dependencies stats, the on-disk representation - * is independent of the source column data types, and it is plausible to - * assume that the old statistic values will still be good for the new - * column contents. (Obviously, if the ALTER COLUMN TYPE has a USING - * expression that substantially alters the semantic meaning of the column - * values, this assumption could fail. But that seems like a corner case - * that doesn't justify zapping the stats in common cases.) - * - * Future types of extended stats will likely require us to work harder. + * If the modified attribute is not referenced by this statistic, we + * can simply leave the statistics alone. + */ + attribute_referenced = false; + for (i = 0; i < staForm->stxkeys.dim1; i++) + if (attnum == staForm->stxkeys.values[i]) + attribute_referenced = true; + + /* + * We can also leave the record as it is if there are no statistics + * including the datum values, like for example MCV lists. */ + if (statext_is_kind_built(oldtup, STATS_EXT_MCV)) + reset_stats = true; + + /* + * If we can leave the statistics as it is, just do minimal cleanup + * and we're done. + */ + if (!attribute_referenced && reset_stats) + { + ReleaseSysCache(oldtup); + return; + } + + /* + * OK, we need to reset some statistics. So let's build the new tuple, + * replacing the affected statistics types with NULL. + */ + memset(nulls, 0, Natts_pg_statistic_ext * sizeof(bool)); + memset(replaces, 0, Natts_pg_statistic_ext * sizeof(bool)); + memset(values, 0, Natts_pg_statistic_ext * sizeof(Datum)); + + if (statext_is_kind_built(oldtup, STATS_EXT_MCV)) + { + replaces[Anum_pg_statistic_ext_stxmcv - 1] = true; + nulls[Anum_pg_statistic_ext_stxmcv - 1] = true; + } + + rel = heap_open(StatisticExtRelationId, RowExclusiveLock); + + /* replace the old tuple */ + stup = heap_modify_tuple(oldtup, + RelationGetDescr(rel), + values, + nulls, + replaces); + + ReleaseSysCache(oldtup); + CatalogTupleUpdate(rel, &stup->t_self, stup); + + heap_freetuple(stup); + + heap_close(rel, RowExclusiveLock); } /* diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c new file mode 100644 index f471794..fd09ad3 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -105,9 +105,6 @@ clauselist_selectivity(PlannerInfo *root Selectivity s1 = 1.0; RelOptInfo *rel; Bitmapset *estimatedclauses = NULL; - RangeQueryClause *rqlist = NULL; - ListCell *l; - int listidx; /* * If there's exactly one clause, just go directly to @@ -125,6 +122,25 @@ clauselist_selectivity(PlannerInfo *root if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL) { /* + * Estimate selectivity on any clauses applicable by stats tracking + * actual values first, then apply functional dependencies on the + * remaining clauses. The reasoning for this particular order is that + * the more complex stats can track more complex correlations between + * the attributes, and may be considered more reliable. + * + * For example MCV list can give us an exact selectivity for values in + * two columns, while functional dependencies can only provide + * information about overall strength of the dependency. + * + * 'estimatedclauses' is a bitmap of 0-based list positions of clauses + * used that way, so that we can ignore them later (not to estimate + * them twice). + */ + s1 *= statext_clauselist_selectivity(root, clauses, varRelid, + jointype, sjinfo, rel, + &estimatedclauses); + + /* * Perform selectivity estimations on any clauses found applicable by * dependencies_clauselist_selectivity. 'estimatedclauses' will be * filled with the 0-based list positions of clauses used that way, so @@ -133,17 +149,72 @@ clauselist_selectivity(PlannerInfo *root s1 *= dependencies_clauselist_selectivity(root, clauses, varRelid, jointype, sjinfo, rel, &estimatedclauses); - - /* - * This would be the place to apply any other types of extended - * statistics selectivity estimations for remaining clauses. - */ } /* * Apply normal selectivity estimates for remaining clauses. We'll be * careful to skip any clauses which were already estimated above. - * + */ + return s1 * clauselist_selectivity_simple(root, clauses, varRelid, + jointype, sjinfo, + estimatedclauses); +} + +/* + * clauselist_selectivity_simple - + * Compute the selectivity of an implicitly-ANDed list of boolean + * expression clauses. The list can be empty, in which case 1.0 + * must be returned. List elements may be either RestrictInfos + * or bare expression clauses --- the former is preferred since + * it allows caching of results. + * + * See clause_selectivity() for the meaning of the additional parameters. + * + * Our basic approach is to take the product of the selectivities of the + * subclauses. However, that's only right if the subclauses have independent + * probabilities, and in reality they are often NOT independent. So, + * we want to be smarter where we can. + * + * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses + * are recognized as possible range query components if they are restriction + * opclauses whose operators have scalarltsel or a related function as their + * restriction selectivity estimator. We pair up clauses of this form that + * refer to the same variable. An unpairable clause of this kind is simply + * multiplied into the selectivity product in the normal way. But when we + * find a pair, we know that the selectivities represent the relative + * positions of the low and high bounds within the column's range, so instead + * of figuring the selectivity as hisel * losel, we can figure it as hisel + + * losel - 1. (To visualize this, see that hisel is the fraction of the range + * below the high bound, while losel is the fraction above the low bound; so + * hisel can be interpreted directly as a 0..1 value but we need to convert + * losel to 1-losel before interpreting it as a value. Then the available + * range is 1-losel to hisel. However, this calculation double-excludes + * nulls, so really we need hisel + losel + null_frac - 1.) + * + * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation + * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation + * yields an impossible (negative) result. + * + * A free side-effect is that we can recognize redundant inequalities such + * as "x < 4 AND x < 5"; only the tighter constraint will be counted. + * + * Of course this is all very dependent on the behavior of the inequality + * selectivity functions; perhaps some day we can generalize the approach. + */ +Selectivity +clauselist_selectivity_simple(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + Bitmapset *estimatedclauses) +{ + Selectivity s1 = 1.0; + RangeQueryClause *rqlist = NULL; + ListCell *l; + int listidx; + + /* * Anything that doesn't look like a potential rangequery clause gets * multiplied into s1 and forgotten. Anything that does gets inserted into * an rqlist entry. diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c new file mode 100644 index 8369e3a..0112450 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -1363,6 +1363,18 @@ get_relation_statistics(RelOptInfo *rel, stainfos = lcons(info, stainfos); } + if (statext_is_kind_built(htup, STATS_EXT_MCV)) + { + StatisticExtInfo *info = makeNode(StatisticExtInfo); + + info->statOid = statOid; + info->rel = rel; + info->kind = STATS_EXT_MCV; + info->keys = bms_copy(keys); + + stainfos = lcons(info, stainfos); + } + ReleaseSysCache(htup); bms_free(keys); } diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c new file mode 100644 index 656b1b5..2987712 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -1681,6 +1681,8 @@ generateClonedExtStatsStmt(RangeVar *hea stat_types = lappend(stat_types, makeString("ndistinct")); else if (enabled[i] == STATS_EXT_DEPENDENCIES) stat_types = lappend(stat_types, makeString("dependencies")); + else if (enabled[i] == STATS_EXT_MCV) + stat_types = lappend(stat_types, makeString("mcv")); else elog(ERROR, "unrecognized statistics kind %c", enabled[i]); } diff --git a/src/backend/statistics/Makefile b/src/backend/statistics/Makefile new file mode 100644 index 3404e45..d281526 --- a/src/backend/statistics/Makefile +++ b/src/backend/statistics/Makefile @@ -12,6 +12,6 @@ subdir = src/backend/statistics top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -OBJS = extended_stats.o dependencies.o mvdistinct.o +OBJS = extended_stats.o dependencies.o mcv.o mvdistinct.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/statistics/README b/src/backend/statistics/README new file mode 100644 index a8f00a5..8f153a9 --- a/src/backend/statistics/README +++ b/src/backend/statistics/README @@ -18,6 +18,8 @@ There are currently two kinds of extende (b) soft functional dependencies (README.dependencies) + (c) MCV lists (README.mcv) + Compatible clause types ----------------------- @@ -26,6 +28,8 @@ Each type of statistics may be used to e (a) functional dependencies - equality clauses (AND), possibly IS NULL + (b) MCV lists - equality and inequality clauses (AND, OR, NOT), IS NULL + Currently, only OpExprs in the form Var op Const, or Const op Var are supported, however it's feasible to expand the code later to also estimate the selectivities on clauses such as Var op Var. diff --git a/src/backend/statistics/README.mcv b/src/backend/statistics/README.mcv new file mode 100644 index ...2910eca --- a/src/backend/statistics/README.mcv +++ b/src/backend/statistics/README.mcv @@ -0,0 +1,140 @@ +MCV lists +========= + +Multivariate MCV (most-common values) lists are a straightforward extension of +regular MCV list, tracking most frequent combinations of values for a group of +attributes. + +This works particularly well for columns with a small number of distinct values, +as the list may include all the combinations and approximate the distribution +very accurately. + +For columns with large number of distinct values (e.g. those with continuous +domains), the list will only track the most frequent combinations. If the +distribution is mostly uniform (all combinations about equally frequent), the +MCV list will be empty. + +Estimates of some clauses (e.g. equality) based on MCV lists are more accurate +than when using histograms. + +Also, MCV lists don't necessarily require sorting of the values (the fact that +we use sorting when building them is implementation detail), but even more +importantly the ordering is not built into the approximation (while histograms +are built on ordering). So MCV lists work well even for attributes where the +ordering of the data type is disconnected from the meaning of the data. For +example we know how to sort strings, but it's unlikely to make much sense for +city names (or other label-like attributes). + + +Selectivity estimation +---------------------- + +The estimation, implemented in clauselist_mv_selectivity_mcvlist(), is quite +simple in principle - we need to identify MCV items matching all the clauses +and sum frequencies of all those items. + +Currently MCV lists support estimation of the following clause types: + + (a) equality clauses WHERE (a = 1) AND (b = 2) + (b) inequality clauses WHERE (a < 1) AND (b >= 2) + (c) NULL clauses WHERE (a IS NULL) AND (b IS NOT NULL) + (d) OR clauses WHERE (a < 1) OR (b >= 2) + +It's possible to add support for additional clauses, for example: + + (e) multi-var clauses WHERE (a > b) + +and possibly others. These are tasks for the future, not yet implemented. + + +Estimating equality clauses +--------------------------- + +When computing selectivity estimate for equality clauses + + (a = 1) AND (b = 2) + +we can do this estimate pretty exactly assuming that two conditions are met: + + (1) there's an equality condition on all attributes of the statistic + + (2) we find a matching item in the MCV list + +In this case we know the MCV item represents all tuples matching the clauses, +and the selectivity estimate is complete (i.e. we don't need to perform +estimation using the histogram). This is what we call 'full match'. + +When only (1) holds, but there's no matching MCV item, we don't know whether +there are no such rows or just are not very frequent. We can however use the +frequency of the least frequent MCV item as an upper bound for the selectivity. + +For a combination of equality conditions (not full-match case) we can clamp the +selectivity by the minimum of selectivities for each condition. For example if +we know the number of distinct values for each column, we can use 1/ndistinct +as a per-column estimate. Or rather 1/ndistinct + selectivity derived from the +MCV list. + +We should also probably only use the 'residual ndistinct' by exluding the items +included in the MCV list (and also residual frequency): + + f = (1.0 - sum(MCV frequencies)) / (ndistinct - ndistinct(MCV list)) + +but it's worth pointing out the ndistinct values are multi-variate for the +columns referenced by the equality conditions. + +Note: Only the "full match" limit is currently implemented. + + +Hashed MCV (not yet implemented) +-------------------------------- + +Regular MCV lists have to include actual values for each item, so if those items +are large the list may be quite large. This is especially true for multi-variate +MCV lists, although the current implementation partially mitigates this by +performing de-duplicating the values before storing them on disk. + +It's possible to only store hashes (32-bit values) instead of the actual values, +significantly reducing the space requirements. Obviously, this would only make +the MCV lists useful for estimating equality conditions (assuming the 32-bit +hashes make the collisions rare enough). + +This might also complicate matching the columns to available stats. + + +TODO Consider implementing hashed MCV list, storing just 32-bit hashes instead + of the actual values. This type of MCV list will be useful only for + estimating equality clauses, and will reduce space requirements for large + varlena types (in such cases we usually only want equality anyway). + +TODO Currently there's no logic to consider building only a MCV list (and not + building the histogram at all), except for doing this decision manually in + ADD STATISTICS. + + +Inspecting the MCV list +----------------------- + +Inspecting the regular (per-attribute) MCV lists is trivial, as it's enough +to select the columns from pg_stats. The data is encoded as anyarrays, and +all the items have the same data type, so anyarray provides a simple way to +get a text representation. + +With multivariate MCV lists the columns may use different data types, making +it impossible to use anyarrays. It might be possible to produce similar +array-like representation, but that would complicate further processing and +analysis of the MCV list. + +So instead the MCV lists are stored in a custom data type (pg_mcv_list), +which however makes it more difficult to inspect the contents. To make that +easier, there's a SRF returning detailed information about the MCV lists. + + SELECT * FROM pg_mcv_list_items(stxmcv); + +It accepts one parameter - a pg_mcv_list value (which can only be obtained +from pg_statistic_ext catalog, to defend against malicious input), and +returns these columns: + + - item index (0, ..., (nitems-1)) + - values (string array) + - nulls only (boolean array) + - frequency (double precision) diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c new file mode 100644 index 3b4a09b..af63dc9 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -201,14 +201,11 @@ static double dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs) { - int i, - j; - int nvalues = numrows * k; + int i; MultiSortSupport mss; SortItem *items; - Datum *values; - bool *isnull; int *attnums; + int *attnums_dep; /* counters valid within a group */ int group_size = 0; @@ -223,26 +220,16 @@ dependency_degree(int numrows, HeapTuple /* sort info for all attributes columns */ mss = multi_sort_init(k); - /* data for the sort */ - items = (SortItem *) palloc(numrows * sizeof(SortItem)); - values = (Datum *) palloc(sizeof(Datum) * nvalues); - isnull = (bool *) palloc(sizeof(bool) * nvalues); - - /* fix the pointers to values/isnull */ - for (i = 0; i < numrows; i++) - { - items[i].values = &values[i * k]; - items[i].isnull = &isnull[i * k]; - } - /* - * Transform the bms into an array, to make accessing i-th member easier. + * Transform the bms into an array, to make accessing i-th member easier, + * and then construct a filtered version with only attnums referenced + * by the dependency we validate. */ - attnums = (int *) palloc(sizeof(int) * bms_num_members(attrs)); - i = 0; - j = -1; - while ((j = bms_next_member(attrs, j)) >= 0) - attnums[i++] = j; + attnums = build_attnums(attrs); + + attnums_dep = (int *)palloc(k * sizeof(int)); + for (i = 0; i < k; i++) + attnums_dep[i] = attnums[dependency[i]]; /* * Verify the dependency (a,b,...)->z, using a rather simple algorithm: @@ -254,7 +241,7 @@ dependency_degree(int numrows, HeapTuple * (c) for each group count different values in the last column */ - /* prepare the sort function for the first dimension, and SortItem array */ + /* prepare the sort function for the dimensions */ for (i = 0; i < k; i++) { VacAttrStats *colstat = stats[dependency[i]]; @@ -267,19 +254,16 @@ dependency_degree(int numrows, HeapTuple /* prepare the sort function for this dimension */ multi_sort_add_dimension(mss, i, type->lt_opr); - - /* accumulate all the data for both columns into an array and sort it */ - for (j = 0; j < numrows; j++) - { - items[j].values[i] = - heap_getattr(rows[j], attnums[dependency[i]], - stats[i]->tupDesc, &items[j].isnull[i]); - } } - /* sort the items so that we can detect the groups */ - qsort_arg((void *) items, numrows, sizeof(SortItem), - multi_sort_compare, mss); + /* + * build an array of SortItem(s) sorted using the multi-sort support + * + * XXX This relies on all stats entries pointing to the same tuple + * descriptor. Not sure if that might not be the case. + */ + items = build_sorted_items(numrows, rows, stats[0]->tupDesc, + mss, k, attnums_dep); /* * Walk through the sorted array, split it into rows according to the @@ -322,9 +306,9 @@ dependency_degree(int numrows, HeapTuple } pfree(items); - pfree(values); - pfree(isnull); pfree(mss); + pfree(attnums); + pfree(attnums_dep); /* Compute the 'degree of validity' as (supporting/total). */ return (n_supporting_rows * 1.0 / numrows); @@ -351,7 +335,6 @@ statext_dependencies_build(int numrows, VacAttrStats **stats) { int i, - j, k; int numattrs; int *attnums; @@ -364,11 +347,7 @@ statext_dependencies_build(int numrows, /* * Transform the bms into an array, to make accessing i-th member easier. */ - attnums = palloc(sizeof(int) * bms_num_members(attrs)); - i = 0; - j = -1; - while ((j = bms_next_member(attrs, j)) >= 0) - attnums[i++] = j; + attnums = build_attnums(attrs); Assert(numattrs >= 2); @@ -915,9 +894,9 @@ find_strongest_dependency(StatisticExtIn * using functional dependency statistics, or 1.0 if no useful functional * dependency statistic exists. * - * 'estimatedclauses' is an output argument that gets a bit set corresponding - * to the (zero-based) list index of each clause that is included in the - * estimated selectivity. + * 'estimatedclauses' is an input/output argument that gets a bit set + * corresponding to the (zero-based) list index of each clause that is included + * in the estimated selectivity. * * Given equality clauses on attributes (a,b) we find the strongest dependency * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected @@ -952,9 +931,6 @@ dependencies_clauselist_selectivity(Plan AttrNumber *list_attnums; int listidx; - /* initialize output argument */ - *estimatedclauses = NULL; - /* check if there's any stats that might be useful for us. */ if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES)) return 1.0; @@ -969,6 +945,9 @@ dependencies_clauselist_selectivity(Plan * the attnums for each clause in a list which we'll reference later so we * don't need to repeat the same work again. We'll also keep track of all * attnums seen. + * + * We also skip clauses that we already estimated using different types of + * statistics (we treat them as incompatible). */ listidx = 0; foreach(l, clauses) @@ -976,7 +955,8 @@ dependencies_clauselist_selectivity(Plan Node *clause = (Node *) lfirst(l); AttrNumber attnum; - if (dependency_is_compatible_clause(clause, rel->relid, &attnum)) + if ((dependency_is_compatible_clause(clause, rel->relid, &attnum)) && + (!bms_is_member(listidx, *estimatedclauses))) { list_attnums[listidx] = attnum; clauses_attnums = bms_add_member(clauses_attnums, attnum); @@ -1046,8 +1026,7 @@ dependencies_clauselist_selectivity(Plan /* * Skip incompatible clauses, and ones we've already estimated on. */ - if (list_attnums[listidx] == InvalidAttrNumber || - bms_is_member(listidx, *estimatedclauses)) + if (list_attnums[listidx] == InvalidAttrNumber) continue; /* diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c new file mode 100644 index 2df5f7d..5f71a7d --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -16,6 +16,8 @@ */ #include "postgres.h" +#include + #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" @@ -23,6 +25,8 @@ #include "catalog/pg_collation.h" #include "catalog/pg_statistic_ext.h" #include "nodes/relation.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" #include "postmaster/autovacuum.h" #include "statistics/extended_stats_internal.h" #include "statistics/statistics.h" @@ -31,6 +35,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" +#include "utils/selfuncs.h" #include "utils/syscache.h" @@ -53,7 +58,7 @@ static VacAttrStats **lookup_var_attr_st int nvacatts, VacAttrStats **vacatts); static void statext_store(Relation pg_stext, Oid relid, MVNDistinct *ndistinct, MVDependencies *dependencies, - VacAttrStats **stats); + MCVList * mcvlist, VacAttrStats **stats); /* @@ -87,6 +92,7 @@ BuildRelationExtStatistics(Relation oner StatExtEntry *stat = (StatExtEntry *) lfirst(lc); MVNDistinct *ndistinct = NULL; MVDependencies *dependencies = NULL; + MCVList *mcv = NULL; VacAttrStats **stats; ListCell *lc2; @@ -124,10 +130,13 @@ BuildRelationExtStatistics(Relation oner else if (t == STATS_EXT_DEPENDENCIES) dependencies = statext_dependencies_build(numrows, rows, stat->columns, stats); + else if (t == STATS_EXT_MCV) + mcv = statext_mcv_build(numrows, rows, stat->columns, stats, + totalrows); } /* store the statistics in the catalog */ - statext_store(pg_stext, stat->statOid, ndistinct, dependencies, stats); + statext_store(pg_stext, stat->statOid, ndistinct, dependencies, mcv, stats); } heap_close(pg_stext, RowExclusiveLock); @@ -155,6 +164,10 @@ statext_is_kind_built(HeapTuple htup, ch attnum = Anum_pg_statistic_ext_stxdependencies; break; + case STATS_EXT_MCV: + attnum = Anum_pg_statistic_ext_stxmcv; + break; + default: elog(ERROR, "unexpected statistics type requested: %d", type); } @@ -219,7 +232,8 @@ fetch_statentries_for_relation(Relation for (i = 0; i < ARR_DIMS(arr)[0]; i++) { Assert((enabled[i] == STATS_EXT_NDISTINCT) || - (enabled[i] == STATS_EXT_DEPENDENCIES)); + (enabled[i] == STATS_EXT_DEPENDENCIES) || + (enabled[i] == STATS_EXT_MCV)); entry->types = lappend_int(entry->types, (int) enabled[i]); } @@ -294,7 +308,7 @@ lookup_var_attr_stats(Relation rel, Bitm static void statext_store(Relation pg_stext, Oid statOid, MVNDistinct *ndistinct, MVDependencies *dependencies, - VacAttrStats **stats) + MCVList * mcv, VacAttrStats **stats) { HeapTuple stup, oldtup; @@ -325,9 +339,18 @@ statext_store(Relation pg_stext, Oid sta values[Anum_pg_statistic_ext_stxdependencies - 1] = PointerGetDatum(data); } + if (mcv != NULL) + { + bytea *data = statext_mcv_serialize(mcv, stats); + + nulls[Anum_pg_statistic_ext_stxmcv - 1] = (data == NULL); + values[Anum_pg_statistic_ext_stxmcv - 1] = PointerGetDatum(data); + } + /* always replace the value (either by bytea or NULL) */ replaces[Anum_pg_statistic_ext_stxndistinct - 1] = true; replaces[Anum_pg_statistic_ext_stxdependencies - 1] = true; + replaces[Anum_pg_statistic_ext_stxmcv - 1] = true; /* there should already be a pg_statistic_ext tuple */ oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid)); @@ -434,6 +457,137 @@ multi_sort_compare_dims(int start, int e return 0; } +int +compare_scalars_simple(const void *a, const void *b, void *arg) +{ + return compare_datums_simple(*(Datum *) a, + *(Datum *) b, + (SortSupport) arg); +} + +int +compare_datums_simple(Datum a, Datum b, SortSupport ssup) +{ + return ApplySortComparator(a, false, b, false, ssup); +} + +/* simple counterpart to qsort_arg */ +void * +bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size, + int (*compar) (const void *, const void *, void *), + void *arg) +{ + size_t l, + u, + idx; + const void *p; + int comparison; + + l = 0; + u = nmemb; + while (l < u) + { + idx = (l + u) / 2; + p = (void *) (((const char *) base) + (idx * size)); + comparison = (*compar) (key, p, arg); + + if (comparison < 0) + u = idx; + else if (comparison > 0) + l = idx + 1; + else + return (void *) p; + } + + return NULL; +} + +int * +build_attnums(Bitmapset *attrs) +{ + int i, + j; + int numattrs = bms_num_members(attrs); + int *attnums; + + /* build attnums from the bitmapset */ + attnums = (int *) palloc(sizeof(int) * numattrs); + i = 0; + j = -1; + while ((j = bms_next_member(attrs, j)) >= 0) + attnums[i++] = j; + + return attnums; +} + +/* build_sorted_items + * build sorted array of SortItem with values from rows + * + * XXX All the memory is allocated in a single chunk, so that the caller + * can simply pfree the return value to release all of it. + */ +SortItem * +build_sorted_items(int numrows, HeapTuple *rows, TupleDesc tdesc, + MultiSortSupport mss, int numattrs, int *attnums) +{ + int i, + j, + len; + int nvalues = numrows * numattrs; + + /* + * We won't allocate the arrays for each item independenly, but in one + * large chunk and then just set the pointers. This allows the caller to + * simply pfree the return value to release all the memory. + */ + SortItem *items; + Datum *values; + bool *isnull; + char *ptr; + + /* Compute the total amount of memory we need (both items and values). */ + len = numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool)); + + /* Allocate the memory and split it into the pieces. */ + ptr = palloc0(len); + + /* items to sort */ + items = (SortItem *) ptr; + ptr += numrows * sizeof(SortItem); + + /* values and null flags */ + values = (Datum *) ptr; + ptr += nvalues * sizeof(Datum); + + isnull = (bool *) ptr; + ptr += nvalues * sizeof(bool); + + /* make sure we consumed the whole buffer exactly */ + Assert((ptr - (char *) items) == len); + + /* fix the pointers to Datum and bool arrays */ + for (i = 0; i < numrows; i++) + { + items[i].values = &values[i * numattrs]; + items[i].isnull = &isnull[i * numattrs]; + + /* load the values/null flags from sample rows */ + for (j = 0; j < numattrs; j++) + { + items[i].values[j] = heap_getattr(rows[i], + attnums[j], /* attnum */ + tdesc, + &items[i].isnull[j]); /* isnull */ + } + } + + /* do the sort, using the multi-sort */ + qsort_arg((void *) items, numrows, sizeof(SortItem), + multi_sort_compare, mss); + + return items; +} + /* * has_stats_of_kind * Check whether the list contains statistic of a given kind @@ -464,7 +618,7 @@ has_stats_of_kind(List *stats, char requ * object referencing the most of the requested attributes, breaking ties * in favor of objects with fewer keys overall. * - * XXX if multiple statistics objects tie on both criteria, then which object + * XXX If multiple statistics objects tie on both criteria, then which object * is chosen depends on the order that they appear in the stats list. Perhaps * further tiebreakers are needed. */ @@ -514,3 +668,335 @@ choose_best_statistics(List *stats, Bitm return best_match; } + +int +bms_member_index(Bitmapset *keys, AttrNumber varattno) +{ + int i, + j; + + i = -1; + j = 0; + while (((i = bms_next_member(keys, i)) >= 0) && (i < varattno)) + j += 1; + + return j; +} + +/* The Duj1 estimator (already used in analyze.c). */ +double +estimate_ndistinct(double totalrows, int numrows, int d, int f1) +{ + double numer, + denom, + ndistinct; + + numer = (double) numrows * (double) d; + + denom = (double) (numrows - f1) + + (double) f1 * (double) numrows / totalrows; + + ndistinct = numer / denom; + + /* Clamp to sane range in case of roundoff error */ + if (ndistinct < (double) d) + ndistinct = (double) d; + + if (ndistinct > totalrows) + ndistinct = totalrows; + + return floor(ndistinct + 0.5); +} + +/* + * statext_is_compatible_clause_internal + * Does the heavy lifting of actually inspecting the clauses for + * statext_is_compatible_clause. + */ +static bool +statext_is_compatible_clause_internal(Node *clause, Index relid, Bitmapset **attnums) +{ + /* We only support plain Vars for now */ + if (IsA(clause, Var)) + { + Var *var = (Var *) clause; + + /* Ensure var is from the correct relation */ + if (var->varno != relid) + return false; + + /* we also better ensure the Var is from the current level */ + if (var->varlevelsup > 0) + return false; + + /* Also skip system attributes (we don't allow stats on those). */ + if (!AttrNumberIsForUserDefinedAttr(var->varattno)) + return false; + + *attnums = bms_add_member(*attnums, var->varattno); + + return true; + } + + /* Var = Const */ + if (is_opclause(clause)) + { + OpExpr *expr = (OpExpr *) clause; + Var *var; + bool varonleft = true; + bool ok; + + /* Only expressions with two arguments are considered compatible. */ + if (list_length(expr->args) != 2) + return false; + + /* see if it actually has the right */ + ok = (NumRelids((Node *) expr) == 1) && + (is_pseudo_constant_clause(lsecond(expr->args)) || + (varonleft = false, + is_pseudo_constant_clause(linitial(expr->args)))); + + /* unsupported structure (two variables or so) */ + if (!ok) + return false; + + /* + * If it's not one of the supported operators ("=", "<", ">", etc.), + * just ignore the clause, as it's not compatible with MCV lists. + * + * This uses the function for estimating selectivity, not the operator + * directly (a bit awkward, but well ...). + */ + if ((get_oprrest(expr->opno) != F_EQSEL) && + (get_oprrest(expr->opno) != F_NEQSEL) && + (get_oprrest(expr->opno) != F_SCALARLTSEL) && + (get_oprrest(expr->opno) != F_SCALARLESEL) && + (get_oprrest(expr->opno) != F_SCALARGTSEL) && + (get_oprrest(expr->opno) != F_SCALARGESEL)) + return false; + + var = (varonleft) ? linitial(expr->args) : lsecond(expr->args); + + return statext_is_compatible_clause_internal((Node *) var, relid, attnums); + } + + /* NOT/AND/OR clause */ + if (or_clause(clause) || + and_clause(clause) || + not_clause(clause)) + { + /* + * AND/OR/NOT-clauses are supported if all sub-clauses are supported + * + * Perhaps we could improve this by handling mixed cases, when some of + * the clauses are supported and some are not. Selectivity for the + * supported subclauses would be computed using extended statistics, + * and the remaining clauses would be estimated using the traditional + * algorithm (product of selectivities). + * + * It however seems overly complex, and in a way we already do that + * because if we reject the whole clause as unsupported here, it will + * be eventually passed to clauselist_selectivity() which does exactly + * this (split into supported/unsupported clauses etc). + */ + BoolExpr *expr = (BoolExpr *) clause; + ListCell *lc; + Bitmapset *clause_attnums = NULL; + + foreach(lc, expr->args) + { + /* + * Had we found incompatible clause in the arguments, treat the + * whole clause as incompatible. + */ + if (!statext_is_compatible_clause_internal((Node *) lfirst(lc), + relid, &clause_attnums)) + return false; + } + + /* + * Otherwise the clause is compatible, and we need to merge the + * attnums into the main bitmapset. + */ + *attnums = bms_join(*attnums, clause_attnums); + + return true; + } + + /* Var IS NULL */ + if (IsA(clause, NullTest)) + { + NullTest *nt = (NullTest *) clause; + + /* + * Only simple (Var IS NULL) expressions supported for now. Maybe we + * could use examine_variable to fix this? + */ + if (!IsA(nt->arg, Var)) + return false; + + return statext_is_compatible_clause_internal((Node *) (nt->arg), relid, attnums); + } + + return false; +} + +/* + * statext_is_compatible_clause + * Determines if the clause is compatible with MCV lists. + * + * Only OpExprs with two arguments using an equality operator are supported. + * When returning True attnum is set to the attribute number of the Var within + * the supported clause. + * + * Currently we only support Var = Const, or Const = Var. It may be possible + * to expand on this later. + */ +static bool +statext_is_compatible_clause(Node *clause, Index relid, Bitmapset **attnums) +{ + RestrictInfo *rinfo = (RestrictInfo *) clause; + + if (!IsA(rinfo, RestrictInfo)) + return false; + + /* Pseudoconstants are not really interesting here. */ + if (rinfo->pseudoconstant) + return false; + + /* clauses referencing multiple varnos are incompatible */ + if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON) + return false; + + return statext_is_compatible_clause_internal((Node *) rinfo->clause, + relid, attnums); +} + +/* + * statext_clauselist_selectivity + * Estimate clauses using the best multi-column statistics. + * + * Selects the best multi-column statistic on a table (measured by a number + * of attributes extracted from the clauses and covered by the statistic). + * + * XXX If we were to use multiple statistics, this is where it would happen. + */ +Selectivity +statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, + JoinType jointype, SpecialJoinInfo *sjinfo, + RelOptInfo *rel, Bitmapset **estimatedclauses) +{ + ListCell *l; + Bitmapset *clauses_attnums = NULL; + Bitmapset **list_attnums; + int listidx; + StatisticExtInfo *stat; + List *stat_clauses; + Selectivity simple_sel, + mcv_sel, + mcv_basesel, + mcv_totalsel, + other_sel, + sel; + + /* we're interested in MCV lists */ + int types = STATS_EXT_MCV; + + /* check if there's any stats that might be useful for us. */ + if (!has_stats_of_kind(rel->statlist, types)) + return (Selectivity) 1.0; + + list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) * + list_length(clauses)); + + /* + * Pre-process the clauses list to extract the attnums seen in each item. + * We need to determine if there's any clauses which will be useful for + * dependency selectivity estimations. Along the way we'll record all of + * the attnums for each clause in a list which we'll reference later so we + * don't need to repeat the same work again. We'll also keep track of all + * attnums seen. + * + * We also skip clauses that we already estimated using different types of + * statistics (we treat them as incompatible). + * + * XXX Currently, the estimated clauses are always empty because the extra + * statistics are applied before functional dependencies. Once we decide + * to apply multiple statistics, this may change. + */ + listidx = 0; + foreach(l, clauses) + { + Node *clause = (Node *) lfirst(l); + Bitmapset *attnums = NULL; + + if ((statext_is_compatible_clause(clause, rel->relid, &attnums)) && + (!bms_is_member(listidx, *estimatedclauses))) + { + list_attnums[listidx] = attnums; + clauses_attnums = bms_add_members(clauses_attnums, attnums); + } + else + list_attnums[listidx] = NULL; + + listidx++; + } + + /* We need at least two attributes for MCV lists. */ + if (bms_num_members(clauses_attnums) < 2) + return 1.0; + + /* find the best suited statistics object for these attnums */ + stat = choose_best_statistics(rel->statlist, clauses_attnums, types); + + /* if no matching stats could be found then we've nothing to do */ + if (!stat) + return (Selectivity) 1.0; + + /* We only understand MCV lists for now. */ + Assert(stat->kind == STATS_EXT_MCV); + + /* now filter the clauses to be estimated using the selected MCV */ + stat_clauses = NIL; + + listidx = 0; + foreach(l, clauses) + { + /* + * If the clause is compatible with the selected statistics, mark it + * as estimated and add it to the list to estimate. + */ + if ((list_attnums[listidx] != NULL) && + (bms_is_subset(list_attnums[listidx], stat->keys))) + { + stat_clauses = lappend(stat_clauses, (Node *) lfirst(l)); + *estimatedclauses = bms_add_member(*estimatedclauses, listidx); + } + + listidx++; + } + + /* + * Use the MCV stats to improve upon the simple selectivity of these + * clauses. + */ + simple_sel = clauselist_selectivity_simple(root, stat_clauses, varRelid, + jointype, sjinfo, NULL); + + mcv_sel = mcv_clauselist_selectivity(root, stat, stat_clauses, varRelid, + jointype, sjinfo, rel, + &mcv_basesel, &mcv_totalsel); + + /* Estimated selectivity of values not covered by MCV matches */ + other_sel = simple_sel - mcv_basesel; + CLAMP_PROBABILITY(other_sel); + + if (other_sel > 1.0 - mcv_totalsel) + other_sel = 1.0 - mcv_totalsel; + + /* Overall selectivity */ + sel = mcv_sel + other_sel; + CLAMP_PROBABILITY(sel); + + return sel; +} diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c new file mode 100644 index ...1136a95 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -0,0 +1,1644 @@ +/*------------------------------------------------------------------------- + * + * mcv.c + * POSTGRES multivariate MCV lists + * + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/statistics/mcv.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_statistic_ext.h" +#include "fmgr.h" +#include "funcapi.h" +#include "optimizer/clauses.h" +#include "statistics/extended_stats_internal.h" +#include "statistics/statistics.h" +#include "utils/builtins.h" +#include "utils/bytea.h" +#include "utils/fmgroids.h" +#include "utils/fmgrprotos.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + +#include + +/* + * Computes size of a serialized MCV item, depending on the number of + * dimensions (columns) the statistic is defined on. The datum values are + * stored in a separate array (deduplicated, to minimize the size), and + * so the serialized items only store uint16 indexes into that array. + * + * Each serialized item store (in this order): + * + * - indexes to values (ndim * sizeof(uint16)) + * - null flags (ndim * sizeof(bool)) + * - frequency (sizeof(double)) + * - base_frequency (sizeof(double)) + * + * So in total each MCV item requires this many bytes: + * + * ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double) + */ +#define ITEM_SIZE(ndims) \ + (ndims * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)) + +/* + * Macros for convenient access to parts of a serialized MCV item. + */ +#define ITEM_INDEXES(item) ((uint16*)item) +#define ITEM_NULLS(item,ndims) ((bool*)(ITEM_INDEXES(item) + ndims)) +#define ITEM_FREQUENCY(item,ndims) ((double*)(ITEM_NULLS(item,ndims) + ndims)) +#define ITEM_BASE_FREQUENCY(item,ndims) ((double*)(ITEM_FREQUENCY(item,ndims) + 1)) + + +static MultiSortSupport build_mss(VacAttrStats **stats, Bitmapset *attrs); + +static SortItem *build_distinct_groups(int numrows, SortItem *items, + MultiSortSupport mss, int *ndistinct); + +static int count_distinct_groups(int numrows, SortItem *items, + MultiSortSupport mss); + +/* + * Builds MCV list from the set of sampled rows. + * + * The algorithm is quite simple: + * + * (1) sort the data (default collation, '<' for the data type) + * + * (2) count distinct groups, decide how many to keep + * + * (3) build the MCV list using the threshold determined in (2) + * + * (4) remove rows represented by the MCV from the sample + * + */ +MCVList * +statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs, + VacAttrStats **stats, double totalrows) +{ + int i, + j, + k; + int numattrs = bms_num_members(attrs); + int ndistinct; + int nitems; + double stadistinct; + int *mcv_counts; + int f1; + + int *attnums = build_attnums(attrs); + + MCVList *mcvlist = NULL; + + /* comparator for all the columns */ + MultiSortSupport mss = build_mss(stats, attrs); + + /* sort the rows */ + SortItem *items = build_sorted_items(numrows, rows, stats[0]->tupDesc, + mss, numattrs, attnums); + + /* transform the sorted rows into groups (sorted by frequency) */ + SortItem *groups = build_distinct_groups(numrows, items, mss, &ndistinct); + + /* + * Maximum number of MCV items to store, based on the attribute with the + * largest stats target + */ + nitems = stats[0]->attr->attstattarget; + for (i = 1; i < numattrs; i++) + { + if (stats[i]->attr->attstattarget > nitems) + nitems = stats[i]->attr->attstattarget; + } + if (nitems > ndistinct) + nitems = ndistinct; + + /* + * Decide how many items to keep in the MCV list. We simply use the same + * algorithm as for per-column MCV lists, to keep it consistent. + * + * One difference is that we do not have a multi-column stanullfrac, and + * we simply treat it as a special item in the MCV list (it it makes it). + * We could compute and store it, of course, but we may have statistics + * on more than two columns, so we'd probably want to store this for + * various combinations of columns - for K columns that'd be 2^K values. + * So we instead store those as items of the multi-column MCV list (if + * common enough). + * + * XXX Conceptually this is similar to the NULL-buckets of histograms. + */ + mcv_counts = (int *) palloc(sizeof(int) * nitems); + f1 = 0; + + for (i = 0; i < nitems; i++) + { + mcv_counts[i] = groups[i].count; + + /* count values that occur exactly once for the ndistinct estimate */ + if (groups[i].count == 1) + f1 += 1; + } + + stadistinct = estimate_ndistinct(totalrows, numrows, ndistinct, f1); + + nitems = analyze_mcv_list(mcv_counts, nitems, stadistinct, + 0.0, /* stanullfrac */ + numrows, totalrows); + + /* + * At this point we know the number of items for the MCV list. There might + * be none (for uniform distribution with many groups), and in that case + * there will be no MCV list. Otherwise construct the MCV list. + */ + if (nitems > 0) + { + /* + * Allocate the MCV list structure, set the global parameters. + */ + mcvlist = (MCVList *) palloc0(sizeof(MCVList)); + + mcvlist->magic = STATS_MCV_MAGIC; + mcvlist->type = STATS_MCV_TYPE_BASIC; + mcvlist->ndimensions = numattrs; + mcvlist->nitems = nitems; + + /* store info about data type OIDs */ + i = 0; + j = -1; + while ((j = bms_next_member(attrs, j)) >= 0) + { + VacAttrStats *colstat = stats[i]; + + mcvlist->types[i] = colstat->attrtypid; + i++; + } + + /* + * Preallocate Datum/isnull arrays (not as a single chunk, as we will + * pass the result outside and thus it needs to be easy to pfree(). + * + * XXX On second thought, we're the only ones dealing with MCV lists, + * so we might allocate everything as a single chunk to reduce palloc + * overhead (chunk headers, etc.) without significant risk. Not sure + * it's worth it, though, as we're not re-building stats very often. + */ + mcvlist->items = (MCVItem * *) palloc0(sizeof(MCVItem *) * nitems); + + for (i = 0; i < nitems; i++) + { + mcvlist->items[i] = (MCVItem *) palloc(sizeof(MCVItem)); + mcvlist->items[i]->values = (Datum *) palloc(sizeof(Datum) * numattrs); + mcvlist->items[i]->isnull = (bool *) palloc(sizeof(bool) * numattrs); + } + + /* Copy the first chunk of groups into the result. */ + for (i = 0; i < nitems; i++) + { + /* just pointer to the proper place in the list */ + MCVItem *item = mcvlist->items[i]; + + /* copy values from the _previous_ group (last item of) */ + memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs); + memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs); + + /* groups should be sorted by frequency in descending order */ + Assert((i == 0) || (groups[i - 1].count >= groups[i].count)); + + /* group frequency */ + item->frequency = (double) groups[i].count / numrows; + + /* base frequency, if the attributes were independent */ + item->base_frequency = 1.0; + for (j = 0; j < numattrs; j++) + { + int count = 0; + + for (k = 0; k < ndistinct; k++) + { + if (multi_sort_compare_dim(j, &groups[i], &groups[k], mss) == 0) + count += groups[k].count; + } + + item->base_frequency *= (double) count / numrows; + } + } + } + + pfree(items); + pfree(groups); + pfree(mcv_counts); + + return mcvlist; +} + +/* + * build_mss + * build MultiSortSupport for the attributes passed in attrs + */ +static MultiSortSupport +build_mss(VacAttrStats **stats, Bitmapset *attrs) +{ + int i, + j; + int numattrs = bms_num_members(attrs); + + /* Sort by multiple columns (using array of SortSupport) */ + MultiSortSupport mss = multi_sort_init(numattrs); + + /* prepare the sort functions for all the attributes */ + i = 0; + j = -1; + while ((j = bms_next_member(attrs, j)) >= 0) + { + VacAttrStats *colstat = stats[i]; + TypeCacheEntry *type; + + type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR); + if (type->lt_opr == InvalidOid) /* shouldn't happen */ + elog(ERROR, "cache lookup failed for ordering operator for type %u", + colstat->attrtypid); + + multi_sort_add_dimension(mss, i, type->lt_opr); + i++; + } + + return mss; +} + +/* + * count_distinct_groups + * count distinct combinations of SortItems in the array + * + * The array is assumed to be sorted according to the MultiSortSupport. + */ +static int +count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss) +{ + int i; + int ndistinct; + + ndistinct = 1; + for (i = 1; i < numrows; i++) + { + /* make sure the array really is sorted */ + Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0); + + if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0) + ndistinct += 1; + } + + return ndistinct; +} + +/* + * compare_sort_item_count + * comparator for sorting items by count (frequencies) in descending order + */ +static int +compare_sort_item_count(const void *a, const void *b) +{ + SortItem *ia = (SortItem *) a; + SortItem *ib = (SortItem *) b; + + if (ia->count == ib->count) + return 0; + else if (ia->count > ib->count) + return -1; + + return 1; +} + +/* + * build_distinct_groups + * build array of SortItems for distinct groups and counts matching items + * + * The input array is assumed to be sorted + */ +static SortItem * +build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, + int *ndistinct) +{ + int i, + j; + int ngroups = count_distinct_groups(numrows, items, mss); + + SortItem *groups = (SortItem *) palloc0(ngroups * sizeof(SortItem)); + + j = 0; + groups[0] = items[0]; + groups[0].count = 1; + + for (i = 1; i < numrows; i++) + { + /* Assume sorted in ascending order. */ + Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0); + + /* New distinct group detected. */ + if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0) + groups[++j] = items[i]; + + groups[j].count++; + } + + /* Sort the distinct groups by frequency (in descending order). */ + pg_qsort((void *) groups, ngroups, sizeof(SortItem), + compare_sort_item_count); + + *ndistinct = ngroups; + return groups; +} + + +/* + * statext_mcv_load + * Load the MCV list for the indicated pg_statistic_ext tuple + */ +MCVList * +statext_mcv_load(Oid mvoid) +{ + bool isnull = false; + Datum mcvlist; + HeapTuple htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(mvoid)); + + if (!HeapTupleIsValid(htup)) + elog(ERROR, "cache lookup failed for statistics object %u", mvoid); + + mcvlist = SysCacheGetAttr(STATEXTOID, htup, + Anum_pg_statistic_ext_stxmcv, &isnull); + + ReleaseSysCache(htup); + + if (isnull) + return NULL; + + return statext_mcv_deserialize(DatumGetByteaP(mcvlist)); +} + + +/* + * Serialize MCV list into a bytea value. + * + * The basic algorithm is simple: + * + * (1) perform deduplication (for each attribute separately) + * (a) collect all (non-NULL) attribute values from all MCV items + * (b) sort the data (using 'lt' from VacAttrStats) + * (c) remove duplicate values from the array + * + * (2) serialize the arrays into a bytea value + * + * (3) process all MCV list items + * (a) replace values with indexes into the arrays + * + * Each attribute has to be processed separately, as we may be mixing different + * datatypes, with different sort operators, etc. + * + * We use uint16 values for the indexes in step (3), as we currently don't allow + * more than 8k MCV items anyway, although that's mostly arbitrary limit. We might + * increase this to 65k and still fit into uint16. Furthermore, this limit is on + * the number of distinct values per column, and we usually have few of those + * (and various combinations of them for the those MCV list). So uint16 seems fine. + * + * We don't really expect the serialization to save as much space as for + * histograms, as we are not doing any bucket splits (which is the source + * of high redundancy in histograms). + * + * TODO: Consider packing boolean flags (NULL) for each item into a single char + * (or a longer type) instead of using an array of bool items. + */ +bytea * +statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) +{ + int i; + int dim; + int ndims = mcvlist->ndimensions; + int itemsize = ITEM_SIZE(ndims); + + SortSupport ssup; + DimensionInfo *info; + + Size total_length; + + /* allocate the item just once */ + char *item = palloc0(itemsize); + + /* serialized items (indexes into arrays, etc.) */ + bytea *output; + char *data = NULL; + + /* values per dimension (and number of non-NULL values) */ + Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims); + int *counts = (int *) palloc0(sizeof(int) * ndims); + + /* + * We'll include some rudimentary information about the attributes (type + * length, etc.), so that we don't have to look them up while + * deserializing the MCV list. + * + * XXX Maybe this is not a great idea? Or maybe we should actually copy + * more fields, e.g. typeid, which would allow us to display the MCV list + * using only the serialized representation (currently we have to fetch + * this info from the relation). + */ + info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims); + + /* sort support data for all attributes included in the MCV list */ + ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims); + + /* collect and deduplicate values for each dimension (attribute) */ + for (dim = 0; dim < ndims; dim++) + { + int ndistinct; + TypeCacheEntry *typentry; + + /* + * Lookup the LT operator (can't get it from stats extra_data, as we + * don't know how to interpret that - scalar vs. array etc.). + */ + typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR); + + /* copy important info about the data type (length, by-value) */ + info[dim].typlen = stats[dim]->attrtype->typlen; + info[dim].typbyval = stats[dim]->attrtype->typbyval; + + /* allocate space for values in the attribute and collect them */ + values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems); + + for (i = 0; i < mcvlist->nitems; i++) + { + /* skip NULL values - we don't need to deduplicate those */ + if (mcvlist->items[i]->isnull[dim]) + continue; + + values[dim][counts[dim]] = mcvlist->items[i]->values[dim]; + counts[dim] += 1; + } + + /* if there are just NULL values in this dimension, we're done */ + if (counts[dim] == 0) + continue; + + /* sort and deduplicate the data */ + ssup[dim].ssup_cxt = CurrentMemoryContext; + ssup[dim].ssup_collation = DEFAULT_COLLATION_OID; + ssup[dim].ssup_nulls_first = false; + + PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]); + + qsort_arg(values[dim], counts[dim], sizeof(Datum), + compare_scalars_simple, &ssup[dim]); + + /* + * Walk through the array and eliminate duplicate values, but keep the + * ordering (so that we can do bsearch later). We know there's at + * least one item as (counts[dim] != 0), so we can skip the first + * element. + */ + ndistinct = 1; /* number of distinct values */ + for (i = 1; i < counts[dim]; i++) + { + /* expect sorted array */ + Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0); + + /* if the value is the same as the previous one, we can skip it */ + if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim])) + continue; + + values[dim][ndistinct] = values[dim][i]; + ndistinct += 1; + } + + /* we must not exceed UINT16_MAX, as we use uint16 indexes */ + Assert(ndistinct <= UINT16_MAX); + + /* + * Store additional info about the attribute - number of deduplicated + * values, and also size of the serialized data. For fixed-length data + * types this is trivial to compute, for varwidth types we need to + * actually walk the array and sum the sizes. + */ + info[dim].nvalues = ndistinct; + + if (info[dim].typlen > 0) /* fixed-length data types */ + info[dim].nbytes = info[dim].nvalues * info[dim].typlen; + else if (info[dim].typlen == -1) /* varlena */ + { + info[dim].nbytes = 0; + for (i = 0; i < info[dim].nvalues; i++) + info[dim].nbytes += VARSIZE_ANY(values[dim][i]); + } + else if (info[dim].typlen == -2) /* cstring */ + { + info[dim].nbytes = 0; + for (i = 0; i < info[dim].nvalues; i++) + info[dim].nbytes += strlen(DatumGetPointer(values[dim][i])); + } + + /* we know (count>0) so there must be some data */ + Assert(info[dim].nbytes > 0); + } + + /* + * Now we can finally compute how much space we'll actually need for the + * whole serialized MCV list, as it contains these fields: + * + * - length (4B) for varlena - magic (4B) - type (4B) - ndimensions (4B) - + * nitems (4B) - info (ndim * sizeof(DimensionInfo) - arrays of values for + * each dimension - serialized items (nitems * itemsize) + * + * So the 'header' size is 20B + ndim * sizeof(DimensionInfo) and then we + * will place all the data (values + indexes). We'll however use offsetof + * and sizeof to compute sizes of the structs. + */ + total_length = (sizeof(int32) + offsetof(MCVList, items) + + (ndims * sizeof(DimensionInfo)) + + mcvlist->nitems * itemsize); + + /* add space for the arrays of deduplicated values */ + for (i = 0; i < ndims; i++) + total_length += info[i].nbytes; + + /* + * Enforce arbitrary limit of 1MB on the size of the serialized MCV list. + * This is meant as a protection against someone building MCV list on long + * values (e.g. text documents). + * + * XXX Should we enforce arbitrary limits like this one? Maybe it's not + * even necessary, as long values are usually unique and so won't make it + * into the MCV list in the first place. In the end, we have a 1GB limit + * on bytea values. + */ + if (total_length > (1024 * 1024)) + elog(ERROR, "serialized MCV list exceeds 1MB (%ld)", total_length); + + /* allocate space for the serialized MCV list, set header fields */ + output = (bytea *) palloc0(total_length); + SET_VARSIZE(output, total_length); + + /* 'data' points to the current position in the output buffer */ + data = VARDATA(output); + + /* MCV list header (number of items, ...) */ + memcpy(data, mcvlist, offsetof(MCVList, items)); + data += offsetof(MCVList, items); + + /* information about the attributes */ + memcpy(data, info, sizeof(DimensionInfo) * ndims); + data += sizeof(DimensionInfo) * ndims; + + /* Copy the deduplicated values for all attributes to the output. */ + for (dim = 0; dim < ndims; dim++) + { +#ifdef USE_ASSERT_CHECKING + /* remember the starting point for Asserts later */ + char *tmp = data; +#endif + for (i = 0; i < info[dim].nvalues; i++) + { + Datum v = values[dim][i]; + + if (info[dim].typbyval) /* passed by value */ + { + memcpy(data, &v, info[dim].typlen); + data += info[dim].typlen; + } + else if (info[dim].typlen > 0) /* pased by reference */ + { + memcpy(data, DatumGetPointer(v), info[dim].typlen); + data += info[dim].typlen; + } + else if (info[dim].typlen == -1) /* varlena */ + { + memcpy(data, DatumGetPointer(v), VARSIZE_ANY(v)); + data += VARSIZE_ANY(v); + } + else if (info[dim].typlen == -2) /* cstring */ + { + memcpy(data, DatumGetPointer(v), strlen(DatumGetPointer(v)) + 1); + data += strlen(DatumGetPointer(v)) + 1; /* terminator */ + } + + /* no underflows or overflows */ + Assert((data > tmp) && ((data - tmp) <= info[dim].nbytes)); + } + + /* + * check we got exactly the amount of data we expected for this + * dimension + */ + Assert((data - tmp) == info[dim].nbytes); + } + + /* Serialize the items, with uint16 indexes instead of the values. */ + for (i = 0; i < mcvlist->nitems; i++) + { + MCVItem *mcvitem = mcvlist->items[i]; + + /* don't write beyond the allocated space */ + Assert(data <= (char *) output + total_length - itemsize); + + /* reset the item (we only allocate it once and reuse it) */ + memset(item, 0, itemsize); + + for (dim = 0; dim < ndims; dim++) + { + Datum *v = NULL; + + /* do the lookup only for non-NULL values */ + if (mcvlist->items[i]->isnull[dim]) + continue; + + v = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim], + info[dim].nvalues, sizeof(Datum), + compare_scalars_simple, &ssup[dim]); + + Assert(v != NULL); /* serialization or deduplication error */ + + /* compute index within the array */ + ITEM_INDEXES(item)[dim] = (v - values[dim]); + + /* check the index is within expected bounds */ + Assert(ITEM_INDEXES(item)[dim] >= 0); + Assert(ITEM_INDEXES(item)[dim] < info[dim].nvalues); + } + + /* copy NULL and frequency flags into the item */ + memcpy(ITEM_NULLS(item, ndims), mcvitem->isnull, sizeof(bool) * ndims); + memcpy(ITEM_FREQUENCY(item, ndims), &mcvitem->frequency, sizeof(double)); + memcpy(ITEM_BASE_FREQUENCY(item, ndims), &mcvitem->base_frequency, sizeof(double)); + + /* copy the serialized item into the array */ + memcpy(data, item, itemsize); + + data += itemsize; + } + + /* at this point we expect to match the total_length exactly */ + Assert((data - (char *) output) == total_length); + + pfree(item); + pfree(values); + pfree(counts); + + return output; +} + +/* + * Reads serialized MCV list into MCVList structure. + * + * Unlike with histograms, we deserialize the MCV list fully (i.e. we don't + * keep the deduplicated arrays and pointers into them), as we don't expect + * there to be a lot of duplicate values. But perhaps that's not true and we + * should keep the MCV in serialized form too. + * + * XXX See how much memory we could save by keeping the deduplicated version + * (both for typical and corner cases with few distinct values but many items). + */ +MCVList * +statext_mcv_deserialize(bytea *data) +{ + int dim, + i; + Size expected_size; + MCVList *mcvlist; + char *tmp; + + int ndims, + nitems, + itemsize; + DimensionInfo *info = NULL; + Datum **values = NULL; + + /* local allocation buffer (used only for deserialization) */ + int bufflen; + char *buff; + char *ptr; + + /* buffer used for the result */ + int rbufflen; + char *rbuff; + char *rptr; + + if (data == NULL) + return NULL; + + /* + * We can't possibly deserialize a MCV list if there's not even a complete + * header. + */ + if (VARSIZE_ANY_EXHDR(data) < offsetof(MCVList, items)) + elog(ERROR, "invalid MCV Size %ld (expected at least %ld)", + VARSIZE_ANY_EXHDR(data), offsetof(MCVList, items)); + + /* read the MCV list header */ + mcvlist = (MCVList *) palloc0(sizeof(MCVList)); + + /* initialize pointer to the data part (skip the varlena header) */ + tmp = VARDATA_ANY(data); + + /* get the header and perform further sanity checks */ + memcpy(mcvlist, tmp, offsetof(MCVList, items)); + tmp += offsetof(MCVList, items); + + if (mcvlist->magic != STATS_MCV_MAGIC) + elog(ERROR, "invalid MCV magic %d (expected %dd)", + mcvlist->magic, STATS_MCV_MAGIC); + + if (mcvlist->type != STATS_MCV_TYPE_BASIC) + elog(ERROR, "invalid MCV type %d (expected %dd)", + mcvlist->type, STATS_MCV_TYPE_BASIC); + + if (mcvlist->ndimensions == 0) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid zero-length dimension array in MCVList"))); + else if (mcvlist->ndimensions > STATS_MAX_DIMENSIONS) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid length (%d) dimension array in MCVList", + mcvlist->ndimensions))); + + if (mcvlist->nitems == 0) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid zero-length item array in MCVList"))); + else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid length (%d) item array in MCVList", + mcvlist->nitems))); + + nitems = mcvlist->nitems; + ndims = mcvlist->ndimensions; + itemsize = ITEM_SIZE(ndims); + + /* + * Check amount of data including DimensionInfo for all dimensions and + * also the serialized items (including uint16 indexes). Also, walk + * through the dimension information and add it to the sum. + */ + expected_size = offsetof(MCVList, items) + + ndims * sizeof(DimensionInfo) + + (nitems * itemsize); + + /* + * Check that we have at least the dimension and info records, along with + * the items. We don't know the size of the serialized values yet. We need + * to do this check first, before accessing the dimension info. + */ + if (VARSIZE_ANY_EXHDR(data) < expected_size) + elog(ERROR, "invalid MCV size %ld (expected %ld)", + VARSIZE_ANY_EXHDR(data), expected_size); + + /* Now it's safe to access the dimension info. */ + info = (DimensionInfo *) (tmp); + tmp += ndims * sizeof(DimensionInfo); + + /* account for the value arrays */ + for (dim = 0; dim < ndims; dim++) + { + /* + * XXX I wonder if we can/should rely on asserts here. Maybe those + * checks should be done every time? + */ + Assert(info[dim].nvalues >= 0); + Assert(info[dim].nbytes >= 0); + + expected_size += info[dim].nbytes; + } + + /* + * Now we know the total expected MCV size, including all the pieces + * (header, dimension info. items and deduplicated data). So do the final + * check on size. + */ + if (VARSIZE_ANY_EXHDR(data) != expected_size) + elog(ERROR, "invalid MCV size %ld (expected %ld)", + VARSIZE_ANY_EXHDR(data), expected_size); + + /* + * Allocate one large chunk of memory for the intermediate data, needed + * only for deserializing the MCV list (and allocate densely to minimize + * the palloc overhead). + * + * Let's see how much space we'll actually need, and also include space + * for the array with pointers. + * + * We need an array of Datum pointers values for each dimension, so that + * we can easily translate the uint16 indexes. We also need a top-level + * array of pointers to those per-dimension arrays. + * + * For byval types with size matching sizeof(Datum) we can reuse the + * serialized array directly. + */ + bufflen = sizeof(Datum **) * ndims; /* space for top-level pointers */ + + for (dim = 0; dim < ndims; dim++) + { + /* for full-size byval types, we reuse the serialized value */ + if (!(info[dim].typbyval && info[dim].typlen == sizeof(Datum))) + bufflen += (sizeof(Datum) * info[dim].nvalues); + } + + buff = palloc0(bufflen); + ptr = buff; + + values = (Datum **) buff; + ptr += (sizeof(Datum *) * ndims); + + /* + * XXX This uses pointers to the original data array (the types not passed + * by value), so when someone frees the memory, e.g. by doing something + * like this: + * + * bytea * data = ... fetch the data from catalog ... + * + * MCVList mcvlist = deserialize_mcv_list(data); + * + * pfree(data); + * + * then 'mcvlist' references the freed memory. Should copy the pieces. + */ + for (dim = 0; dim < ndims; dim++) + { +#ifdef USE_ASSERT_CHECKING + /* remember where data for this dimension starts */ + char *start = tmp; +#endif + if (info[dim].typbyval) + { + /* passed by value / size matches Datum - just reuse the array */ + if (info[dim].typlen == sizeof(Datum)) + { + values[dim] = (Datum *) tmp; + tmp += info[dim].nbytes; + + /* no overflow of input array */ + Assert(tmp <= start + info[dim].nbytes); + } + else + { + values[dim] = (Datum *) ptr; + ptr += (sizeof(Datum) * info[dim].nvalues); + + for (i = 0; i < info[dim].nvalues; i++) + { + /* just point into the array */ + memcpy(&values[dim][i], tmp, info[dim].typlen); + tmp += info[dim].typlen; + + /* no overflow of input array */ + Assert(tmp <= start + info[dim].nbytes); + } + } + } + else + { + /* all the other types need a chunk of the buffer */ + values[dim] = (Datum *) ptr; + ptr += (sizeof(Datum) * info[dim].nvalues); + + /* passed by reference, but fixed length (name, tid, ...) */ + if (info[dim].typlen > 0) + { + for (i = 0; i < info[dim].nvalues; i++) + { + /* just point into the array */ + values[dim][i] = PointerGetDatum(tmp); + tmp += info[dim].typlen; + + /* no overflow of input array */ + Assert(tmp <= start + info[dim].nbytes); + } + } + else if (info[dim].typlen == -1) + { + /* varlena */ + for (i = 0; i < info[dim].nvalues; i++) + { + /* just point into the array */ + values[dim][i] = PointerGetDatum(tmp); + tmp += VARSIZE_ANY(tmp); + + /* no overflow of input array */ + Assert(tmp <= start + info[dim].nbytes); + } + } + else if (info[dim].typlen == -2) + { + /* cstring */ + for (i = 0; i < info[dim].nvalues; i++) + { + /* just point into the array */ + values[dim][i] = PointerGetDatum(tmp); + tmp += (strlen(tmp) + 1); /* don't forget the \0 */ + + /* no overflow of input array */ + Assert(tmp <= start + info[dim].nbytes); + } + } + } + + /* check we consumed the serialized data for this dimension exactly */ + Assert((tmp - start) == info[dim].nbytes); + } + + /* we should have exhausted the buffer exactly */ + Assert((ptr - buff) == bufflen); + + /* allocate space for all the MCV items in a single piece */ + rbufflen = (sizeof(MCVItem *) + sizeof(MCVItem) + + sizeof(Datum) * ndims + sizeof(bool) * ndims) * nitems; + + rbuff = palloc0(rbufflen); + rptr = rbuff; + + mcvlist->items = (MCVItem * *) rbuff; + rptr += (sizeof(MCVItem *) * nitems); + + /* deserialize the MCV items and translate the indexes to Datums */ + for (i = 0; i < nitems; i++) + { + uint16 *indexes = NULL; + MCVItem *item = (MCVItem *) rptr; + + rptr += (sizeof(MCVItem)); + + item->values = (Datum *) rptr; + rptr += (sizeof(Datum) * ndims); + + item->isnull = (bool *) rptr; + rptr += (sizeof(bool) * ndims); + + /* just point to the right place */ + indexes = ITEM_INDEXES(tmp); + + memcpy(item->isnull, ITEM_NULLS(tmp, ndims), sizeof(bool) * ndims); + memcpy(&item->frequency, ITEM_FREQUENCY(tmp, ndims), sizeof(double)); + memcpy(&item->base_frequency, ITEM_BASE_FREQUENCY(tmp, ndims), sizeof(double)); + + /* translate the values */ + for (dim = 0; dim < ndims; dim++) + if (!item->isnull[dim]) + item->values[dim] = values[dim][indexes[dim]]; + + mcvlist->items[i] = item; + + tmp += ITEM_SIZE(ndims); + + /* check we're not overflowing the input */ + Assert(tmp <= (char *) data + VARSIZE_ANY(data)); + } + + /* check that we processed all the data */ + Assert(tmp == (char *) data + VARSIZE_ANY(data)); + + /* release the temporary buffer */ + pfree(buff); + + return mcvlist; +} + +/* + * SRF with details about buckets of a histogram: + * + * - item ID (0...nitems) + * - values (string array) + * - nulls only (boolean array) + * - frequency (double precision) + * - base_frequency (double precision) + * + * The input is the OID of the statistics, and there are no rows returned if + * the statistics contains no histogram. + */ +PG_FUNCTION_INFO_V1(pg_stats_ext_mcvlist_items); + +Datum +pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + int call_cntr; + int max_calls; + TupleDesc tupdesc; + AttInMetadata *attinmeta; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + MCVList *mcvlist; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* switch to memory context appropriate for multiple function calls */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + mcvlist = statext_mcv_deserialize(PG_GETARG_BYTEA_P(0)); + + funcctx->user_fctx = mcvlist; + + /* total number of tuples to be returned */ + funcctx->max_calls = 0; + if (funcctx->user_fctx != NULL) + funcctx->max_calls = mcvlist->nitems; + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); + + /* build metadata needed later to produce tuples from raw C-strings */ + attinmeta = TupleDescGetAttInMetadata(tupdesc); + funcctx->attinmeta = attinmeta; + + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + + call_cntr = funcctx->call_cntr; + max_calls = funcctx->max_calls; + attinmeta = funcctx->attinmeta; + + if (call_cntr < max_calls) /* do when there is more left to send */ + { + char **values; + HeapTuple tuple; + Datum result; + + char *buff = palloc0(1024); + char *format; + + int i; + + Oid *outfuncs; + FmgrInfo *fmgrinfo; + + MCVList *mcvlist; + MCVItem *item; + + mcvlist = (MCVList *) funcctx->user_fctx; + + Assert(call_cntr < mcvlist->nitems); + + item = mcvlist->items[call_cntr]; + + /* + * Prepare a values array for building the returned tuple. This should + * be an array of C strings which will be processed later by the type + * input functions. + */ + values = (char **) palloc(5 * sizeof(char *)); + + values[0] = (char *) palloc(64 * sizeof(char)); + + /* arrays */ + values[1] = (char *) palloc0(1024 * sizeof(char)); + values[2] = (char *) palloc0(1024 * sizeof(char)); + + /* frequency */ + values[3] = (char *) palloc(64 * sizeof(char)); + + /* base frequency */ + values[4] = (char *) palloc(64 * sizeof(char)); + + outfuncs = (Oid *) palloc0(sizeof(Oid) * mcvlist->ndimensions); + fmgrinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo) * mcvlist->ndimensions); + + for (i = 0; i < mcvlist->ndimensions; i++) + { + bool isvarlena; + + getTypeOutputInfo(mcvlist->types[i], &outfuncs[i], &isvarlena); + + fmgr_info(outfuncs[i], &fmgrinfo[i]); + } + + snprintf(values[0], 64, "%d", call_cntr); /* item ID */ + + for (i = 0; i < mcvlist->ndimensions; i++) + { + Datum val, + valout; + + format = "%s, %s"; + if (i == 0) + format = "{%s%s"; + else if (i == mcvlist->ndimensions - 1) + format = "%s, %s}"; + + if (item->isnull[i]) + valout = CStringGetDatum("NULL"); + else + { + val = item->values[i]; + valout = FunctionCall1(&fmgrinfo[i], val); + } + + snprintf(buff, 1024, format, values[1], DatumGetPointer(valout)); + strncpy(values[1], buff, 1023); + buff[0] = '\0'; + + snprintf(buff, 1024, format, values[2], item->isnull[i] ? "t" : "f"); + strncpy(values[2], buff, 1023); + buff[0] = '\0'; + } + + snprintf(values[3], 64, "%f", item->frequency); /* frequency */ + snprintf(values[4], 64, "%f", item->base_frequency); /* base frequency */ + + /* build a tuple */ + tuple = BuildTupleFromCStrings(attinmeta, values); + + /* make the tuple into a datum */ + result = HeapTupleGetDatum(tuple); + + /* clean up (this is not really necessary) */ + pfree(values[0]); + pfree(values[1]); + pfree(values[2]); + pfree(values[3]); + pfree(values[4]); + + pfree(values); + + SRF_RETURN_NEXT(funcctx, result); + } + else /* do when there is no more left */ + { + SRF_RETURN_DONE(funcctx); + } +} + +/* + * pg_mcv_list_in - input routine for type pg_mcv_list. + * + * pg_mcv_list is real enough to be a table column, but it has no operations + * of its own, and disallows input too + */ +Datum +pg_mcv_list_in(PG_FUNCTION_ARGS) +{ + /* + * pg_mcv_list stores the data in binary form and parsing text input is + * not needed, so disallow this. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_mcv_list"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + + +/* + * pg_mcv_list_out - output routine for type PG_MCV_LIST. + * + * MCV lists are serialized into a bytea value, so we simply call byteaout() + * to serialize the value into text. But it'd be nice to serialize that into + * a meaningful representation (e.g. for inspection by people). + * + * XXX This should probably return something meaningful, similar to what + * pg_dependencies_out does. Not sure how to deal with the deduplicated + * values, though - do we want to expand that or not? + */ +Datum +pg_mcv_list_out(PG_FUNCTION_ARGS) +{ + return byteaout(fcinfo); +} + +/* + * pg_mcv_list_recv - binary input routine for type pg_mcv_list. + */ +Datum +pg_mcv_list_recv(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_mcv_list"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * pg_mcv_list_send - binary output routine for type pg_mcv_list. + * + * MCV lists are serialized in a bytea value (although the type is named + * differently), so let's just send that. + */ +Datum +pg_mcv_list_send(PG_FUNCTION_ARGS) +{ + return byteasend(fcinfo); +} + +/* + * mcv_update_match_bitmap + * Evaluate clauses using the MCV list, and update the match bitmap. + * + * A match bitmap keeps match/mismatch status for each MCV item, and we + * update it based on additional clauses. We also use it to skip items + * that can't possibly match (e.g. item marked as "mismatch" can't change + * to "match" when evaluating AND clause list). + * + * The function also returns a flag indicating whether there was an + * equality condition for all attributes, the minimum frequency in the MCV + * list, and a total MCV frequency (sum of frequencies for all items). + * + * XXX Currently the match bitmap uses a char for each MCV item, which is + * somewhat wasteful as we could do with just a single bit, thus reducing + * the size to ~1/8. It would also allow us to combine bitmaps simply using + * & and |, which should be faster than min/max. The bitmaps are fairly + * small, though (as we cap the MCV list size to 8k items). + */ +static void +mcv_update_match_bitmap(PlannerInfo *root, List *clauses, + Bitmapset *keys, MCVList * mcvlist, char *matches, + bool is_or) +{ + int i; + ListCell *l; + + /* The bitmap may be partially built. */ + Assert(clauses != NIL); + Assert(list_length(clauses) >= 1); + Assert(mcvlist != NULL); + Assert(mcvlist->nitems > 0); + Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS); + + /* + * Loop through the list of clauses, and for each of them evaluate all the + * MCV items not yet eliminated by the preceding clauses. + */ + foreach(l, clauses) + { + Node *clause = (Node *) lfirst(l); + + /* if it's a RestrictInfo, then extract the clause */ + if (IsA(clause, RestrictInfo)) + clause = (Node *) ((RestrictInfo *) clause)->clause; + + /* + * Handle the various types of clauses - OpClause, NullTest and + * AND/OR/NOT + */ + if (is_opclause(clause)) + { + OpExpr *expr = (OpExpr *) clause; + bool varonleft = true; + bool ok; + FmgrInfo opproc; + + /* get procedure computing operator selectivity */ + RegProcedure oprrest = get_oprrest(expr->opno); + + fmgr_info(get_opcode(expr->opno), &opproc); + + ok = (NumRelids(clause) == 1) && + (is_pseudo_constant_clause(lsecond(expr->args)) || + (varonleft = false, + is_pseudo_constant_clause(linitial(expr->args)))); + + if (ok) + { + + FmgrInfo gtproc; + Var *var = (varonleft) ? linitial(expr->args) : lsecond(expr->args); + Const *cst = (varonleft) ? lsecond(expr->args) : linitial(expr->args); + bool isgt = (!varonleft); + + TypeCacheEntry *typecache + = lookup_type_cache(var->vartype, TYPECACHE_GT_OPR); + + /* match the attribute to a dimension of the statistic */ + int idx = bms_member_index(keys, var->varattno); + + fmgr_info(get_opcode(typecache->gt_opr), >proc); + + /* + * Walk through the MCV items and evaluate the current clause. + * We can skip items that were already ruled out, and + * terminate if there are no remaining MCV items that might + * possibly match. + */ + for (i = 0; i < mcvlist->nitems; i++) + { + bool mismatch = false; + MCVItem *item = mcvlist->items[i]; + + /* + * For AND-lists, we can also mark NULL items as 'no + * match' (and then skip them). For OR-lists this is not + * possible. + */ + if ((!is_or) && item->isnull[idx]) + matches[i] = STATS_MATCH_NONE; + + /* skip MCV items that were already ruled out */ + if ((!is_or) && (matches[i] == STATS_MATCH_NONE)) + continue; + else if (is_or && (matches[i] == STATS_MATCH_FULL)) + continue; + + switch (oprrest) + { + case F_EQSEL: + case F_NEQSEL: + + /* + * We don't care about isgt in equality, because + * it does not matter whether it's (var op const) + * or (const op var). + */ + mismatch = !DatumGetBool(FunctionCall2Coll(&opproc, + DEFAULT_COLLATION_OID, + cst->constvalue, + item->values[idx])); + + break; + + case F_SCALARLTSEL: /* column < constant */ + case F_SCALARLESEL: /* column <= constant */ + case F_SCALARGTSEL: /* column > constant */ + case F_SCALARGESEL: /* column >= constant */ + + /* + * First check whether the constant is below the + * lower boundary (in that case we can skip the + * bucket, because there's no overlap). + */ + if (isgt) + mismatch = !DatumGetBool(FunctionCall2Coll(&opproc, + DEFAULT_COLLATION_OID, + cst->constvalue, + item->values[idx])); + else + mismatch = !DatumGetBool(FunctionCall2Coll(&opproc, + DEFAULT_COLLATION_OID, + item->values[idx], + cst->constvalue)); + + break; + } + + /* + * XXX The conditions on matches[i] are not needed, as we + * skip MCV items that can't become true/false, depending + * on the current flag. See beginning of the loop over MCV + * items. + */ + + if ((is_or) && (!mismatch)) + { + /* OR - was MATCH_NONE, but will be MATCH_FULL */ + matches[i] = STATS_MATCH_FULL; + continue; + } + else if ((!is_or) && mismatch) + { + /* AND - was MATC_FULL, but will be MATCH_NONE */ + matches[i] = STATS_MATCH_NONE; + continue; + } + + } + } + } + else if (IsA(clause, NullTest)) + { + NullTest *expr = (NullTest *) clause; + Var *var = (Var *) (expr->arg); + + /* match the attribute to a dimension of the statistic */ + int idx = bms_member_index(keys, var->varattno); + + /* + * Walk through the MCV items and evaluate the current clause. We + * can skip items that were already ruled out, and terminate if + * there are no remaining MCV items that might possibly match. + */ + for (i = 0; i < mcvlist->nitems; i++) + { + char match = STATS_MATCH_NONE; /* assume mismatch */ + MCVItem *item = mcvlist->items[i]; + + /* if the clause mismatches the MCV item, set it as MATCH_NONE */ + switch (expr->nulltesttype) + { + case IS_NULL: + match = (item->isnull[idx]) ? STATS_MATCH_FULL : match; + break; + + case IS_NOT_NULL: + match = (!item->isnull[idx]) ? STATS_MATCH_FULL : match; + break; + } + + /* now, update the match bitmap, depending on OR/AND type */ + if (is_or) + matches[i] = Max(matches[i], match); + else + matches[i] = Min(matches[i], match); + } + } + else if (or_clause(clause) || and_clause(clause)) + { + /* AND/OR clause, with all subclauses being compatible */ + + int i; + BoolExpr *bool_clause = ((BoolExpr *) clause); + List *bool_clauses = bool_clause->args; + + /* match/mismatch bitmap for each MCV item */ + char *bool_matches = NULL; + + Assert(bool_clauses != NIL); + Assert(list_length(bool_clauses) >= 2); + + /* by default none of the MCV items matches the clauses */ + bool_matches = palloc0(sizeof(char) * mcvlist->nitems); + + if (or_clause(clause)) + { + /* OR clauses assume nothing matches, initially */ + memset(bool_matches, STATS_MATCH_NONE, sizeof(char) * mcvlist->nitems); + } + else + { + /* AND clauses assume everything matches, initially */ + memset(bool_matches, STATS_MATCH_FULL, sizeof(char) * mcvlist->nitems); + } + + /* build the match bitmap for the OR-clauses */ + mcv_update_match_bitmap(root, bool_clauses, keys, + mcvlist, bool_matches, + or_clause(clause)); + + /* + * Merge the bitmap produced by mcv_update_match_bitmap into the + * current one. We need to consider if we're evaluating AND or OR + * condition when merging the results. + */ + for (i = 0; i < mcvlist->nitems; i++) + { + /* Is this OR or AND clause? */ + if (is_or) + matches[i] = Max(matches[i], bool_matches[i]); + else + matches[i] = Min(matches[i], bool_matches[i]); + } + + pfree(bool_matches); + } + else if (not_clause(clause)) + { + /* NOT clause, with all subclauses compatible */ + + int i; + BoolExpr *not_clause = ((BoolExpr *) clause); + List *not_args = not_clause->args; + + /* match/mismatch bitmap for each MCV item */ + char *not_matches = NULL; + + Assert(not_args != NIL); + Assert(list_length(not_args) == 1); + + /* by default none of the MCV items matches the clauses */ + not_matches = palloc0(sizeof(char) * mcvlist->nitems); + + /* NOT clauses assume nothing matches, initially */ + memset(not_matches, STATS_MATCH_FULL, sizeof(char) * mcvlist->nitems); + + /* build the match bitmap for the NOT-clause */ + mcv_update_match_bitmap(root, not_args, keys, + mcvlist, not_matches, false); + + /* + * Merge the bitmap produced by mcv_update_match_bitmap into the + * current one. + */ + for (i = 0; i < mcvlist->nitems; i++) + { + /* + * When handling a NOT clause, we need to invert the result + * before merging it into the global result. + */ + if (not_matches[i] == STATS_MATCH_NONE) + not_matches[i] = STATS_MATCH_FULL; + else + not_matches[i] = STATS_MATCH_NONE; + + /* Is this OR or AND clause? */ + if (is_or) + matches[i] = Max(matches[i], not_matches[i]); + else + matches[i] = Min(matches[i], not_matches[i]); + } + + pfree(not_matches); + } + else if (IsA(clause, Var)) + { + /* Var (has to be a boolean Var, possibly from below NOT) */ + + Var *var = (Var *) (clause); + + /* match the attribute to a dimension of the statistic */ + int idx = bms_member_index(keys, var->varattno); + + Assert(var->vartype == BOOLOID); + + /* + * Walk through the MCV items and evaluate the current clause. We + * can skip items that were already ruled out, and terminate if + * there are no remaining MCV items that might possibly match. + */ + for (i = 0; i < mcvlist->nitems; i++) + { + MCVItem *item = mcvlist->items[i]; + bool match = STATS_MATCH_NONE; + + /* if the item is NULL, it's a mismatch */ + if (!item->isnull[idx] && DatumGetBool(item->values[idx])) + match = STATS_MATCH_FULL; + + /* now, update the match bitmap, depending on OR/AND type */ + if (is_or) + matches[i] = Max(matches[i], match); + else + matches[i] = Min(matches[i], match); + } + } + else + { + elog(ERROR, "unknown clause type: %d", clause->type); + } + } +} + + +/* + * mcv_clauselist_selectivity + * Return the selectivity estimate of clauses using MCV list. + * + * It also produces two interesting selectivities - total selectivity of + * all the MCV items combined, and selectivity of the least frequent item + * in the list. + */ +Selectivity +mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, + List *clauses, int varRelid, + JoinType jointype, SpecialJoinInfo *sjinfo, + RelOptInfo *rel, + Selectivity *basesel, Selectivity *totalsel) +{ + int i; + MCVList *mcv; + Selectivity s = 0.0; + + /* match/mismatch bitmap for each MCV item */ + char *matches = NULL; + + /* load the MCV list stored in the statistics object */ + mcv = statext_mcv_load(stat->statOid); + + /* by default all the MCV items match the clauses fully */ + matches = palloc0(sizeof(char) * mcv->nitems); + memset(matches, STATS_MATCH_FULL, sizeof(char) * mcv->nitems); + + mcv_update_match_bitmap(root, clauses, stat->keys, mcv, + matches, false); + + /* sum frequencies for all the matching MCV items */ + *basesel = 0.0; + *totalsel = 0.0; + for (i = 0; i < mcv->nitems; i++) + { + *totalsel += mcv->items[i]->frequency; + + if (matches[i] != STATS_MATCH_NONE) + { + *basesel += mcv->items[i]->base_frequency; + s += mcv->items[i]->frequency; + } + } + + return s; +} diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c new file mode 100644 index 593c219..fb74ed3 --- a/src/backend/statistics/mvdistinct.c +++ b/src/backend/statistics/mvdistinct.c @@ -23,8 +23,6 @@ */ #include "postgres.h" -#include - #include "access/htup_details.h" #include "catalog/pg_statistic_ext.h" #include "utils/fmgrprotos.h" @@ -39,7 +37,6 @@ static double ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows, VacAttrStats **stats, int k, int *combination); -static double estimate_ndistinct(double totalrows, int numrows, int d, int f1); static int n_choose_k(int n, int k); static int num_combinations(int n); @@ -508,31 +505,6 @@ ndistinct_for_combination(double totalro return estimate_ndistinct(totalrows, numrows, d, f1); } -/* The Duj1 estimator (already used in analyze.c). */ -static double -estimate_ndistinct(double totalrows, int numrows, int d, int f1) -{ - double numer, - denom, - ndistinct; - - numer = (double) numrows * (double) d; - - denom = (double) (numrows - f1) + - (double) f1 * (double) numrows / totalrows; - - ndistinct = numer / denom; - - /* Clamp to sane range in case of roundoff error */ - if (ndistinct < (double) d) - ndistinct = (double) d; - - if (ndistinct > totalrows) - ndistinct = totalrows; - - return floor(ndistinct + 0.5); -} - /* * n_choose_k * computes binomial coefficients using an algorithm that is both diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c new file mode 100644 index 03e9a28..c941c33 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -1504,6 +1504,7 @@ pg_get_statisticsobj_worker(Oid statexti bool isnull; bool ndistinct_enabled; bool dependencies_enabled; + bool mcv_enabled; int i; statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid)); @@ -1539,6 +1540,7 @@ pg_get_statisticsobj_worker(Oid statexti ndistinct_enabled = false; dependencies_enabled = false; + mcv_enabled = false; for (i = 0; i < ARR_DIMS(arr)[0]; i++) { @@ -1546,6 +1548,8 @@ pg_get_statisticsobj_worker(Oid statexti ndistinct_enabled = true; if (enabled[i] == STATS_EXT_DEPENDENCIES) dependencies_enabled = true; + if (enabled[i] == STATS_EXT_MCV) + mcv_enabled = true; } /* @@ -1555,13 +1559,27 @@ pg_get_statisticsobj_worker(Oid statexti * statistics types on a newer postgres version, if the statistics had all * options enabled on the original version. */ - if (!ndistinct_enabled || !dependencies_enabled) + if (!ndistinct_enabled || !dependencies_enabled || !mcv_enabled) { + bool gotone = false; + appendStringInfoString(&buf, " ("); + if (ndistinct_enabled) + { appendStringInfoString(&buf, "ndistinct"); - else if (dependencies_enabled) - appendStringInfoString(&buf, "dependencies"); + gotone = true; + } + + if (dependencies_enabled) + { + appendStringInfo(&buf, "%sdependencies", gotone ? ", " : ""); + gotone = true; + } + + if (mcv_enabled) + appendStringInfo(&buf, "%smcv", gotone ? ", " : ""); + appendStringInfoChar(&buf, ')'); } diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c new file mode 100644 index f1c78ff..fdfc0d6 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3735,6 +3735,171 @@ estimate_num_groups(PlannerInfo *root, L } /* + * estimate_num_groups_simple + * Estimate number of groups in a relation. + * + * A simplified version of estimate_num_groups, assuming all expressions + * are only plain Vars from a single relation, and that no filtering is + * happenning. + */ +double +estimate_num_groups_simple(PlannerInfo *root, List *vars) +{ + List *varinfos = NIL; + double numdistinct; + ListCell *l; + + RelOptInfo *rel; + double reldistinct = 1; + double relmaxndistinct = reldistinct; + int relvarcount = 0; + + + /* + * If no grouping columns, there's exactly one group. (This can't happen + * for normal cases with GROUP BY or DISTINCT, but it is possible for + * corner cases with set operations.) + */ + if (vars == NIL) + return 1.0; + + /* + * We expect only variables from a single relation. + */ + Assert(NumRelids((Node *) vars) == 1); + + /* + * Find the unique Vars used, treating an expression as a Var if we can + * find stats for it. For each one, record the statistical estimate of + * number of distinct values (total in its table). + */ + numdistinct = 1.0; + + foreach(l, vars) + { + Var *var = (Var *) lfirst(l); + VariableStatData vardata; + + Assert(IsA(var, Var)); + + /* + * If examine_variable is able to deduce anything about the GROUP BY + * expression, treat it as a single variable even if it's really more + * complicated. + */ + examine_variable(root, (Node *) var, 0, &vardata); + if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique) + { + varinfos = add_unique_group_var(root, varinfos, + (Node *) var, &vardata); + ReleaseVariableStats(vardata); + continue; + } + ReleaseVariableStats(vardata); + } + + Assert(varinfos); + + /* + * Get the numdistinct estimate for the Vars of this rel. + * + * We + * iteratively search for multivariate n-distinct with maximum number + * of vars; assuming that each var group is independent of the others, + * we multiply them together. Any remaining relvarinfos after no more + * multivariate matches are found are assumed independent too, so + * their individual ndistinct estimates are multiplied also. + * + * While iterating, count how many separate numdistinct values we + * apply. We apply a fudge factor below, but only if we multiplied + * more than one such values. + */ + while (varinfos) + { + double mvndistinct; + + rel = ((GroupVarInfo *) linitial(varinfos))->rel; + + if (estimate_multivariate_ndistinct(root, rel, &varinfos, + &mvndistinct)) + { + reldistinct *= mvndistinct; + if (relmaxndistinct < mvndistinct) + relmaxndistinct = mvndistinct; + relvarcount++; + } + else + { + foreach(l, varinfos) + { + GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(l); + + reldistinct *= varinfo->ndistinct; + if (relmaxndistinct < varinfo->ndistinct) + relmaxndistinct = varinfo->ndistinct; + relvarcount++; + } + + /* we're done with this relation */ + varinfos = NIL; + } + } + + /* + * Sanity check --- don't divide by zero if empty relation. + */ + Assert(IS_SIMPLE_REL(rel)); + if (rel->tuples > 0) + { + /* + * Clamp to size of rel, or size of rel / 10 if multiple Vars. The + * fudge factor is because the Vars are probably correlated but we + * don't know by how much. We should never clamp to less than the + * largest ndistinct value for any of the Vars, though, since + * there will surely be at least that many groups. + */ + double clamp = rel->tuples; + + if (relvarcount > 1) + { + clamp *= 0.1; + if (clamp < relmaxndistinct) + { + clamp = relmaxndistinct; + /* for sanity in case some ndistinct is too large: */ + if (clamp > rel->tuples) + clamp = rel->tuples; + } + } + if (reldistinct > clamp) + reldistinct = clamp; + + /* + * We're assuming we are returning all rows. + */ + reldistinct = clamp_row_est(reldistinct); + + /* + * Update estimate of total distinct groups. + */ + numdistinct *= reldistinct; + + /* Guard against out-of-range answers */ + if (numdistinct > rel->tuples) + numdistinct = rel->tuples; + } + + if (numdistinct < 1.0) + numdistinct = 1.0; + + /* Round off */ + numdistinct = ceil(numdistinct); + + return numdistinct; + +} + +/* * Estimate hash bucket statistics when the specified expression is used * as a hash key for the given number of buckets. * diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c new file mode 100644 index 80d8338..9207b94 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -2517,7 +2517,8 @@ describeOneTableDetails(const char *sche " JOIN pg_catalog.pg_attribute a ON (stxrelid = a.attrelid AND\n" " a.attnum = s.attnum AND NOT attisdropped)) AS columns,\n" " 'd' = any(stxkind) AS ndist_enabled,\n" - " 'f' = any(stxkind) AS deps_enabled\n" + " 'f' = any(stxkind) AS deps_enabled,\n" + " 'm' = any(stxkind) AS mcv_enabled\n" "FROM pg_catalog.pg_statistic_ext stat " "WHERE stxrelid = '%s'\n" "ORDER BY 1;", @@ -2554,6 +2555,12 @@ describeOneTableDetails(const char *sche if (strcmp(PQgetvalue(result, i, 6), "t") == 0) { appendPQExpBuffer(&buf, "%sdependencies", gotone ? ", " : ""); + gotone = true; + } + + if (strcmp(PQgetvalue(result, i, 7), "t") == 0) + { + appendPQExpBuffer(&buf, "%smcv", gotone ? ", " : ""); } appendPQExpBuffer(&buf, ") ON %s FROM %s", diff --git a/src/include/catalog/pg_cast.dat b/src/include/catalog/pg_cast.dat new file mode 100644 index cf00752..dff3a9a --- a/src/include/catalog/pg_cast.dat +++ b/src/include/catalog/pg_cast.dat @@ -324,6 +324,12 @@ { castsource => 'pg_dependencies', casttarget => 'text', castfunc => '0', castcontext => 'i', castmethod => 'i' }, +# pg_mcv_list can be coerced to, but not from, bytea and text +{ castsource => 'pg_mcv_list', casttarget => 'bytea', castfunc => '0', + castcontext => 'i', castmethod => 'b' }, +{ castsource => 'pg_mcv_list', casttarget => 'text', castfunc => '0', + castcontext => 'i', castmethod => 'i' }, + # Datetime category { castsource => 'abstime', casttarget => 'date', castfunc => 'date(abstime)', castcontext => 'a', castmethod => 'f' }, diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat new file mode 100644 index a146510..3cfcafc --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5073,6 +5073,30 @@ proname => 'pg_dependencies_send', provolatile => 's', prorettype => 'bytea', proargtypes => 'pg_dependencies', prosrc => 'pg_dependencies_send' }, +{ oid => '4002', descr => 'I/O', + proname => 'pg_mcv_list_in', prorettype => 'pg_mcv_list', + proargtypes => 'cstring', prosrc => 'pg_mcv_list_in' }, +{ oid => '4003', descr => 'I/O', + proname => 'pg_mcv_list_out', prorettype => 'cstring', + proargtypes => 'pg_mcv_list', prosrc => 'pg_mcv_list_out' }, +{ oid => '4004', descr => 'I/O', + proname => 'pg_mcv_list_recv', provolatile => 's', + prorettype => 'pg_mcv_list', proargtypes => 'internal', + prosrc => 'pg_mcv_list_recv' }, +{ oid => '4005', descr => 'I/O', + proname => 'pg_mcv_list_send', provolatile => 's', prorettype => 'bytea', + proargtypes => 'pg_mcv_list', prosrc => 'pg_mcv_list_send' }, + +{ oid => '3424', + descr => 'details about MCV list items', + proname => 'pg_mcv_list_items', prorows => '1000', proisstrict => 'f', + proretset => 't', provolatile => 's', prorettype => 'record', + proargtypes => 'pg_mcv_list', + proallargtypes => '{pg_mcv_list,int4,text,_bool,float8,float8}', + proargmodes => '{i,o,o,o,o,o}', + proargnames => '{mcv_list,index,values,nulls,frequency,base_frequency}', + prosrc => 'pg_stats_ext_mcvlist_items' }, + { oid => '1928', descr => 'statistics: number of scans done for table/index', proname => 'pg_stat_get_numscans', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', diff --git a/src/include/catalog/pg_statistic_ext.h b/src/include/catalog/pg_statistic_ext.h new file mode 100644 index 443798a..7ddbee6 --- a/src/include/catalog/pg_statistic_ext.h +++ b/src/include/catalog/pg_statistic_ext.h @@ -47,6 +47,7 @@ CATALOG(pg_statistic_ext,3381,StatisticE * to build */ pg_ndistinct stxndistinct; /* ndistinct coefficients (serialized) */ pg_dependencies stxdependencies; /* dependencies (serialized) */ + pg_mcv_list stxmcv; /* MCV (serialized) */ #endif } FormData_pg_statistic_ext; @@ -62,6 +63,7 @@ typedef FormData_pg_statistic_ext *Form_ #define STATS_EXT_NDISTINCT 'd' #define STATS_EXT_DEPENDENCIES 'f' +#define STATS_EXT_MCV 'm' #endif /* EXPOSE_TO_CLIENT_CODE */ diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat new file mode 100644 index 48e01cd..0ff25e8 --- a/src/include/catalog/pg_type.dat +++ b/src/include/catalog/pg_type.dat @@ -165,6 +165,13 @@ typoutput => 'pg_dependencies_out', typreceive => 'pg_dependencies_recv', typsend => 'pg_dependencies_send', typalign => 'i', typstorage => 'x', typcollation => '100' }, +{ oid => '4001', oid_symbol => 'PGMCVLISTOID', + descr => 'multivariate MCV list', + typname => 'pg_mcv_list', typlen => '-1', typbyval => 'f', + typcategory => 'S', typinput => 'pg_mcv_list_in', + typoutput => 'pg_mcv_list_out', typreceive => 'pg_mcv_list_recv', + typsend => 'pg_mcv_list_send', typalign => 'i', typstorage => 'x', + typcollation => '100' }, { oid => '32', oid_symbol => 'PGDDLCOMMANDOID', descr => 'internal type for passing CollectedCommand', typname => 'pg_ddl_command', typlen => 'SIZEOF_POINTER', typbyval => 't', diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h new file mode 100644 index 85d472f..f327e22 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -195,6 +195,12 @@ extern void analyze_rel(Oid relid, Range VacuumParams *params, List *va_cols, bool in_outer_xact, BufferAccessStrategy bstrategy); extern bool std_typanalyze(VacAttrStats *stats); +extern int analyze_mcv_list(int *mcv_counts, + int num_mcv, + double stadistinct, + double stanullfrac, + int samplerows, + double totalrows); /* in utils/misc/sampling.c --- duplicate of declarations in utils/sampling.h */ extern double anl_random_fract(void); diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h new file mode 100644 index 77ca7ff..e6cded0 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -215,6 +215,12 @@ extern Selectivity clause_selectivity(Pl int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); +extern Selectivity clauselist_selectivity_simple(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + Bitmapset *estimatedclauses); extern void cost_gather_merge(GatherMergePath *path, PlannerInfo *root, RelOptInfo *rel, ParamPathInfo *param_info, Cost input_startup_cost, Cost input_total_cost, diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h new file mode 100644 index b3ca0c1..11159b5 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -31,6 +31,15 @@ typedef struct int tupno; /* position index for tuple it came from */ } ScalarItem; +/* (de)serialization info */ +typedef struct DimensionInfo +{ + int nvalues; /* number of deduplicated values */ + int nbytes; /* number of bytes (serialized) */ + int typlen; /* pg_type.typlen */ + bool typbyval; /* pg_type.typbyval */ +} DimensionInfo; + /* multi-sort */ typedef struct MultiSortSupportData { @@ -44,6 +53,7 @@ typedef struct SortItem { Datum *values; bool *isnull; + int count; } SortItem; extern MVNDistinct *statext_ndistinct_build(double totalrows, @@ -57,6 +67,12 @@ extern MVDependencies *statext_dependenc extern bytea *statext_dependencies_serialize(MVDependencies *dependencies); extern MVDependencies *statext_dependencies_deserialize(bytea *data); +extern MCVList * statext_mcv_build(int numrows, HeapTuple *rows, + Bitmapset *attrs, VacAttrStats **stats, + double totalrows); +extern bytea *statext_mcv_serialize(MCVList * mcv, VacAttrStats **stats); +extern MCVList * statext_mcv_deserialize(bytea *data); + extern MultiSortSupport multi_sort_init(int ndims); extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper); @@ -65,5 +81,32 @@ extern int multi_sort_compare_dim(int di const SortItem *b, MultiSortSupport mss); extern int multi_sort_compare_dims(int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss); +extern int compare_scalars_simple(const void *a, const void *b, void *arg); +extern int compare_datums_simple(Datum a, Datum b, SortSupport ssup); + +extern void *bsearch_arg(const void *key, const void *base, + size_t nmemb, size_t size, + int (*compar) (const void *, const void *, void *), + void *arg); + +extern int *build_attnums(Bitmapset *attrs); + +extern SortItem *build_sorted_items(int numrows, HeapTuple *rows, + TupleDesc tdesc, MultiSortSupport mss, + int numattrs, int *attnums); + +extern int bms_member_index(Bitmapset *keys, AttrNumber varattno); + +extern double estimate_ndistinct(double totalrows, int numrows, int d, int f1); + +extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root, + StatisticExtInfo *stat, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + RelOptInfo *rel, + Selectivity *basesel, + Selectivity *totalsel); #endif /* EXTENDED_STATS_INTERNAL_H */ diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h new file mode 100644 index 8009fee..e69d6a0 --- a/src/include/statistics/statistics.h +++ b/src/include/statistics/statistics.h @@ -16,6 +16,14 @@ #include "commands/vacuum.h" #include "nodes/relation.h" +/* + * Degree of how much MCV item matches a clause. + * This is then considered when computing the selectivity. + */ +#define STATS_MATCH_NONE 0 /* no match at all */ +#define STATS_MATCH_PARTIAL 1 /* partial match */ +#define STATS_MATCH_FULL 2 /* full match */ + #define STATS_MAX_DIMENSIONS 8 /* max number of attributes */ /* Multivariate distinct coefficients */ @@ -78,8 +86,42 @@ typedef struct MVDependencies /* size of the struct excluding the deps array */ #define SizeOfDependencies (offsetof(MVDependencies, ndeps) + sizeof(uint32)) +/* used to flag stats serialized to bytea */ +#define STATS_MCV_MAGIC 0xE1A651C2 /* marks serialized + * bytea */ +#define STATS_MCV_TYPE_BASIC 1 /* basic MCV list type */ + +/* max items in MCV list (mostly arbitrary number) */ +#define STATS_MCVLIST_MAX_ITEMS 8192 + +/* + * Multivariate MCV (most-common value) lists + * + * A straight-forward extension of MCV items - i.e. a list (array) of + * combinations of attribute values, together with a frequency and null flags. + */ +typedef struct MCVItem +{ + double frequency; /* frequency of this combination */ + double base_frequency; /* frequency if independent */ + bool *isnull; /* lags of NULL values (up to 32 columns) */ + Datum *values; /* variable-length (ndimensions) */ +} MCVItem; + +/* multivariate MCV list - essentally an array of MCV items */ +typedef struct MCVList +{ + uint32 magic; /* magic constant marker */ + uint32 type; /* type of MCV list (BASIC) */ + uint32 nitems; /* number of MCV items in the array */ + AttrNumber ndimensions; /* number of dimensions */ + Oid types[STATS_MAX_DIMENSIONS]; /* OIDs of data types */ + MCVItem **items; /* array of MCV items */ +} MCVList; + extern MVNDistinct *statext_ndistinct_load(Oid mvoid); extern MVDependencies *statext_dependencies_load(Oid mvoid); +extern MCVList * statext_mcv_load(Oid mvoid); extern void BuildRelationExtStatistics(Relation onerel, double totalrows, int numrows, HeapTuple *rows, @@ -92,6 +134,13 @@ extern Selectivity dependencies_clauseli SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses); +extern Selectivity statext_clauselist_selectivity(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + RelOptInfo *rel, + Bitmapset **estimatedclauses); extern bool has_stats_of_kind(List *stats, char requiredkind); extern StatisticExtInfo *choose_best_statistics(List *stats, Bitmapset *attnums, char requiredkind); diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h new file mode 100644 index 95e4428..4e9aaca --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -209,6 +209,8 @@ extern void mergejoinscansel(PlannerInfo extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset); +extern double estimate_num_groups_simple(PlannerInfo *root, List *vars); + extern void estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets, Selectivity *mcv_freq, diff --git a/src/test/regress/expected/create_table_like.out b/src/test/regress/expected/create_table_like.out new file mode 100644 index 8d4543b..0f97355 --- a/src/test/regress/expected/create_table_like.out +++ b/src/test/regress/expected/create_table_like.out @@ -243,7 +243,7 @@ Indexes: Check constraints: "ctlt1_a_check" CHECK (length(a) > 2) Statistics objects: - "public"."ctlt_all_a_b_stat" (ndistinct, dependencies) ON a, b FROM ctlt_all + "public"."ctlt_all_a_b_stat" (ndistinct, dependencies, mcv) ON a, b FROM ctlt_all SELECT c.relname, objsubid, description FROM pg_description, pg_index i, pg_class c WHERE classoid = 'pg_class'::regclass AND objoid = i.indexrelid AND c.oid = i.indexrelid AND i.indrelid = 'ctlt_all'::regclass ORDER BY c.relname, objsubid; relname | objsubid | description diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out new file mode 100644 index 3c6d853..d13f092 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -902,11 +902,12 @@ WHERE c.castmethod = 'b' AND pg_node_tree | text | 0 | i pg_ndistinct | bytea | 0 | i pg_dependencies | bytea | 0 | i + pg_mcv_list | bytea | 0 | i cidr | inet | 0 | i xml | text | 0 | a xml | character varying | 0 | a xml | character | 0 | a -(9 rows) +(10 rows) -- **************** pg_conversion **************** -- Look for illegal values in pg_conversion fields. diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out new file mode 100644 index 054a381..5d05962 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -58,7 +58,7 @@ ALTER TABLE ab1 DROP COLUMN a; b | integer | | | c | integer | | | Statistics objects: - "public"."ab1_b_c_stats" (ndistinct, dependencies) ON b, c FROM ab1 + "public"."ab1_b_c_stats" (ndistinct, dependencies, mcv) ON b, c FROM ab1 -- Ensure statistics are dropped when table is SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%'; @@ -206,7 +206,7 @@ SELECT stxkind, stxndistinct FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass; stxkind | stxndistinct ---------+--------------------------------------------------------- - {d,f} | {"3, 4": 301, "3, 6": 301, "4, 6": 301, "3, 4, 6": 301} + {d,f,m} | {"3, 4": 301, "3, 6": 301, "4, 6": 301, "3, 4, 6": 301} (1 row) -- Hash Aggregate, thanks to estimates improved by the statistic @@ -272,7 +272,7 @@ SELECT stxkind, stxndistinct FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass; stxkind | stxndistinct ---------+------------------------------------------------------------- - {d,f} | {"3, 4": 2550, "3, 6": 800, "4, 6": 1632, "3, 4, 6": 10000} + {d,f,m} | {"3, 4": 2550, "3, 6": 800, "4, 6": 1632, "3, 4, 6": 10000} (1 row) -- plans using Group Aggregate, thanks to using correct esimates @@ -509,3 +509,316 @@ EXPLAIN (COSTS OFF) (5 rows) RESET random_page_cost; +-- MCV lists +CREATE TABLE mcv_lists ( + filler1 TEXT, + filler2 NUMERIC, + a INT, + b TEXT, + filler3 DATE, + c INT, + d TEXT +); +SET random_page_cost = 1.2; +CREATE INDEX mcv_lists_ab_idx ON mcv_lists (a, b); +CREATE INDEX mcv_lists_abc_idx ON mcv_lists (a, b, c); +-- random data (no MCV list) +INSERT INTO mcv_lists (a, b, c, filler1) + SELECT mod(i,37), mod(i,41), mod(i,43), mod(i,47) FROM generate_series(1,5000) s(i); +ANALYZE mcv_lists; +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1'; + QUERY PLAN +--------------------------------------------------- + Bitmap Heap Scan on mcv_lists + Recheck Cond: ((a = 1) AND (b = '1'::text)) + -> Bitmap Index Scan on mcv_lists_abc_idx + Index Cond: ((a = 1) AND (b = '1'::text)) +(4 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1' AND c = 1; + QUERY PLAN +--------------------------------------------------------- + Index Scan using mcv_lists_abc_idx on mcv_lists + Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1)) +(2 rows) + +-- create statistics +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; +ANALYZE mcv_lists; +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1'; + QUERY PLAN +--------------------------------------------------- + Bitmap Heap Scan on mcv_lists + Recheck Cond: ((a = 1) AND (b = '1'::text)) + -> Bitmap Index Scan on mcv_lists_abc_idx + Index Cond: ((a = 1) AND (b = '1'::text)) +(4 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1' AND c = 1; + QUERY PLAN +--------------------------------------------------------- + Index Scan using mcv_lists_abc_idx on mcv_lists + Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1)) +(2 rows) + +-- 100 distinct combinations, all in the MCV list +TRUNCATE mcv_lists; +DROP STATISTICS mcv_lists_stats; +INSERT INTO mcv_lists (a, b, c, filler1) + SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i); +ANALYZE mcv_lists; +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1'; + QUERY PLAN +------------------------------------------------- + Index Scan using mcv_lists_abc_idx on mcv_lists + Index Cond: ((a = 1) AND (b = '1'::text)) +(2 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a < 1 AND b < '1'; + QUERY PLAN +------------------------------------------------- + Index Scan using mcv_lists_abc_idx on mcv_lists + Index Cond: ((a < 1) AND (b < '1'::text)) +(2 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a <= 0 AND b <= '0'; + QUERY PLAN +------------------------------------------------- + Index Scan using mcv_lists_abc_idx on mcv_lists + Index Cond: ((a <= 0) AND (b <= '0'::text)) +(2 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1' AND c = 1; + QUERY PLAN +--------------------------------------------------------- + Index Scan using mcv_lists_abc_idx on mcv_lists + Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1)) +(2 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a < 5 AND b < '1' AND c < 5; + QUERY PLAN +--------------------------------------------------------- + Index Scan using mcv_lists_abc_idx on mcv_lists + Index Cond: ((a < 5) AND (b < '1'::text) AND (c < 5)) +(2 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a <= 4 AND b <= '0' AND c <= 4; + QUERY PLAN +------------------------------------------------------------ + Index Scan using mcv_lists_abc_idx on mcv_lists + Index Cond: ((a <= 4) AND (b <= '0'::text) AND (c <= 4)) +(2 rows) + +-- create statistics +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; +ANALYZE mcv_lists; +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1'; + QUERY PLAN +--------------------------------------------------- + Bitmap Heap Scan on mcv_lists + Recheck Cond: ((a = 1) AND (b = '1'::text)) + -> Bitmap Index Scan on mcv_lists_abc_idx + Index Cond: ((a = 1) AND (b = '1'::text)) +(4 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a < 1 AND b < '1'; + QUERY PLAN +--------------------------------------------------- + Bitmap Heap Scan on mcv_lists + Recheck Cond: ((a < 1) AND (b < '1'::text)) + -> Bitmap Index Scan on mcv_lists_abc_idx + Index Cond: ((a < 1) AND (b < '1'::text)) +(4 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a <= 0 AND b <= '0'; + QUERY PLAN +----------------------------------------------------- + Bitmap Heap Scan on mcv_lists + Recheck Cond: ((a <= 0) AND (b <= '0'::text)) + -> Bitmap Index Scan on mcv_lists_abc_idx + Index Cond: ((a <= 0) AND (b <= '0'::text)) +(4 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1' AND c = 1; + QUERY PLAN +--------------------------------------------------- + Bitmap Heap Scan on mcv_lists + Recheck Cond: ((a = 1) AND (b = '1'::text)) + Filter: (c = 1) + -> Bitmap Index Scan on mcv_lists_ab_idx + Index Cond: ((a = 1) AND (b = '1'::text)) +(5 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a < 5 AND b < '1' AND c < 5; + QUERY PLAN +--------------------------------------------------- + Bitmap Heap Scan on mcv_lists + Recheck Cond: ((a < 5) AND (b < '1'::text)) + Filter: (c < 5) + -> Bitmap Index Scan on mcv_lists_ab_idx + Index Cond: ((a < 5) AND (b < '1'::text)) +(5 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a <= 4 AND b <= '0' AND c <= 4; + QUERY PLAN +----------------------------------------------------- + Bitmap Heap Scan on mcv_lists + Recheck Cond: ((a <= 4) AND (b <= '0'::text)) + Filter: (c <= 4) + -> Bitmap Index Scan on mcv_lists_ab_idx + Index Cond: ((a <= 4) AND (b <= '0'::text)) +(5 rows) + +-- check change of column type resets the MCV statistics +ALTER TABLE mcv_lists ALTER COLUMN c TYPE numeric; +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1'; + QUERY PLAN +------------------------------------------------- + Index Scan using mcv_lists_abc_idx on mcv_lists + Index Cond: ((a = 1) AND (b = '1'::text)) +(2 rows) + +ANALYZE mcv_lists; +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1'; + QUERY PLAN +--------------------------------------------------- + Bitmap Heap Scan on mcv_lists + Recheck Cond: ((a = 1) AND (b = '1'::text)) + -> Bitmap Index Scan on mcv_lists_abc_idx + Index Cond: ((a = 1) AND (b = '1'::text)) +(4 rows) + +-- 100 distinct combinations with NULL values, all in the MCV list +TRUNCATE mcv_lists; +DROP STATISTICS mcv_lists_stats; +INSERT INTO mcv_lists (a, b, c, filler1) + SELECT + (CASE WHEN mod(i,100) = 1 THEN NULL ELSE mod(i,100) END), + (CASE WHEN mod(i,50) = 1 THEN NULL ELSE mod(i,50) END), + (CASE WHEN mod(i,25) = 1 THEN NULL ELSE mod(i,25) END), + i + FROM generate_series(1,5000) s(i); +ANALYZE mcv_lists; +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL; + QUERY PLAN +------------------------------------------------- + Index Scan using mcv_lists_abc_idx on mcv_lists + Index Cond: ((a IS NULL) AND (b IS NULL)) +(2 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL; + QUERY PLAN +------------------------------------------------ + Index Scan using mcv_lists_ab_idx on mcv_lists + Index Cond: ((a IS NULL) AND (b IS NULL)) + Filter: (c IS NULL) +(3 rows) + +-- create statistics +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; +ANALYZE mcv_lists; +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL; + QUERY PLAN +--------------------------------------------------- + Bitmap Heap Scan on mcv_lists + Recheck Cond: ((a IS NULL) AND (b IS NULL)) + -> Bitmap Index Scan on mcv_lists_abc_idx + Index Cond: ((a IS NULL) AND (b IS NULL)) +(4 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL; + QUERY PLAN +--------------------------------------------------- + Bitmap Heap Scan on mcv_lists + Recheck Cond: ((a IS NULL) AND (b IS NULL)) + Filter: (c IS NULL) + -> Bitmap Index Scan on mcv_lists_ab_idx + Index Cond: ((a IS NULL) AND (b IS NULL)) +(5 rows) + +RESET random_page_cost; +-- mcv with arrays +CREATE TABLE mcv_lists_arrays ( + a TEXT[], + b NUMERIC[], + c INT[] +); +INSERT INTO mcv_lists_arrays (a, b, c) + SELECT + ARRAY[md5((i/100)::text), md5((i/100-1)::text), md5((i/100+1)::text)], + ARRAY[(i/100-1)::numeric/1000, (i/100)::numeric/1000, (i/100+1)::numeric/1000], + ARRAY[(i/100-1), i/100, (i/100+1)] + FROM generate_series(1,5000) s(i); +CREATE STATISTICS mcv_lists_arrays_stats (mcv) ON a, b, c + FROM mcv_lists_arrays; +ANALYZE mcv_lists_arrays; +-- mcv with bool +CREATE TABLE mcv_lists_bool ( + a BOOL, + b BOOL, + c BOOL +); +INSERT INTO mcv_lists_bool (a, b, c) + SELECT + (mod(i,2) = 0), (mod(i,4) = 0), (mod(i,8) = 0) + FROM generate_series(1,10000) s(i); +CREATE INDEX mcv_lists_bool_ab_idx ON mcv_lists_bool (a, b); +CREATE INDEX mcv_lists_bool_abc_idx ON mcv_lists_bool (a, b, c); +CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c + FROM mcv_lists_bool; +ANALYZE mcv_lists_bool; +EXPLAIN (COSTS OFF) SELECT * FROM mcv_lists_bool WHERE a AND b AND c; + QUERY PLAN +---------------------------------------------------------------- + Bitmap Heap Scan on mcv_lists_bool + Filter: (a AND b AND c) + -> Bitmap Index Scan on mcv_lists_bool_abc_idx + Index Cond: ((a = true) AND (b = true) AND (c = true)) +(4 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND c; + QUERY PLAN +---------------------------------------------------------- + Index Scan using mcv_lists_bool_ab_idx on mcv_lists_bool + Index Cond: ((a = false) AND (b = true)) + Filter: ((NOT a) AND b AND c) +(3 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM mcv_lists_bool WHERE NOT a AND NOT b AND c; + QUERY PLAN +---------------------------------------------------------------- + Index Only Scan using mcv_lists_bool_abc_idx on mcv_lists_bool + Index Cond: ((a = false) AND (b = false) AND (c = true)) + Filter: ((NOT a) AND (NOT b) AND c) +(3 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c; + QUERY PLAN +---------------------------------------------------------- + Index Scan using mcv_lists_bool_ab_idx on mcv_lists_bool + Index Cond: ((a = false) AND (b = true)) + Filter: ((NOT a) AND b AND (NOT c)) +(3 rows) + diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out new file mode 100644 index b1419d4..a56d6c5 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -72,8 +72,9 @@ WHERE p1.typtype not in ('c','d','p') AN 194 | pg_node_tree 3361 | pg_ndistinct 3402 | pg_dependencies + 4001 | pg_mcv_list 210 | smgr -(4 rows) +(5 rows) -- Make sure typarray points to a varlena array type of our own base SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype, diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql new file mode 100644 index 46acaad..ad1f103 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -282,3 +282,184 @@ EXPLAIN (COSTS OFF) SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; RESET random_page_cost; + +-- MCV lists +CREATE TABLE mcv_lists ( + filler1 TEXT, + filler2 NUMERIC, + a INT, + b TEXT, + filler3 DATE, + c INT, + d TEXT +); + +SET random_page_cost = 1.2; + +CREATE INDEX mcv_lists_ab_idx ON mcv_lists (a, b); +CREATE INDEX mcv_lists_abc_idx ON mcv_lists (a, b, c); + +-- random data (no MCV list) +INSERT INTO mcv_lists (a, b, c, filler1) + SELECT mod(i,37), mod(i,41), mod(i,43), mod(i,47) FROM generate_series(1,5000) s(i); + +ANALYZE mcv_lists; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1'; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1' AND c = 1; + +-- create statistics +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; + +ANALYZE mcv_lists; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1'; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1' AND c = 1; + +-- 100 distinct combinations, all in the MCV list +TRUNCATE mcv_lists; +DROP STATISTICS mcv_lists_stats; + +INSERT INTO mcv_lists (a, b, c, filler1) + SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i); + +ANALYZE mcv_lists; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1'; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a < 1 AND b < '1'; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a <= 0 AND b <= '0'; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1' AND c = 1; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a < 5 AND b < '1' AND c < 5; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a <= 4 AND b <= '0' AND c <= 4; + +-- create statistics +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; + +ANALYZE mcv_lists; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1'; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a < 1 AND b < '1'; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a <= 0 AND b <= '0'; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1' AND c = 1; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a < 5 AND b < '1' AND c < 5; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a <= 4 AND b <= '0' AND c <= 4; + +-- check change of column type resets the MCV statistics +ALTER TABLE mcv_lists ALTER COLUMN c TYPE numeric; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1'; + +ANALYZE mcv_lists; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a = 1 AND b = '1'; + +-- 100 distinct combinations with NULL values, all in the MCV list +TRUNCATE mcv_lists; +DROP STATISTICS mcv_lists_stats; + +INSERT INTO mcv_lists (a, b, c, filler1) + SELECT + (CASE WHEN mod(i,100) = 1 THEN NULL ELSE mod(i,100) END), + (CASE WHEN mod(i,50) = 1 THEN NULL ELSE mod(i,50) END), + (CASE WHEN mod(i,25) = 1 THEN NULL ELSE mod(i,25) END), + i + FROM generate_series(1,5000) s(i); + +ANALYZE mcv_lists; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL; + +-- create statistics +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; + +ANALYZE mcv_lists; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL; + +EXPLAIN (COSTS OFF) + SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL; + +RESET random_page_cost; + +-- mcv with arrays +CREATE TABLE mcv_lists_arrays ( + a TEXT[], + b NUMERIC[], + c INT[] +); + +INSERT INTO mcv_lists_arrays (a, b, c) + SELECT + ARRAY[md5((i/100)::text), md5((i/100-1)::text), md5((i/100+1)::text)], + ARRAY[(i/100-1)::numeric/1000, (i/100)::numeric/1000, (i/100+1)::numeric/1000], + ARRAY[(i/100-1), i/100, (i/100+1)] + FROM generate_series(1,5000) s(i); + +CREATE STATISTICS mcv_lists_arrays_stats (mcv) ON a, b, c + FROM mcv_lists_arrays; + +ANALYZE mcv_lists_arrays; + +-- mcv with bool +CREATE TABLE mcv_lists_bool ( + a BOOL, + b BOOL, + c BOOL +); + +INSERT INTO mcv_lists_bool (a, b, c) + SELECT + (mod(i,2) = 0), (mod(i,4) = 0), (mod(i,8) = 0) + FROM generate_series(1,10000) s(i); + +CREATE INDEX mcv_lists_bool_ab_idx ON mcv_lists_bool (a, b); + +CREATE INDEX mcv_lists_bool_abc_idx ON mcv_lists_bool (a, b, c); + +CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c + FROM mcv_lists_bool; + +ANALYZE mcv_lists_bool; + +EXPLAIN (COSTS OFF) SELECT * FROM mcv_lists_bool WHERE a AND b AND c; + +EXPLAIN (COSTS OFF) SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND c; + +EXPLAIN (COSTS OFF) SELECT * FROM mcv_lists_bool WHERE NOT a AND NOT b AND c; + +EXPLAIN (COSTS OFF) SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c;