From c0567d9927055e5de6fe5bef008931e7e1de9c42 Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Thu, 15 Feb 2024 03:36:30 -0500 Subject: [PATCH v5 2/2] Create pg_import_ext_stats(). This is the extended statistics equivalent of pg_import_rel_stats(). The most likely application of this function is to quickly apply stats to a newly upgraded database faster than could be accomplished by vacuumdb --analyze-in-stages. The exported values stored in the parameter extended_stats are compared against the existing structure in pg_statistic_ext and are transformed into pg_statistic_ext_data rows, transactionally replacing any pre-existing rows for that object. The statistics applied are not locked in any way, and will be overwritten by the next analyze, either explicit or via autovacuum. This function also allows for tweaking of table statistics in-place, allowing the user to simulate correlations, skew histograms, etc, to see what those changes will evoke from the query planner. --- src/include/catalog/pg_proc.dat | 5 + .../statistics/extended_stats_internal.h | 7 + src/backend/statistics/dependencies.c | 161 +++ src/backend/statistics/extended_stats.c | 986 ++++++++++++++++-- src/backend/statistics/mcv.c | 192 ++++ src/backend/statistics/mvdistinct.c | 160 +++ .../regress/expected/stats_export_import.out | 265 ++++- src/test/regress/sql/stats_export_import.sql | 245 ++++- doc/src/sgml/func.sgml | 28 +- 9 files changed, 1976 insertions(+), 73 deletions(-) diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 0e48c08566..701ed3a2c9 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -6125,6 +6125,11 @@ { oid => '9161', descr => 'adjust time to local time zone', proname => 'timezone', provolatile => 's', prorettype => 'timetz', proargtypes => 'timetz', prosrc => 'timetz_at_local' }, +{ oid => '9162', + descr => 'statistics: import to extended stats object', + proname => 'pg_import_ext_stats', provolatile => 'v', proisstrict => 't', + proparallel => 'u', prorettype => 'bool', proargtypes => 'oid jsonb bool bool', + prosrc => 'pg_import_ext_stats' }, { oid => '2039', descr => 'hash', proname => 'timestamp_hash', prorettype => 'int4', proargtypes => 'timestamp', prosrc => 'timestamp_hash' }, diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index 8eed9b338d..e325a76e63 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -70,15 +70,22 @@ typedef struct StatsBuildData extern MVNDistinct *statext_ndistinct_build(double totalrows, StatsBuildData *data); +extern MVNDistinct *statext_ndistinct_import(Oid relid, Datum ndistinct, + bool ndististinct_null, Datum attributes, + bool attributes_null); extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct); extern MVNDistinct *statext_ndistinct_deserialize(bytea *data); extern MVDependencies *statext_dependencies_build(StatsBuildData *data); +extern MVDependencies *statext_dependencies_import(Oid relid, + Datum dependencies, bool dependencies_null, + Datum attributes, bool attributes_null); extern bytea *statext_dependencies_serialize(MVDependencies *dependencies); extern MVDependencies *statext_dependencies_deserialize(bytea *data); extern MCVList *statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget); +extern MCVList *statext_mcv_import(Datum mcv, bool mcv_null, VacAttrStats **stats); extern bytea *statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats); extern MCVList *statext_mcv_deserialize(bytea *data); diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index 4752b99ed5..e482eca557 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -18,6 +18,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_statistic_ext.h" #include "catalog/pg_statistic_ext_data.h" +#include "executor/spi.h" #include "lib/stringinfo.h" #include "nodes/nodeFuncs.h" #include "nodes/nodes.h" @@ -27,6 +28,7 @@ #include "parser/parsetree.h" #include "statistics/extended_stats_internal.h" #include "statistics/statistics.h" +#include "utils/builtins.h" #include "utils/bytea.h" #include "utils/fmgroids.h" #include "utils/fmgrprotos.h" @@ -1829,3 +1831,162 @@ dependencies_clauselist_selectivity(PlannerInfo *root, return s1; } + +/* + * statext_dependencies_import + * + * The dependencies serialization is a string that looks like + * {"2 => 3": 0.258241, "1 => 2": 0.0, ...} + * + * The integers represent attnums in the exported table, and these + * may not line up with the attnums in the destination table so we + * match them by name. + * + * This structure can be coerced into JSON, but we must use JSON + * over JSONB because JSON preserves key order and JSONB does not. + * + * + */ +MVDependencies * +statext_dependencies_import(Oid relid, + Datum dependencies, bool dependencies_null, + Datum attributes, bool attributes_null) +{ + MVDependencies *result = NULL; + +#define DEPS_NARGS 3 + + Oid argtypes[DEPS_NARGS] = { OIDOID, TEXTOID, JSONBOID }; + Datum args[DEPS_NARGS] = { relid, dependencies, attributes }; + char argnulls[DEPS_NARGS] = { ' ', + dependencies_null ? 'n' : ' ', + attributes_null ? 'n' : ' ' }; + + const char *sql = + "SELECT " + " dep.depord, " + " da.depattrord, " + " da.exp_attnum, " + " ea.attname AS exp_attname, " + " CASE " + " WHEN da.exp_attnum < 0 THEN da.exp_attnum " + " ELSE pga.attnum " + " END AS attnum, " + " dep.degree::float8 AS degree, " + " COUNT(*) OVER (PARTITION BY dep.depord) AS num_attrs, " + " MAX(dep.depord) OVER () AS num_deps " + "FROM json_each_text($2::json) " + " WITH ORDINALITY AS dep(attrs, degree, depord) " + "CROSS JOIN LATERAL unnest( string_to_array( " + " replace(dep.attrs, ' => ', ', '), ', ')::int2[]) " + " WITH ORDINALITY AS da(exp_attnum, depattrord) " + "LEFT JOIN LATERAL jsonb_to_recordset($3) AS ea(attnum int2, attname text) " + " ON ea.attnum = da.exp_attnum AND da.exp_attnum > 0 " + "LEFT JOIN pg_attribute AS pga " + " ON pga.attrelid = $1 AND pga.attname = ea.attname " + "ORDER BY dep.depord, da.depattrord "; + + enum { + DEPS_DEPORD = 0, + DEPS_DEPATTRORD, + DEPS_EXP_ATTNUM, + DEPS_EXP_ATTNAME, + DEPS_ATTNUM, + DEPS_DEGREE, + DEPS_NUM_ATTRS, + DEPS_NUM_DEPS, + NUM_DEPS_COLS + }; + + SPITupleTable *tuptable; + int ret; + int ndeps; + int j = 0; + + ret = SPI_execute_with_args(sql, DEPS_NARGS, argtypes, args, argnulls, true, 0); + + if (ret != SPI_OK_SELECT) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("statistic export JSON is not in proper format"))); + + tuptable = SPI_tuptable; + if (tuptable->numvals == 0) + ndeps = 0; + else + { + bool isnull; + Datum d = SPI_getbinval(tuptable->vals[0], tuptable->tupdesc, + DEPS_NUM_DEPS+1, &isnull); + ndeps = DatumGetInt32(d); + + if (isnull) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Indeterminate number of dependencies"))); + } + + if (ndeps == 0) + result = (MVDependencies *) palloc0(sizeof(MVDependencies)); + else + result = (MVDependencies *) palloc0(offsetof(MVDependencies, deps) + + (ndeps * sizeof(MVDependency *))); + + result->magic = STATS_DEPS_MAGIC; + result->type = STATS_DEPS_TYPE_BASIC; + result->ndeps = ndeps; + + for (j = 0; j < tuptable->numvals; j++) + { + Datum datums[NUM_DEPS_COLS]; + bool nulls[NUM_DEPS_COLS]; + int natts; + int d; + int a; + + heap_deform_tuple(tuptable->vals[j], tuptable->tupdesc, datums, nulls); + + Assert(!nulls[DEPS_DEPORD]); + d = DatumGetInt32(datums[DEPS_DEPORD]) - 1; + Assert(!nulls[DEPS_DEPATTRORD]); + a = DatumGetInt32(datums[DEPS_DEPATTRORD]) - 1; + + if (a == 0) + { + /* New MVDependnecy */ + Assert(!nulls[DEPS_NUM_ATTRS]); + natts = DatumGetInt32(datums[DEPS_NUM_ATTRS]); + + result->deps[d] = palloc0(offsetof(MVDependency, attributes) + + (natts * sizeof(AttrNumber))); + + result->deps[d]->nattributes = natts; + Assert(!nulls[DEPS_DEGREE]); + result->deps[d]->degree = DatumGetFloat8(datums[DEPS_DEGREE]); + } + + if (!nulls[DEPS_ATTNUM]) + result->deps[d]->attributes[a] = DatumGetInt16(datums[DEPS_ATTNUM]); + else if (nulls[DEPS_EXP_ATTNUM]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Dependency exported attnum cannot be null"))); + else if (nulls[DEPS_ATTNUM]) + { + AttrNumber exp_attnum = DatumGetInt16(datums[DEPS_EXP_ATTNUM]); + + if (nulls[DEPS_EXP_ATTNAME]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Dependency has no exported name for attnum %d", + exp_attnum))); + + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Dependency tried to match attnum %d by name (%s) but found no match", + exp_attnum, TextDatumGetCString(datums[DEPS_EXP_ATTNAME])))); + } + } + + return result; +} diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index c5461514d8..841c3d8f55 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -19,12 +19,14 @@ #include "access/detoast.h" #include "access/genam.h" #include "access/htup_details.h" +#include "access/relation.h" #include "access/table.h" #include "catalog/indexing.h" #include "catalog/pg_collation.h" #include "catalog/pg_statistic_ext.h" #include "catalog/pg_statistic_ext_data.h" #include "executor/executor.h" +#include "executor/spi.h" #include "commands/defrem.h" #include "commands/progress.h" #include "miscadmin.h" @@ -32,10 +34,12 @@ #include "optimizer/clauses.h" #include "optimizer/optimizer.h" #include "parser/parsetree.h" +#include "parser/parse_oper.h" #include "pgstat.h" #include "postmaster/autovacuum.h" #include "statistics/extended_stats_internal.h" #include "statistics/statistics.h" +#include "statistics/statistics_internal.h" #include "utils/acl.h" #include "utils/array.h" #include "utils/attoptcache.h" @@ -418,6 +422,83 @@ statext_is_kind_built(HeapTuple htup, char type) return !heap_attisnull(htup, attnum, NULL); } +/* + * Create a single StatExtEntry from a fetched heap tuple + */ +static StatExtEntry * +statext_create_entry(HeapTuple htup) +{ + StatExtEntry *entry; + Datum datum; + bool isnull; + int i; + ArrayType *arr; + char *enabled; + Form_pg_statistic_ext staForm; + List *exprs = NIL; + + entry = palloc0(sizeof(StatExtEntry)); + staForm = (Form_pg_statistic_ext) GETSTRUCT(htup); + entry->statOid = staForm->oid; + entry->schema = get_namespace_name(staForm->stxnamespace); + entry->name = pstrdup(NameStr(staForm->stxname)); + entry->stattarget = staForm->stxstattarget; + for (i = 0; i < staForm->stxkeys.dim1; i++) + { + entry->columns = bms_add_member(entry->columns, + staForm->stxkeys.values[i]); + } + + /* decode the stxkind char array into a list of chars */ + datum = SysCacheGetAttrNotNull(STATEXTOID, htup, + Anum_pg_statistic_ext_stxkind); + arr = DatumGetArrayTypeP(datum); + if (ARR_NDIM(arr) != 1 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != CHAROID) + elog(ERROR, "stxkind is not a 1-D char array"); + enabled = (char *) ARR_DATA_PTR(arr); + for (i = 0; i < ARR_DIMS(arr)[0]; i++) + { + Assert((enabled[i] == STATS_EXT_NDISTINCT) || + (enabled[i] == STATS_EXT_DEPENDENCIES) || + (enabled[i] == STATS_EXT_MCV) || + (enabled[i] == STATS_EXT_EXPRESSIONS)); + entry->types = lappend_int(entry->types, (int) enabled[i]); + } + + /* decode expression (if any) */ + datum = SysCacheGetAttr(STATEXTOID, htup, + Anum_pg_statistic_ext_stxexprs, &isnull); + + if (!isnull) + { + char *exprsString; + + exprsString = TextDatumGetCString(datum); + exprs = (List *) stringToNode(exprsString); + + pfree(exprsString); + + /* + * Run the expressions through eval_const_expressions. This is not + * just an optimization, but is necessary, because the planner + * will be comparing them to similarly-processed qual clauses, and + * may fail to detect valid matches without this. We must not use + * canonicalize_qual, however, since these aren't qual + * expressions. + */ + exprs = (List *) eval_const_expressions(NULL, (Node *) exprs); + + /* May as well fix opfuncids too */ + fix_opfuncids((Node *) exprs); + } + + entry->exprs = exprs; + + return entry; +} + /* * Return a list (of StatExtEntry) of statistics objects for the given relation. */ @@ -443,74 +524,7 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid) while (HeapTupleIsValid(htup = systable_getnext(scan))) { - StatExtEntry *entry; - Datum datum; - bool isnull; - int i; - ArrayType *arr; - char *enabled; - Form_pg_statistic_ext staForm; - List *exprs = NIL; - - entry = palloc0(sizeof(StatExtEntry)); - staForm = (Form_pg_statistic_ext) GETSTRUCT(htup); - entry->statOid = staForm->oid; - entry->schema = get_namespace_name(staForm->stxnamespace); - entry->name = pstrdup(NameStr(staForm->stxname)); - entry->stattarget = staForm->stxstattarget; - for (i = 0; i < staForm->stxkeys.dim1; i++) - { - entry->columns = bms_add_member(entry->columns, - staForm->stxkeys.values[i]); - } - - /* decode the stxkind char array into a list of chars */ - datum = SysCacheGetAttrNotNull(STATEXTOID, htup, - Anum_pg_statistic_ext_stxkind); - arr = DatumGetArrayTypeP(datum); - if (ARR_NDIM(arr) != 1 || - ARR_HASNULL(arr) || - ARR_ELEMTYPE(arr) != CHAROID) - elog(ERROR, "stxkind is not a 1-D char array"); - enabled = (char *) ARR_DATA_PTR(arr); - for (i = 0; i < ARR_DIMS(arr)[0]; i++) - { - Assert((enabled[i] == STATS_EXT_NDISTINCT) || - (enabled[i] == STATS_EXT_DEPENDENCIES) || - (enabled[i] == STATS_EXT_MCV) || - (enabled[i] == STATS_EXT_EXPRESSIONS)); - entry->types = lappend_int(entry->types, (int) enabled[i]); - } - - /* decode expression (if any) */ - datum = SysCacheGetAttr(STATEXTOID, htup, - Anum_pg_statistic_ext_stxexprs, &isnull); - - if (!isnull) - { - char *exprsString; - - exprsString = TextDatumGetCString(datum); - exprs = (List *) stringToNode(exprsString); - - pfree(exprsString); - - /* - * Run the expressions through eval_const_expressions. This is not - * just an optimization, but is necessary, because the planner - * will be comparing them to similarly-processed qual clauses, and - * may fail to detect valid matches without this. We must not use - * canonicalize_qual, however, since these aren't qual - * expressions. - */ - exprs = (List *) eval_const_expressions(NULL, (Node *) exprs); - - /* May as well fix opfuncids too */ - fix_opfuncids((Node *) exprs); - } - - entry->exprs = exprs; - + StatExtEntry *entry = statext_create_entry(htup); result = lappend(result, entry); } @@ -2636,3 +2650,839 @@ make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows, return result; } + +/* + * examine_rel_attribute -- pre-analysis of a single column + * + * Determine whether the column is analyzable; if so, create and initialize + * a VacAttrStats struct for it. If not, return NULL. + * + * If index_expr isn't NULL, then we're trying to import an expression index, + * and index_expr is the expression tree representing the column's data. + */ +static VacAttrStats * +examine_rel_attribute(Relation onerel, int attnum, Node *index_expr) +{ + Form_pg_attribute attr = TupleDescAttr(onerel->rd_att, attnum - 1); + HeapTuple typtuple; + VacAttrStats *stats; + int i; + bool ok; + + /* Never analyze dropped columns */ + if (attr->attisdropped) + return NULL; + + /* + * Create the VacAttrStats struct. + */ + stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats)); + stats->attstattarget = 1; /* Any nonzero value */ + + /* + * When analyzing an expression index, believe the expression tree's type + * not the column datatype --- the latter might be the opckeytype storage + * type of the opclass, which is not interesting for our purposes. (Note: + * if we did anything with non-expression index columns, we'd need to + * figure out where to get the correct type info from, but for now that's + * not a problem.) It's not clear whether anyone will care about the + * typmod, but we store that too just in case. + */ + if (index_expr) + { + stats->attrtypid = exprType(index_expr); + stats->attrtypmod = exprTypmod(index_expr); + + /* + * If a collation has been specified for the index column, use that in + * preference to anything else; but if not, fall back to whatever we + * can get from the expression. + */ + if (OidIsValid(onerel->rd_indcollation[attnum - 1])) + stats->attrcollid = onerel->rd_indcollation[attnum - 1]; + else + stats->attrcollid = exprCollation(index_expr); + } + else + { + stats->attrtypid = attr->atttypid; + stats->attrtypmod = attr->atttypmod; + stats->attrcollid = attr->attcollation; + } + + typtuple = SearchSysCacheCopy1(TYPEOID, + ObjectIdGetDatum(stats->attrtypid)); + if (!HeapTupleIsValid(typtuple)) + elog(ERROR, "cache lookup failed for type %u", stats->attrtypid); + stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple); + stats->anl_context = NULL; + stats->tupattnum = attnum; + + /* + * The fields describing the stats->stavalues[n] element types default to + * the type of the data being analyzed, but the type-specific typanalyze + * function can change them if it wants to store something else. + */ + for (i = 0; i < STATISTIC_NUM_SLOTS; i++) + { + stats->statypid[i] = stats->attrtypid; + stats->statyplen[i] = stats->attrtype->typlen; + stats->statypbyval[i] = stats->attrtype->typbyval; + stats->statypalign[i] = stats->attrtype->typalign; + } + + /* + * Call the type-specific typanalyze function. If none is specified, use + * std_typanalyze(). + */ + if (OidIsValid(stats->attrtype->typanalyze)) + ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze, + PointerGetDatum(stats))); + else + ok = std_typanalyze(stats); + + if (!ok || stats->compute_stats == NULL || stats->minrows <= 0) + { + heap_freetuple(typtuple); + pfree(stats); + return NULL; + } + + return stats; +} + + +static Datum +import_expressions(Datum stxdexpr, bool stxdexpr_null, + Datum operators, bool operators_null, + VacAttrStats **expr_stats, int nexprs) +{ + +#define EXPR_NARGS 2 + + Oid argtypes[EXPR_NARGS] = { JSONBOID, JSONBOID }; + Datum args[EXPR_NARGS] = { stxdexpr, operators }; + char argnulls[EXPR_NARGS] = { + stxdexpr_null ? 'n' : ' ', + operators_null ? 'n' : ' ' }; + + const char *sql = + "WITH exported_operators AS ( " + " SELECT eo.* " + " FROM jsonb_to_recordset($2) " + " AS eo(oid oid, oprname text) " + ") " + "SELECT s.*, " + " eo1.oprname AS eoprname1, " + " eo2.oprname AS eoprname2, " + " eo3.oprname AS eoprname3, " + " eo4.oprname AS eoprname4, " + " eo5.oprname AS eoprname5 " + "FROM jsonb_to_recordset($1) " + " AS s(staattnum integer, " + " stainherit boolean, " + " stanullfrac float4, " + " stawidth integer, " + " stadistinct float4, " + " stakind1 int2, " + " stakind2 int2, " + " stakind3 int2, " + " stakind4 int2, " + " stakind5 int2, " + " staop1 oid, " + " staop2 oid, " + " staop3 oid, " + " staop4 oid, " + " staop5 oid, " + " stacoll1 oid, " + " stacoll2 oid, " + " stacoll3 oid, " + " stacoll4 oid, " + " stacoll5 oid, " + " stanumbers1 float4[], " + " stanumbers2 float4[], " + " stanumbers3 float4[], " + " stanumbers4 float4[], " + " stanumbers5 float4[], " + " stavalues1 text, " + " stavalues2 text, " + " stavalues3 text, " + " stavalues4 text, " + " stavalues5 text) " + "LEFT JOIN exported_operators AS eo1 ON eo1.oid = s.staop1 " + "LEFT JOIN exported_operators AS eo2 ON eo2.oid = s.staop2 " + "LEFT JOIN exported_operators AS eo3 ON eo3.oid = s.staop3 " + "LEFT JOIN exported_operators AS eo4 ON eo4.oid = s.staop4 " + "LEFT JOIN exported_operators AS eo5 ON eo5.oid = s.staop5 "; + + enum + { + EXPR_ATTNUM = 0, + EXPR_STAINHERIT, + EXPR_STANULLFRAC, + EXPR_STAWIDTH, + EXPR_STADISTINCT, + EXPR_STAKIND1, + EXPR_STAKIND2, + EXPR_STAKIND3, + EXPR_STAKIND4, + EXPR_STAKIND5, + EXPR_STAOP1, + EXPR_STAOP2, + EXPR_STAOP3, + EXPR_STAOP4, + EXPR_STAOP5, + EXPR_STACOLL1, + EXPR_STACOLL2, + EXPR_STACOLL3, + EXPR_STACOLL4, + EXPR_STACOLL5, + EXPR_STANUMBERS1, + EXPR_STANUMBERS2, + EXPR_STANUMBERS3, + EXPR_STANUMBERS4, + EXPR_STANUMBERS5, + EXPR_STAVALUES1, + EXPR_STAVALUES2, + EXPR_STAVALUES3, + EXPR_STAVALUES4, + EXPR_STAVALUES5, + EXPR_EOPRNAME1, + EXPR_EOPRNAME2, + EXPR_EOPRNAME3, + EXPR_EOPRNAME4, + EXPR_EOPRNAME5, + NUM_EXPR_COLS + }; + + SPITupleTable *tuptable; + int ret; + int e; + + ArrayBuildState *astate = NULL; + + Relation pgsd; + HeapTuple pgstup; + Oid pgstypoid; + FmgrInfo finfo; + + pgsd = table_open(StatisticRelationId, RowExclusiveLock); + pgstypoid = get_rel_type_id(StatisticRelationId); + fmgr_info(F_ARRAY_IN, &finfo); + + if (!OidIsValid(pgstypoid)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("relation \"%s\" does not have a composite type", + "pg_statistic"))); + + ret = SPI_execute_with_args(sql, EXPR_NARGS, argtypes, args, argnulls, true, 0); + + if (ret != SPI_OK_SELECT) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("statistic export JSON is not in proper format"))); + + tuptable = SPI_tuptable; + + if (nexprs != tuptable->numvals) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("statistic export expected %d stxdexpr rows but found %lu", + nexprs, tuptable->numvals))); + + if (nexprs == 0) + astate = accumArrayResult(astate, + (Datum) 0, + true, + pgstypoid, + CurrentMemoryContext); + + for (e = 0; e < nexprs; e++) + { + Datum values[Natts_pg_statistic] = { 0 }; + bool nulls[Natts_pg_statistic] = { false }; + + Datum rs_datums[NUM_EXPR_COLS]; + bool rs_nulls[NUM_EXPR_COLS]; + + VacAttrStats *stats = expr_stats[e]; + + Oid basetypoid; + Oid ltopr; + Oid baseltopr; + Oid eqopr; + Oid baseeqopr; + int k; + + /* + * If if the stat is an array, then we want the base element + * type. This mimics the calculation in get_attrinfo(). + */ + get_sort_group_operators(stats->attrtypid, + false, false, false, + <opr, &eqopr, NULL, + NULL); + basetypoid = get_base_element_type(stats->attrtypid); + if (basetypoid == InvalidOid) + basetypoid = stats->attrtypid; + get_sort_group_operators(basetypoid, + false, false, false, + &baseltopr, &baseeqopr, NULL, + NULL); + + heap_deform_tuple(tuptable->vals[e], tuptable->tupdesc, + rs_datums, rs_nulls); + + /* These values are not derived from either vac stats or exported stats */ + values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(InvalidOid); + values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(InvalidAttrNumber); + values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(false); + + if (rs_nulls[EXPR_STANULLFRAC]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Imported stxdepr row cannot have NULL %s", "stanullfrac"))); + + if (rs_nulls[EXPR_STAWIDTH]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Imported stxdepr row cannot have NULL %s", "stawidth"))); + + if (rs_nulls[EXPR_STADISTINCT]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Imported stxdepr row cannot have NULL %s", "stadistinct"))); + + values[Anum_pg_statistic_stanullfrac - 1] = rs_datums[EXPR_STANULLFRAC]; + values[Anum_pg_statistic_stawidth - 1] = rs_datums[EXPR_STAWIDTH]; + values[Anum_pg_statistic_stadistinct - 1] = rs_datums[EXPR_STADISTINCT]; + + for (k = 0; k < STATISTIC_NUM_SLOTS; k++) + { + int16 kind = 0; + Oid op = InvalidOid; + + if (!rs_nulls[EXPR_STAKIND1 + k]) + { + kind = Int16GetDatum(rs_datums[EXPR_STAKIND1 + k]); + + if (!rs_nulls[EXPR_EOPRNAME1 + k]) + { + char *s = TextDatumGetCString(rs_datums[EXPR_EOPRNAME1 + k]); + + if (strcmp(s, "=") == 0) + { + /* + * MCELEM stat arrays are of the same type as the + * array base element type and are eqopr + */ + if ((kind == STATISTIC_KIND_MCELEM) || + (kind == STATISTIC_KIND_DECHIST)) + op = baseeqopr; + else + op = eqopr; + } + else if (strcmp(s, "<") == 0) + op = ltopr; + else + op = InvalidOid; + + pfree(s); + } + } + + values[Anum_pg_statistic_stakind1 - 1 + k] = kind; + values[Anum_pg_statistic_staop1 - 1 + k] = op; + + /* rely on vacattrstat */ + values[Anum_pg_statistic_stacoll1 - 1 + k] = + ObjectIdGetDatum(stats->stacoll[k]); + + values[Anum_pg_statistic_stanumbers1 - 1 + k] = + rs_datums[EXPR_STANUMBERS1 + k]; + nulls[Anum_pg_statistic_stanumbers1 - 1 + k] = + rs_nulls[EXPR_STANUMBERS1 + k]; + + nulls[Anum_pg_statistic_stavalues1 - 1 + k] = + rs_nulls[EXPR_STAVALUES1 + k]; + if (rs_nulls[EXPR_STAVALUES1 + k]) + values[Anum_pg_statistic_stavalues1 - 1 + k] = (Datum) 0; + else + { + char *s = TextDatumGetCString(rs_datums[EXPR_STAVALUES1 + k]); + + values[Anum_pg_statistic_stavalues1 - 1 + k] = + FunctionCall3(&finfo, CStringGetDatum(s), + ObjectIdGetDatum(basetypoid), + Int32GetDatum(stats->attrtypmod)); + + pfree(s); + } + } + + pgstup = heap_form_tuple(RelationGetDescr(pgsd), values, nulls); + + astate = accumArrayResult(astate, + heap_copy_tuple_as_datum(pgstup, RelationGetDescr(pgsd)), + false, + pgstypoid, + CurrentMemoryContext); + } + + table_close(pgsd, RowExclusiveLock); + + return makeArrayResult(astate, CurrentMemoryContext); +} + +/* + * Import statistics for a given extended statistics object. + * + * The statistics json format is: + * + * { + * "server_version_num": number, -- SHOW server_version on source system + * "stxoid": number, -- pg_stat_ext.stxoid + * "stxname": string, -- pg_stat_ext.stxname + * "stxnspname": string, -- schema name for the statistics object + * "relname": string, -- pgclass.relname of the exported relation + * "nspname": string, -- schema name for the exported relation + * -- stxkeys cast to text to aid array_in() + * "stxkeys": string, -- pg_statistic_ext.stxkind::text + * -- stxndistinct and stxdndepencies only on v10-v11 + * "stxndistinct": string, -- pg_statistic_ext.stxndistinct::text + * "stxdependencies": string, -- pg_statistic_ext.stxdependencies::text + * -- data is on v12+ + * "data": [ + * { + * -- stxdinherit is on v15+ + * "stxdinherit": bool, -- pg_statistic_ext_data.stxdinherit + * -- stxdndistinct and stxddependencies are on v12+ + * "stxdndistinct": text, -- pg_statistic_ext_data.stxdndisinct::text + * "stxddependencies": text, -- pg_statistic_ext_data.stxddepencies::text + * -- stxdexpr is on v12+ + * "stxdmcv": [ + * { + * "index": number, + * "nulls": [bool], + * "values": [text], + * "frequency": number, + * "base_frequency": number + * } + * ], + * -- stxdexpr is on v14+ + * "stxdexpr": [ + * { + * "staattnum": number, -- pg_statistic.staattnum + * "stainherit": bool, -- pg_statistic.stainherit + * "stanullfrac": number, -- pg_statistic.stanullfrac + * "stawidth": number, -- pg_statistic.stawidth + * "stadistinct": number, -- pg_statistic.stadistinct + * "stakind1": number, -- pg_statistic.stakind1 + * "stakind2": number, -- pg_statistic.stakind2 + * "stakind3": number, -- pg_statistic.stakind3 + * "stakind4": number, -- pg_statistic.stakind4 + * "stakind5": number, -- pg_statistic.stakind5 + * "staop1": number, -- pg_statistic.staop1 + * "staop2": number, -- pg_statistic.staop2 + * "staop3": number, -- pg_statistic.staop3 + * "staop4": number, -- pg_statistic.staop4 + * "staop5": number, -- pg_statistic.staop5 + * "stacoll1": number, -- pg_statistic.stacoll1 + * "stacoll2": number, -- pg_statistic.stacoll2 + * "stacoll3": number, -- pg_statistic.stacoll3 + * "stacoll4": number, -- pg_statistic.stacoll4 + * "stacoll5": number, -- pg_statistic.stacoll5 + * -- stanumbersN are cast to string to aid array_in() + * "stanumbers1": string, -- pg_statistic.stanumbers1::text + * "stanumbers2": string, -- pg_statistic.stanumbers2::text + * "stanumbers3": string, -- pg_statistic.stanumbers3::text + * "stanumbers4": string, -- pg_statistic.stanumbers4::text + * "stanumbers5": string, -- pg_statistic.stanumbers5::text + * -- stavaluesN are cast to string to aid array_in() + * "stavalues1": string, -- pg_statistic.stavalues1::text + * "stavalues2": string, -- pg_statistic.stavalues2::text + * "stavalues3": string, -- pg_statistic.stavalues3::text + * "stavalues4": string, -- pg_statistic.stavalues4::text + * "stavalues5": string -- pg_statistic.stavalues5::text + * } + * ] + * } + * ], + * "types": [ + * -- export of all pg_type referenced in this json doc + * { + * "oid": number, -- pg_type.oid + * "typname": string, -- pg_type.typname + * "nspname": string -- schema name for the pg_type + * } + * ], + * "collations": [ + * -- export all pg_collation reference in this json doc + * { + * "oid": number, -- pg_collation.oid + * "collname": string, -- pg_collation.collname + * "nspname": string -- schema name for the pg_collation + * } + * ], + * "operators": [ + * -- export all pg_operator reference in this json doc + * { + * "oid": number, -- pg_operator.oid + * "collname": string, -- pg_oprname + * "nspname": string -- schema name for the pg_operator + * } + * ], + * "attributes": [ + * -- export all pg_attribute for the exported relation + * { + * "attnum": number, -- pg_attribute.attnum + * "attname": string, -- pg_attribute.attname + * "atttypid": number, -- pg_attribute.atttypid + * "attcollation": number -- pg_attribute.attcollation + * } + * ] + * } + * + * Each server verion exports a subset of this format. The exported format + * can and will change with each new version, and this function will have + * to account for those variations. + * + * Statistics imported from version 15 and higher can potentially have two + * result rows, one with stxdinherit = false and one for stxdinherit = true + * + */ +Datum +pg_import_ext_stats(PG_FUNCTION_ARGS) +{ + const char *bq_sql = + "SELECT current_setting('server_version_num') AS current_version, eb.* " + "FROM jsonb_to_record($1) AS eb( " + " server_version_num integer, " + " stxoid Oid, " + " reloid Oid, " + " stxname text, " + " stxnspname text, " + " relname text, " + " nspname text, " + " stxkeys text, " + " stxkind text, " + " stxndistinct text, " + " stxdependencies text, " + " data jsonb, " + " attributes jsonb, " + " collations jsonb, " + " operators jsonb, " + " types jsonb) "; + + enum { + BQ_CURRENT_VERSION_NUM = 0, + BQ_SERVER_VERSION_NUM, + BQ_STXOID, + BQ_RELOID, + BQ_STXNAME, + BQ_STXNSPNAME, + BQ_RELNAME, + BQ_NSPNAME, + BQ_STXKEYS, + BQ_STXKIND, + BQ_STXNDISTINCT, + BQ_STXDEPENDENCIES, + BQ_DATA, + BQ_ATTRIBUTES, + BQ_COLLATIONS, + BQ_OPERATORS, + BQ_TYPES, + NUM_BQ_COLS + }; + + /* All versions of the STXD query have the same column signature */ + enum { + STXD_INHERIT = 0, + STXD_NDISTINCT, + STXD_DEPENDENCIES, + STXD_MCV, + STXD_EXPR, + NUM_STXD_COLS + }; + +#define BQ_NARGS 1 + + Oid stxid; + bool validate; + bool require_match_oids; + + Oid bq_argtypes[BQ_NARGS] = { JSONBOID }; + Datum bq_args[BQ_NARGS]; + + int ret; + + SPITupleTable *tuptable; + + Relation rel; + TupleDesc tupdesc; + int natts; + + HeapTuple etup; + Relation sd; + + Form_pg_statistic_ext stxform; + + StatExtEntry *stxentry; + VacAttrStats **relstats; /* all relations attributes */ + VacAttrStats **extstats; /* entries relevenat to the extstat */ + VacAttrStats **expr_stats; /* expressions in the extstat */ + int nexprs; + int ncols; + + Datum bq_datums[NUM_BQ_COLS]; + bool bq_nulls[NUM_BQ_COLS]; + + int i; + int32 server_version_num; + int32 current_version_num; + + if (PG_ARGISNULL(0)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("extended statistics oid cannot be NULL"))); + stxid = PG_GETARG_OID(0); + + if (PG_ARGISNULL(1)) + PG_RETURN_BOOL(false); + bq_args[0] = PG_GETARG_DATUM(1); + + if (PG_ARGISNULL(2)) + validate = false; + else + validate = PG_GETARG_BOOL(2); + + if (PG_ARGISNULL(3)) + require_match_oids = false; + else + require_match_oids = PG_GETARG_BOOL(3); + + sd = table_open(StatisticRelationId, RowExclusiveLock); + etup = SearchSysCacheCopy1(STATEXTOID, ObjectIdGetDatum(stxid)); + if (!HeapTupleIsValid(etup)) + elog(ERROR, "pg_statistic_ext entry for oid %u vanished during statistics import", + stxid); + + stxform = (Form_pg_statistic_ext) GETSTRUCT(etup); + + rel = relation_open(stxform->stxrelid, ShareUpdateExclusiveLock); + + tupdesc = RelationGetDescr(rel); + natts = tupdesc->natts; + + relstats = (VacAttrStats **) palloc(natts * sizeof(VacAttrStats *)); + for (i = 0; i < natts; i++) + relstats[i] = examine_rel_attribute(rel, i+1, NULL); + + stxentry = statext_create_entry(etup); + extstats = lookup_var_attr_stats(rel, stxentry->columns, stxentry->exprs, + natts, relstats); + + /* only the stats that were derived from pg_statistic_ext */ + ncols = bms_num_members(stxentry->columns); + expr_stats = &extstats[ncols]; + nexprs = list_length(stxentry->exprs); + + /* + * Connect to SPI manager + */ + if ((ret = SPI_connect()) < 0) + elog(ERROR, "SPI connect failure - returned %d", ret); + + /* + * Fetch the base level of the stats json. The results found there will + * determine how the nested data will be handled. + */ + ret = SPI_execute_with_args(bq_sql, BQ_NARGS, bq_argtypes, bq_args, + NULL, true, 1); + + /* + * Only allow one qualifying tuple + */ + if (ret != SPI_OK_SELECT) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("statistic export JSON is not in proper format"))); + + if (SPI_processed != 1) + ereport(ERROR, + (errcode(ERRCODE_CARDINALITY_VIOLATION), + errmsg("pg_statistic_ext export JSON should return exactly one base object"))); + + tuptable = SPI_tuptable; + heap_deform_tuple(tuptable->vals[0], tuptable->tupdesc, bq_datums, bq_nulls); + + /* + * Check for valid combination of exported server_version_num to the local + * server_version_num. We won't be reusing these values in a query so use + * scratch datum/null vars. + */ + if (bq_nulls[BQ_CURRENT_VERSION_NUM]) + ereport(ERROR, + (errcode(ERRCODE_SQL_JSON_ITEM_CANNOT_BE_CAST_TO_TARGET_TYPE), + errmsg("current_version_num cannot be null"))); + + if (bq_nulls[BQ_SERVER_VERSION_NUM]) + ereport(ERROR, + (errcode(ERRCODE_SQL_JSON_ITEM_CANNOT_BE_CAST_TO_TARGET_TYPE), + errmsg("server_version_num cannot be null"))); + + current_version_num = DatumGetInt32(bq_datums[BQ_CURRENT_VERSION_NUM]); + server_version_num = DatumGetInt32(bq_datums[BQ_SERVER_VERSION_NUM]); + + if (server_version_num <= 100000) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("Cannot import statistics from servers below version 10.0"))); + + if (server_version_num > current_version_num) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("Cannot import statistics from server version %d to %d", + server_version_num, current_version_num))); + + if (validate) + { + validate_exported_types(bq_datums[BQ_TYPES], bq_nulls[BQ_TYPES]); + validate_exported_collations(bq_datums[BQ_COLLATIONS], bq_nulls[BQ_COLLATIONS]); + validate_exported_operators(bq_datums[BQ_OPERATORS], bq_nulls[BQ_OPERATORS]); + validate_exported_attributes(bq_datums[BQ_ATTRIBUTES], bq_nulls[BQ_ATTRIBUTES]); + } + + if (server_version_num >= 120000) + { + /* pg_statistic_ext_data export for modern versions */ + +#define STXD_NARGS 1 + + Oid stxd_argtypes[STXD_NARGS] = { JSONBOID }; + Datum stxd_args[STXD_NARGS] = { bq_datums[BQ_DATA] }; + char stxd_nulls[STXD_NARGS] = { bq_nulls[BQ_DATA] ? 'n' : ' ' }; + + const char *stxd_sql = + "SELECT d.* " + "FROM jsonb_to_recordset($1) AS d ( " + " stxdinherit bool, " + " stxdndistinct text, " + " stxddependencies text, " + " stxdmcv jsonb, " + " stxdexpr jsonb) " + "ORDER BY d.stxdinherit "; + + /* Versions 12+ cannot have ndistinct or dependencies on the base query */ + if (!bq_nulls[BQ_STXNDISTINCT]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Key stxndistinct not allowed on exports of servers v12 and later"), + errhint("Use stxdndistinct instead"))); + + if(!bq_nulls[BQ_STXDEPENDENCIES]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Key stxdependencies not allowed on exports of servers v12 and later"), + errhint("Use stxddependencies instead"))); + + ret = SPI_execute_with_args(stxd_sql, STXD_NARGS, stxd_argtypes, stxd_args, + stxd_nulls, true, 0); +#undef STXD_NARGS + } + else + { +#define STXD_NARGS 2 + Oid stxd_argtypes[STXD_NARGS] = { + TEXTOID, + TEXTOID }; + Datum stxd_args[STXD_NARGS] = { + bq_datums[BQ_STXNDISTINCT], + bq_datums[BQ_STXDEPENDENCIES] }; + char stxd_nulls[STXD_NARGS] = { + bq_nulls[BQ_STXNDISTINCT] ? 'n' : ' ', + bq_nulls[BQ_DATA] ? 'n' : ' ' }; + + /* pg_statistic_ext_data export for versions prior to the table existing */ + const char *stxd_sql = + "SELECT " + " NULL::boolean AS stxdinherit, " + " $1 AS stxdndistinct, " + " $2 AS stxddependencies, " + " NULL::jsonb AS stxdmcv, " + " NULL::jsonb AS stxdexpr "; + + ret = SPI_execute_with_args(stxd_sql, STXD_NARGS, stxd_argtypes, stxd_args, + stxd_nulls, true, 2); + +#undef STXD_NARGS + } + + if (ret != SPI_OK_SELECT) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("statistic export JSON is not in proper format"))); + + /* overwrite previous tuptable */ + tuptable = SPI_tuptable; + + for (i = 0; i < tuptable->numvals; i++) + { + Datum stxd_datums[NUM_BQ_COLS]; + bool stxd_nulls[NUM_BQ_COLS]; + bool inh; + MCVList *mcvlist; + MVDependencies *dependencies; + MVNDistinct *ndistinct; + Datum exprs; + + heap_deform_tuple(tuptable->vals[i], tuptable->tupdesc, stxd_datums, + stxd_nulls); + + if ((!stxd_nulls[STXD_MCV]) && (server_version_num < 120000)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Key stxdmv not allowed on exports of servers berfore v12"))); + + if ((!stxd_nulls[STXD_EXPR]) && (server_version_num < 140000)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Key stxdexpr not allowed on exports of servers berfore v14"))); + + if ((!stxd_nulls[STXD_INHERIT]) && (server_version_num < 150000)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Extended statistics from servers prior to v15 cannot contain inherited stats"))); + + /* Versions prior to v15 never have stxdinhert set */ + if (stxd_nulls[STXD_INHERIT]) + inh = false; + else + inh = DatumGetBool(stxd_datums[STXD_INHERIT]); + + ndistinct = statext_ndistinct_import(stxform->stxrelid, + stxd_datums[STXD_NDISTINCT], stxd_nulls[STXD_NDISTINCT], + bq_datums[BQ_ATTRIBUTES], bq_nulls[BQ_ATTRIBUTES]); + + dependencies = statext_dependencies_import(stxform->stxrelid, + stxd_datums[STXD_DEPENDENCIES], + stxd_nulls[STXD_DEPENDENCIES], + bq_datums[BQ_ATTRIBUTES], bq_nulls[BQ_ATTRIBUTES]); + + mcvlist = statext_mcv_import(stxd_datums[STXD_MCV], stxd_nulls[STXD_MCV], + extstats); + + exprs = import_expressions(stxd_datums[STXD_EXPR], stxd_nulls[STXD_EXPR], + bq_datums[BQ_OPERATORS], bq_nulls[BQ_OPERATORS], + expr_stats, nexprs); + + statext_store(stxentry->statOid, inh, ndistinct, dependencies, mcvlist, exprs, extstats); + } + + relation_close(rel, NoLock); + table_close(sd, RowExclusiveLock); + SPI_finish(); + + PG_RETURN_BOOL(true); +} diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 6255cd1f4f..3bafde83d6 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -20,6 +20,7 @@ #include "catalog/pg_collation.h" #include "catalog/pg_statistic_ext.h" #include "catalog/pg_statistic_ext_data.h" +#include "executor/spi.h" #include "fmgr.h" #include "funcapi.h" #include "nodes/nodeFuncs.h" @@ -2177,3 +2178,194 @@ mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat, return s; } + +/* + * statext_mcv_import + * + * The mcv serialization is the json equivalent of the + * pg_mcv_list_items() result set: + * [ + * { + * "index": number, + * "values": [string], + * "nulls": [bool], + * "frequency": number, + * "base_frequency": number + * } + * ] + * + * The values are text strings that must be converted into datums of the type + * appropriate for their corresponding dimension. This means that we must + * cast individual datums rather than trying to use array_in(). + * + */ +MCVList * +statext_mcv_import(Datum mcv, bool mcv_null, VacAttrStats **extstats) +{ + const char *sql = + "SELECT m.*, array_length(m.nulls,1) AS ndims " + "FROM jsonb_to_recordset($1) AS m(index integer, values text[], " + " nulls boolean[], frequency float8, base_frequency float8) " + "ORDER BY m.index "; + + enum { + MCVS_INDEX = 0, + MCVS_VALUES, + MCVS_NULLS, + MCVS_FREQUENCY, + MCVS_BASE_FREQUENCY, + MCVS_NDIMS, + NUM_MCVS_COLS + }; + +#define MCVS_NARGS 1 + + Oid argtypes[MCVS_NARGS] = { JSONBOID }; + Datum args[MCVS_NARGS] = { mcv }; + char argnulls[MCVS_NARGS] = { mcv_null ? 'n' : ' ' }; + int nitems = 0; + int ndims = 0; + int ret; + int i; + + MCVList *mcvlist; + SPITupleTable *tuptable; + Oid ioparams[STATS_MAX_DIMENSIONS]; + FmgrInfo finfos[STATS_MAX_DIMENSIONS]; + + ret = SPI_execute_with_args(sql, MCVS_NARGS, argtypes, args, argnulls, true, 0); + + if (ret != SPI_OK_SELECT) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("statistic export JSON is not in proper format"))); + + tuptable = SPI_tuptable; + if (tuptable->numvals > 0) + { + /* ndims will be same for all rows, so just check first one */ + bool isnull; + Datum d = SPI_getbinval(tuptable->vals[0], tuptable->tupdesc, + MCVS_NDIMS+1, &isnull); + + if (isnull) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Indeterminate number of mcv dimensions"))); + + ndims = DatumGetInt32(d); + nitems = tuptable->numvals; + } + + mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) + + (sizeof(MCVItem) * nitems)); + + mcvlist->magic = STATS_MCV_MAGIC; + mcvlist->type = STATS_MCV_TYPE_BASIC; + mcvlist->nitems = nitems; + mcvlist->ndimensions = ndims; + + /* We will need these input functions $nitems times. */ + for (i = 0; i < ndims; i++) + { + Oid typid = extstats[i]->attrtypid; + Oid infunc; + + mcvlist->types[i] = typid; + getTypeInputInfo(typid, &infunc, &ioparams[i]); + fmgr_info(infunc, &finfos[i]); + } + + for (i = 0; i < nitems; i++) + { + MCVItem *item = &mcvlist->items[i]; + Datum datums[NUM_MCVS_COLS]; + bool nulls[NUM_MCVS_COLS]; + ArrayType *arr; + Datum *elems; + bool *elnulls; + int nelems; + + int d; + + heap_deform_tuple(tuptable->vals[i], tuptable->tupdesc, datums, nulls); + + if (nulls[MCVS_VALUES]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("extended statistic mcv export cannot have NULL %s", + "values"))); + if (nulls[MCVS_NULLS]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("extended statistic mcv export cannot have NULL %s", + "nulls"))); + if (nulls[MCVS_FREQUENCY]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("extended statistic mcv export cannot have NULL %s", + "frequency"))); + if (nulls[MCVS_BASE_FREQUENCY]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("extended statistic mcv export cannot have NULL %s", + "base_frequency"))); + + item->frequency = DatumGetFloat8(datums[MCVS_FREQUENCY]); + item->base_frequency = DatumGetFloat8(datums[MCVS_BASE_FREQUENCY]); + item->values = (Datum *) palloc(sizeof(Datum) * ndims); + item->isnull = (bool *) palloc(sizeof(bool) * ndims); + + arr = DatumGetArrayTypeP(datums[MCVS_NULLS]); + deconstruct_array(arr, BOOLOID, 1, true, 'c', &elems, &elnulls, &nelems); + + if (nelems != ndims) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("extended statistic mcv %s array expected %d elements but %d found", + "nulls", ndims, nelems))); + + for (d = 0; d < ndims; d++) + { + if (elnulls[d]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("extended statistic mcv %s array cannot contain NULL values", + "nulls"))); + item->isnull[d] = DatumGetBool(elems[d]); + } + + arr = DatumGetArrayTypeP(datums[MCVS_VALUES]); + deconstruct_array_builtin(arr, TEXTOID, &elems, &elnulls, &nelems); + + if (nelems != ndims) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("extended statistic mcv %s array expected %d elements but %d found", + "values", ndims, nelems))); + + for (d = 0; d < ndims; d++) + { + /* if the element is a known NULL, nothing to decode */ + if (item->isnull[d]) + item->values[d] = (Datum) 0; + else + { + char *s; + + if (elnulls[d]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("extended statistic mcv nulls array in conflict with values array"))); + + s = TextDatumGetCString(elems[d]); + + item->values[d] = InputFunctionCall(&finfos[d], s, ioparams[d], + extstats[d]->attrtypmod); + pfree(s); + } + } + } + + return mcvlist; +} diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c index ee1134cc37..d84eee47ee 100644 --- a/src/backend/statistics/mvdistinct.c +++ b/src/backend/statistics/mvdistinct.c @@ -28,9 +28,11 @@ #include "access/htup_details.h" #include "catalog/pg_statistic_ext.h" #include "catalog/pg_statistic_ext_data.h" +#include "executor/spi.h" #include "lib/stringinfo.h" #include "statistics/extended_stats_internal.h" #include "statistics/statistics.h" +#include "utils/builtins.h" #include "utils/fmgrprotos.h" #include "utils/lsyscache.h" #include "utils/syscache.h" @@ -698,3 +700,161 @@ generate_combinations(CombinationGenerator *state) pfree(current); } + +/* + * statext_dependencies_import + * + * The ndinstinct serialization is a string that looks like + * {"2, 3": 1521, "3, -1": 4} + * + * This structure can be coerced into JSON, but we must use JSON + * over JSONB because JSON preserves key order and JSONB does not. + * + * The key side integers represent attnums in the exported table, and these + * may not line up with the attnums in the destination table so we match + * them by name. + * + * Negative integers represent expressions columns that have no + * corresponding match in the exported attributes. We leave those + * attnums as-is. Positive integers are looked up in the exported + * attributes and the attname there is then compared to pg_attribute + * names in the underlying table, and that tuples attnum is used instead. + */ +MVNDistinct * +statext_ndistinct_import(Oid relid, Datum ndistinct, bool ndistinct_null, + Datum attributes, bool attributes_null) +{ + MVNDistinct *result; + int nitems; + +#define NDIST_NARGS 3 + + Oid argtypes[NDIST_NARGS] = { OIDOID, TEXTOID, JSONBOID }; + Datum args[NDIST_NARGS] = { relid, ndistinct , attributes }; + char argnulls[NDIST_NARGS] = { ' ', + ndistinct_null ? 'n' : ' ', + attributes_null ? 'n' : ' ' }; + + const char *sql = + "SELECT " + " i.itemord, " + " a.attrord, " + " a.exp_attnum, " + " ea.attname AS exp_attname, " + " CASE " + " WHEN a.exp_attnum < 0 THEN a.exp_attnum " + " ELSE pga.attnum " + " END AS attnum, " + " i.ndistinct::float8 AS ndistinct, " + " COUNT(*) OVER (PARTITION BY i.itemord) AS num_attrs, " + " MAX(i.itemord) OVER () AS num_items " + "FROM json_each_text($2::json) " + " WITH ORDINALITY AS i(attrlist, ndistinct, itemord) " + "CROSS JOIN LATERAL unnest(string_to_array(i.attrlist, ', ')::int2[]) " + " WITH ORDINALITY AS a(exp_attnum, attrord) " + "LEFT JOIN LATERAL jsonb_to_recordset($3) AS ea(attnum int2, attname text) " + " ON ea.attnum = a.exp_attnum AND a.exp_attnum > 0 " + "LEFT JOIN pg_attribute AS pga " + " ON pga.attrelid = $1 AND pga.attname = ea.attname " + "ORDER BY i.itemord, a.attrord "; + + enum { + NDIST_ITEMORD = 0, + NDIST_ATTRORD, + NDIST_EXP_ATTNUM, + NDIST_EXP_ATTNAME, + NDIST_ATTNUM, + NDIST_NDISTINCT, + NDIST_NUM_ATTRS, + NDIST_NUM_ITEMS, + NUM_NDIST_COLS + }; + + SPITupleTable *tuptable; + int ret; + int j; + + ret = SPI_execute_with_args(sql, NDIST_NARGS, argtypes, args, argnulls, true, 0); + + if (ret != SPI_OK_SELECT) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("statistic export JSON is not in proper format"))); + + tuptable = SPI_tuptable; + if (tuptable->numvals == 0) + nitems = 0; + else + { + bool isnull; + Datum d = SPI_getbinval(tuptable->vals[0], tuptable->tupdesc, + NDIST_NUM_ITEMS+1, &isnull); + nitems = DatumGetInt32(d); + + if (isnull) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Indeterminate number of dependencies"))); + } + + result = palloc(offsetof(MVNDistinct, items) + + (nitems * sizeof(MVNDistinctItem))); + result->magic = STATS_NDISTINCT_MAGIC; + result->type = STATS_NDISTINCT_TYPE_BASIC; + result->nitems = nitems; + + for (j = 0; j < tuptable->numvals; j++) + { + Datum datums[NUM_NDIST_COLS]; + bool nulls[NUM_NDIST_COLS]; + int i; + int a; + int natts; + + MVNDistinctItem *item; + + heap_deform_tuple(tuptable->vals[j], tuptable->tupdesc, datums, nulls); + + Assert(!nulls[NDIST_ITEMORD]); + i = DatumGetInt32(datums[NDIST_ITEMORD]) - 1; + item = &result->items[i]; + Assert(!nulls[NDIST_ATTRORD]); + a = DatumGetInt32(datums[NDIST_ATTRORD]) - 1; + + if (a == 0) + { + /* New item */ + Assert(!nulls[NDIST_NUM_ATTRS]); + natts = DatumGetInt32(datums[NDIST_NUM_ATTRS]); + item->nattributes = natts; + item->attributes = palloc(sizeof(AttrNumber) * natts); + Assert(!nulls[NDIST_NDISTINCT]); + item->ndistinct = DatumGetFloat8(datums[NDIST_NDISTINCT]); + } + + if (!nulls[NDIST_ATTNUM]) + item->attributes[a] = + DatumGetInt16(datums[NDIST_ATTNUM]); + else if (nulls[NDIST_EXP_ATTNUM]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ndistinct exported attnum cannot be null"))); + else + { + AttrNumber exp_attnum = DatumGetInt16(datums[NDIST_EXP_ATTNUM]); + + if (nulls[NDIST_EXP_ATTNAME]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ndistinct has no exported name for attnum %d", + exp_attnum))); + + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Dependency tried to match attnum %d by name (%s) but found no match", + exp_attnum, TextDatumGetCString(datums[NDIST_EXP_ATTNAME])))); + } + } + + return result; +} diff --git a/src/test/regress/expected/stats_export_import.out b/src/test/regress/expected/stats_export_import.out index 5ab51c5aa0..9d17947583 100644 --- a/src/test/regress/expected/stats_export_import.out +++ b/src/test/regress/expected/stats_export_import.out @@ -22,6 +22,7 @@ SELECT 3, 'tre', (3, 3.3, 'TRE', '2003-03-03', NULL)::stats_export_import.comple UNION ALL SELECT 4, 'four', NULL, NULL; CREATE INDEX is_odd ON stats_export_import.test(((comp).a % 2 = 1)); +CREATE STATISTICS stats_export_import.evens_test ON name, ((comp).a % 2 = 0) FROM stats_export_import.test; -- Generate statistics on table with data ANALYZE stats_export_import.test; -- Capture pg_statistic values for table and index @@ -44,6 +45,25 @@ FROM stats_export_import.pg_statistic_capture; 5 (1 row) +-- Capture pg_statistic values for table and index +CREATE TABLE stats_export_import.pg_statistic_ext_data_capture +AS +SELECT d.stxdinherit, + d.stxdndistinct::text AS stxdndistinct, + d.stxddependencies::text AS stxddependencies, + d.stxdmcv::text AS stxdmcv, + d.stxdexpr::text AS stxdexpr +FROM pg_statistic_ext AS e +JOIN pg_statistic_ext_data AS d ON d.stxoid = e.oid +WHERE e.stxrelid = 'stats_export_import.test'::regclass +AND e.stxname = 'evens_test'; +SELECT COUNT(*) +FROM stats_export_import.pg_statistic_ext_data_capture; + count +------- + 1 +(1 row) + -- Export stats SELECT jsonb_build_object( @@ -323,6 +343,173 @@ WHERE :'debug'::boolean; ------------------ (0 rows) +SELECT + jsonb_build_object( + 'server_version_num', current_setting('server_version_num'), + 'stxoid', e.oid, + 'reloid', r.oid, + 'stxname', e.stxname, + 'stxnspname', en.nspname, + 'relname', r.relname, + 'nspname', n.nspname, + 'stxkeys', e.stxkeys::text, + 'stxkind', e.stxkind::text, + 'data', + ( + SELECT + array_agg(r ORDER by r.stxdinherit) + FROM ( + SELECT + sd.stxdinherit, + sd.stxdndistinct::text AS stxdndistinct, + sd.stxddependencies::text AS stxddependencies, + ( + SELECT + array_agg(mcvl) + FROM pg_mcv_list_items(sd.stxdmcv) AS mcvl + WHERE sd.stxdmcv IS NOT NULL + ) AS stxdmcv, + ( + SELECT + array_agg(r ORDER BY r.stainherit, r.staattnum) + FROM ( + SELECT + s.staattnum, + s.stainherit, + s.stanullfrac, + s.stawidth, + s.stadistinct, + s.stakind1, + s.stakind2, + s.stakind3, + s.stakind4, + s.stakind5, + s.staop1, + s.staop2, + s.staop3, + s.staop4, + s.staop5, + s.stacoll1, + s.stacoll2, + s.stacoll3, + s.stacoll4, + s.stacoll5, + s.stanumbers1::text AS stanumbers1, + s.stanumbers2::text AS stanumbers2, + s.stanumbers3::text AS stanumbers3, + s.stanumbers4::text AS stanumbers4, + s.stanumbers5::text AS stanumbers5, + s.stavalues1::text AS stavalues1, + s.stavalues2::text AS stavalues2, + s.stavalues3::text AS stavalues3, + s.stavalues4::text AS stavalues4, + s.stavalues5::text AS stavalues5 + FROM unnest(sd.stxdexpr) AS s + WHERE sd.stxdexpr IS NOT NULL + ) AS r + ) AS stxdexpr + FROM pg_statistic_ext_data AS sd + WHERE sd.stxoid = e.oid + ) r + ), + 'types', + ( + SELECT + array_agg(r) + FROM ( + SELECT + a.atttypid AS oid, + t.typname, + n.nspname + FROM pg_attribute AS a + JOIN pg_type AS t ON t.oid = a.atttypid + JOIN pg_namespace AS n ON n.oid = t.typnamespace + WHERE a.attrelid = r.oid + AND NOT a.attisdropped + AND a.attnum > 0 + ) AS r + ), + 'collations', + ( + SELECT + array_agg(r) + FROM ( + SELECT + e.oid, + c.collname, + n.nspname + FROM ( + SELECT a.attcollation AS oid + FROM pg_attribute AS a + WHERE a.attrelid = r.oid + AND NOT a.attisdropped + AND a.attnum > 0 + UNION + SELECT u.collid + FROM pg_statistic_ext_data AS sd + CROSS JOIN LATERAL unnest(sd.stxdexpr) AS s + CROSS JOIN LATERAL unnest(ARRAY[ + s.stacoll1, s.stacoll2, + s.stacoll3, s.stacoll4, + s.stacoll5]) AS u(collid) + WHERE sd.stxoid = e.oid + AND sd.stxdexpr IS NOT NULL + ) AS e + JOIN pg_collation AS c ON c.oid = e.oid + JOIN pg_namespace AS n ON n.oid = c.collnamespace + ) AS r + ), + 'operators', + ( + SELECT + array_agg(r) + FROM ( + SELECT DISTINCT + o.oid, + o.oprname, + n.nspname + FROM pg_statistic_ext_data AS sd + CROSS JOIN LATERAL unnest(sd.stxdexpr) AS s + CROSS JOIN LATERAL unnest(ARRAY[ + s.staop1, s.staop2, + s.staop3, s.staop4, + s.staop5]) AS u(opid) + JOIN pg_operator AS o ON o.oid = u.oid + JOIN pg_namespace AS n ON n.oid = o.oprnamespace + WHERE sd.stxoid = e.oid + AND sd.stxdexpr IS NOT NULL + ) AS r + ), + 'attributes', + ( + SELECT + array_agg(r ORDER BY r.attnum) + FROM ( + SELECT + a.attnum, + a.attname, + a.atttypid, + a.attcollation + FROM pg_attribute AS a + WHERE a.attrelid = r.oid + AND NOT a.attisdropped + AND a.attnum > 0 + ) AS r + ) + ) AS ext_stats_json +FROM pg_class r +JOIN pg_statistic_ext AS e ON e.stxrelid = r.oid +JOIN pg_namespace AS en ON en.oid = e.stxnamespace +JOIN pg_namespace AS n ON n.oid = r.relnamespace +WHERE e.stxrelid = 'stats_export_import.test'::regclass +AND e.stxname = 'evens_test' +\gset +SELECT jsonb_pretty(:'ext_stats_json'::jsonb) AS ext_stats_json +WHERE :'debug'::boolean; + ext_stats_json +---------------- +(0 rows) + SELECT relname, reltuples FROM pg_class WHERE oid IN ('stats_export_import.test'::regclass, @@ -334,12 +521,14 @@ ORDER BY relname; test | 4 (2 rows) --- Move table and index out of the way +-- Move table and index and extended stats out of the way ALTER TABLE stats_export_import.test RENAME TO test_orig; ALTER INDEX stats_export_import.is_odd RENAME TO is_odd_orig; --- Create empty copy tables +ALTER STATISTICS stats_export_import.evens_test RENAME TO evens_test_orig; +-- Create empty copy tables and objects CREATE TABLE stats_export_import.test(LIKE stats_export_import.test_orig); CREATE INDEX is_odd ON stats_export_import.test(((comp).a % 2 = 1)); +CREATE STATISTICS stats_export_import.evens_test ON name, ((comp).a % 2 = 0) FROM stats_export_import.test; -- Verify no stats for these new tables SELECT COUNT(*) FROM pg_statistic @@ -475,6 +664,19 @@ SELECT pg_import_rel_stats( t (1 row) +SELECT pg_import_ext_stats( + e.oid, + :'ext_stats_json'::jsonb, + true, + true) +FROM pg_statistic_ext AS e +WHERE e.stxrelid = 'stats_export_import.test'::regclass +AND e.stxname = 'evens_test'; + pg_import_ext_stats +--------------------- + t +(1 row) + -- This should return 0 rows SELECT staattnum, stainherit, stanullfrac, stawidth, stadistinct, stakind1, stakind2, stakind3, stakind4, stakind5, @@ -528,3 +730,62 @@ ORDER BY relname; test | 4 (2 rows) +-- This should return 0 rows +SELECT d.stxdinherit, + d.stxdndistinct::text AS stxdndistinct, + d.stxddependencies::text AS stxddependencies, + d.stxdmcv::text AS stxdmcv, + d.stxdexpr::text AS stxdexpr +FROM pg_statistic_ext AS e +JOIN pg_statistic_ext_data AS d ON d.stxoid = e.oid +WHERE e.stxrelid = 'stats_export_import.test'::regclass +AND e.stxname = 'evens_test' +EXCEPT +SELECT * +FROM stats_export_import.pg_statistic_ext_data_capture; + stxdinherit | stxdndistinct | stxddependencies | stxdmcv | stxdexpr +-------------+---------------+------------------+---------+---------- +(0 rows) + +-- This should return 0 rows +SELECT * +FROM stats_export_import.pg_statistic_ext_data_capture +EXCEPT +SELECT d.stxdinherit, + d.stxdndistinct::text AS stxdndistinct, + d.stxddependencies::text AS stxddependencies, + d.stxdmcv::text AS stxdmcv, + d.stxdexpr::text AS stxdexpr +FROM pg_statistic_ext AS e +JOIN pg_statistic_ext_data AS d ON d.stxoid = e.oid +WHERE e.stxrelid = 'stats_export_import.test'::regclass +AND e.stxname = 'evens_test'; + stxdinherit | stxdndistinct | stxddependencies | stxdmcv | stxdexpr +-------------+---------------+------------------+---------+---------- +(0 rows) + +SELECT staattnum, stainherit, stanullfrac, stawidth, stadistinct, + stakind1, stakind2, stakind3, stakind4, stakind5, + staop1, staop2, staop3, staop4, staop5, stacoll1, stacoll2, stacoll3, stacoll4, stacoll5, + stanumbers1, stanumbers2, stanumbers3, stanumbers4, stanumbers5, + sv1, sv2, sv3, sv4, sv5 +FROM stats_export_import.pg_statistic_capture +WHERE :'debug'::boolean; + staattnum | stainherit | stanullfrac | stawidth | stadistinct | stakind1 | stakind2 | stakind3 | stakind4 | stakind5 | staop1 | staop2 | staop3 | staop4 | staop5 | stacoll1 | stacoll2 | stacoll3 | stacoll4 | stacoll5 | stanumbers1 | stanumbers2 | stanumbers3 | stanumbers4 | stanumbers5 | sv1 | sv2 | sv3 | sv4 | sv5 +-----------+------------+-------------+----------+-------------+----------+----------+----------+----------+----------+--------+--------+--------+--------+--------+----------+----------+----------+----------+----------+-------------+-------------+-------------+-------------+-------------+-----+-----+-----+-----+----- +(0 rows) + +SELECT staattnum, stainherit, stanullfrac, stawidth, stadistinct, + stakind1, stakind2, stakind3, stakind4, stakind5, + staop1, staop2, staop3, staop4, staop5, stacoll1, stacoll2, stacoll3, stacoll4, stacoll5, + stanumbers1, stanumbers2, stanumbers3, stanumbers4, stanumbers5, + stavalues1::text AS sv1, stavalues2::text AS sv2, stavalues3::text AS sv3, + stavalues4::text AS sv4, stavalues5::text AS sv5 +FROM pg_statistic +WHERE starelid IN ('stats_export_import.test'::regclass, + 'stats_export_import.is_odd'::regclass) +AND :'debug'::boolean; + staattnum | stainherit | stanullfrac | stawidth | stadistinct | stakind1 | stakind2 | stakind3 | stakind4 | stakind5 | staop1 | staop2 | staop3 | staop4 | staop5 | stacoll1 | stacoll2 | stacoll3 | stacoll4 | stacoll5 | stanumbers1 | stanumbers2 | stanumbers3 | stanumbers4 | stanumbers5 | sv1 | sv2 | sv3 | sv4 | sv5 +-----------+------------+-------------+----------+-------------+----------+----------+----------+----------+----------+--------+--------+--------+--------+--------+----------+----------+----------+----------+----------+-------------+-------------+-------------+-------------+-------------+-----+-----+-----+-----+----- +(0 rows) + diff --git a/src/test/regress/sql/stats_export_import.sql b/src/test/regress/sql/stats_export_import.sql index 9a80eebeec..cbe94b9273 100644 --- a/src/test/regress/sql/stats_export_import.sql +++ b/src/test/regress/sql/stats_export_import.sql @@ -26,6 +26,7 @@ UNION ALL SELECT 4, 'four', NULL, NULL; CREATE INDEX is_odd ON stats_export_import.test(((comp).a % 2 = 1)); +CREATE STATISTICS stats_export_import.evens_test ON name, ((comp).a % 2 = 0) FROM stats_export_import.test; -- Generate statistics on table with data ANALYZE stats_export_import.test; @@ -47,6 +48,22 @@ WHERE starelid IN ('stats_export_import.test'::regclass, SELECT COUNT(*) FROM stats_export_import.pg_statistic_capture; +-- Capture pg_statistic values for table and index +CREATE TABLE stats_export_import.pg_statistic_ext_data_capture +AS +SELECT d.stxdinherit, + d.stxdndistinct::text AS stxdndistinct, + d.stxddependencies::text AS stxddependencies, + d.stxdmcv::text AS stxdmcv, + d.stxdexpr::text AS stxdexpr +FROM pg_statistic_ext AS e +JOIN pg_statistic_ext_data AS d ON d.stxoid = e.oid +WHERE e.stxrelid = 'stats_export_import.test'::regclass +AND e.stxname = 'evens_test'; + +SELECT COUNT(*) +FROM stats_export_import.pg_statistic_ext_data_capture; + -- Export stats SELECT jsonb_build_object( @@ -322,19 +339,186 @@ WHERE r.oid = 'stats_export_import.is_odd'::regclass SELECT jsonb_pretty(:'index_stats_json'::jsonb) AS index_stats_json WHERE :'debug'::boolean; +SELECT + jsonb_build_object( + 'server_version_num', current_setting('server_version_num'), + 'stxoid', e.oid, + 'reloid', r.oid, + 'stxname', e.stxname, + 'stxnspname', en.nspname, + 'relname', r.relname, + 'nspname', n.nspname, + 'stxkeys', e.stxkeys::text, + 'stxkind', e.stxkind::text, + 'data', + ( + SELECT + array_agg(r ORDER by r.stxdinherit) + FROM ( + SELECT + sd.stxdinherit, + sd.stxdndistinct::text AS stxdndistinct, + sd.stxddependencies::text AS stxddependencies, + ( + SELECT + array_agg(mcvl) + FROM pg_mcv_list_items(sd.stxdmcv) AS mcvl + WHERE sd.stxdmcv IS NOT NULL + ) AS stxdmcv, + ( + SELECT + array_agg(r ORDER BY r.stainherit, r.staattnum) + FROM ( + SELECT + s.staattnum, + s.stainherit, + s.stanullfrac, + s.stawidth, + s.stadistinct, + s.stakind1, + s.stakind2, + s.stakind3, + s.stakind4, + s.stakind5, + s.staop1, + s.staop2, + s.staop3, + s.staop4, + s.staop5, + s.stacoll1, + s.stacoll2, + s.stacoll3, + s.stacoll4, + s.stacoll5, + s.stanumbers1::text AS stanumbers1, + s.stanumbers2::text AS stanumbers2, + s.stanumbers3::text AS stanumbers3, + s.stanumbers4::text AS stanumbers4, + s.stanumbers5::text AS stanumbers5, + s.stavalues1::text AS stavalues1, + s.stavalues2::text AS stavalues2, + s.stavalues3::text AS stavalues3, + s.stavalues4::text AS stavalues4, + s.stavalues5::text AS stavalues5 + FROM unnest(sd.stxdexpr) AS s + WHERE sd.stxdexpr IS NOT NULL + ) AS r + ) AS stxdexpr + FROM pg_statistic_ext_data AS sd + WHERE sd.stxoid = e.oid + ) r + ), + 'types', + ( + SELECT + array_agg(r) + FROM ( + SELECT + a.atttypid AS oid, + t.typname, + n.nspname + FROM pg_attribute AS a + JOIN pg_type AS t ON t.oid = a.atttypid + JOIN pg_namespace AS n ON n.oid = t.typnamespace + WHERE a.attrelid = r.oid + AND NOT a.attisdropped + AND a.attnum > 0 + ) AS r + ), + 'collations', + ( + SELECT + array_agg(r) + FROM ( + SELECT + e.oid, + c.collname, + n.nspname + FROM ( + SELECT a.attcollation AS oid + FROM pg_attribute AS a + WHERE a.attrelid = r.oid + AND NOT a.attisdropped + AND a.attnum > 0 + UNION + SELECT u.collid + FROM pg_statistic_ext_data AS sd + CROSS JOIN LATERAL unnest(sd.stxdexpr) AS s + CROSS JOIN LATERAL unnest(ARRAY[ + s.stacoll1, s.stacoll2, + s.stacoll3, s.stacoll4, + s.stacoll5]) AS u(collid) + WHERE sd.stxoid = e.oid + AND sd.stxdexpr IS NOT NULL + ) AS e + JOIN pg_collation AS c ON c.oid = e.oid + JOIN pg_namespace AS n ON n.oid = c.collnamespace + ) AS r + ), + 'operators', + ( + SELECT + array_agg(r) + FROM ( + SELECT DISTINCT + o.oid, + o.oprname, + n.nspname + FROM pg_statistic_ext_data AS sd + CROSS JOIN LATERAL unnest(sd.stxdexpr) AS s + CROSS JOIN LATERAL unnest(ARRAY[ + s.staop1, s.staop2, + s.staop3, s.staop4, + s.staop5]) AS u(opid) + JOIN pg_operator AS o ON o.oid = u.oid + JOIN pg_namespace AS n ON n.oid = o.oprnamespace + WHERE sd.stxoid = e.oid + AND sd.stxdexpr IS NOT NULL + ) AS r + ), + 'attributes', + ( + SELECT + array_agg(r ORDER BY r.attnum) + FROM ( + SELECT + a.attnum, + a.attname, + a.atttypid, + a.attcollation + FROM pg_attribute AS a + WHERE a.attrelid = r.oid + AND NOT a.attisdropped + AND a.attnum > 0 + ) AS r + ) + ) AS ext_stats_json +FROM pg_class r +JOIN pg_statistic_ext AS e ON e.stxrelid = r.oid +JOIN pg_namespace AS en ON en.oid = e.stxnamespace +JOIN pg_namespace AS n ON n.oid = r.relnamespace +WHERE e.stxrelid = 'stats_export_import.test'::regclass +AND e.stxname = 'evens_test' +\gset + +SELECT jsonb_pretty(:'ext_stats_json'::jsonb) AS ext_stats_json +WHERE :'debug'::boolean; + SELECT relname, reltuples FROM pg_class WHERE oid IN ('stats_export_import.test'::regclass, 'stats_export_import.is_odd'::regclass) ORDER BY relname; --- Move table and index out of the way +-- Move table and index and extended stats out of the way ALTER TABLE stats_export_import.test RENAME TO test_orig; ALTER INDEX stats_export_import.is_odd RENAME TO is_odd_orig; +ALTER STATISTICS stats_export_import.evens_test RENAME TO evens_test_orig; --- Create empty copy tables +-- Create empty copy tables and objects CREATE TABLE stats_export_import.test(LIKE stats_export_import.test_orig); CREATE INDEX is_odd ON stats_export_import.test(((comp).a % 2 = 1)); +CREATE STATISTICS stats_export_import.evens_test ON name, ((comp).a % 2 = 0) FROM stats_export_import.test; -- Verify no stats for these new tables SELECT COUNT(*) @@ -456,6 +640,15 @@ SELECT pg_import_rel_stats( true, true); +SELECT pg_import_ext_stats( + e.oid, + :'ext_stats_json'::jsonb, + true, + true) +FROM pg_statistic_ext AS e +WHERE e.stxrelid = 'stats_export_import.test'::regclass +AND e.stxname = 'evens_test'; + -- This should return 0 rows SELECT staattnum, stainherit, stanullfrac, stawidth, stadistinct, stakind1, stakind2, stakind3, stakind4, stakind5, @@ -497,3 +690,51 @@ FROM pg_class WHERE oid IN ('stats_export_import.test'::regclass, 'stats_export_import.is_odd'::regclass) ORDER BY relname; + +-- This should return 0 rows +SELECT d.stxdinherit, + d.stxdndistinct::text AS stxdndistinct, + d.stxddependencies::text AS stxddependencies, + d.stxdmcv::text AS stxdmcv, + d.stxdexpr::text AS stxdexpr +FROM pg_statistic_ext AS e +JOIN pg_statistic_ext_data AS d ON d.stxoid = e.oid +WHERE e.stxrelid = 'stats_export_import.test'::regclass +AND e.stxname = 'evens_test' +EXCEPT +SELECT * +FROM stats_export_import.pg_statistic_ext_data_capture; + +-- This should return 0 rows +SELECT * +FROM stats_export_import.pg_statistic_ext_data_capture +EXCEPT +SELECT d.stxdinherit, + d.stxdndistinct::text AS stxdndistinct, + d.stxddependencies::text AS stxddependencies, + d.stxdmcv::text AS stxdmcv, + d.stxdexpr::text AS stxdexpr +FROM pg_statistic_ext AS e +JOIN pg_statistic_ext_data AS d ON d.stxoid = e.oid +WHERE e.stxrelid = 'stats_export_import.test'::regclass +AND e.stxname = 'evens_test'; + + +SELECT staattnum, stainherit, stanullfrac, stawidth, stadistinct, + stakind1, stakind2, stakind3, stakind4, stakind5, + staop1, staop2, staop3, staop4, staop5, stacoll1, stacoll2, stacoll3, stacoll4, stacoll5, + stanumbers1, stanumbers2, stanumbers3, stanumbers4, stanumbers5, + sv1, sv2, sv3, sv4, sv5 +FROM stats_export_import.pg_statistic_capture +WHERE :'debug'::boolean; + +SELECT staattnum, stainherit, stanullfrac, stawidth, stadistinct, + stakind1, stakind2, stakind3, stakind4, stakind5, + staop1, staop2, staop3, staop4, staop5, stacoll1, stacoll2, stacoll3, stacoll4, stacoll5, + stanumbers1, stanumbers2, stanumbers3, stanumbers4, stanumbers5, + stavalues1::text AS sv1, stavalues2::text AS sv2, stavalues3::text AS sv3, + stavalues4::text AS sv4, stavalues5::text AS sv5 +FROM pg_statistic +WHERE starelid IN ('stats_export_import.test'::regclass, + 'stats_export_import.is_odd'::regclass) +AND :'debug'::boolean; diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 2be0a30d4d..6f41c7a292 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -28787,12 +28787,38 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset If require_match_oids is set to true, - then the import will fail if the imported oids for pt_type, + then the import will fail if the imported oids for pg_type, pg_collation, and pg_operator do not match the values specified in relation_json, as would be expected in a binary upgrade. These assumptions would not be true when restoring from a dump. + + + + pg_import_ext_stats + + pg_import_ext_stats ( extended statisticss object oid, extended_stats jsonb validate boolean, require_match_oids ) + boolean + + + Modifies the pg_statistic_ext_data rows for the + oid matching + extended statistics object are transactionally + replaced with the values found in extended_stats. + The purpose of this function is to apply statistics values in an upgrade + situation that are "good enough" for system operation until they are + replaced by the next auto-analyze. This function could be used by + pg_upgrade and pg_restore to + convey the statistics from the old system version into the new one. + + + If validate is set to true, + then the function will perform a series of data consistency checks on + the data in extended_stats before attempting to + import statistics. Any inconsistencies found will raise an error. + + -- 2.43.0