From 1ca442a2021e5aa4a06f40bea1827c87215b3a02 Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Wed, 5 Nov 2025 01:13:04 -0500 Subject: [PATCH v22 3/3] Include Extended Statistics in pg_dump. Incorporate the new pg_restore_extended_stats() function into pg_dump. This detects the existence of extended statistics statistics (i.e. pg_statistic_ext_data rows). This handles many of the changes that have happened to extended statistic statistics over the various versions, including: * Format change for pg_ndistinct and pg_dependencies in current development version. Earlier versions have the format translated via the pg_dump SQL statement. * Inherited extended statistics were introduced in v15. * Expressions were introduced to extended statistics in v14. * MCV extended statistics were introduced in v13. * pg_statistic_ext_data and pg_stats_ext introduced in v12, prior to that ndstinct and depdendencies data (the only kind of stats that existed were directly on pg_statistic_ext. * Extended Statistics were introduced in v10, so there is no support for prior versions necessary. --- src/bin/pg_dump/pg_backup.h | 1 + src/bin/pg_dump/pg_backup_archiver.c | 3 +- src/bin/pg_dump/pg_dump.c | 254 +++++++++++++++++++++++++++ src/bin/pg_dump/t/002_pg_dump.pl | 28 +++ 4 files changed, 285 insertions(+), 1 deletion(-) diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index d9041dad720..2f8d9799c30 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -68,6 +68,7 @@ enum _dumpPreparedQueries PREPQUERY_DUMPCOMPOSITETYPE, PREPQUERY_DUMPDOMAIN, PREPQUERY_DUMPENUMTYPE, + PREPQUERY_DUMPEXTSTATSOBJSTATS, PREPQUERY_DUMPFUNC, PREPQUERY_DUMPOPR, PREPQUERY_DUMPRANGETYPE, diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 4a63f7392ae..1d42b0343eb 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -3007,7 +3007,8 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) strcmp(te->desc, "SEARCHPATH") == 0) return REQ_SPECIAL; - if (strcmp(te->desc, "STATISTICS DATA") == 0) + if ((strcmp(te->desc, "STATISTICS DATA") == 0) || + (strcmp(te->desc, "EXTENDED STATISTICS DATA") == 0)) { if (!ropt->dumpStatistics) return 0; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 24ad201af2f..b5ecf463db6 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -324,6 +324,7 @@ static void dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo); static void dumpIndex(Archive *fout, const IndxInfo *indxinfo); static void dumpIndexAttach(Archive *fout, const IndexAttachInfo *attachinfo); static void dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo); +static void dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo); static void dumpConstraint(Archive *fout, const ConstraintInfo *coninfo); static void dumpTableConstraintComment(Archive *fout, const ConstraintInfo *coninfo); static void dumpTSParser(Archive *fout, const TSParserInfo *prsinfo); @@ -8272,6 +8273,9 @@ getExtendedStatistics(Archive *fout) /* Decide whether we want to dump it */ selectDumpableStatisticsObject(&(statsextinfo[i]), fout); + + if (fout->dopt->dumpStatistics) + statsextinfo[i].dobj.components |= DUMP_COMPONENT_STATISTICS; } PQclear(res); @@ -11726,6 +11730,7 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj) break; case DO_STATSEXT: dumpStatisticsExt(fout, (const StatsExtInfo *) dobj); + dumpStatisticsExtStats(fout, (const StatsExtInfo *) dobj); break; case DO_REFRESH_MATVIEW: refreshMatViewData(fout, (const TableDataInfo *) dobj); @@ -18528,6 +18533,255 @@ dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo) free(qstatsextname); } +/* + * dumpStatisticsExtStats + * write out to fout the stats for an extended statistics object + */ +static void +dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo) +{ + DumpOptions *dopt = fout->dopt; + PQExpBuffer query; + PGresult *res; + int nstats; + + /* Do nothing if not dumping statistics */ + if (!dopt->dumpStatistics) + return; + + if (!fout->is_prepared[PREPQUERY_DUMPEXTSTATSOBJSTATS]) + { + PQExpBuffer pq = createPQExpBuffer(); + + /* + * Set up query for constraint-specific details. + * + * 19+: query pg_stats_ext and pg_stats_ext_exprs as-is 15-18: query + * pg_stats_ext translating the ndistinct and dependencies, 14: + * inherited is always NULL 12-13: no pg_stats_ext_exprs 10-11: no + * pg_stats_ext, join pg_statistic_ext and pg_namespace + */ + + appendPQExpBufferStr(pq, + "PREPARE getExtStatsStats(pg_catalog.name, pg_catalog.name) AS\n" + "SELECT "); + + /* + * Versions 15+ have inherited stats. + * + * Create this column in all version because we need to order by it + * later. + */ + if (fout->remoteVersion >= 150000) + appendPQExpBufferStr(pq, "e.inherited, "); + else + appendPQExpBufferStr(pq, "false AS inherited, "); + + /* + * The ndistinnct and depdendencies formats changed in v19, so + * everything before that needs to be translated. + * + * The ndistinct translation converts this: + * + * {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11} + * + * to this: + * + * [ {"attributes": [3,4], "ndistinct": 11}, {"attributes": [3,6], + * "ndistinct": 11}, {"attributes": [4,6], "ndistinct": 11}, + * {"attributes": [3,4,6], "ndistinct": 11} ] + * + * and the dependencies translation converts this: + * + * {"3 => 4": 1.000000, "3 => 6": 1.000000, "4 => 6": 1.000000, "3, 4 + * => 6": 1.000000, "3, 6 => 4": 1.000000} + * + * to this: + * + * [ {"attributes": [3], "dependency": 4, "degree": 1.000000}, + * {"attributes": [3], "dependency": 6, "degree": 1.000000}, + * {"attributes": [4], "dependency": 6, "degree": 1.000000}, + * {"attributes": [3,4], "dependency": 6, "degree": 1.000000}, + * {"attributes": [3,6], "dependency": 4, "degree": 1.000000} ] + */ + if (fout->remoteVersion >= 190000) + appendPQExpBufferStr(pq, "e.n_distinct, e.dependencies, "); + else + appendPQExpBufferStr(pq, + "( " + "SELECT json_agg( " + " json_build_object( " + " 'attributes', " + " string_to_array(kv.key, ', ')::integer[], " + " 'ndistinct', " + " kv.value::bigint )) " + "FROM json_each_text(e.n_distinct::text::json) AS kv" + ") AS n_distinct, " + "( " + "SELECT json_agg( " + " json_build_object( " + " 'attributes', " + " string_to_array( " + " split_part(kv.key, ' => ', 1), " + " ', ')::integer[], " + " 'dependency', " + " split_part(kv.key, ' => ', 2)::integer, " + " 'degree', " + " kv.value::double precision )) " + "FROM json_each_text(e.dependencies::text::json) AS kv " + ") AS dependencies, "); + + /* MCV was introduced v13 */ + if (fout->remoteVersion >= 130000) + appendPQExpBufferStr(pq, + "e.most_common_vals, e.most_common_val_nulls, " + "e.most_common_freqs, e.most_common_base_freqs, "); + else + appendPQExpBufferStr(pq, + "NULL AS most_common_vals, NULL AS most_common_val_nulls, " + "NULL AS most_common_freqs, NULL AS most_common_base_freqs, "); + + /* Expressions were introduced in v14 */ + if (fout->remoteVersion >= 140000) + { + appendPQExpBufferStr(pq, + "( " + "SELECT array_agg( " + " ARRAY[ee.null_frac::text, ee.avg_width::text, " + " ee.n_distinct::text, ee.most_common_vals::text, " + " ee.most_common_freqs::text, ee.histogram_bounds::text, " + " ee.correlation::text, ee.most_common_elems::text, " + " ee.most_common_elem_freqs::text, " + " ee.elem_count_histogram::text]) " + "FROM pg_stats_ext_exprs AS ee " + "WHERE ee.statistics_schemaname = $1 " + "AND ee.statistics_name = $2 "); + + /* Inherited expressions introduced in v15 */ + if (fout->remoteVersion >= 150000) + appendPQExpBufferStr(pq, "AND ee.inherited = e.inherited"); + + appendPQExpBufferStr(pq, ") AS exprs "); + } + else + appendPQExpBufferStr(pq, "NULL AS exprs "); + + /* pg_stats_ext introduced in v12 */ + if (fout->remoteVersion >= 120000) + appendPQExpBufferStr(pq, + "FROM pg_catalog.pg_stats_ext AS e " + "WHERE e.statistics_schemaname = $1 " + "AND e.statistics_name = $2 "); + else + appendPQExpBufferStr(pq, + "FROM ( " + "SELECT s.stxndistinct AS n_distinct, " + " s.stxdependencies AS dependencies " + "FROM pg_catalog.pg_statistics_ext AS s " + "JOIN pg_catalog.pg_namespace AS n " + "ON n.oid = s.stxnamespace " + "WHERE n.nspname = $1 " + "AND e.stxname = $2 " + ") AS e "); + + /* we always have an inherited column, but it may be a constant */ + appendPQExpBufferStr(pq, "ORDER BY inherited"); + + ExecuteSqlStatement(fout, pq->data); + + fout->is_prepared[PREPQUERY_DUMPEXTSTATSOBJSTATS] = true; + + destroyPQExpBuffer(pq); + } + + query = createPQExpBuffer(); + + appendPQExpBufferStr(query, "EXECUTE getExtStatsStats("); + appendStringLiteralAH(query, statsextinfo->dobj.namespace->dobj.name, fout); + appendPQExpBufferStr(query, "::pg_catalog.name, "); + appendStringLiteralAH(query, statsextinfo->dobj.name, fout); + appendPQExpBufferStr(query, "::pg_catalog.name)"); + + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + + destroyPQExpBuffer(query); + + nstats = PQntuples(res); + + if (nstats > 0) + { + PQExpBuffer out = createPQExpBuffer(); + + int i_inherited = PQfnumber(res, "inherited"); + int i_ndistinct = PQfnumber(res, "n_distinct"); + int i_dependencies = PQfnumber(res, "dependencies"); + int i_mcv = PQfnumber(res, "most_common_vals"); + int i_mcv_nulls = PQfnumber(res, "most_common_val_nulls"); + int i_mcf = PQfnumber(res, "most_common_freqs"); + int i_mcbf = PQfnumber(res, "most_common_base_freqs"); + int i_exprs = PQfnumber(res, "exprs"); + + for (int i = 0; i < nstats; i++) + { + if (PQgetisnull(res, i, i_inherited)) + pg_fatal("inherited cannot be NULL"); + + appendPQExpBufferStr(out, + "SELECT * FROM pg_catalog.pg_restore_extended_stats(\n"); + appendPQExpBuffer(out, "\t'version', '%d'::integer,\n", + fout->remoteVersion); + appendPQExpBufferStr(out, "\t'statistics_schemaname', "); + appendStringLiteralAH(out, statsextinfo->dobj.namespace->dobj.name, fout); + appendPQExpBufferStr(out, ",\n\t'statistics_name', "); + appendStringLiteralAH(out, statsextinfo->dobj.name, fout); + appendNamedArgument(out, fout, "inherited", "boolean", + PQgetvalue(res, i, i_inherited)); + + if (!PQgetisnull(res, i, i_ndistinct)) + appendNamedArgument(out, fout, "n_distinct", "pg_ndistinct", + PQgetvalue(res, i, i_ndistinct)); + + if (!PQgetisnull(res, i, i_dependencies)) + appendNamedArgument(out, fout, "dependencies", "pg_dependencies", + PQgetvalue(res, i, i_dependencies)); + + if (!PQgetisnull(res, i, i_mcv)) + appendNamedArgument(out, fout, "most_common_vals", "text[]", + PQgetvalue(res, i, i_mcv)); + + if (!PQgetisnull(res, i, i_mcv_nulls)) + appendNamedArgument(out, fout, "most_common_val_nulls", "boolean[]", + PQgetvalue(res, i, i_mcv_nulls)); + + if (!PQgetisnull(res, i, i_mcf)) + appendNamedArgument(out, fout, "most_common_freqs", "double precision[]", + PQgetvalue(res, i, i_mcf)); + + if (!PQgetisnull(res, i, i_mcbf)) + appendNamedArgument(out, fout, "most_common_base_freqs", "double precision[]", + PQgetvalue(res, i, i_mcbf)); + + if (!PQgetisnull(res, i, i_exprs)) + appendNamedArgument(out, fout, "exprs", "text[]", + PQgetvalue(res, i, i_exprs)); + + appendPQExpBufferStr(out, "\n);\n"); + } + + ArchiveEntry(fout, nilCatalogId, createDumpId(), + ARCHIVE_OPTS(.tag = statsextinfo->dobj.name, + .namespace = statsextinfo->dobj.namespace->dobj.name, + .owner = statsextinfo->rolname, + .description = "EXTENDED STATISTICS DATA", + .section = SECTION_POST_DATA, + .createStmt = out->data, + .deps = &statsextinfo->dobj.dumpId, + .nDeps = 1)); + destroyPQExpBuffer(out); + } + PQclear(res); +} + /* * dumpConstraint * write out to fout a user-defined constraint diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index e33aa95f6ff..9ab82f97277 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -4772,6 +4772,34 @@ my %tests = ( }, }, + # + # EXTENDED stats will end up in SECTION_POST_DATA. + # + 'extended_statistics_import' => { + create_sql => ' + CREATE TABLE dump_test.has_ext_stats + AS SELECT g.g AS x, g.g / 2 AS y FROM generate_series(1,100) AS g(g); + CREATE STATISTICS dump_test.es1 ON x, (y % 2) FROM dump_test.has_ext_stats; + ANALYZE dump_test.has_ext_stats;', + regexp => qr/^ + \QSELECT * FROM pg_catalog.pg_restore_extended_stats(\E\s+/xm, + like => { + %full_runs, + %dump_test_schema_runs, + no_data_no_schema => 1, + no_schema => 1, + section_post_data => 1, + statistics_only => 1, + schema_only_with_statistics => 1, + }, + unlike => { + exclude_dump_test_schema => 1, + no_statistics => 1, + only_dump_measurement => 1, + schema_only => 1, + }, + }, + # # While attribute stats (aka pg_statistic stats) only appear for tables # that have been analyzed, all tables will have relation stats because -- 2.52.0