From e435c033cd574b0a07f08577880695de4290747d Mon Sep 17 00:00:00 2001 From: TatsuyaKawata Date: Sat, 10 Jan 2026 16:51:40 +0900 Subject: [PATCH v3] Add sampling statistics to autoanalyze log output Previously, autoanalyze log messages only showed buffer usage, WAL usage, and system usage statistics. However, ANALYZE VERBOSE showed additional sampling statistics including pages scanned, live rows, and dead rows found during sampling. This made it difficult to understand the sampling behavior from autoanalyze logs alone. This patch unifies the logging by adding sampling statistics to the autoanalyze log output. The new log format includes: - Number of pages scanned out of total pages - Live rows and dead rows found during sampling - Number of rows in sample and estimated total rows Additionally, this patch adds "inheritance tree" to the autoanalyze log message when analyzing inherited statistics, making it easier to distinguish between single-table analyze and inheritance tree analyze. To support this change, a new SamplingStats struct is introduced in vacuum.h to collect and pass sampling statistics. The AcquireSampleRowsFunc callback signature is updated to include this new parameter. Author: Tatsuya Kawata Reviewed-by: Fujii Masao Reviewed-by: Sami Imseih Reviewed-by: Chao Li Discussion: https://www.postgresql.org/message-id/flat/CAHza6qcN%3DPaGqo8CGgrqd%2BnaOwY_pLGiwEq6u%3D%2BASZZNL9zi9A%40mail.gmail.com#26a70a815cc922b7513e71fc0c445ff3 --- contrib/file_fdw/file_fdw.c | 6 ++- contrib/postgres_fdw/postgres_fdw.c | 7 +++- src/backend/commands/analyze.c | 65 ++++++++++++++++++++--------- src/include/commands/vacuum.h | 13 ++++++ src/include/foreign/fdwapi.h | 6 ++- 5 files changed, 73 insertions(+), 24 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 33a37d832ce..a131d0e93ce 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -171,7 +171,8 @@ static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel, Cost *startup_cost, Cost *total_cost); static int file_acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows); + double *totalrows, double *totaldeadrows, + SamplingStats *sampling_stats); /* @@ -1185,7 +1186,8 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel, static int file_acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows) + double *totalrows, double *totaldeadrows, + SamplingStats *sampling_stats) { int numrows = 0; double rowstoskip = -1; /* -1 means not set yet */ diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 3572689e33b..08d573b9705 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -21,6 +21,7 @@ #include "commands/defrem.h" #include "commands/explain_format.h" #include "commands/explain_state.h" +#include "commands/vacuum.h" #include "executor/execAsync.h" #include "foreign/fdwapi.h" #include "funcapi.h" @@ -504,7 +505,8 @@ static void process_query_params(ExprContext *econtext, static int postgresAcquireSampleRowsFunc(Relation relation, int elevel, HeapTuple *rows, int targrows, double *totalrows, - double *totaldeadrows); + double *totaldeadrows, + SamplingStats *sampling_stats); static void analyze_row_processor(PGresult *res, int row, PgFdwAnalyzeState *astate); static void produce_tuple_asynchronously(AsyncRequest *areq, bool fetch); @@ -5008,7 +5010,8 @@ static int postgresAcquireSampleRowsFunc(Relation relation, int elevel, HeapTuple *rows, int targrows, double *totalrows, - double *totaldeadrows) + double *totaldeadrows, + SamplingStats *sampling_stats) { PgFdwAnalyzeState astate; ForeignTable *table; diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index a483424152c..3475fc2a947 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -86,11 +86,13 @@ static VacAttrStats *examine_attribute(Relation onerel, int attnum, Node *index_expr); static int acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows); + double *totalrows, double *totaldeadrows, + SamplingStats *sampling_stats); static int compare_rows(const void *a, const void *b, void *arg); static int acquire_inherited_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows); + double *totalrows, double *totaldeadrows, + SamplingStats *sampling_stats); static void update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats); static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); @@ -302,6 +304,7 @@ do_analyze_rel(Relation onerel, const VacuumParams params, double totalrows, totaldeadrows; HeapTuple *rows; + SamplingStats sampling_stats = {0}; PGRUsage ru0; TimestampTz starttime = 0; MemoryContext caller_context; @@ -535,11 +538,13 @@ do_analyze_rel(Relation onerel, const VacuumParams params, if (inh) numrows = acquire_inherited_sample_rows(onerel, elevel, rows, targrows, - &totalrows, &totaldeadrows); + &totalrows, &totaldeadrows, + &sampling_stats); else numrows = (*acquirefunc) (onerel, elevel, rows, targrows, - &totalrows, &totaldeadrows); + &totalrows, &totaldeadrows, + &sampling_stats); /* * Compute the statistics. Temporary results during the calculations for @@ -805,7 +810,12 @@ do_analyze_rel(Relation onerel, const VacuumParams params, initStringInfo(&buf); if (AmAutoVacuumWorkerProcess()) - msgfmt = _("automatic analyze of table \"%s.%s.%s\"\n"); + { + if (inh) + msgfmt = _("automatic analyze of table \"%s.%s.%s\" inheritance tree\n"); + else + msgfmt = _("automatic analyze of table \"%s.%s.%s\"\n"); + } else msgfmt = _("finished analyzing table \"%s.%s.%s\"\n"); @@ -813,6 +823,18 @@ do_analyze_rel(Relation onerel, const VacuumParams params, get_database_name(MyDatabaseId), get_namespace_name(RelationGetNamespace(onerel)), RelationGetRelationName(onerel)); + if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + appendStringInfo(&buf, + _("sampling: %d rows in sample, %.0f estimated total rows\n"), + numrows, totalrows); + else + appendStringInfo(&buf, + _("sampling: scanned %u of %u pages, " + "containing %.0f live rows and %.0f dead rows; " + "%d rows in sample, %.0f estimated total rows\n"), + sampling_stats.scannedpages, sampling_stats.totalpages, + sampling_stats.liverows, sampling_stats.deadrows, + numrows, totalrows); if (track_cost_delay_timing) { /* @@ -1204,7 +1226,8 @@ block_sampling_read_stream_next(ReadStream *stream, static int acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows) + double *totalrows, double *totaldeadrows, + SamplingStats *sampling_stats) { int numrows = 0; /* # rows now in reservoir */ double samplerows = 0; /* total # rows collected */ @@ -1345,17 +1368,11 @@ acquire_sample_rows(Relation onerel, int elevel, *totaldeadrows = 0.0; } - /* - * Emit some interesting relation info - */ - ereport(elevel, - (errmsg("\"%s\": scanned %d of %u pages, " - "containing %.0f live rows and %.0f dead rows; " - "%d rows in sample, %.0f estimated total rows", - RelationGetRelationName(onerel), - bs.m, totalblocks, - liverows, deadrows, - numrows, *totalrows))); + /* Populate sampling statistics output parameters */ + sampling_stats->totalpages = totalblocks; + sampling_stats->scannedpages = bs.m; + sampling_stats->liverows = liverows; + sampling_stats->deadrows = deadrows; return numrows; } @@ -1396,7 +1413,8 @@ compare_rows(const void *a, const void *b, void *arg) static int acquire_inherited_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows) + double *totalrows, double *totaldeadrows, + SamplingStats *sampling_stats) { List *tableOIDs; Relation *rels; @@ -1408,10 +1426,12 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, i; ListCell *lc; bool has_child; + SamplingStats child_sampling_stats; /* Initialize output parameters to zero now, in case we exit early */ *totalrows = 0; *totaldeadrows = 0; + memset(sampling_stats, 0, sizeof(SamplingStats)); /* * Find all members of inheritance set. We only need AccessShareLock on @@ -1588,7 +1608,8 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, /* Fetch a random sample of the child's rows */ childrows = (*acquirefunc) (childrel, elevel, rows + numrows, childtargrows, - &trows, &tdrows); + &trows, &tdrows, + &child_sampling_stats); /* We may need to convert from child's rowtype to parent's */ if (childrows > 0 && @@ -1619,6 +1640,12 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, numrows += childrows; *totalrows += trows; *totaldeadrows += tdrows; + + /* Accumulate sampling statistics */ + sampling_stats->totalpages += child_sampling_stats.totalpages; + sampling_stats->scannedpages += child_sampling_stats.scannedpages; + sampling_stats->liverows += child_sampling_stats.liverows; + sampling_stats->deadrows += child_sampling_stats.deadrows; } } diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index e885a4b9c77..ffb6990199a 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -300,6 +300,19 @@ typedef struct VacDeadItemsInfo int64 num_items; /* current # of entries */ } VacDeadItemsInfo; +/* + * SamplingStats stores sampling statistics collected during ANALYZE. + * This is used to report sampling information for both manual ANALYZE VERBOSE + * and autoanalyze logging. + */ +typedef struct SamplingStats +{ + BlockNumber totalpages; /* total pages in relation */ + BlockNumber scannedpages; /* pages actually scanned */ + double liverows; /* live rows found during sampling */ + double deadrows; /* dead rows found during sampling */ +} SamplingStats; + /* GUC parameters */ extern PGDLLIMPORT int default_statistics_target; /* PGDLLIMPORT for PostGIS */ extern PGDLLIMPORT int vacuum_freeze_min_age; diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h index 96b6f692d2a..87e980c2997 100644 --- a/src/include/foreign/fdwapi.h +++ b/src/include/foreign/fdwapi.h @@ -19,6 +19,9 @@ /* avoid including explain_state.h here */ typedef struct ExplainState ExplainState; +/* avoid including vacuum.h here */ +typedef struct SamplingStats SamplingStats; + /* * Callback function signatures --- see fdwhandler.sgml for more info. @@ -151,7 +154,8 @@ typedef void (*ExplainDirectModify_function) (ForeignScanState *node, typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel, HeapTuple *rows, int targrows, double *totalrows, - double *totaldeadrows); + double *totaldeadrows, + SamplingStats *sampling_stats); typedef bool (*AnalyzeForeignTable_function) (Relation relation, AcquireSampleRowsFunc *func, -- 2.34.1