From 22e6eace8ce10dd017513ca935d2e86c04b400b6 Mon Sep 17 00:00:00 2001 From: Yugo Nagata Date: Fri, 18 Jul 2025 09:58:56 +0900 Subject: [PATCH v6] Allow creating extended statistics on virtual generated columns This allows users to define extended statistics on virtual generated columns. Expressions including such columns are stored in the catalog as-is, and expanded at ANALYZE time. Extended statistics on a single virtual generated column are also allowed, since it is treated as a single expression. To enable the optimizer to make use of these statistics, expressions are also expanded at planning time. --- doc/src/sgml/ref/alter_table.sgml | 9 +---- src/backend/commands/statscmds.c | 49 ++++++++++++++----------- src/backend/optimizer/util/plancat.c | 5 +++ src/backend/statistics/extended_stats.c | 15 ++++++-- src/test/regress/expected/stats_ext.out | 25 ++++--------- src/test/regress/sql/stats_ext.sql | 21 ++++------- 6 files changed, 59 insertions(+), 65 deletions(-) diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index 8591a6b5014..f8006e9d72a 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -216,8 +216,6 @@ WITH ( MODULUS numeric_literal, REM When this form is used, the column's statistics are removed, so running ANALYZE on the table afterwards is recommended. - For a virtual generated column, ANALYZE - is not necessary because such columns never have statistics. @@ -284,12 +282,9 @@ WITH ( MODULUS numeric_literal, REM - When this form is used on a stored generated column, its statistics - are removed, so running - ANALYZE + When this form is used, the column's statistics are removed, + so running ANALYZE on the table afterwards is recommended. - For a virtual generated column, ANALYZE - is not necessary because such columns never have statistics. diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c index c1da79f36ba..8a910ba1cd0 100644 --- a/src/backend/commands/statscmds.c +++ b/src/backend/commands/statscmds.c @@ -28,6 +28,7 @@ #include "commands/comment.h" #include "commands/defrem.h" #include "miscadmin.h" +#include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" #include "statistics/statistics.h" @@ -268,12 +269,6 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("statistics creation on system columns is not supported"))); - /* Disallow use of virtual generated columns in extended stats */ - if (attForm->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("statistics creation on virtual generated columns is not supported"))); - /* Disallow data types without a less-than operator */ type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR); if (type->lt_opr == InvalidOid) @@ -282,8 +277,22 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class", attname, format_type_be(attForm->atttypid)))); - attnums[nattnums] = attForm->attnum; - nattnums++; + /* Treat virtual generated columns as expressions */ + if (attForm->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL) + { + Node *expr = (Node *) makeVar(1, + attForm->attnum, + attForm->atttypid, + attForm->atttypmod, + attForm->attcollation, + 0); + stxexprs = lappend(stxexprs, expr); + } + else + { + attnums[nattnums] = attForm->attnum; + nattnums++; + } ReleaseSysCache(atttuple); } else if (IsA(selem->expr, Var)) /* column reference in parens */ @@ -297,12 +306,6 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("statistics creation on system columns is not supported"))); - /* Disallow use of virtual generated columns in extended stats */ - if (get_attgenerated(relid, var->varattno) == ATTRIBUTE_GENERATED_VIRTUAL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("statistics creation on virtual generated columns is not supported"))); - /* Disallow data types without a less-than operator */ type = lookup_type_cache(var->vartype, TYPECACHE_LT_OPR); if (type->lt_opr == InvalidOid) @@ -311,8 +314,16 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class", get_attname(relid, var->varattno, false), format_type_be(var->vartype)))); - attnums[nattnums] = var->varattno; - nattnums++; + /* Treat virtual generated columns as expressions */ + if (get_attgenerated(relid, var->varattno) == ATTRIBUTE_GENERATED_VIRTUAL) + { + stxexprs = lappend(stxexprs, (Node *) var); + } + else + { + attnums[nattnums] = var->varattno; + nattnums++; + } } else /* expression */ { @@ -336,12 +347,6 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("statistics creation on system columns is not supported"))); - - /* Disallow use of virtual generated columns in extended stats */ - if (get_attgenerated(relid, attnum) == ATTRIBUTE_GENERATED_VIRTUAL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("statistics creation on virtual generated columns is not supported"))); } /* diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index b2fbd6a082b..ea8faeff2ac 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -1804,6 +1804,11 @@ get_relation_statistics(PlannerInfo *root, RelOptInfo *rel, if (varno != 1) ChangeVarNodes((Node *) exprs, 1, varno, 0); + /* + * Expand virtual generated columns in the expressions. + */ + exprs = (List *) expand_generated_columns_in_expr((Node *) exprs, relation, 1); + /* * Run the expressions through eval_const_expressions. This is * not just an optimization, but is necessary, because the diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 334c6498581..9225a168a8a 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -32,6 +32,7 @@ #include "parser/parsetree.h" #include "pgstat.h" #include "postmaster/autovacuum.h" +#include "rewrite/rewriteHandler.h" #include "statistics/extended_stats_internal.h" #include "statistics/statistics.h" #include "utils/acl.h" @@ -73,7 +74,7 @@ typedef struct StatExtEntry } StatExtEntry; -static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid); +static List *fetch_statentries_for_relation(Relation pg_statext, Relation rel); static VacAttrStats **lookup_var_attr_stats(Bitmapset *attrs, List *exprs, int nvacatts, VacAttrStats **vacatts); static void statext_store(Oid statOid, bool inh, @@ -125,7 +126,7 @@ BuildRelationExtStatistics(Relation onerel, bool inh, double totalrows, /* the list of stats has to be allocated outside the memory context */ pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock); - statslist = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel)); + statslist = fetch_statentries_for_relation(pg_stext, onerel); /* memory context for building each statistics object */ cxt = AllocSetContextCreate(CurrentMemoryContext, @@ -279,7 +280,7 @@ ComputeExtStatisticsRows(Relation onerel, oldcxt = MemoryContextSwitchTo(cxt); pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock); - lstats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel)); + lstats = fetch_statentries_for_relation(pg_stext, onerel); foreach(lc, lstats) { @@ -416,12 +417,13 @@ statext_is_kind_built(HeapTuple htup, char type) * Return a list (of StatExtEntry) of statistics objects for the given relation. */ static List * -fetch_statentries_for_relation(Relation pg_statext, Oid relid) +fetch_statentries_for_relation(Relation pg_statext, Relation rel) { SysScanDesc scan; ScanKeyData skey; HeapTuple htup; List *result = NIL; + Oid relid = RelationGetRelid(rel); /* * Prepare to scan pg_statistic_ext for entries having stxrelid = this @@ -491,6 +493,11 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid) pfree(exprsString); + /* + * Expand virtual generated columns in the expressions. + */ + exprs = (List *) expand_generated_columns_in_expr((Node *) exprs, rel, 1); + /* * Run the expressions through eval_const_expressions. This is not * just an optimization, but is necessary, because the planner diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index b6431d1ee95..b1c472eb716 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -25,7 +25,7 @@ begin end; $$; -- Verify failures -CREATE TABLE ext_stats_test (x text, y int, z int); +CREATE TABLE ext_stats_test (x text, y int, z int, w xid); CREATE STATISTICS tst; ERROR: syntax error at or near ";" LINE 1: CREATE STATISTICS tst; @@ -84,30 +84,19 @@ CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression ERROR: syntax error at or near "," LINE 1: CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; ^ -DROP TABLE ext_stats_test; --- statistics on virtual generated column not allowed -CREATE TABLE ext_stats_test1 (x int, y int, z int GENERATED ALWAYS AS (x+y) VIRTUAL, w xid); -CREATE STATISTICS tst on z from ext_stats_test1; -ERROR: statistics creation on virtual generated columns is not supported -CREATE STATISTICS tst on (z) from ext_stats_test1; -ERROR: statistics creation on virtual generated columns is not supported -CREATE STATISTICS tst on (z+1) from ext_stats_test1; -ERROR: statistics creation on virtual generated columns is not supported -CREATE STATISTICS tst (ndistinct) ON z from ext_stats_test1; -ERROR: statistics creation on virtual generated columns is not supported -- statistics on system column not allowed -CREATE STATISTICS tst on tableoid from ext_stats_test1; +CREATE STATISTICS tst on tableoid from ext_stats_test; ERROR: statistics creation on system columns is not supported -CREATE STATISTICS tst on (tableoid) from ext_stats_test1; +CREATE STATISTICS tst on (tableoid) from ext_stats_test; ERROR: statistics creation on system columns is not supported -CREATE STATISTICS tst on (tableoid::int+1) from ext_stats_test1; +CREATE STATISTICS tst on (tableoid::int+1) from ext_stats_test; ERROR: statistics creation on system columns is not supported -CREATE STATISTICS tst (ndistinct) ON xmin from ext_stats_test1; +CREATE STATISTICS tst (ndistinct) ON xmin from ext_stats_test; ERROR: statistics creation on system columns is not supported -- statistics without a less-than operator not supported -CREATE STATISTICS tst (ndistinct) ON w from ext_stats_test1; +CREATE STATISTICS tst (ndistinct) ON w from ext_stats_test; ERROR: column "w" cannot be used in statistics because its type xid has no default btree operator class -DROP TABLE ext_stats_test1; +DROP TABLE ext_stats_test; -- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1; diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 9dcce3440c8..22ffd74a654 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -28,7 +28,7 @@ end; $$; -- Verify failures -CREATE TABLE ext_stats_test (x text, y int, z int); +CREATE TABLE ext_stats_test (x text, y int, z int, w xid); CREATE STATISTICS tst; CREATE STATISTICS tst ON a, b; CREATE STATISTICS tst FROM sometab; @@ -56,21 +56,14 @@ DROP FUNCTION tftest; CREATE STATISTICS tst ON (y) FROM ext_stats_test; -- single column reference CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression -DROP TABLE ext_stats_test; --- statistics on virtual generated column not allowed -CREATE TABLE ext_stats_test1 (x int, y int, z int GENERATED ALWAYS AS (x+y) VIRTUAL, w xid); -CREATE STATISTICS tst on z from ext_stats_test1; -CREATE STATISTICS tst on (z) from ext_stats_test1; -CREATE STATISTICS tst on (z+1) from ext_stats_test1; -CREATE STATISTICS tst (ndistinct) ON z from ext_stats_test1; -- statistics on system column not allowed -CREATE STATISTICS tst on tableoid from ext_stats_test1; -CREATE STATISTICS tst on (tableoid) from ext_stats_test1; -CREATE STATISTICS tst on (tableoid::int+1) from ext_stats_test1; -CREATE STATISTICS tst (ndistinct) ON xmin from ext_stats_test1; +CREATE STATISTICS tst on tableoid from ext_stats_test; +CREATE STATISTICS tst on (tableoid) from ext_stats_test; +CREATE STATISTICS tst on (tableoid::int+1) from ext_stats_test; +CREATE STATISTICS tst (ndistinct) ON xmin from ext_stats_test; -- statistics without a less-than operator not supported -CREATE STATISTICS tst (ndistinct) ON w from ext_stats_test1; -DROP TABLE ext_stats_test1; +CREATE STATISTICS tst (ndistinct) ON w from ext_stats_test; +DROP TABLE ext_stats_test; -- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); -- 2.43.0