From 8049d2b7e5d1636d5fb2b7d421d6b29a39389fb3 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Fri, 20 Jan 2023 20:50:41 +0100 Subject: [PATCH 1/2] Display length and bounds histograms in pg_stats Values corresponding to STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM and STATISTIC_KIND_BOUNDS_HISTOGRAM were not exposed to pg_stats when these slot kinds were introduced in 918eee0c49. This commit adds the missing fields to pg_stats. TODO: catalog version bump --- doc/src/sgml/catalogs.sgml | 40 ++++++ src/backend/catalog/system_views.sql | 30 ++++- src/backend/utils/adt/rangetypes_typanalyze.c | 118 ++++++++++++++++++ src/include/catalog/pg_proc.dat | 10 ++ src/include/catalog/pg_statistic.h | 3 + src/test/regress/expected/rules.out | 34 ++++- 6 files changed, 233 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index c1e4048054..e0851c52b4 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -9634,6 +9634,46 @@ SCRAM-SHA-256$<iteration count>:&l User mapping specific options, as keyword=value strings + + + + range_length_histogram anyarray + + + A histogram of the lengths of non-empty and non-null range values of a + range type column. (Null for non-range types.) + + + + + + range_empty_frac float4 + + + Fraction of column entries whose values are empty ranges. + (Null for non-range types.) + + + + + + range_lower_histogram anyarray + + + A histogram of lower bounds of non-empty and non-null range values. + (Null for non-range types.) + + + + + + range_upper_histogram anyarray + + + A histogram of upper bounds of non-empty and non-null range values. + (Null for non-range types.) + + diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 8608e3fa5b..b3b9e64ae2 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -243,7 +243,35 @@ CREATE VIEW pg_stats WITH (security_barrier) AS WHEN stakind3 = 5 THEN stanumbers3 WHEN stakind4 = 5 THEN stanumbers4 WHEN stakind5 = 5 THEN stanumbers5 - END AS elem_count_histogram + END AS elem_count_histogram, + CASE + WHEN stakind1 = 6 THEN stavalues1 + WHEN stakind2 = 6 THEN stavalues2 + WHEN stakind3 = 6 THEN stavalues3 + WHEN stakind4 = 6 THEN stavalues4 + WHEN stakind5 = 6 THEN stavalues5 + END AS range_length_histogram, + CASE + WHEN stakind1 = 6 THEN stanumbers1[1] + WHEN stakind2 = 6 THEN stanumbers2[1] + WHEN stakind3 = 6 THEN stanumbers3[1] + WHEN stakind4 = 6 THEN stanumbers4[1] + WHEN stakind5 = 6 THEN stanumbers5[1] + END AS range_empty_frac, + CASE + WHEN stakind1 = 7 THEN ranges_lower_bounds(stavalues1) + WHEN stakind2 = 7 THEN ranges_lower_bounds(stavalues2) + WHEN stakind3 = 7 THEN ranges_lower_bounds(stavalues3) + WHEN stakind4 = 7 THEN ranges_lower_bounds(stavalues4) + WHEN stakind5 = 7 THEN ranges_lower_bounds(stavalues5) + END AS range_lower_histogram, + CASE + WHEN stakind1 = 7 THEN ranges_upper_bounds(stavalues1) + WHEN stakind2 = 7 THEN ranges_upper_bounds(stavalues2) + WHEN stakind3 = 7 THEN ranges_upper_bounds(stavalues3) + WHEN stakind4 = 7 THEN ranges_upper_bounds(stavalues4) + WHEN stakind5 = 7 THEN ranges_upper_bounds(stavalues5) + END AS range_upper_histogram FROM pg_statistic s JOIN pg_class c ON (c.oid = s.starelid) JOIN pg_attribute a ON (c.oid = attrelid AND attnum = s.staattnum) LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace) diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c b/src/backend/utils/adt/rangetypes_typanalyze.c index 86810a1a6e..a0097282fc 100644 --- a/src/backend/utils/adt/rangetypes_typanalyze.c +++ b/src/backend/utils/adt/rangetypes_typanalyze.c @@ -26,6 +26,8 @@ #include "catalog/pg_operator.h" #include "commands/vacuum.h" +#include "utils/array.h" +#include "utils/arrayaccess.h" #include "utils/float.h" #include "utils/fmgrprotos.h" #include "utils/lsyscache.h" @@ -427,3 +429,119 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, * hashtable should also go away, as it used a child memory context. */ } + +Datum +ranges_lower_bounds(PG_FUNCTION_ARGS) +{ + AnyArrayType *array = PG_GETARG_ANY_ARRAY_P(0); + int ndims = AARR_NDIM(array); + int *dims = AARR_DIMS(array); + Oid element_type = AARR_ELEMTYPE(array); + int i; + array_iter iter; + int nelems; + Datum *elems; + TypeCacheEntry *typentry; + TypeCacheEntry *typentry_element; + + /* Get information about range type; note column might be a domain */ + typentry = range_get_typcache(fcinfo, getBaseType(element_type)); + + if (typentry->typtype != TYPTYPE_RANGE) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("expected array of ranges"))); + + ndims = AARR_NDIM(array); + dims = AARR_DIMS(array); + nelems = ArrayGetNItems(ndims, dims); + + elems = (Datum *) palloc(nelems * sizeof(Datum)); + + array_iter_setup(&iter, array); + + for (i = 0; i < nelems; i++) + { + Datum itemvalue; + bool isnull; + RangeBound lower; + RangeBound upper; + bool empty; + + /* Get source element, checking for NULL */ + itemvalue = array_iter_next(&iter, &isnull, i, + typentry->typlen, typentry->typbyval, + typentry->typalign); + + Assert(!isnull); + + range_deserialize(typentry, (RangeType *) itemvalue, &lower, &upper, &empty); + elems[i] = lower.val; + } + + typentry_element = typentry->rngelemtype; + + PG_RETURN_ARRAYTYPE_P(construct_array(elems, nelems, + typentry_element->type_id, + typentry_element->typlen, + typentry_element->typbyval, + typentry_element->typalign)); +} + +Datum +ranges_upper_bounds(PG_FUNCTION_ARGS) +{ + AnyArrayType *array = PG_GETARG_ANY_ARRAY_P(0); + int ndims = AARR_NDIM(array); + int *dims = AARR_DIMS(array); + Oid element_type = AARR_ELEMTYPE(array); + int i; + array_iter iter; + int nelems; + Datum *elems; + TypeCacheEntry *typentry; + TypeCacheEntry *typentry_element; + + /* Get information about range type; note column might be a domain */ + typentry = range_get_typcache(fcinfo, getBaseType(element_type)); + + if (typentry->typtype != TYPTYPE_RANGE) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("expected array of ranges"))); + + ndims = AARR_NDIM(array); + dims = AARR_DIMS(array); + nelems = ArrayGetNItems(ndims, dims); + + elems = (Datum *) palloc(nelems * sizeof(Datum)); + + array_iter_setup(&iter, array); + + for (i = 0; i < nelems; i++) + { + Datum itemvalue; + bool isnull; + RangeBound lower; + RangeBound upper; + bool empty; + + /* Get source element, checking for NULL */ + itemvalue = array_iter_next(&iter, &isnull, i, + typentry->typlen, typentry->typbyval, + typentry->typalign); + + Assert(!isnull); + + range_deserialize(typentry, (RangeType *) itemvalue, &lower, &upper, &empty); + elems[i] = upper.val; + } + + typentry_element = typentry->rngelemtype; + + PG_RETURN_ARRAYTYPE_P(construct_array(elems, nelems, + typentry_element->type_id, + typentry_element->typlen, + typentry_element->typbyval, + typentry_element->typalign)); +} diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 86eb8e8c58..161557e4cb 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11891,4 +11891,14 @@ prorettype => 'bytea', proargtypes => 'pg_brin_minmax_multi_summary', prosrc => 'brin_minmax_multi_summary_send' }, +{ oid => '9693', descr => 'lower bounds of ranges', + proname => 'ranges_lower_bounds', provolatile => 's', + prorettype => 'anyarray', proargtypes => 'anyarray', + prosrc => 'ranges_lower_bounds' }, + +{ oid => '9694', descr => 'upper bounds of ranges', + proname => 'ranges_upper_bounds', provolatile => 's', + prorettype => 'anyarray', proargtypes => 'anyarray', + prosrc => 'ranges_upper_bounds' }, + ] diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h index 8770c5b4c6..10401dece0 100644 --- a/src/include/catalog/pg_statistic.h +++ b/src/include/catalog/pg_statistic.h @@ -152,6 +152,9 @@ DECLARE_FOREIGN_KEY((starelid, staattnum), pg_attribute, (attrelid, attnum)); * data "kind" will appear in any particular slot. Instead, search the * stakind fields to see if the desired data is available. (The standard * function get_attstatsslot() may be used for this.) + * + * Note: The pg_stats view needs to be modified whenever a new slot kind is + * added to core. */ /* diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index e7a2f5856a..26087dfdf5 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2436,7 +2436,39 @@ pg_stats| SELECT n.nspname AS schemaname, WHEN (s.stakind4 = 5) THEN s.stanumbers4 WHEN (s.stakind5 = 5) THEN s.stanumbers5 ELSE NULL::real[] - END AS elem_count_histogram + END AS elem_count_histogram, + CASE + WHEN (s.stakind1 = 6) THEN s.stavalues1 + WHEN (s.stakind2 = 6) THEN s.stavalues2 + WHEN (s.stakind3 = 6) THEN s.stavalues3 + WHEN (s.stakind4 = 6) THEN s.stavalues4 + WHEN (s.stakind5 = 6) THEN s.stavalues5 + ELSE NULL::anyarray + END AS range_length_histogram, + CASE + WHEN (s.stakind1 = 6) THEN s.stanumbers1[1] + WHEN (s.stakind2 = 6) THEN s.stanumbers2[1] + WHEN (s.stakind3 = 6) THEN s.stanumbers3[1] + WHEN (s.stakind4 = 6) THEN s.stanumbers4[1] + WHEN (s.stakind5 = 6) THEN s.stanumbers5[1] + ELSE NULL::real + END AS range_empty_frac, + CASE + WHEN (s.stakind1 = 7) THEN ranges_lower_bounds(s.stavalues1) + WHEN (s.stakind2 = 7) THEN ranges_lower_bounds(s.stavalues2) + WHEN (s.stakind3 = 7) THEN ranges_lower_bounds(s.stavalues3) + WHEN (s.stakind4 = 7) THEN ranges_lower_bounds(s.stavalues4) + WHEN (s.stakind5 = 7) THEN ranges_lower_bounds(s.stavalues5) + ELSE NULL::anyarray + END AS range_lower_histogram, + CASE + WHEN (s.stakind1 = 7) THEN ranges_upper_bounds(s.stavalues1) + WHEN (s.stakind2 = 7) THEN ranges_upper_bounds(s.stavalues2) + WHEN (s.stakind3 = 7) THEN ranges_upper_bounds(s.stavalues3) + WHEN (s.stakind4 = 7) THEN ranges_upper_bounds(s.stavalues4) + WHEN (s.stakind5 = 7) THEN ranges_upper_bounds(s.stavalues5) + ELSE NULL::anyarray + END AS range_upper_histogram FROM (((pg_statistic s JOIN pg_class c ON ((c.oid = s.starelid))) JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum)))) -- 2.39.0