From b83aa230fad447e02f4398f13523f59de63e4d5a Mon Sep 17 00:00:00 2001 From: Alexandre Felipe Date: Sun, 5 Apr 2026 01:01:18 +0100 Subject: [PATCH 5/6] SLOPE: Planner support Update the planner to take advantage of order of an expression that can be inferred from the order of a subexpression. If f(x) is a monotonic function and x is known to be ordered, we can infeer the order of f(x) from the order of x. The analysis is performed in two stages 1. During plan creation, for every pathkey expression computes the source of variation, defined as the innermost subexpression that causes all variation in the expression. if the source of variaiton is not the full expression, and depends on a single table, a slope_info entry is created. 2. on build_index_pathkeys, in addition to the usual pathkeys create pathkeys for slope_info entries where the index key is the source of variation. Changes: - Add SlopeInfo struct to cache monotonicity information per pathkey - Add precompute_slope_cache() called during plan construction - Add get_variation_source() to find the innermost varying expression - Add get_expr_slope_wrt() to verify monotonicity by calling prosupport - Modify build_index_pathkeys() to check for monotonicity on index keys - Handle both increasing and decreasing monotonic functions, with reversed pathkey emission for decreasing functions so that backward index scans are correctly selected Adds tests tests in sql/slope.sql covering GROUP BY, ORDER BY, involving a few arithmetic operations, increasing and decreasing functions. --- src/backend/optimizer/path/pathkeys.c | 474 ++++++++++++++++++++++++-- src/backend/optimizer/plan/planner.c | 3 + src/include/nodes/pathnodes.h | 6 + src/include/nodes/plannodes.h | 5 + src/include/optimizer/paths.h | 1 + src/test/regress/expected/slope.out | 281 +++++++++++++++ src/test/regress/parallel_schedule | 3 + src/test/regress/sql/slope.sql | 143 ++++++++ 8 files changed, 891 insertions(+), 25 deletions(-) create mode 100644 src/test/regress/expected/slope.out create mode 100644 src/test/regress/sql/slope.sql diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 3e3eb720c1f..8aceccb4b2e 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -19,24 +19,46 @@ #include "access/stratnum.h" #include "catalog/pg_opfamily.h" +#include "fmgr.h" #include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "parser/parse_oper.h" #include "partitioning/partbounds.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" +#include "utils/typcache.h" /* Consider reordering of GROUP BY keys? */ bool enable_group_by_reordering = true; +/* + * SlopeInfo - cached information about a query pathkey for SLOPE optimization. + * Stored in PlannerInfo.slope_info array. + */ +typedef struct SlopeInfo +{ + MonotonicFunction slope; /* cached monotonicity result */ + Index relid; /* relid of the table, or 0 if multi-table */ + Expr *expr; /* variation source (inner expression) */ + PathKey *pathkey; /* the query pathkey this info belongs to */ +} SlopeInfo; + static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); static bool matches_boolean_partition_clause(RestrictInfo *rinfo, RelOptInfo *partrel, int partkeycol); static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle); static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey); +static MonotonicFunction get_expr_slope_wrt(Expr *expr, Expr *target); +static PathKey *match_slope_pathkey_for_index_col(PlannerInfo *root, + IndexOptInfo *index, + int colno, + Expr *indexkey, + bool reverse_sort); /**************************************************************************** @@ -760,6 +782,391 @@ get_cheapest_parallel_safe_total_inner(List *paths) return NULL; } +/* + * get_variation_source + * Find the source of variation in an expression. + * + * Descends through function calls to find the innermost non-constant + * expression that determines the variation of the whole expression. + * For f(x) returns x. For f(g(x)) returns x. For f(x, y) returns f(x, y). + * For a plain Var, returns the Var itself. + * + * This is a cheap extraction that doesn't check monotonicity - that's + * deferred until we find an index column matching the variation source. + * Also extracts the relid if all Vars are from the same table. + */ +static void +get_variation_source(Expr *expr, Expr **inner_out, Index *relid_out) +{ + *inner_out = NULL; + *relid_out = 0; + + for (;;) + { + List *args; + Expr *non_const_arg = NULL; + int non_const_count = 0; + ListCell *lc; + + /* Skip RelabelType (no-op coercion) */ + if (IsA(expr, RelabelType)) + { + expr = (Expr *) ((RelabelType *) expr)->arg; + continue; + } + + /* Handle FuncExpr - skip through casts */ + if (IsA(expr, FuncExpr)) + { + FuncExpr *fexpr = (FuncExpr *) expr; + + if ((fexpr->funcformat == COERCE_IMPLICIT_CAST || + fexpr->funcformat == COERCE_EXPLICIT_CAST) && + list_length(fexpr->args) == 1) + { + expr = (Expr *) linitial(fexpr->args); + continue; + } + args = fexpr->args; + } + else if (IsA(expr, OpExpr)) + { + args = ((OpExpr *) expr)->args; + } + else if (IsA(expr, Var)) + { + /* Reached a Var - this is our inner expression */ + *inner_out = expr; + *relid_out = ((Var *) expr)->varno; + return; + } + else + { + /* Unsupported node type */ + return; + } + + /* Find non-constant arguments */ + foreach(lc, args) + { + Expr *arg = (Expr *) lfirst(lc); + + if (!IsA(arg, Const)) + { + non_const_count++; + if (non_const_count > 1) + { + /* Multivariate - return this expression as inner */ + *inner_out = expr; + *relid_out = 0; /* unknown, will use equal() */ + return; + } + non_const_arg = arg; + } + } + + if (non_const_arg == NULL) + { + /* All constant - no inner expression */ + return; + } + + expr = non_const_arg; + } +} + +/* + * get_expr_slope_wrt + * Determine the monotonicity slope of an expression with respect to + * a specific target subexpression. + * + * Returns the slope of 'expr' with respect to 'target' + * MONOTONICFUNC_INCREASING: monotonically increasing + * MONOTONICFUNC_DECREASING: monotonically decreasing + * MONOTONICFUNC_NONE: cannot determine monotonicity + */ +static MonotonicFunction +get_expr_slope_wrt(Expr *expr, Expr *target) +{ + MonotonicFunction slope = MONOTONICFUNC_INCREASING; + + for (;;) + { + Oid funcid; + List *args; + Oid prosupport; + SupportRequestMonotonic req; + ListCell *lc; + int i; + Expr *next_expr = NULL; + MonotonicFunction func_arg_slope = MONOTONICFUNC_INCREASING; + + /* Check if we've reached the target */ + if (equal(expr, target)) + return slope; + + /* Skip RelabelType (no-op coercion) */ + if (IsA(expr, RelabelType)) + { + expr = (Expr *) ((RelabelType *) expr)->arg; + continue; + } + + /* Handle FuncExpr - skip through casts */ + if (IsA(expr, FuncExpr)) + { + FuncExpr *fexpr = (FuncExpr *) expr; + + if ((fexpr->funcformat == COERCE_IMPLICIT_CAST || + fexpr->funcformat == COERCE_EXPLICIT_CAST) && + list_length(fexpr->args) == 1) + { + expr = (Expr *) linitial(fexpr->args); + continue; + } + funcid = fexpr->funcid; + args = fexpr->args; + } + else if (IsA(expr, OpExpr)) + { + OpExpr *opexpr = (OpExpr *) expr; + + set_opfuncid(opexpr); + funcid = opexpr->opfuncid; + args = opexpr->args; + } + else + { + /* Reached a leaf without finding target */ + return MONOTONICFUNC_NONE; + } + + /* Check for prosupport function */ + prosupport = get_func_support(funcid); + if (!OidIsValid(prosupport)) + return MONOTONICFUNC_NONE; + + /* Call prosupport to get slope pattern */ + req.type = T_SupportRequestMonotonic; + req.expr = (Node *) expr; + req.slopes = NULL; + req.nslopes = 0; + + if (DatumGetPointer(OidFunctionCall1(prosupport, PointerGetDatum(&req))) == NULL) + return MONOTONICFUNC_NONE; + + if (req.slopes == NULL || req.nslopes <= 0) + return MONOTONICFUNC_NONE; + + /* Find the single non-constant argument */ + i = 0; + foreach(lc, args) + { + Expr *arg = (Expr *) lfirst(lc); + + if (!IsA(arg, Const)) + { + if (next_expr != NULL) + { + /* Multivariate - check if this is the target */ + return equal(expr, target) ? slope : MONOTONICFUNC_NONE; + } + next_expr = arg; + if (likely(i < req.nslopes)) + { + if (req.slopes[i] == MONOTONICFUNC_DECREASING) + func_arg_slope = MONOTONICFUNC_DECREASING; + else if (req.slopes[i] != MONOTONICFUNC_INCREASING) + return MONOTONICFUNC_NONE; + } + else + return MONOTONICFUNC_NONE; + } + i++; + } + + if (next_expr == NULL) + return MONOTONICFUNC_NONE; /* all constant */ + + /* Compose slopes */ + if (func_arg_slope == MONOTONICFUNC_DECREASING) + { + slope = (slope == MONOTONICFUNC_INCREASING) ? + MONOTONICFUNC_DECREASING : MONOTONICFUNC_INCREASING; + } + + expr = next_expr; + } +} + +/* + * precompute_slope_cache + * For each pathkey, extract the source of variation and check if + * it is usable, i.e. depends on a single table, and is not the + * complete expression. + * + * This is called once after query_pathkeys is set. We store only entries + * where there's a useful variation source (relid != 0), making the array + * compact for efficient iteration. Monotonicity is checked later only + * when an index column matches the variation source. + * + * We also check if the next pathkey equals the variation source, enabling + * patterns like [f(x), x] to be satisfied by a single index column. + */ +void +precompute_slope_cache(PlannerInfo *root) +{ + int nqpk; + int count; + ListCell *lc; + + root->num_slope_entries = 0; + root->slope_info = NULL; + + if (root->query_pathkeys == NIL) + return; + + nqpk = list_length(root->query_pathkeys); + root->slope_info = (SlopeInfo *) palloc(nqpk * sizeof(SlopeInfo)); + + count = 0; + foreach(lc, root->query_pathkeys) + { + PathKey *qpk = lfirst_node(PathKey, lc); + EquivalenceMember *em; + Expr *inner; + Index relid; + + if (qpk->pk_eclass->ec_has_volatile || + qpk->pk_eclass->ec_members == NIL) + continue; + + em = linitial(qpk->pk_eclass->ec_members); + + /* Simple Vars don't need slope analysis */ + if (IsA(em->em_expr, Var)) + continue; + + get_variation_source(em->em_expr, &inner, &relid); + + /* + * Only store if we found a useful variation source from a single + * table that differs from the original expression. + * relid == 0 means multivariate or unknown source. + */ + if (inner != NULL && relid != 0 && inner != em->em_expr) + { + root->slope_info[count].slope = MONOTONICFUNC_BOTH; + root->slope_info[count].relid = relid; + root->slope_info[count].expr = inner; + root->slope_info[count].pathkey = qpk; + count++; + } + } + + if (count > 0) + root->num_slope_entries = count; + else + { + pfree(root->slope_info); + root->slope_info = NULL; + } +} + +/* + * match_slope_pathkey_for_index_col + * If a precomputed slope entry matches this index column, return its + * PathKey; otherwise NULL. Monotonicity is computed on first match and + * cached in slope_info. + * + * The emitted pathkey reflects the sort direction that the forward scan + * actually produces, determined by the index column direction and the + * function's monotonicity. This mirrors how make_pathkey_from_sortinfo + * always emits for the given scan direction. + */ +static PathKey * +match_slope_pathkey_for_index_col(PlannerInfo *root, + IndexOptInfo *index, + int colno, + Expr *indexkey, + bool reverse_sort) +{ + TypeCacheEntry *tce = NULL; + + Assert(colno >= 0 && colno < index->nkeycolumns); + + if (root->num_slope_entries == 0 || + index->rel->reloptkind != RELOPT_BASEREL) + return NULL; + + for (int j = 0; j < root->num_slope_entries; j++) + { + SlopeInfo *si = &root->slope_info[j]; + bool need_desc; + bool produces_desc; + + if (si->relid != index->rel->relid) + continue; + + /* Check if the variation source matches the index column */ + if (likely(IsA(indexkey, Var))) + { + Var *v1 = (Var *) si->expr; + Var *v2 = (Var *) indexkey; + + if (unlikely(!IsA(v1, Var) || + v1->varno != v2->varno || + v1->varattno != v2->varattno)) + continue; + } + else if (!equal(si->expr, indexkey)) + continue; + + /* Check opfamily (once per index column) */ + if (tce == NULL) + { + tce = lookup_type_cache(index->opcintype[colno], + TYPECACHE_BTREE_OPFAMILY); + if (unlikely(!OidIsValid(tce->btree_opf) || + tce->btree_opf != index->sortopfamily[colno])) + break; + } + + /* Compute monotonicity (once per slope_info entry) */ + if (si->slope == MONOTONICFUNC_BOTH) + { + EquivalenceMember *em; + + em = linitial(si->pathkey->pk_eclass->ec_members); + si->slope = get_expr_slope_wrt(em->em_expr, si->expr); + } + + if (si->slope == MONOTONICFUNC_NONE) + continue; + + /* + * Emit a pathkey reflecting the direction the forward scan + * produces, which depends on both the index column direction + * and the function's monotonicity: + * + * index function pathkey + * ASC ASC ASC + * ASC DESC DESC + * DESC ASC DESC + * DESC DESC ASC + */ + need_desc = (si->pathkey->pk_cmptype == COMPARE_GT); + produces_desc = (reverse_sort != (si->slope == MONOTONICFUNC_DECREASING)); + + if (need_desc == produces_desc) + return si->pathkey; + else + return make_reversed_pathkey(root, si->pathkey); + } + + return NULL; +} + /**************************************************************************** * NEW PATHKEY FORMATION ****************************************************************************/ @@ -819,42 +1226,59 @@ build_index_pathkeys(PlannerInfo *root, nulls_first = index->nulls_first[i]; /* - * OK, try to make a canonical pathkey for this sort key. + * First, try SLOPE: check if a monotonic function wraps this + * index column. This is checked before make_pathkey_from_sortinfo + * so that f(x) pathkeys that have no direct EquivalenceClass for + * the index column can still be matched. + * + * The emitted pathkey reflects what the forward scan produces, + * based on the index column direction and the function's + * monotonicity. */ - cpathkey = make_pathkey_from_sortinfo(root, - indexkey, - index->sortopfamily[i], - index->opcintype[i], - index->indexcollations[i], - reverse_sort, - nulls_first, - 0, - index->rel->relids, - false); - + cpathkey = match_slope_pathkey_for_index_col(root, index, i, + indexkey, + reverse_sort); if (cpathkey) { - /* - * We found the sort key in an EquivalenceClass, so it's relevant - * for this query. Add it to list, unless it's redundant. - */ if (!pathkey_is_redundant(cpathkey, retval)) retval = lappend(retval, cpathkey); } else { /* - * Boolean index keys might be redundant even if they do not - * appear in an EquivalenceClass, because of our special treatment - * of boolean equality conditions --- see the comment for - * indexcol_is_bool_constant_for_query(). If that applies, we can - * continue to examine lower-order index columns. Otherwise, the - * sort key is not an interesting sort order for this query, so we - * should stop considering index columns; any lower-order sort - * keys won't be useful either. + * No SLOPE match. Try to make a canonical pathkey. */ - if (!indexcol_is_bool_constant_for_query(root, index, i)) + cpathkey = make_pathkey_from_sortinfo(root, + indexkey, + index->sortopfamily[i], + index->opcintype[i], + index->indexcollations[i], + reverse_sort, + nulls_first, + 0, + index->rel->relids, + false); + + if (cpathkey) + { + if (!pathkey_is_redundant(cpathkey, retval)) + retval = lappend(retval, cpathkey); + } + else if (!indexcol_is_bool_constant_for_query(root, index, i)) + { + /* + * Boolean index keys might be redundant even if they do not + * appear in an EquivalenceClass, because of our special + * treatment of boolean equality conditions --- see the + * comment for indexcol_is_bool_constant_for_query(). If that + * applies, we can continue to examine lower-order index + * columns. Otherwise, the sort key is not an interesting + * sort order for this query, so we should stop considering + * index columns; any lower-order sort keys won't be useful + * either. + */ break; + } } i++; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 07944612668..cf05f99d7d4 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3739,6 +3739,9 @@ standard_qp_callback(PlannerInfo *root, void *extra) root->query_pathkeys = root->setop_pathkeys; else root->query_pathkeys = NIL; + + /* Precompute SLOPE cache for monotonic function optimization */ + precompute_slope_cache(root); } /* diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 7947d83d584..ce49ddd1279 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -297,6 +297,8 @@ typedef struct PlannerGlobal * correctly replaced with the keeping one. *---------- */ +struct SlopeInfo; /* private to pathkeys.c */ + typedef struct PlannerInfo PlannerInfo; struct PlannerInfo @@ -515,6 +517,10 @@ struct PlannerInfo /* desired pathkeys for query_planner() */ List *query_pathkeys; + /* SLOPE optimization: cached info about monotonic pathkeys */ + struct SlopeInfo *slope_info pg_node_attr(read_write_ignore); + int num_slope_entries; + /* groupClause pathkeys, if any */ List *group_pathkeys; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index e2c00576d41..dfe15f05897 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -1831,6 +1831,11 @@ typedef struct PlanInvalItem * than the previous call. A monotonically decreasing function cannot yield a * higher value on subsequent calls, and a function which is both must return * the same value on each call. + * + * Used both for window function run conditions (SupportRequestWFuncMonotonic) + * and for per-argument monotonicity of scalar functions + * (SupportRequestMonotonic), where it enables the planner to use an index + * on 'x' to satisfy ORDER BY / GROUP BY on 'f(x)'. */ typedef enum MonotonicFunction { diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 7564e232e65..192b29f8eb2 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -236,6 +236,7 @@ extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths, Relids required_outer, double fraction); extern Path *get_cheapest_parallel_safe_total_inner(List *paths); +extern void precompute_slope_cache(PlannerInfo *root); extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index); extern PathKey *make_reversed_pathkey(PlannerInfo *root, PathKey *pathkey); extern List *reverse_pathkeys(PlannerInfo *root, List *pathkeys); diff --git a/src/test/regress/expected/slope.out b/src/test/regress/expected/slope.out new file mode 100644 index 00000000000..f33299a9120 --- /dev/null +++ b/src/test/regress/expected/slope.out @@ -0,0 +1,281 @@ +-- +-- SLOPE (Scalar function Leveraging Ordered Path Evaluation) +-- Test that monotonic functions can use indexes for ordering +-- +-- Create test table with various data types +CREATE TABLE slope_src ( + id serial PRIMARY KEY, + v_int2 int2, + v_int4 int4, + v_int8 int8, + v_float4 float4, + v_float8 float8, + v_numeric numeric, + ts timestamp, + tstz timestamptz +); +-- Insert some test data +INSERT INTO slope_src (v_int2, v_int4, v_int8, v_float4, v_float8, v_numeric, ts, tstz) +SELECT + (i % 100)::int2, + i, + i::int8, + i::float4, + i::float8, + i::numeric, + '2020-01-01'::timestamp + (i || ' hours')::interval, + '2020-01-01'::timestamptz + (i || ' hours')::interval +FROM generate_series(1, 1000) i; +-- Create indexes on the columns we'll test +CREATE INDEX slope_src_v_int4_idx ON slope_src (v_int4); +CREATE INDEX slope_src_v_int8_idx ON slope_src (v_int8); +CREATE INDEX slope_src_v_float8_idx ON slope_src (v_float8); +CREATE INDEX slope_src_v_numeric_idx ON slope_src (v_numeric); +CREATE INDEX slope_src_ts_idx ON slope_src (ts); +CREATE INDEX slope_src_tstz_idx ON slope_src (tstz); +-- Analyze to get good statistics +ANALYZE slope_src; +-- Disable hash aggregation to force group aggregate plan +SET enable_hashagg = off; +-- +-- Test GROUP BY with monotonic function +-- +-- Basic: floor(float8) should use index on v_float8 +explain (costs off, verbose) +select floor(v_float8), count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------------ + GroupAggregate + Output: (floor(v_float8)), count(*) + Group Key: floor(slope_src.v_float8) + -> Index Only Scan using slope_src_v_float8_idx on public.slope_src + Output: floor(v_float8) +(5 rows) + +-- ceil(float8) should use index on v_float8 +explain (costs off, verbose) +select ceil(v_float8), count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------------ + GroupAggregate + Output: (ceil(v_float8)), count(*) + Group Key: ceil(slope_src.v_float8) + -> Index Only Scan using slope_src_v_float8_idx on public.slope_src + Output: ceil(v_float8) +(5 rows) + +-- floor(numeric) should use index on v_numeric +explain (costs off, verbose) +select floor(v_numeric), count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------------- + GroupAggregate + Output: (floor(v_numeric)), count(*) + Group Key: floor(slope_src.v_numeric) + -> Index Only Scan using slope_src_v_numeric_idx on public.slope_src + Output: floor(v_numeric) +(5 rows) + +-- timestamp::date cast should use index on ts +explain (costs off, verbose) +select ts::date, count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------ + GroupAggregate + Output: ((ts)::date), count(*) + Group Key: (slope_src.ts)::date + -> Index Only Scan using slope_src_ts_idx on public.slope_src + Output: (ts)::date +(5 rows) + +-- date_trunc on timestamp should use index +explain (costs off, verbose) +select date_trunc('day', ts), count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------ + GroupAggregate + Output: (date_trunc('day'::text, ts)), count(*) + Group Key: date_trunc('day'::text, slope_src.ts) + -> Index Only Scan using slope_src_ts_idx on public.slope_src + Output: date_trunc('day'::text, ts) +(5 rows) + +-- date_trunc on timestamptz should use index +explain (costs off, verbose) +select date_trunc('day', tstz), count(*) from slope_src group by 1; + QUERY PLAN +-------------------------------------------------------------------- + GroupAggregate + Output: (date_trunc('day'::text, tstz)), count(*) + Group Key: date_trunc('day'::text, slope_src.tstz) + -> Index Only Scan using slope_src_tstz_idx on public.slope_src + Output: date_trunc('day'::text, tstz) +(5 rows) + +-- +-- Test arithmetic operations +-- +-- Addition: v_int4 + 10 is increasing in v_int4 +explain (costs off, verbose) +select v_int4 + 10, count(*) from slope_src group by 1; + QUERY PLAN +---------------------------------------------------------------------- + GroupAggregate + Output: ((v_int4 + 10)), count(*) + Group Key: (slope_src.v_int4 + 10) + -> Index Only Scan using slope_src_v_int4_idx on public.slope_src + Output: (v_int4 + 10) +(5 rows) + +-- Subtraction: v_int4 - 10 is increasing in v_int4 +explain (costs off, verbose) +select v_int4 - 10, count(*) from slope_src group by 1; + QUERY PLAN +---------------------------------------------------------------------- + GroupAggregate + Output: ((v_int4 - 10)), count(*) + Group Key: (slope_src.v_int4 - 10) + -> Index Only Scan using slope_src_v_int4_idx on public.slope_src + Output: (v_int4 - 10) +(5 rows) + +-- Multiplication by positive constant: v_int4 * 2 is increasing +explain (costs off, verbose) +select v_int4 * 2, count(*) from slope_src group by 1; + QUERY PLAN +---------------------------------------------------------------------- + GroupAggregate + Output: ((v_int4 * 2)), count(*) + Group Key: (slope_src.v_int4 * 2) + -> Index Only Scan using slope_src_v_int4_idx on public.slope_src + Output: (v_int4 * 2) +(5 rows) + +-- Division by positive constant: v_int4 / 2 is increasing +explain (costs off, verbose) +select v_int4 / 2, count(*) from slope_src group by 1; + QUERY PLAN +---------------------------------------------------------------------- + GroupAggregate + Output: ((v_int4 / 2)), count(*) + Group Key: (slope_src.v_int4 / 2) + -> Index Only Scan using slope_src_v_int4_idx on public.slope_src + Output: (v_int4 / 2) +(5 rows) + +-- +-- Test decreasing functions (should use backward scan) +-- +-- Unary minus: -v_int4 is decreasing in v_int4 +explain (costs off, verbose) +select -v_int4, count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------------------- + GroupAggregate + Output: ((- v_int4)), count(*) + Group Key: (- slope_src.v_int4) + -> Index Only Scan Backward using slope_src_v_int4_idx on public.slope_src + Output: (- v_int4) +(5 rows) + +-- Subtraction from constant: 1000 - v_int4 is decreasing in v_int4 +explain (costs off, verbose) +select 1000 - v_int4, count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------------------- + GroupAggregate + Output: ((1000 - v_int4)), count(*) + Group Key: (1000 - slope_src.v_int4) + -> Index Only Scan Backward using slope_src_v_int4_idx on public.slope_src + Output: (1000 - v_int4) +(5 rows) + +-- Multiplication by negative constant: v_int4 * (-2) is decreasing +explain (costs off, verbose) +select v_int4 * (-2), count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------------------- + GroupAggregate + Output: ((v_int4 * '-2'::integer)), count(*) + Group Key: (slope_src.v_int4 * '-2'::integer) + -> Index Only Scan Backward using slope_src_v_int4_idx on public.slope_src + Output: (v_int4 * '-2'::integer) +(5 rows) + +-- Division by negative constant: v_int4 / (-2) is decreasing +explain (costs off, verbose) +select v_int4 / (-2), count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------------------- + GroupAggregate + Output: ((v_int4 / '-2'::integer)), count(*) + Group Key: (slope_src.v_int4 / '-2'::integer) + -> Index Only Scan Backward using slope_src_v_int4_idx on public.slope_src + Output: (v_int4 / '-2'::integer) +(5 rows) + +-- +-- Test ORDER BY with monotonic function +-- +-- ORDER BY floor(v_float8) should use index +explain (costs off, verbose) +select floor(v_float8), v_float8 from slope_src order by 1 limit 10; + QUERY PLAN +------------------------------------------------------------------------ + Limit + Output: (floor(v_float8)), v_float8 + -> Index Only Scan using slope_src_v_float8_idx on public.slope_src + Output: floor(v_float8), v_float8 +(4 rows) + +-- ORDER BY -v_int4 DESC should use forward scan (decreasing + DESC = forward) +explain (costs off, verbose) +select -v_int4 from slope_src order by 1 desc limit 10; + QUERY PLAN +---------------------------------------------------------------------- + Limit + Output: ((- v_int4)) + -> Index Only Scan using slope_src_v_int4_idx on public.slope_src + Output: (- v_int4) +(4 rows) + +-- ORDER BY -v_int4 ASC should use backward scan (decreasing + ASC = backward) +explain (costs off, verbose) +select -v_int4 from slope_src order by 1 limit 10; + QUERY PLAN +------------------------------------------------------------------------------- + Limit + Output: ((- v_int4)) + -> Index Only Scan Backward using slope_src_v_int4_idx on public.slope_src + Output: (- v_int4) +(4 rows) + +-- +-- Test nested monotonic function +-- +-- floor(floor(x)) should still use index +explain (costs off, verbose) +select floor(floor(v_float8)), count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------------ + GroupAggregate + Output: (floor(floor(v_float8))), count(*) + Group Key: floor(floor(slope_src.v_float8)) + -> Index Only Scan using slope_src_v_float8_idx on public.slope_src + Output: floor(floor(v_float8)) +(5 rows) + +-- floor(v + 1) should use index +explain (costs off, verbose) +select floor(v_float8 + 1), count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------------ + GroupAggregate + Output: (floor((v_float8 + '1'::double precision))), count(*) + Group Key: floor((slope_src.v_float8 + '1'::double precision)) + -> Index Only Scan using slope_src_v_float8_idx on public.slope_src + Output: floor((v_float8 + '1'::double precision)) +(5 rows) + +-- Cleanup +RESET enable_hashagg; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 734da057c34..9dd2ef31517 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -63,6 +63,9 @@ test: sanity_check # ---------- test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index update delete namespace prepared_xacts +# SLOPE optimization test +test: slope + # ---------- # Another group of parallel tests # ---------- diff --git a/src/test/regress/sql/slope.sql b/src/test/regress/sql/slope.sql new file mode 100644 index 00000000000..9fb48f56ab2 --- /dev/null +++ b/src/test/regress/sql/slope.sql @@ -0,0 +1,143 @@ +-- +-- SLOPE (Scalar function Leveraging Ordered Path Evaluation) +-- Test that monotonic functions can use indexes for ordering +-- + +-- Create test table with various data types +CREATE TABLE slope_src ( + id serial PRIMARY KEY, + v_int2 int2, + v_int4 int4, + v_int8 int8, + v_float4 float4, + v_float8 float8, + v_numeric numeric, + ts timestamp, + tstz timestamptz +); + +-- Insert some test data +INSERT INTO slope_src (v_int2, v_int4, v_int8, v_float4, v_float8, v_numeric, ts, tstz) +SELECT + (i % 100)::int2, + i, + i::int8, + i::float4, + i::float8, + i::numeric, + '2020-01-01'::timestamp + (i || ' hours')::interval, + '2020-01-01'::timestamptz + (i || ' hours')::interval +FROM generate_series(1, 1000) i; + +-- Create indexes on the columns we'll test +CREATE INDEX slope_src_v_int4_idx ON slope_src (v_int4); +CREATE INDEX slope_src_v_int8_idx ON slope_src (v_int8); +CREATE INDEX slope_src_v_float8_idx ON slope_src (v_float8); +CREATE INDEX slope_src_v_numeric_idx ON slope_src (v_numeric); +CREATE INDEX slope_src_ts_idx ON slope_src (ts); +CREATE INDEX slope_src_tstz_idx ON slope_src (tstz); + +-- Analyze to get good statistics +ANALYZE slope_src; + +-- Disable hash aggregation to force group aggregate plan +SET enable_hashagg = off; + +-- +-- Test GROUP BY with monotonic function +-- + +-- Basic: floor(float8) should use index on v_float8 +explain (costs off, verbose) +select floor(v_float8), count(*) from slope_src group by 1; + +-- ceil(float8) should use index on v_float8 +explain (costs off, verbose) +select ceil(v_float8), count(*) from slope_src group by 1; + +-- floor(numeric) should use index on v_numeric +explain (costs off, verbose) +select floor(v_numeric), count(*) from slope_src group by 1; + +-- timestamp::date cast should use index on ts +explain (costs off, verbose) +select ts::date, count(*) from slope_src group by 1; + +-- date_trunc on timestamp should use index +explain (costs off, verbose) +select date_trunc('day', ts), count(*) from slope_src group by 1; + +-- date_trunc on timestamptz should use index +explain (costs off, verbose) +select date_trunc('day', tstz), count(*) from slope_src group by 1; + +-- +-- Test arithmetic operations +-- + +-- Addition: v_int4 + 10 is increasing in v_int4 +explain (costs off, verbose) +select v_int4 + 10, count(*) from slope_src group by 1; + +-- Subtraction: v_int4 - 10 is increasing in v_int4 +explain (costs off, verbose) +select v_int4 - 10, count(*) from slope_src group by 1; + +-- Multiplication by positive constant: v_int4 * 2 is increasing +explain (costs off, verbose) +select v_int4 * 2, count(*) from slope_src group by 1; + +-- Division by positive constant: v_int4 / 2 is increasing +explain (costs off, verbose) +select v_int4 / 2, count(*) from slope_src group by 1; + +-- +-- Test decreasing functions (should use backward scan) +-- + +-- Unary minus: -v_int4 is decreasing in v_int4 +explain (costs off, verbose) +select -v_int4, count(*) from slope_src group by 1; + +-- Subtraction from constant: 1000 - v_int4 is decreasing in v_int4 +explain (costs off, verbose) +select 1000 - v_int4, count(*) from slope_src group by 1; + +-- Multiplication by negative constant: v_int4 * (-2) is decreasing +explain (costs off, verbose) +select v_int4 * (-2), count(*) from slope_src group by 1; + +-- Division by negative constant: v_int4 / (-2) is decreasing +explain (costs off, verbose) +select v_int4 / (-2), count(*) from slope_src group by 1; + +-- +-- Test ORDER BY with monotonic function +-- + +-- ORDER BY floor(v_float8) should use index +explain (costs off, verbose) +select floor(v_float8), v_float8 from slope_src order by 1 limit 10; + +-- ORDER BY -v_int4 DESC should use forward scan (decreasing + DESC = forward) +explain (costs off, verbose) +select -v_int4 from slope_src order by 1 desc limit 10; + +-- ORDER BY -v_int4 ASC should use backward scan (decreasing + ASC = backward) +explain (costs off, verbose) +select -v_int4 from slope_src order by 1 limit 10; + +-- +-- Test nested monotonic function +-- + +-- floor(floor(x)) should still use index +explain (costs off, verbose) +select floor(floor(v_float8)), count(*) from slope_src group by 1; + +-- floor(v + 1) should use index +explain (costs off, verbose) +select floor(v_float8 + 1), count(*) from slope_src group by 1; + +-- Cleanup +RESET enable_hashagg; -- 2.53.0