From 9f3914039663e593322b2ed990dc8eeaf4a46716 Mon Sep 17 00:00:00 2001 From: Alexandre Felipe Date: Sun, 5 Apr 2026 01:01:18 +0100 Subject: [PATCH v7 5/6] SLOPE: Planner support Update the planner to take advantage of order of an expression that can be inferred from the order of a subexpression. If f(x) is a monotonic function and x is known to be ordered, we can infeer the order of f(x) from the order of x. The analysis is performed in two stages 1. During plan creation, for every pathkey expression computes the source of variation, defined as the innermost subexpression that causes all variation in the expression. if the source of variaiton is not the full expression, and depends on a single table, a slope_info entry is created. 2. on build_index_pathkeys, in addition to the usual pathkeys create pathkeys for slope_info entries where the index key is the source of variation. Changes: - Add SlopeInfo struct to cache monotonicity information per pathkey - Add precompute_slope_cache() called during plan construction - Add get_variation_source() to find the innermost varying expression - Add get_expr_slope_wrt() to verify monotonicity by calling prosupport - Modify build_index_pathkeys() to check for monotonicity on index keys - Handle both increasing and decreasing monotonic functions, with reversed pathkey emission for decreasing functions so that backward index scans are correctly selected Adds tests tests in sql/slope.sql covering GROUP BY, ORDER BY, involving a few arithmetic operations, increasing and decreasing functions. --- src/backend/optimizer/path/pathkeys.c | 351 ++++++++++++++++++++++ src/backend/optimizer/plan/planner.c | 3 + src/include/nodes/pathnodes.h | 10 + src/include/nodes/plannodes.h | 5 + src/include/optimizer/paths.h | 1 + src/test/regress/expected/slope.out | 403 ++++++++++++++++++++++++++ src/test/regress/parallel_schedule | 3 + src/test/regress/sql/slope.sql | 217 ++++++++++++++ 8 files changed, 993 insertions(+) create mode 100644 src/test/regress/expected/slope.out create mode 100644 src/test/regress/sql/slope.sql diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 3e3eb720c1f..0c0d1138cac 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -19,14 +19,18 @@ #include "access/stratnum.h" #include "catalog/pg_opfamily.h" +#include "fmgr.h" #include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "parser/parse_oper.h" #include "partitioning/partbounds.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" +#include "utils/typcache.h" /* Consider reordering of GROUP BY keys? */ bool enable_group_by_reordering = true; @@ -37,6 +41,10 @@ static bool matches_boolean_partition_clause(RestrictInfo *rinfo, int partkeycol); static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle); static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey); +static MonotonicFunction get_expr_slope_wrt(Expr *expr, Expr *target); +static PathKey *slope_emit_pathkey(PlannerInfo *root, PathKey *pk, + Expr *indexkey, bool reverse_sort, + bool nulls_first); /**************************************************************************** @@ -760,6 +768,305 @@ get_cheapest_parallel_safe_total_inner(List *paths) return NULL; } +/* + * get_variation_source + * Find the source of variation in an expression. + * + * Descends through function calls to find the innermost non-constant + * expression that determines the variation of the whole expression. + * For f(x) returns x. For f(g(x)) returns x. For f(x, y) returns f(x, y). + * For a plain Var, returns the Var itself. + * + * This is a cheap extraction that doesn't check monotonicity - that's + * deferred until we find an index column matching the variation source. + * Also extracts the relid if all Vars are from the same table. + */ +static void +get_variation_source(Expr *expr, Expr **inner_out, Index *relid_out) +{ + *inner_out = NULL; + *relid_out = 0; + + for (;;) + { + List *args; + Expr *non_const_arg = NULL; + int non_const_count = 0; + ListCell *lc; + + /* Skip RelabelType (no-op coercion) */ + if (IsA(expr, RelabelType)) + { + expr = (Expr *) ((RelabelType *) expr)->arg; + continue; + } + + /* Handle FuncExpr - skip through casts */ + if (IsA(expr, FuncExpr)) + { + FuncExpr *fexpr = (FuncExpr *) expr; + + if ((fexpr->funcformat == COERCE_IMPLICIT_CAST || + fexpr->funcformat == COERCE_EXPLICIT_CAST) && + list_length(fexpr->args) == 1) + { + expr = (Expr *) linitial(fexpr->args); + continue; + } + args = fexpr->args; + } + else if (IsA(expr, OpExpr)) + { + args = ((OpExpr *) expr)->args; + } + else if (IsA(expr, Var)) + { + /* Reached a Var - this is our inner expression */ + *inner_out = expr; + *relid_out = ((Var *) expr)->varno; + return; + } + else + { + /* Unsupported node type */ + return; + } + + /* Find non-constant arguments */ + foreach(lc, args) + { + Expr *arg = (Expr *) lfirst(lc); + + if (!IsA(arg, Const)) + { + non_const_count++; + if (non_const_count > 1) + { + /* Multivariate - return this expression as inner */ + *inner_out = expr; + *relid_out = 0; /* unknown, will use equal() */ + return; + } + non_const_arg = arg; + } + } + + if (non_const_arg == NULL) + { + /* All constant - no inner expression */ + return; + } + + expr = non_const_arg; + } +} + +/* + * get_expr_slope_wrt + * Determine the monotonicity slope of an expression with respect to + * a specific target subexpression. + * + * Returns the slope of 'expr' with respect to 'target' + * MONOTONICFUNC_INCREASING: monotonically increasing + * MONOTONICFUNC_DECREASING: monotonically decreasing + * MONOTONICFUNC_NONE: cannot determine monotonicity + */ +static MonotonicFunction +get_expr_slope_wrt(Expr *expr, Expr *target) +{ + MonotonicFunction slope = MONOTONICFUNC_INCREASING; + + for (;;) + { + Oid funcid; + List *args; + Oid prosupport; + SupportRequestMonotonic req; + ListCell *lc; + int i; + Expr *next_expr = NULL; + MonotonicFunction func_arg_slope = MONOTONICFUNC_INCREASING; + + /* Check if we've reached the target */ + if (equal(expr, target)) + return slope; + + /* Skip RelabelType (no-op coercion) */ + if (IsA(expr, RelabelType)) + { + expr = (Expr *) ((RelabelType *) expr)->arg; + continue; + } + + /* Handle FuncExpr - skip through casts */ + if (IsA(expr, FuncExpr)) + { + FuncExpr *fexpr = (FuncExpr *) expr; + + if ((fexpr->funcformat == COERCE_IMPLICIT_CAST || + fexpr->funcformat == COERCE_EXPLICIT_CAST) && + list_length(fexpr->args) == 1) + { + expr = (Expr *) linitial(fexpr->args); + continue; + } + funcid = fexpr->funcid; + args = fexpr->args; + } + else if (IsA(expr, OpExpr)) + { + OpExpr *opexpr = (OpExpr *) expr; + + set_opfuncid(opexpr); + funcid = opexpr->opfuncid; + args = opexpr->args; + } + else + { + /* Reached a leaf without finding target */ + return MONOTONICFUNC_NONE; + } + + /* Check for prosupport function */ + prosupport = get_func_support(funcid); + if (!OidIsValid(prosupport)) + return MONOTONICFUNC_NONE; + + /* Call prosupport to get slope pattern */ + req.type = T_SupportRequestMonotonic; + req.expr = (Node *) expr; + req.slopes = NULL; + req.nslopes = 0; + + if (DatumGetPointer(OidFunctionCall1(prosupport, PointerGetDatum(&req))) == NULL) + return MONOTONICFUNC_NONE; + + if (req.slopes == NULL || req.nslopes <= 0) + return MONOTONICFUNC_NONE; + + /* Find the single non-constant argument */ + i = 0; + foreach(lc, args) + { + Expr *arg = (Expr *) lfirst(lc); + + if (!IsA(arg, Const)) + { + if (next_expr != NULL) + { + /* Multivariate - check if this is the target */ + return equal(expr, target) ? slope : MONOTONICFUNC_NONE; + } + next_expr = arg; + if (likely(i < req.nslopes)) + { + if (req.slopes[i] == MONOTONICFUNC_DECREASING) + func_arg_slope = MONOTONICFUNC_DECREASING; + else if (req.slopes[i] != MONOTONICFUNC_INCREASING) + return MONOTONICFUNC_NONE; + } + else + return MONOTONICFUNC_NONE; + } + i++; + } + + if (next_expr == NULL) + return MONOTONICFUNC_NONE; /* all constant */ + + /* Compose slopes */ + if (func_arg_slope == MONOTONICFUNC_DECREASING) + { + slope = (slope == MONOTONICFUNC_INCREASING) ? + MONOTONICFUNC_DECREASING : MONOTONICFUNC_INCREASING; + } + + expr = next_expr; + } +} + +/* + * precompute_slope_pathkeys + * For each query pathkey, extract the source of variation and store + * it directly on the PathKey (pk_var, pk_varrelid). + * + * Called once after query_pathkeys is set. Pathkeys whose expression + * is a plain Var or that have no usable variation source are left with + * pk_var = NULL. Monotonicity (pk_slope) is computed lazily on first + * index match. + */ +void +precompute_slope_pathkeys(PlannerInfo *root) +{ + ListCell *lc; + + foreach(lc, root->query_pathkeys) + { + PathKey *pk = lfirst_node(PathKey, lc); + EquivalenceMember *em; + + pk->pk_var = NULL; + pk->pk_varrelid = 0; + pk->pk_slope = MONOTONICFUNC_BOTH; /* not yet computed */ + + if (pk->pk_eclass->ec_has_volatile || + pk->pk_eclass->ec_members == NIL) + continue; + + em = linitial(pk->pk_eclass->ec_members); + + if (IsA(em->em_expr, Var)) + continue; + + get_variation_source(em->em_expr, &pk->pk_var, &pk->pk_varrelid); + + /* Discard if no useful source or it equals the full expression */ + if (pk->pk_var == NULL || pk->pk_varrelid == 0 || + pk->pk_var == em->em_expr) + pk->pk_var = NULL; + } +} + +/* + * slope_emit_pathkey + * Return the canonical pathkey for what a forward index scan actually + * produces for an expression that is monotonic in the index column. + * + * The result reflects both the direction and null ordering that the + * forward scan generates. f(NULL) is NULL, so nulls appear at the + * position dictated by the index's null ordering. + * + * Returns NULL if pk_slope indicates no monotonicity. + * + * index function pathkey + * ASC ASC ASC + * ASC DESC DESC + * DESC ASC DESC + * DESC DESC ASC + */ +static PathKey * +slope_emit_pathkey(PlannerInfo *root, + PathKey *pk, + Expr *indexkey, + bool reverse_sort, + bool nulls_first) +{ + MonotonicFunction slope; + bool produces_desc; + + slope = (MonotonicFunction) pk->pk_slope; + if (slope == MONOTONICFUNC_NONE) + return NULL; + + produces_desc = (reverse_sort != (slope == MONOTONICFUNC_DECREASING)); + + return make_canonical_pathkey(root, + pk->pk_eclass, + pk->pk_opfamily, + produces_desc ? COMPARE_GT : COMPARE_LT, + nulls_first); +} + /**************************************************************************** * NEW PATHKEY FORMATION ****************************************************************************/ @@ -832,6 +1139,50 @@ build_index_pathkeys(PlannerInfo *root, index->rel->relids, false); + /* + * SLOPE: if the first unmatched query pathkey is a monotonic + * function of this index column, use that pathkey instead of + * the column's own pathkey so the index can satisfy the query + * ordering without a Sort. + */ + if (index->rel->reloptkind == RELOPT_BASEREL) + { + ListCell *lc2; + + foreach(lc2, root->query_pathkeys) + { + PathKey *qpk = lfirst_node(PathKey, lc2); + + if (pathkey_is_redundant(qpk, retval)) + continue; + + if (cpathkey && qpk->pk_eclass == cpathkey->pk_eclass) + break; + + if (qpk->pk_var != NULL && + !qpk->pk_eclass->ec_has_volatile && + qpk->pk_varrelid == index->rel->relid && + equal(qpk->pk_var, indexkey)) + { + PathKey *spk; + + if (qpk->pk_slope == MONOTONICFUNC_BOTH) + { + EquivalenceMember *em; + + em = linitial(qpk->pk_eclass->ec_members); + qpk->pk_slope = get_expr_slope_wrt(em->em_expr, + qpk->pk_var); + } + spk = slope_emit_pathkey(root, qpk, indexkey, + reverse_sort, nulls_first); + if (spk) + cpathkey = spk; + } + break; + } + } + if (cpathkey) { /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 4ec76ce31a9..a6dda82109b 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3740,6 +3740,9 @@ standard_qp_callback(PlannerInfo *root, void *extra) root->query_pathkeys = root->setop_pathkeys; else root->query_pathkeys = NIL; + + /* Annotate query pathkeys with variation sources for SLOPE */ + precompute_slope_pathkeys(root); } /* diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 693b879f76d..142ce73dbdb 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1814,6 +1814,16 @@ typedef struct PathKey Oid pk_opfamily; /* index opfamily defining the ordering */ CompareType pk_cmptype; /* sort direction (ASC or DESC) */ bool pk_nulls_first; /* do NULLs come before normal values? */ + + /* + * SLOPE: innermost source of variation, filled by + * precompute_slope_pathkeys(). NULL if this pathkey is a plain Var + * or cannot benefit from SLOPE. pk_slope stores a MonotonicFunction + * value (from plannodes.h) as int to avoid a header dependency. + */ + Expr *pk_var pg_node_attr(read_write_ignore, equal_ignore); + Index pk_varrelid pg_node_attr(read_write_ignore, equal_ignore); + int pk_slope pg_node_attr(read_write_ignore, equal_ignore); } PathKey; /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 14a1dfed2b9..c9e374f92bf 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -1833,6 +1833,11 @@ typedef struct PlanInvalItem * than the previous call. A monotonically decreasing function cannot yield a * higher value on subsequent calls, and a function which is both must return * the same value on each call. + * + * Used both for window function run conditions (SupportRequestWFuncMonotonic) + * and for per-argument monotonicity of scalar functions + * (SupportRequestMonotonic), where it enables the planner to use an index + * on 'x' to satisfy ORDER BY / GROUP BY on 'f(x)'. */ typedef enum MonotonicFunction { diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 4ad3e8eaa89..af7d1f54517 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -235,6 +235,7 @@ extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths, Relids required_outer, double fraction); extern Path *get_cheapest_parallel_safe_total_inner(List *paths); +extern void precompute_slope_pathkeys(PlannerInfo *root); extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index); extern PathKey *make_reversed_pathkey(PlannerInfo *root, PathKey *pathkey); extern List *reverse_pathkeys(PlannerInfo *root, List *pathkeys); diff --git a/src/test/regress/expected/slope.out b/src/test/regress/expected/slope.out new file mode 100644 index 00000000000..1e1e62350b1 --- /dev/null +++ b/src/test/regress/expected/slope.out @@ -0,0 +1,403 @@ +-- +-- SLOPE (Scalar function Leveraging Ordered Path Evaluation) +-- Test that monotonic functions can use indexes for ordering +-- +-- Create test table with various data types +CREATE TABLE slope_src ( + id serial PRIMARY KEY, + v_int2 int2, + v_int4 int4, + v_int8 int8, + v_float4 float4, + v_float8 float8, + v_numeric numeric, + ts timestamp, + tstz timestamptz +); +-- Insert some test data +INSERT INTO slope_src (v_int2, v_int4, v_int8, v_float4, v_float8, v_numeric, ts, tstz) +SELECT + (i % 100)::int2, + i, + i::int8, + i::float4, + i::float8, + i::numeric, + '2020-01-01'::timestamp + (i || ' hours')::interval, + '2020-01-01'::timestamptz + (i || ' hours')::interval +FROM generate_series(1, 1000) i; +-- Create indexes on the columns we'll test +CREATE INDEX slope_src_v_int4_idx ON slope_src (v_int4); +CREATE INDEX slope_src_v_int8_idx ON slope_src (v_int8); +CREATE INDEX slope_src_v_float8_idx ON slope_src (v_float8); +CREATE INDEX slope_src_v_numeric_idx ON slope_src (v_numeric); +CREATE INDEX slope_src_ts_idx ON slope_src (ts); +CREATE INDEX slope_src_tstz_idx ON slope_src (tstz); +-- Analyze to get good statistics +ANALYZE slope_src; +-- Disable hash aggregation to force group aggregate plan +SET enable_hashagg = off; +-- +-- Test GROUP BY with monotonic function +-- +-- Basic: floor(float8) should use index on v_float8 +explain (costs off, verbose) +select floor(v_float8), count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------------ + GroupAggregate + Output: (floor(v_float8)), count(*) + Group Key: floor(slope_src.v_float8) + -> Index Only Scan using slope_src_v_float8_idx on public.slope_src + Output: floor(v_float8) +(5 rows) + +-- ceil(float8) should use index on v_float8 +explain (costs off, verbose) +select ceil(v_float8), count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------------ + GroupAggregate + Output: (ceil(v_float8)), count(*) + Group Key: ceil(slope_src.v_float8) + -> Index Only Scan using slope_src_v_float8_idx on public.slope_src + Output: ceil(v_float8) +(5 rows) + +-- floor(numeric) should use index on v_numeric +explain (costs off, verbose) +select floor(v_numeric), count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------------- + GroupAggregate + Output: (floor(v_numeric)), count(*) + Group Key: floor(slope_src.v_numeric) + -> Index Only Scan using slope_src_v_numeric_idx on public.slope_src + Output: floor(v_numeric) +(5 rows) + +-- timestamp::date cast should use index on ts +explain (costs off, verbose) +select ts::date, count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------ + GroupAggregate + Output: ((ts)::date), count(*) + Group Key: (slope_src.ts)::date + -> Index Only Scan using slope_src_ts_idx on public.slope_src + Output: (ts)::date +(5 rows) + +-- date_trunc on timestamp should use index +explain (costs off, verbose) +select date_trunc('day', ts), count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------ + GroupAggregate + Output: (date_trunc('day'::text, ts)), count(*) + Group Key: date_trunc('day'::text, slope_src.ts) + -> Index Only Scan using slope_src_ts_idx on public.slope_src + Output: date_trunc('day'::text, ts) +(5 rows) + +-- date_trunc on timestamptz should use index +explain (costs off, verbose) +select date_trunc('day', tstz), count(*) from slope_src group by 1; + QUERY PLAN +-------------------------------------------------------------------- + GroupAggregate + Output: (date_trunc('day'::text, tstz)), count(*) + Group Key: date_trunc('day'::text, slope_src.tstz) + -> Index Only Scan using slope_src_tstz_idx on public.slope_src + Output: date_trunc('day'::text, tstz) +(5 rows) + +-- +-- Test arithmetic operations +-- +-- Addition: v_int4 + 10 is increasing in v_int4 +explain (costs off, verbose) +select v_int4 + 10, count(*) from slope_src group by 1; + QUERY PLAN +---------------------------------------------------------------------- + GroupAggregate + Output: ((v_int4 + 10)), count(*) + Group Key: (slope_src.v_int4 + 10) + -> Index Only Scan using slope_src_v_int4_idx on public.slope_src + Output: (v_int4 + 10) +(5 rows) + +-- Subtraction: v_int4 - 10 is increasing in v_int4 +explain (costs off, verbose) +select v_int4 - 10, count(*) from slope_src group by 1; + QUERY PLAN +---------------------------------------------------------------------- + GroupAggregate + Output: ((v_int4 - 10)), count(*) + Group Key: (slope_src.v_int4 - 10) + -> Index Only Scan using slope_src_v_int4_idx on public.slope_src + Output: (v_int4 - 10) +(5 rows) + +-- Multiplication by positive constant: v_int4 * 2 is increasing +explain (costs off, verbose) +select v_int4 * 2, count(*) from slope_src group by 1; + QUERY PLAN +---------------------------------------------------------------------- + GroupAggregate + Output: ((v_int4 * 2)), count(*) + Group Key: (slope_src.v_int4 * 2) + -> Index Only Scan using slope_src_v_int4_idx on public.slope_src + Output: (v_int4 * 2) +(5 rows) + +-- Division by positive constant: v_int4 / 2 is increasing +explain (costs off, verbose) +select v_int4 / 2, count(*) from slope_src group by 1; + QUERY PLAN +---------------------------------------------------------------------- + GroupAggregate + Output: ((v_int4 / 2)), count(*) + Group Key: (slope_src.v_int4 / 2) + -> Index Only Scan using slope_src_v_int4_idx on public.slope_src + Output: (v_int4 / 2) +(5 rows) + +-- +-- Test decreasing functions (should use backward scan) +-- +-- Unary minus: -v_int4 is decreasing in v_int4 +explain (costs off, verbose) +select -v_int4, count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------ + GroupAggregate + Output: ((- v_int4)), count(*) + Group Key: ((- slope_src.v_int4)) + -> Sort + Output: ((- v_int4)) + Sort Key: ((- slope_src.v_int4)) + -> Seq Scan on public.slope_src + Output: (- v_int4) +(8 rows) + +-- Subtraction from constant: 1000 - v_int4 is decreasing in v_int4 +explain (costs off, verbose) +select 1000 - v_int4, count(*) from slope_src group by 1; + QUERY PLAN +----------------------------------------------- + GroupAggregate + Output: ((1000 - v_int4)), count(*) + Group Key: ((1000 - slope_src.v_int4)) + -> Sort + Output: ((1000 - v_int4)) + Sort Key: ((1000 - slope_src.v_int4)) + -> Seq Scan on public.slope_src + Output: (1000 - v_int4) +(8 rows) + +-- Multiplication by negative constant: v_int4 * (-2) is decreasing +explain (costs off, verbose) +select v_int4 * (-2), count(*) from slope_src group by 1; + QUERY PLAN +-------------------------------------------------------- + GroupAggregate + Output: ((v_int4 * '-2'::integer)), count(*) + Group Key: ((slope_src.v_int4 * '-2'::integer)) + -> Sort + Output: ((v_int4 * '-2'::integer)) + Sort Key: ((slope_src.v_int4 * '-2'::integer)) + -> Seq Scan on public.slope_src + Output: (v_int4 * '-2'::integer) +(8 rows) + +-- Division by negative constant: v_int4 / (-2) is decreasing +explain (costs off, verbose) +select v_int4 / (-2), count(*) from slope_src group by 1; + QUERY PLAN +-------------------------------------------------------- + GroupAggregate + Output: ((v_int4 / '-2'::integer)), count(*) + Group Key: ((slope_src.v_int4 / '-2'::integer)) + -> Sort + Output: ((v_int4 / '-2'::integer)) + Sort Key: ((slope_src.v_int4 / '-2'::integer)) + -> Seq Scan on public.slope_src + Output: (v_int4 / '-2'::integer) +(8 rows) + +-- +-- Test ORDER BY with monotonic function +-- +-- ORDER BY floor(v_float8) should use index +explain (costs off, verbose) +select floor(v_float8), v_float8 from slope_src order by 1 limit 10; + QUERY PLAN +------------------------------------------------------------------------ + Limit + Output: (floor(v_float8)), v_float8 + -> Index Only Scan using slope_src_v_float8_idx on public.slope_src + Output: floor(v_float8), v_float8 +(4 rows) + +-- ORDER BY -v_int4 DESC should use forward scan (decreasing + DESC = forward) +explain (costs off, verbose) +select -v_int4 from slope_src order by 1 desc limit 10; + QUERY PLAN +----------------------------------------------- + Limit + Output: ((- v_int4)) + -> Sort + Output: ((- v_int4)) + Sort Key: ((- slope_src.v_int4)) DESC + -> Seq Scan on public.slope_src + Output: (- v_int4) +(7 rows) + +-- ORDER BY -v_int4 ASC should use backward scan (decreasing + ASC = backward) +explain (costs off, verbose) +select -v_int4 from slope_src order by 1 limit 10; + QUERY PLAN +------------------------------------------ + Limit + Output: ((- v_int4)) + -> Sort + Output: ((- v_int4)) + Sort Key: ((- slope_src.v_int4)) + -> Seq Scan on public.slope_src + Output: (- v_int4) +(7 rows) + +-- +-- Test nested monotonic function +-- +-- floor(floor(x)) should still use index +explain (costs off, verbose) +select floor(floor(v_float8)), count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------------ + GroupAggregate + Output: (floor(floor(v_float8))), count(*) + Group Key: floor(floor(slope_src.v_float8)) + -> Index Only Scan using slope_src_v_float8_idx on public.slope_src + Output: floor(floor(v_float8)) +(5 rows) + +-- floor(v + 1) should use index +explain (costs off, verbose) +select floor(v_float8 + 1), count(*) from slope_src group by 1; + QUERY PLAN +------------------------------------------------------------------------ + GroupAggregate + Output: (floor((v_float8 + '1'::double precision))), count(*) + Group Key: floor((slope_src.v_float8 + '1'::double precision)) + -> Index Only Scan using slope_src_v_float8_idx on public.slope_src + Output: floor((v_float8 + '1'::double precision)) +(5 rows) + +-- +-- Test all index/query direction+nulls combinations for SLOPE. +-- For an increasing function like floor(), the scan uses the index when both +-- direction and nulls agree (Forward) or both are flipped (Backward). +-- When only one differs, a Sort is required. +-- +CREATE TABLE slope_nulls_tmp (v float8); +INSERT INTO slope_nulls_tmp VALUES (1), (NULL), (2); +ANALYZE slope_nulls_tmp; +CREATE TEMPORARY TABLE slope_nulls_results ( + sign text, index_order text, query_order text, scan_method text, + example text +); +SET enable_seqscan = off; +DO $$ +DECLARE + r record; + plan_json json; + node_type text; + query text; + r1 text; + r2 text; +BEGIN + FOR r IN + SELECT idx_dir, idx_nf, qry_dir, qry_nf, sign + FROM unnest(ARRAY['+','-']) AS sign, + unnest(ARRAY['ASC','DESC']) AS idx_dir, + unnest(ARRAY['FIRST','LAST']) AS idx_nf, + unnest(ARRAY['ASC','DESC']) AS qry_dir, + unnest(ARRAY['FIRST','LAST']) AS qry_nf + LOOP + EXECUTE format('CREATE INDEX slope_nulls_tmp_idx ON slope_nulls_tmp (v %s NULLS %s)', + r.idx_dir, r.idx_nf); + query := format('SELECT floor(0.5 %s v) as x FROM slope_nulls_tmp ORDER BY 1 %s NULLS %s', + r.sign, r.qry_dir, r.qry_nf); + EXECUTE format('EXPLAIN (FORMAT JSON) %s', query) + INTO plan_json; + + set enable_seqscan = on; + set enable_indexscan = off; + set enable_indexonlyscan = off; + execute 'SELECT string_agg(coalesce(x::text, ''NULL''), '','') FROM (' || query || ') tmp(x)' into r1; + set enable_seqscan = off; + set enable_indexscan = on; + set enable_indexonlyscan = on; + execute 'SELECT string_agg(coalesce(x::text, ''NULL''), '','') FROM (' || query || ') tmp(x)' into r2; + if r1 <> r2 then + raise exception 'r1 <> r2'; + end if; + node_type := plan_json->0->'Plan'->>'Node Type'; + INSERT INTO slope_nulls_results VALUES ( + r.sign, + r.idx_dir || ' NULLS ' || r.idx_nf, + r.qry_dir || ' NULLS ' || r.qry_nf, + CASE WHEN node_type IN ('Index Only Scan', 'Index Scan') + THEN plan_json->0->'Plan'->>'Scan Direction' + ELSE node_type + END, + r2 + ); + EXECUTE format('DROP INDEX slope_nulls_tmp_idx'); + END LOOP; +END; +$$; +SELECT * FROM slope_nulls_results ORDER BY query_order, sign, index_order; + sign | index_order | query_order | scan_method | example +------+------------------+------------------+-------------+------------ + + | ASC NULLS FIRST | ASC NULLS FIRST | Forward | NULL,1,2 + + | ASC NULLS LAST | ASC NULLS FIRST | Sort | NULL,1,2 + + | DESC NULLS FIRST | ASC NULLS FIRST | Sort | NULL,1,2 + + | DESC NULLS LAST | ASC NULLS FIRST | Backward | NULL,1,2 + - | ASC NULLS FIRST | ASC NULLS FIRST | Sort | NULL,-2,-1 + - | ASC NULLS LAST | ASC NULLS FIRST | Backward | NULL,-2,-1 + - | DESC NULLS FIRST | ASC NULLS FIRST | Forward | NULL,-2,-1 + - | DESC NULLS LAST | ASC NULLS FIRST | Sort | NULL,-2,-1 + + | ASC NULLS FIRST | ASC NULLS LAST | Sort | 1,2,NULL + + | ASC NULLS LAST | ASC NULLS LAST | Forward | 1,2,NULL + + | DESC NULLS FIRST | ASC NULLS LAST | Backward | 1,2,NULL + + | DESC NULLS LAST | ASC NULLS LAST | Sort | 1,2,NULL + - | ASC NULLS FIRST | ASC NULLS LAST | Backward | -2,-1,NULL + - | ASC NULLS LAST | ASC NULLS LAST | Sort | -2,-1,NULL + - | DESC NULLS FIRST | ASC NULLS LAST | Sort | -2,-1,NULL + - | DESC NULLS LAST | ASC NULLS LAST | Forward | -2,-1,NULL + + | ASC NULLS FIRST | DESC NULLS FIRST | Sort | NULL,2,1 + + | ASC NULLS LAST | DESC NULLS FIRST | Backward | NULL,2,1 + + | DESC NULLS FIRST | DESC NULLS FIRST | Forward | NULL,2,1 + + | DESC NULLS LAST | DESC NULLS FIRST | Sort | NULL,2,1 + - | ASC NULLS FIRST | DESC NULLS FIRST | Forward | NULL,-1,-2 + - | ASC NULLS LAST | DESC NULLS FIRST | Sort | NULL,-1,-2 + - | DESC NULLS FIRST | DESC NULLS FIRST | Sort | NULL,-1,-2 + - | DESC NULLS LAST | DESC NULLS FIRST | Backward | NULL,-1,-2 + + | ASC NULLS FIRST | DESC NULLS LAST | Backward | 2,1,NULL + + | ASC NULLS LAST | DESC NULLS LAST | Sort | 2,1,NULL + + | DESC NULLS FIRST | DESC NULLS LAST | Sort | 2,1,NULL + + | DESC NULLS LAST | DESC NULLS LAST | Forward | 2,1,NULL + - | ASC NULLS FIRST | DESC NULLS LAST | Sort | -1,-2,NULL + - | ASC NULLS LAST | DESC NULLS LAST | Forward | -1,-2,NULL + - | DESC NULLS FIRST | DESC NULLS LAST | Backward | -1,-2,NULL + - | DESC NULLS LAST | DESC NULLS LAST | Sort | -1,-2,NULL +(32 rows) + +RESET enable_seqscan; +DROP TABLE slope_nulls_results; +DROP TABLE slope_nulls_tmp; +-- Cleanup +RESET enable_hashagg; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index fef89848d25..d35b20a6bf9 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -63,6 +63,9 @@ test: sanity_check # ---------- test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index update delete namespace prepared_xacts +# SLOPE optimization test +test: slope + # ---------- # Another group of parallel tests # ---------- diff --git a/src/test/regress/sql/slope.sql b/src/test/regress/sql/slope.sql new file mode 100644 index 00000000000..58cc4f1114c --- /dev/null +++ b/src/test/regress/sql/slope.sql @@ -0,0 +1,217 @@ +-- +-- SLOPE (Scalar function Leveraging Ordered Path Evaluation) +-- Test that monotonic functions can use indexes for ordering +-- + +-- Create test table with various data types +CREATE TABLE slope_src ( + id serial PRIMARY KEY, + v_int2 int2, + v_int4 int4, + v_int8 int8, + v_float4 float4, + v_float8 float8, + v_numeric numeric, + ts timestamp, + tstz timestamptz +); + +-- Insert some test data +INSERT INTO slope_src (v_int2, v_int4, v_int8, v_float4, v_float8, v_numeric, ts, tstz) +SELECT + (i % 100)::int2, + i, + i::int8, + i::float4, + i::float8, + i::numeric, + '2020-01-01'::timestamp + (i || ' hours')::interval, + '2020-01-01'::timestamptz + (i || ' hours')::interval +FROM generate_series(1, 1000) i; + +-- Create indexes on the columns we'll test +CREATE INDEX slope_src_v_int4_idx ON slope_src (v_int4); +CREATE INDEX slope_src_v_int8_idx ON slope_src (v_int8); +CREATE INDEX slope_src_v_float8_idx ON slope_src (v_float8); +CREATE INDEX slope_src_v_numeric_idx ON slope_src (v_numeric); +CREATE INDEX slope_src_ts_idx ON slope_src (ts); +CREATE INDEX slope_src_tstz_idx ON slope_src (tstz); + +-- Analyze to get good statistics +ANALYZE slope_src; + +-- Disable hash aggregation to force group aggregate plan +SET enable_hashagg = off; + +-- +-- Test GROUP BY with monotonic function +-- + +-- Basic: floor(float8) should use index on v_float8 +explain (costs off, verbose) +select floor(v_float8), count(*) from slope_src group by 1; + +-- ceil(float8) should use index on v_float8 +explain (costs off, verbose) +select ceil(v_float8), count(*) from slope_src group by 1; + +-- floor(numeric) should use index on v_numeric +explain (costs off, verbose) +select floor(v_numeric), count(*) from slope_src group by 1; + +-- timestamp::date cast should use index on ts +explain (costs off, verbose) +select ts::date, count(*) from slope_src group by 1; + +-- date_trunc on timestamp should use index +explain (costs off, verbose) +select date_trunc('day', ts), count(*) from slope_src group by 1; + +-- date_trunc on timestamptz should use index +explain (costs off, verbose) +select date_trunc('day', tstz), count(*) from slope_src group by 1; + +-- +-- Test arithmetic operations +-- + +-- Addition: v_int4 + 10 is increasing in v_int4 +explain (costs off, verbose) +select v_int4 + 10, count(*) from slope_src group by 1; + +-- Subtraction: v_int4 - 10 is increasing in v_int4 +explain (costs off, verbose) +select v_int4 - 10, count(*) from slope_src group by 1; + +-- Multiplication by positive constant: v_int4 * 2 is increasing +explain (costs off, verbose) +select v_int4 * 2, count(*) from slope_src group by 1; + +-- Division by positive constant: v_int4 / 2 is increasing +explain (costs off, verbose) +select v_int4 / 2, count(*) from slope_src group by 1; + +-- +-- Test decreasing functions (should use backward scan) +-- + +-- Unary minus: -v_int4 is decreasing in v_int4 +explain (costs off, verbose) +select -v_int4, count(*) from slope_src group by 1; + +-- Subtraction from constant: 1000 - v_int4 is decreasing in v_int4 +explain (costs off, verbose) +select 1000 - v_int4, count(*) from slope_src group by 1; + +-- Multiplication by negative constant: v_int4 * (-2) is decreasing +explain (costs off, verbose) +select v_int4 * (-2), count(*) from slope_src group by 1; + +-- Division by negative constant: v_int4 / (-2) is decreasing +explain (costs off, verbose) +select v_int4 / (-2), count(*) from slope_src group by 1; + +-- +-- Test ORDER BY with monotonic function +-- + +-- ORDER BY floor(v_float8) should use index +explain (costs off, verbose) +select floor(v_float8), v_float8 from slope_src order by 1 limit 10; + +-- ORDER BY -v_int4 DESC should use forward scan (decreasing + DESC = forward) +explain (costs off, verbose) +select -v_int4 from slope_src order by 1 desc limit 10; + +-- ORDER BY -v_int4 ASC should use backward scan (decreasing + ASC = backward) +explain (costs off, verbose) +select -v_int4 from slope_src order by 1 limit 10; + +-- +-- Test nested monotonic function +-- + +-- floor(floor(x)) should still use index +explain (costs off, verbose) +select floor(floor(v_float8)), count(*) from slope_src group by 1; + +-- floor(v + 1) should use index +explain (costs off, verbose) +select floor(v_float8 + 1), count(*) from slope_src group by 1; + +-- +-- Test all index/query direction+nulls combinations for SLOPE. +-- For an increasing function like floor(), the scan uses the index when both +-- direction and nulls agree (Forward) or both are flipped (Backward). +-- When only one differs, a Sort is required. +-- +CREATE TABLE slope_nulls_tmp (v float8); +INSERT INTO slope_nulls_tmp VALUES (1), (NULL), (2); +ANALYZE slope_nulls_tmp; + +CREATE TEMPORARY TABLE slope_nulls_results ( + sign text, index_order text, query_order text, scan_method text, + example text +); + +SET enable_seqscan = off; + +DO $$ +DECLARE + r record; + plan_json json; + node_type text; + query text; + r1 text; + r2 text; +BEGIN + FOR r IN + SELECT idx_dir, idx_nf, qry_dir, qry_nf, sign + FROM unnest(ARRAY['+','-']) AS sign, + unnest(ARRAY['ASC','DESC']) AS idx_dir, + unnest(ARRAY['FIRST','LAST']) AS idx_nf, + unnest(ARRAY['ASC','DESC']) AS qry_dir, + unnest(ARRAY['FIRST','LAST']) AS qry_nf + LOOP + EXECUTE format('CREATE INDEX slope_nulls_tmp_idx ON slope_nulls_tmp (v %s NULLS %s)', + r.idx_dir, r.idx_nf); + query := format('SELECT floor(0.5 %s v) as x FROM slope_nulls_tmp ORDER BY 1 %s NULLS %s', + r.sign, r.qry_dir, r.qry_nf); + EXECUTE format('EXPLAIN (FORMAT JSON) %s', query) + INTO plan_json; + + set enable_seqscan = on; + set enable_indexscan = off; + set enable_indexonlyscan = off; + execute 'SELECT string_agg(coalesce(x::text, ''NULL''), '','') FROM (' || query || ') tmp(x)' into r1; + set enable_seqscan = off; + set enable_indexscan = on; + set enable_indexonlyscan = on; + execute 'SELECT string_agg(coalesce(x::text, ''NULL''), '','') FROM (' || query || ') tmp(x)' into r2; + if r1 <> r2 then + raise exception 'r1 <> r2'; + end if; + node_type := plan_json->0->'Plan'->>'Node Type'; + INSERT INTO slope_nulls_results VALUES ( + r.sign, + r.idx_dir || ' NULLS ' || r.idx_nf, + r.qry_dir || ' NULLS ' || r.qry_nf, + CASE WHEN node_type IN ('Index Only Scan', 'Index Scan') + THEN plan_json->0->'Plan'->>'Scan Direction' + ELSE node_type + END, + r2 + ); + EXECUTE format('DROP INDEX slope_nulls_tmp_idx'); + END LOOP; +END; +$$; + +SELECT * FROM slope_nulls_results ORDER BY query_order, sign, index_order; + +RESET enable_seqscan; +DROP TABLE slope_nulls_results; +DROP TABLE slope_nulls_tmp; + +-- Cleanup +RESET enable_hashagg; -- 2.53.0