From 75f4fc95c840843e87d6f1a5425077c360e21316 Mon Sep 17 00:00:00 2001 From: Alexandre Felipe Date: Wed, 11 Feb 2026 20:34:28 +0000 Subject: [PATCH 1/3] SLOPE Analysis Machinery This commit introduces a datastructure to describe function with respect to its different arguments. And functions in pathkeys.c to analyse expressions and determine wether it is monotonic with respect to some variable. The main goal of this change is to signal to the planner that an expression can be sorted indirectly. This allows the planner to find better plans. Motivation: Consider a table data with an indexed timestamp column ts, the query `SELECT ts::date, count(1) GROUP BY 1;` requires ts::date to be sorted, but the planner is not aware of the fact that ts::date will ordered whenever ts is ordered. One can specify how a function varies with respect to each of its arguments, using MonotonicFunction enum MONOTONICFUNC_* BOTH: Doesn't depend on the argument (here for completeness) INCREASING: The function is never decreasing. DECREASING: The function is never increasing. NONE: The function can be either incrasing or decreasing. The focus here is INCREASING and DECREASING, as those will allow take advantage of the order of underlying variables to satisfy the sort order of the expression. Added one line to .gitignore file because I like to keep data related to this in the project workarea, e.g. ".install", ".dbdata", ".patches", and I don't want those cluttering my git diffs. --- .gitignore | 3 + src/backend/optimizer/path/pathkeys.c | 334 +++++++++++++++++++++++++- src/include/nodes/plannodes.h | 5 + src/include/nodes/supportnodes.h | 34 +++ 4 files changed, 369 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 4e911395fe3..2f38a9a9077 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,6 @@ lib*.pc /Release/ /tmp_install/ /portlock/ + +# ignore hidden files +.* \ No newline at end of file diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 5eb71635d15..75455dafffc 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -19,11 +19,14 @@ #include "access/stratnum.h" #include "catalog/pg_opfamily.h" +#include "fmgr.h" #include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "parser/parse_oper.h" #include "partitioning/partbounds.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" @@ -1352,6 +1355,260 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, return result; } +/* + * expr_matches_index_col + * Check if an expression matches any index column for any relation. + * + * This is used to determine if an expression can be satisfied directly by + * an index without requiring monotonic unwrapping. + */ +static bool +expr_matches_index_col(PlannerInfo *root, Expr *expr) +{ + int rti; + + /* Check all base relations */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + ListCell *lc; + + if (rel == NULL) + continue; + + /* Skip non-baserels */ + if (rel->reloptkind != RELOPT_BASEREL) + continue; + + /* Check each index on this relation */ + foreach(lc, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(lc); + ListCell *indexlc; + int col = 0; + + /* Check each column of the index */ + foreach(indexlc, index->indextlist) + { + TargetEntry *indextle = (TargetEntry *) lfirst(indexlc); + + /* Only check key columns, not INCLUDE columns */ + if (col >= index->nkeycolumns) + break; + + /* Check if the expression matches this index column */ + if (equal(expr, indextle->expr)) + return true; + + col++; + } + } + } + + return false; +} + +/* + * get_expr_slope + * Recursively determine the monotonicity slope of an expression. + * + * Returns the slope of 'expr' with respect to some underlying variable: + * MONOTONICFUNC_INCREASING: monotonically increasing + * MONOTONICFUNC_DECREASING: monotonically decreasing + * MONOTONICFUNC_BOTH: constant (no non-const variables) + * MONOTONICFUNC_NONE: cannot determine monotonicity + * + * + * The contribution of each argument to the final slope of the function is + * determined by the slope of the function with respect to an argument + * and the slope of the underlying expression passed to it, combined as + * follows: + * + * NONE + NONE = NONE (one undefined -> undefined) + * NONE + INC = NONE + * NONE + DEC = NONE + * BOTH + NONE = NONE (maybe mutable) + * BOTH + BOTH = BOTH + * BOTH + INC = BOTH (one constant -> constant) + * BOTH + DEC = BOTH + * INC + INC = INC (same direction -> preserves) + * DEC + DEC = INC (both flip -> preserves) + * INC + DEC = DEC (opposite direction -> flips) + * DEC + INC = DEC (opposite direction -> flips) + * (a + b) = (b + a) + */ +static MonotonicFunction +get_expr_slope(Expr *expr, Expr **underlying_expr) +{ + Oid funcid; + List *args; + Oid prosupport; + SupportRequestMonotonic req; + ListCell *lc; + int i; + MonotonicFunction func_slope; + Expr *found_expr; + + /* Constants have no slope - they don't vary */ + if (IsA(expr, Const)) + return MONOTONICFUNC_BOTH; + + /* A variable is ascending with respect to itself */ + if (IsA(expr, Var)) + { + *underlying_expr = expr; + return MONOTONICFUNC_INCREASING; + } + + /* + * RelabelType is a no-op type coercion (binary compatible types). + * It preserves monotonicity of the underlying expression. + */ + if (IsA(expr, RelabelType)) + { + RelabelType *rt = (RelabelType *) expr; + + return get_expr_slope((Expr *) rt->arg, underlying_expr); + } + + /* Handle FuncExpr and OpExpr */ + if (IsA(expr, FuncExpr)) + { + FuncExpr *fexpr = (FuncExpr *) expr; + + funcid = fexpr->funcid; + args = fexpr->args; + } + else if (IsA(expr, OpExpr)) + { + OpExpr *opexpr = (OpExpr *) expr; + + set_opfuncid(opexpr); + funcid = opexpr->opfuncid; + args = opexpr->args; + } + else + { + /* Unknown expression type - can't determine slope */ + return MONOTONICFUNC_NONE; + } + + if (list_length(args) == 0) + return MONOTONICFUNC_BOTH; + + /* Check for prosupport function */ + prosupport = get_func_support(funcid); + if (!OidIsValid(prosupport)) + return MONOTONICFUNC_NONE; + + /* Call prosupport to get slope pattern */ + req.type = T_SupportRequestMonotonic; + req.expr = (Node *) expr; + req.slopes = NULL; + req.nslopes = 0; + + if (DatumGetPointer(OidFunctionCall1(prosupport, PointerGetDatum(&req))) == NULL) + return MONOTONICFUNC_NONE; + + if (req.slopes == NULL || req.nslopes <= 0) + return MONOTONICFUNC_NONE; + + /* + * Combine slopes from all arguments. Start assuming the result is + * constant, then update as we find non-constant arguments. + */ + func_slope = MONOTONICFUNC_BOTH; + found_expr = NULL; + i = 0; + + foreach(lc, args) + { + Expr *arg = (Expr *) lfirst(lc); + Expr *arg_underlying = NULL; + MonotonicFunction arg_expr_slope; + MonotonicFunction func_arg_slope; + MonotonicFunction combined; + + /* Get the slope of this argument expression (recursive) */ + arg_expr_slope = get_expr_slope(arg, &arg_underlying); + + /* Get the function's declared slope for this argument */ + func_arg_slope = (i < req.nslopes) ? req.slopes[i] : MONOTONICFUNC_NONE; + + /* If either is BOTH (constant), this arg doesn't affect the result */ + if (arg_expr_slope == MONOTONICFUNC_BOTH || + func_arg_slope == MONOTONICFUNC_BOTH) + { + i++; + continue; + } + + /* If either is NONE, we can't determine the result */ + if (arg_expr_slope == MONOTONICFUNC_NONE || + func_arg_slope == MONOTONICFUNC_NONE) + return MONOTONICFUNC_NONE; + + /* Combine: same direction -> INC, opposite -> DEC */ + combined = (func_arg_slope == arg_expr_slope) ? + MONOTONICFUNC_INCREASING : MONOTONICFUNC_DECREASING; + + /* Check for conflicting slopes from different arguments */ + if (func_slope == MONOTONICFUNC_BOTH) + { + func_slope = combined; + found_expr = arg_underlying; + } + else + { + /* + * We already have a non-constant underlying variable. + * Check if the new contribution is from the same variable + * in the same direction. + */ + if (func_slope != combined || !equal(found_expr, arg_underlying)) + return MONOTONICFUNC_NONE; + } + + i++; + } + + *underlying_expr = found_expr; + return func_slope; +} + +/* + * get_expr_monotonic_var + * If 'expr' is monotonic in exactly one variable, return that variable + * and set *is_decreasing accordingly. Otherwise return NULL. + * + * This is the main entry point for determining if an expression like + * "round(1 + (2 - x), 2)" is monotonic, which would allow using an index + * on 'x' to satisfy an ORDER BY on the expression. + */ +static Expr * +get_expr_monotonic_var(Expr *expr, bool *is_decreasing) +{ + Expr *underlying = NULL; + MonotonicFunction slope; + + *is_decreasing = false; + + slope = get_expr_slope(expr, &underlying); + + if (slope == MONOTONICFUNC_INCREASING) + { + *is_decreasing = false; + return underlying; + } + else if (slope == MONOTONICFUNC_DECREASING) + { + *is_decreasing = true; + return underlying; + } + + return NULL; +} + /* * make_pathkeys_for_sortclauses_extended * Generate a pathkeys list that represents the sort order specified @@ -1410,13 +1667,76 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root, bms_make_singleton(root->group_rtindex), NULL); } - pathkey = make_pathkey_from_sortop(root, - sortkey, - sortcl->sortop, - sortcl->reverse_sort, - sortcl->nulls_first, - sortcl->tleSortGroupRef, - true); + /* + * If the sort key is a monotonic expression, consider unwrapping it + * to build the pathkey from the underlying variable instead. This + * allows an index on 'x' to satisfy ORDER BY / GROUP BY on 'f(x)' + * when f is monotonic. + * + * However, only do this if the original expression doesn't already + * match an index directly. If there's an index on 'f(x)', we want + * to use it without unwrapping (e.g., ORDER BY -a with index on -a). + * + * This handles composed monotonic functions recursively, e.g. + * round(1 + (2 - x), 4) unwraps to x with descending slope. + * + * IMPORTANT: Only unwrap the LAST sort key. For earlier keys, a + * non-injective monotonic function (like round, floor, date_trunc) + * can map multiple distinct index values to the same output value. + * If we unwrap such a function, subsequent sort keys won't be + * properly ordered within those groups. For example, with index + * on (x, y) and ORDER BY round(x), y: distinct x values that map + * to the same round(x) won't have their y values properly sorted. + */ + { + bool mono_decreasing; + Expr *mono_var; + Oid sortop = sortcl->sortop; + bool reverse_sort = sortcl->reverse_sort; + bool nulls_first = sortcl->nulls_first; + bool is_last_sortclause = (lnext(*sortclauses, l) == NULL); + + /* + * Only try monotonic unwrapping if: + * 1. This is the last sort clause (to avoid ordering issues) + * 2. The original expression doesn't match an index directly + * 3. The unwrapped expression does match an index + */ + mono_var = get_expr_monotonic_var(sortkey, &mono_decreasing); + if (mono_var != NULL && + is_last_sortclause && + !expr_matches_index_col(root, sortkey) && + expr_matches_index_col(root, mono_var)) + { + if (mono_decreasing) + { + reverse_sort = !reverse_sort; + nulls_first = !nulls_first; + } + + /* + * Re-derive the sort operator for the underlying type. + * If the types match we can keep the existing operator. + */ + if (exprType((Node *) mono_var) != exprType((Node *) sortkey)) + { + get_sort_group_operators(exprType((Node *) mono_var), + true, false, false, + &sortop, NULL, NULL, + NULL); + } + + sortkey = mono_var; + } + + pathkey = make_pathkey_from_sortop(root, + sortkey, + sortop, + reverse_sort, + nulls_first, + sortcl->tleSortGroupRef, + true); + } if (pathkey->pk_eclass->ec_sortref == 0 && set_ec_sortref) { /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index b6185825fcb..221ab29a3ee 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -1829,6 +1829,11 @@ typedef struct PlanInvalItem * than the previous call. A monotonically decreasing function cannot yield a * higher value on subsequent calls, and a function which is both must return * the same value on each call. + * + * Used both for window function run conditions (SupportRequestWFuncMonotonic) + * and for per-argument monotonicity of scalar functions + * (SupportRequestMonotonic), where it enables the planner to use an index + * on 'x' to satisfy ORDER BY / GROUP BY on 'f(x)'. */ typedef enum MonotonicFunction { diff --git a/src/include/nodes/supportnodes.h b/src/include/nodes/supportnodes.h index 132a2bcd8af..561758736e8 100644 --- a/src/include/nodes/supportnodes.h +++ b/src/include/nodes/supportnodes.h @@ -445,4 +445,38 @@ typedef struct SupportRequestModifyInPlace int paramid; /* ID of Param(s) representing variable */ } SupportRequestModifyInPlace; +/* ---------- + * The Monotonic request allows the support function to indicate that a + * scalar function is monotonically increasing or decreasing with respect + * to one or more of its arguments. This is used by the planner to + * recognize that an index ordering on 'x' can satisfy an ORDER BY or + * GROUP BY on 'f(x)' when f is monotonic in x, eliminating unnecessary + * Sort nodes. + * + * The support function should return a const MonotonicFunction * array with + * one entry per function argument, indicating the monotonicity of each + * argument. + * + * Inputs: + * 'expr' is the FuncExpr or OpExpr node for the function/operator call. + * + * Outputs (set by prosupport function before returning): + * 'nslopes' is the number of slopes defined in the array. + * 'slopes' points to a MonotonicFunction array (one per argument up to + * nslopes). Arguments beyond nslopes are treated as MONOTONICFUNC_NONE. + * Return the request pointer on success, or NULL if not monotonic. + * ---------- + */ +typedef struct SupportRequestMonotonic +{ + NodeTag type; + + /* Input fields: */ + Node *expr; /* FuncExpr or OpExpr */ + + /* Output fields (set by prosupport function): */ + int nslopes; /* number of slopes in array */ + const MonotonicFunction *slopes; /* array of slopes, one per arg */ +} SupportRequestMonotonic; + #endif /* SUPPORTNODES_H */ -- 2.53.0