From b0ee52b6e091ee93f4a35a2983f69592a95e6742 Mon Sep 17 00:00:00 2001 From: Alexandre Felipe Date: Wed, 11 Feb 2026 20:34:28 +0000 Subject: [PATCH 1/3] SLOPE Analysis Machinery This commit introduces a datastructure to describe function with respect to its different arguments. And functions in pathkeys.c to analyse expressions and determine wether it is monotonic with respect to some variable. The main goal of this change is to signal to the planner that an expression can be sorted indirectly. This allows the planner to find better plans. Motivation: Consider a table data with an indexed timestamp column ts, the query `SELECT ts::date, count(1) GROUP BY 1;` requires ts::date to be sorted, but the planner is not aware of the fact that ts::date will ordered whenever ts is ordered. One can specify how a function varies with respect to some underlying variable CONST: Doesn't depend on the argument (here for completeness) INC: The function is never decreasing. DEC: The function is never decreasing. ANY: The function can be either incrasing or decreasing. The focus here is INC and DEC, as those will allow to sort expressions indirectly. Added one line to the gitignore file because I like to keep data related to this in the project workarea, e.g. ".install", ".dbdata", ".patches", and I don't want those cluttering my git diffs. Hopefully more peple find it useful. --- .gitignore | 3 + src/backend/optimizer/path/pathkeys.c | 329 +++++++++++++++++++++++++- src/include/nodes/supportnodes.h | 42 ++++ 3 files changed, 367 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 4e911395fe3..2f38a9a9077 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,6 @@ lib*.pc /Release/ /tmp_install/ /portlock/ + +# ignore hidden files +.* \ No newline at end of file diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 5eb71635d15..dfc8db511fd 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -19,11 +19,14 @@ #include "access/stratnum.h" #include "catalog/pg_opfamily.h" +#include "fmgr.h" #include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "parser/parse_oper.h" #include "partitioning/partbounds.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" @@ -1352,6 +1355,255 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, return result; } +/* + * expr_matches_index_col + * Check if an expression matches any index column for any relation. + * + * This is used to determine if an expression can be satisfied directly by + * an index without requiring monotonic unwrapping. + */ +static bool +expr_matches_index_col(PlannerInfo *root, Expr *expr) +{ + int rti; + + /* Check all base relations */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + ListCell *lc; + + if (rel == NULL) + continue; + + /* Skip non-baserels */ + if (rel->reloptkind != RELOPT_BASEREL) + continue; + + /* Check each index on this relation */ + foreach(lc, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(lc); + ListCell *indexlc; + int col = 0; + + /* Check each column of the index */ + foreach(indexlc, index->indextlist) + { + TargetEntry *indextle = (TargetEntry *) lfirst(indexlc); + + /* Only check key columns, not INCLUDE columns */ + if (col >= index->nkeycolumns) + break; + + /* Check if the expression matches this index column */ + if (equal(expr, indextle->expr)) + return true; + + col++; + } + } + } + + return false; +} + +/* + * get_expr_slope + * Recursively determine the monotonicity slope of an expression. + * + * Returns the slope of 'expr' with respect to some underlying variable: + * SLOPE_ASC: monotonically increasing + * SLOPE_DESC: monotonically decreasing + * SLOPE_CONST: constant (no non-const variables) + * SLOPE_ANY: cannot determine monotonicity + * + * If the result is SLOPE_ASC or SLOPE_DESC, *underlying_expr is set to the + * by checking the slopes of the function arguments and the expression + * passed combined as follows: + * + * ANY + ANY = ANY + * ANY + ASC = ANY + * ANY + DESC = ANY + * CONST + ANY = ANY + * CONST + CONST = CONST + * CONST + ASC = CONST + * CONST + DESC = CONST + * ASC + ASC = ASC + * DESC + DESC = ASC + * ASC + DESC = DESC + * DESC + ASC = DESC + * (a + b) = (b + a) + */ +static Slope +get_expr_slope(Expr *expr, Expr **underlying_expr) +{ + Oid funcid; + List *args; + Oid prosupport; + SupportRequestMonotonic req; + ListCell *lc; + int i; + Slope func_slope; + Expr *found_expr; + + /* Constants have no slope - they don't vary */ + if (IsA(expr, Const)) + return SLOPE_CONST; + + /* A variable is ascending with respect to itself */ + if (IsA(expr, Var)) + { + *underlying_expr = expr; + return SLOPE_ASC; + } + + /* + * RelabelType is a no-op type coercion (binary compatible types). + * It preserves monotonicity of the underlying expression. + */ + if (IsA(expr, RelabelType)) + { + RelabelType *rt = (RelabelType *) expr; + + return get_expr_slope((Expr *) rt->arg, underlying_expr); + } + + /* Handle FuncExpr and OpExpr */ + if (IsA(expr, FuncExpr)) + { + FuncExpr *fexpr = (FuncExpr *) expr; + + funcid = fexpr->funcid; + args = fexpr->args; + } + else if (IsA(expr, OpExpr)) + { + OpExpr *opexpr = (OpExpr *) expr; + + set_opfuncid(opexpr); + funcid = opexpr->opfuncid; + args = opexpr->args; + } + else + { + /* Unknown expression type - can't determine slope */ + return SLOPE_ANY; + } + + if (list_length(args) == 0) + return SLOPE_CONST; + + /* Check for prosupport function */ + prosupport = get_func_support(funcid); + if (!OidIsValid(prosupport)) + return SLOPE_ANY; + + /* Call prosupport to get slope pattern */ + req.type = T_SupportRequestMonotonic; + req.expr = (Node *) expr; + req.slopes = NULL; + req.nslopes = 0; + + if (DatumGetPointer(OidFunctionCall1(prosupport, PointerGetDatum(&req))) == NULL) + return SLOPE_ANY; + + if (req.slopes == NULL || req.nslopes <= 0) + return SLOPE_ANY; + + /* + * Combine slopes from all arguments. Start assuming the result is + * constant, then update as we find non-constant arguments. + */ + func_slope = SLOPE_CONST; + found_expr = NULL; + i = 0; + + foreach(lc, args) + { + Expr *arg = (Expr *) lfirst(lc); + Expr *arg_underlying = NULL; + Slope arg_expr_slope; + Slope func_arg_slope; + Slope combined; + + /* Get the slope of this argument expression (recursive) */ + arg_expr_slope = get_expr_slope(arg, &arg_underlying); + + /* Get the function's declared slope for this argument */ + func_arg_slope = (i < req.nslopes) ? req.slopes[i] : SLOPE_ANY; + + /* If either is CONST, this arg doesn't affect the result slope */ + if (arg_expr_slope == SLOPE_CONST || func_arg_slope == SLOPE_CONST) + { + i++; + continue; + } + + /* If either is ANY, we can't determine the result */ + if (arg_expr_slope == SLOPE_ANY || func_arg_slope == SLOPE_ANY) + return SLOPE_ANY; + + /* Combine slope */ + combined = (func_arg_slope == arg_expr_slope) ? SLOPE_ASC : SLOPE_DESC; + + /* Check for conflicting slopes from different arguments */ + if (func_slope == SLOPE_CONST) + { + func_slope = combined; + found_expr = arg_underlying; + } + else + { + /* + * We already have a non-constant underlying variable. + * check if this the new contribution is from the same variable + * on the same direction. + */ + if (func_slope != combined || !equal(found_expr, arg_underlying)) + return SLOPE_ANY; + } + + i++; + } + + *underlying_expr = found_expr; + return func_slope; +} + +/* + * get_expr_monotonic_var + * If 'expr' is monotonic in exactly one variable, return that variable + * and set *is_decreasing accordingly. Otherwise return NULL. + * + * This is the main entry point for determining if an expression like + * "round(1 + (2 - x), 2)" is monotonic, which would allow using an index + * on 'x' to satisfy an ORDER BY on the expression. + */ +static Expr * +get_expr_monotonic_var(Expr *expr, bool *is_decreasing) +{ + Expr *underlying = NULL; + Slope slope; + + *is_decreasing = false; + + slope = get_expr_slope(expr, &underlying); + + if (slope == SLOPE_ASC) + { + *is_decreasing = false; + return underlying; + } + else if (slope == SLOPE_DESC) + { + *is_decreasing = true; + return underlying; + } + + return NULL; +} + /* * make_pathkeys_for_sortclauses_extended * Generate a pathkeys list that represents the sort order specified @@ -1410,13 +1662,76 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root, bms_make_singleton(root->group_rtindex), NULL); } - pathkey = make_pathkey_from_sortop(root, - sortkey, - sortcl->sortop, - sortcl->reverse_sort, - sortcl->nulls_first, - sortcl->tleSortGroupRef, - true); + /* + * If the sort key is a monotonic expression, consider unwrapping it + * to build the pathkey from the underlying variable instead. This + * allows an index on 'x' to satisfy ORDER BY / GROUP BY on 'f(x)' + * when f is monotonic. + * + * However, only do this if the original expression doesn't already + * match an index directly. If there's an index on 'f(x)', we want + * to use it without unwrapping (e.g., ORDER BY -a with index on -a). + * + * This handles composed monotonic functions recursively, e.g. + * round(1 + (2 - x), 4) unwraps to x with descending slope. + * + * IMPORTANT: Only unwrap the LAST sort key. For earlier keys, a + * non-injective monotonic function (like round, floor, date_trunc) + * can map multiple distinct index values to the same output value. + * If we unwrap such a function, subsequent sort keys won't be + * properly ordered within those groups. For example, with index + * on (x, y) and ORDER BY round(x), y: distinct x values that map + * to the same round(x) won't have their y values properly sorted. + */ + { + bool mono_decreasing; + Expr *mono_var; + Oid sortop = sortcl->sortop; + bool reverse_sort = sortcl->reverse_sort; + bool nulls_first = sortcl->nulls_first; + bool is_last_sortclause = (lnext(*sortclauses, l) == NULL); + + /* + * Only try monotonic unwrapping if: + * 1. This is the last sort clause (to avoid ordering issues) + * 2. The original expression doesn't match an index directly + * 3. The unwrapped expression does match an index + */ + mono_var = get_expr_monotonic_var(sortkey, &mono_decreasing); + if (mono_var != NULL && + is_last_sortclause && + !expr_matches_index_col(root, sortkey) && + expr_matches_index_col(root, mono_var)) + { + if (mono_decreasing) + { + reverse_sort = !reverse_sort; + nulls_first = !nulls_first; + } + + /* + * Re-derive the sort operator for the underlying type. + * If the types match we can keep the existing operator. + */ + if (exprType((Node *) mono_var) != exprType((Node *) sortkey)) + { + get_sort_group_operators(exprType((Node *) mono_var), + true, false, false, + &sortop, NULL, NULL, + NULL); + } + + sortkey = mono_var; + } + + pathkey = make_pathkey_from_sortop(root, + sortkey, + sortop, + reverse_sort, + nulls_first, + sortcl->tleSortGroupRef, + true); + } if (pathkey->pk_eclass->ec_sortref == 0 && set_ec_sortref) { /* diff --git a/src/include/nodes/supportnodes.h b/src/include/nodes/supportnodes.h index 132a2bcd8af..383ba25a138 100644 --- a/src/include/nodes/supportnodes.h +++ b/src/include/nodes/supportnodes.h @@ -445,4 +445,46 @@ typedef struct SupportRequestModifyInPlace int paramid; /* ID of Param(s) representing variable */ } SupportRequestModifyInPlace; +/* ---------- + * The Monotonic request allows the support function to indicate that a + * scalar function is monotonically increasing or decreasing with respect + * to one or more of its arguments. This is used by the planner to + * recognize that an index ordering on 'x' can satisfy an ORDER BY or + * GROUP BY on 'f(x)' when f is monotonic in x, eliminating unnecessary + * Sort nodes. + * + * The support function should return a const Slope * array with one entry + * per function argument, indicating the monotonicity of each argument. + * if there are no conflicting slopes. + * + * Inputs: + * 'expr' is the FuncExpr or OpExpr node for the function/operator call. + * + * Outputs (set by prosupport function before returning): + * 'nslopes' is the number of slopes defined in the array. + * 'slopes' points to a Slope array (one entry per argument up to nslopes). + * Arguments beyond nslopes are treated as SLOPE_ANY. + * Return the request pointer on success, or NULL if not monotonic. + * ---------- + */ + +/* Slope type for representing monotonicity */ +typedef int8 Slope; +#define SLOPE_ANY 0 /* 0b00 - unknown/either (safe default) */ +#define SLOPE_ASC 1 /* 0b01 - ascending (descending blocked) */ +#define SLOPE_DESC 2 /* 0b10 - descending (ascending blocked) */ +#define SLOPE_CONST 3 /* 0b11 - constant (both blocked) */ + +typedef struct SupportRequestMonotonic +{ + NodeTag type; + + /* Input fields: */ + Node *expr; /* FuncExpr or OpExpr */ + + /* Output fields (set by prosupport function): */ + int nslopes; /* number of slopes in array */ + const Slope *slopes; /* array of slopes, one per argument */ +} SupportRequestMonotonic; + #endif /* SUPPORTNODES_H */ -- 2.40.0