From 9bb423ae562b383192267ad12448c9ff7b250138 Mon Sep 17 00:00:00 2001 From: Alexandre Felipe Date: Wed, 11 Feb 2026 20:34:28 +0000 Subject: [PATCH v4 1/3] SLOPE Analysis Machinery This commit introduces a datastructure to describe function with respect to its different arguments. And functions in pathkeys.c to analyse expressions and determine wether it is monotonic with respect to some variable. The main goal of this change is to signal to the planner that an expression can be sorted indirectly. This allows the planner to find better plans. Motivation: Consider a table data with an indexed timestamp column ts, the query `SELECT ts::date, count(1) GROUP BY 1;` requires ts::date to be sorted, but the planner is not aware of the fact that ts::date will ordered whenever ts is ordered. One can specify how a function varies with respect to each of its arguments, using MonotonicFunction enum MONOTONICFUNC_* BOTH: Doesn't depend on the argument (here for completeness) INCREASING: The function is never decreasing. DECREASING: The function is never increasing. NONE: The function can be either incrasing or decreasing. The focus here is INCREASING and DECREASING, as those will allow take advantage of the order of underlying variables to satisfy the sort order of the expression. Added one line to .gitignore file because I like to keep data related to this in the project workarea, e.g. ".install", ".dbdata", ".patches", and I don't want those cluttering my git diffs. --- .gitignore | 3 + src/backend/optimizer/path/pathkeys.c | 419 ++++++++++++++++++++++++-- src/include/nodes/plannodes.h | 5 + src/include/nodes/supportnodes.h | 34 +++ 4 files changed, 429 insertions(+), 32 deletions(-) diff --git a/.gitignore b/.gitignore index 4e911395fe3..2f38a9a9077 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,6 @@ lib*.pc /Release/ /tmp_install/ /portlock/ + +# ignore hidden files +.* \ No newline at end of file diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 5eb71635d15..e67284465c7 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -19,11 +19,14 @@ #include "access/stratnum.h" #include "catalog/pg_opfamily.h" +#include "fmgr.h" #include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "parser/parse_oper.h" #include "partitioning/partbounds.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" @@ -37,6 +40,10 @@ static bool matches_boolean_partition_clause(RestrictInfo *rinfo, int partkeycol); static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle); static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey); +static MonotonicFunction get_expr_slope(Expr *expr, Expr **underlying_expr); +static bool ec_member_is_monotonic_in(EquivalenceClass *ec, Expr *target, + MonotonicFunction *slope_out); +static bool pathkey_is_monotonic_of(PathKey *pk_query, PathKey *pk_path); /**************************************************************************** @@ -338,19 +345,16 @@ compare_pathkeys(List *keys1, List *keys2) * pathkeys_contained_in * Common special case of compare_pathkeys: we just want to know * if keys2 are at least as well sorted as keys1. + * + * This also recognizes monotonic function relationships via + * pathkeys_count_contained_in. */ bool pathkeys_contained_in(List *keys1, List *keys2) { - switch (compare_pathkeys(keys1, keys2)) - { - case PATHKEYS_EQUAL: - case PATHKEYS_BETTER2: - return true; - default: - break; - } - return false; + int n_common; + + return pathkeys_count_contained_in(keys1, keys2, &n_common); } /* @@ -549,10 +553,116 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path) return infos; } +/* + * ec_member_is_monotonic_in + * Check if any member of 'ec' is a monotonic function of 'target'. + * + * If found, sets *slope_out to the slope (INCREASING or DECREASING) and + * returns true. The EC must not be volatile. + */ +static pg_attribute_always_inline bool +ec_member_is_monotonic_in(EquivalenceClass *ec, Expr *target, + MonotonicFunction *slope_out) +{ + ListCell *lc; + + if (ec->ec_has_volatile) + return false; + + foreach(lc, ec->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc); + Expr *underlying = NULL; + MonotonicFunction slope; + + slope = get_expr_slope(em->em_expr, &underlying); + if (slope != MONOTONICFUNC_INCREASING && + slope != MONOTONICFUNC_DECREASING) + continue; + + if (equal(underlying, target)) + { + *slope_out = slope; + return true; + } + } + + return false; +} + +/* + * pathkey_is_monotonic_of + * Check if pk_query (desired sort key) is a monotonic function of pk_path + * (available sort key), considering sort direction and NULLS ordering. + * + * For example, if pk_query is for "val + 1 ASC" and pk_path is for "val ASC", + * this returns true because val + 1 is monotonically increasing in val, so + * data sorted by val is also sorted by val + 1. + * + * The sort direction and nulls_first must be compatible: a monotonically + * increasing function preserves ASC ordering; a decreasing function reverses + * it. NULLS ordering must also match after accounting for direction reversal. + */ +static bool +pathkey_is_monotonic_of(PathKey *pk_query, PathKey *pk_path) +{ + EquivalenceClass *ec_path = pk_path->pk_eclass; + + if (ec_path->ec_has_volatile) + return false; + + foreach_node(EquivalenceMember, em_p, ec_path->ec_members) + { + MonotonicFunction slope; + + if (!ec_member_is_monotonic_in(pk_query->pk_eclass, + em_p->em_expr, &slope)) + continue; + + /* + * Verify sort direction and nulls_first compatibility. + * + * For INCREASING: the path's sort direction directly satisfies + * the query's direction. For DECREASING: the path must have + * the opposite direction. + */ + if (slope == MONOTONICFUNC_INCREASING) + { + if (pk_query->pk_cmptype == pk_path->pk_cmptype && + pk_query->pk_nulls_first == pk_path->pk_nulls_first) + return true; + } + else + { + CompareType reverse_cmptype; + + if (pk_path->pk_cmptype == COMPARE_LT) + reverse_cmptype = COMPARE_GT; + else if (pk_path->pk_cmptype == COMPARE_GT) + reverse_cmptype = COMPARE_LT; + else + continue; + + if (pk_query->pk_cmptype == reverse_cmptype && + pk_query->pk_nulls_first != pk_path->pk_nulls_first) + return true; + } + } + + return false; +} + /* * pathkeys_count_contained_in * Same as pathkeys_contained_in, but also sets length of longest * common prefix of keys1 and keys2. + * + * This also recognizes monotonic function relationships: if a desired + * pathkey (in keys1) is a monotonic function of an available pathkey + * (in keys2), the ordering requirement is considered satisfied. + * However, after a monotonic match we stop, because non-injective + * monotonic functions (like round, date_trunc) can map multiple input + * values to the same output, so subsequent pathkeys may not be ordered. */ bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common) @@ -592,6 +702,19 @@ pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common) if (pathkey1 != pathkey2) { + /* + * Not an exact match. Check if pathkey1 (desired) is a monotonic + * function of pathkey2 (available). If so, the available ordering + * satisfies this key, but we must stop here: subsequent keys may + * not be properly ordered due to non-injectivity. + */ + if (pathkey_is_monotonic_of(pathkey1, pathkey2)) + { + n++; + *n_common = n; + return (lnext(keys1, key1) == NULL); + } + *n_common = n; return false; } @@ -804,17 +927,64 @@ build_index_pathkeys(PlannerInfo *root, else { /* - * Boolean index keys might be redundant even if they do not - * appear in an EquivalenceClass, because of our special treatment - * of boolean equality conditions --- see the comment for - * indexcol_is_bool_constant_for_query(). If that applies, we can - * continue to examine lower-order index columns. Otherwise, the - * sort key is not an interesting sort order for this query, so we - * should stop considering index columns; any lower-order sort - * keys won't be useful either. + * The index column has no existing EquivalenceClass. Check if + * any query pathkey is a monotonic function of this index column. + * If so, create an EC for the index column so the index can be + * recognized as providing useful ordering. + * + * We check the monotonic relationship first (without creating an + * EC) by directly examining query pathkey expressions, and only + * create the EC if we find a match. */ - if (!indexcol_is_bool_constant_for_query(root, index, i)) - break; + bool found_monotonic = false; + + if (root->query_pathkeys != NIL && + index->rel->reloptkind == RELOPT_BASEREL) + { + foreach_node(PathKey, qpk, root->query_pathkeys) + { + MonotonicFunction slope; + + if (!ec_member_is_monotonic_in(qpk->pk_eclass, + indexkey, &slope)) + continue; + + cpathkey = make_pathkey_from_sortinfo(root, + indexkey, + index->sortopfamily[i], + index->opcintype[i], + index->indexcollations[i], + reverse_sort, + nulls_first, + 0, + index->rel->relids, + true); + + if (cpathkey && + !pathkey_is_redundant(cpathkey, retval)) + retval = lappend(retval, cpathkey); + + found_monotonic = true; + break; + } + } + + if (!found_monotonic) + { + /* + * Boolean index keys might be redundant even if they do not + * appear in an EquivalenceClass, because of our special + * treatment of boolean equality conditions --- see the + * comment for indexcol_is_bool_constant_for_query(). If that + * applies, we can continue to examine lower-order index + * columns. Otherwise, the sort key is not an interesting + * sort order for this query, so we should stop considering + * index columns; any lower-order sort keys won't be useful + * either. + */ + if (!indexcol_is_bool_constant_for_query(root, index, i)) + break; + } } i++; @@ -1352,6 +1522,176 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, return result; } +/* + * get_expr_slope + * Recursively determine the monotonicity slope of an expression. + * + * Returns the slope of 'expr' with respect to some underlying variable: + * MONOTONICFUNC_INCREASING: monotonically increasing + * MONOTONICFUNC_DECREASING: monotonically decreasing + * MONOTONICFUNC_BOTH: constant (no non-const variables) + * MONOTONICFUNC_NONE: cannot determine monotonicity + * + * + * The contribution of each argument to the final slope of the function is + * determined by the slope of the function with respect to an argument + * and the slope of the underlying expression passed to it, combined as + * follows: + * + * NONE + NONE = NONE (one undefined -> undefined) + * NONE + INC = NONE + * NONE + DEC = NONE + * BOTH + NONE = NONE + * BOTH + BOTH = BOTH + * BOTH + INC = BOTH (one constant -> constant) + * BOTH + DEC = BOTH + * INC + INC = INC (same direction -> preserves) + * DEC + DEC = INC (both flip -> preserves) + * INC + DEC = DEC (opposite direction -> flips) + * (a + b) = (b + a) + */ +static MonotonicFunction +get_expr_slope(Expr *expr, Expr **underlying_expr) +{ + Oid funcid; + List *args; + Oid prosupport; + SupportRequestMonotonic req; + ListCell *lc; + int i; + MonotonicFunction func_slope; + Expr *found_expr; + + /* Constants have no slope - they don't vary */ + if (IsA(expr, Const)) + return MONOTONICFUNC_BOTH; + + /* A variable is ascending with respect to itself */ + if (IsA(expr, Var)) + { + *underlying_expr = expr; + return MONOTONICFUNC_INCREASING; + } + + /* + * RelabelType is a no-op type coercion (binary compatible types). + * It preserves monotonicity of the underlying expression. + */ + if (IsA(expr, RelabelType)) + { + RelabelType *rt = (RelabelType *) expr; + + return get_expr_slope((Expr *) rt->arg, underlying_expr); + } + + /* Handle FuncExpr and OpExpr */ + if (IsA(expr, FuncExpr)) + { + FuncExpr *fexpr = (FuncExpr *) expr; + + funcid = fexpr->funcid; + args = fexpr->args; + } + else if (IsA(expr, OpExpr)) + { + OpExpr *opexpr = (OpExpr *) expr; + + set_opfuncid(opexpr); + funcid = opexpr->opfuncid; + args = opexpr->args; + } + else + { + /* Not supported */ + return MONOTONICFUNC_NONE; + } + + if (list_length(args) == 0) + return MONOTONICFUNC_BOTH; + + /* Check for prosupport function */ + prosupport = get_func_support(funcid); + if (!OidIsValid(prosupport)) + return MONOTONICFUNC_NONE; + + /* Call prosupport to get slope pattern */ + req.type = T_SupportRequestMonotonic; + req.expr = (Node *) expr; + /* prosupport function sets req.slopes to a static + * pointer, no pfree is required. + */ + req.slopes = NULL; + req.nslopes = 0; + + if (DatumGetPointer(OidFunctionCall1(prosupport, PointerGetDatum(&req))) == NULL) + return MONOTONICFUNC_NONE; + + if (req.slopes == NULL || req.nslopes <= 0) + return MONOTONICFUNC_NONE; + + /* + * Combine slopes from all arguments. Start assuming the result is + * constant, then update as we find non-constant arguments. + */ + func_slope = MONOTONICFUNC_BOTH; + found_expr = NULL; + i = 0; + + foreach(lc, args) + { + Expr *arg = (Expr *) lfirst(lc); + Expr *arg_underlying = NULL; + MonotonicFunction arg_expr_slope; + MonotonicFunction func_arg_slope; + MonotonicFunction combined; + + /* Get the slope of this argument expression (recursive) */ + arg_expr_slope = get_expr_slope(arg, &arg_underlying); + + /* Get the function's declared slope for this argument */ + func_arg_slope = (i < req.nslopes) ? req.slopes[i] : MONOTONICFUNC_NONE; + + /* If either is BOTH (constant), this arg doesn't affect the result */ + if (arg_expr_slope == MONOTONICFUNC_BOTH || + func_arg_slope == MONOTONICFUNC_BOTH) + { + i++; + continue; + } + + /* If either is NONE, we can't determine the result */ + if (arg_expr_slope == MONOTONICFUNC_NONE || + func_arg_slope == MONOTONICFUNC_NONE) + return MONOTONICFUNC_NONE; + + /* Combine: same direction -> INC, opposite -> DEC */ + combined = (func_arg_slope == arg_expr_slope) ? + MONOTONICFUNC_INCREASING : MONOTONICFUNC_DECREASING; + + if (func_slope == MONOTONICFUNC_BOTH) + { + /* first non-constant contribution */ + func_slope = combined; + found_expr = arg_underlying; + } + else + { + /* + * We already have a non-constant underlying variable. + * Check if the new contribution is from the same variable + * in the same direction. + */ + if (func_slope != combined || !equal(found_expr, arg_underlying)) + return MONOTONICFUNC_NONE; + } + + i++; + } + + *underlying_expr = found_expr; + return func_slope; +} + /* * make_pathkeys_for_sortclauses_extended * Generate a pathkeys list that represents the sort order specified @@ -1411,12 +1751,12 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root, NULL); } pathkey = make_pathkey_from_sortop(root, - sortkey, - sortcl->sortop, - sortcl->reverse_sort, - sortcl->nulls_first, - sortcl->tleSortGroupRef, - true); + sortkey, + sortcl->sortop, + sortcl->reverse_sort, + sortcl->nulls_first, + sortcl->tleSortGroupRef, + true); if (pathkey->pk_eclass->ec_sortref == 0 && set_ec_sortref) { /* @@ -2163,7 +2503,7 @@ count_common_leading_pathkeys_ordered(List *keys1, List *keys2) /* * count_common_leading_pathkeys_unordered * Returns the number of leading PathKeys in 'keys2' which exist in - * 'keys1'. + * 'keys1'. Also recognizes monotonic function relationships. */ static int count_common_leading_pathkeys_unordered(List *keys1, List *keys2) @@ -2175,13 +2515,28 @@ count_common_leading_pathkeys_unordered(List *keys1, List *keys2) return 0; /* walk keys2 and search for matching PathKeys in keys1 */ - foreach_node(PathKey, pathkey, keys2) + foreach_node(PathKey, pathkey2, keys2) { - /* - * return the number of matches so far as soon as keys1 doesn't - * contain the given keys2 key. - */ - if (!list_member_ptr(keys1, pathkey)) + bool found = false; + + if (list_member_ptr(keys1, pathkey2)) + { + found = true; + } + else + { + /* Check if any key in keys1 is a monotonic function of pathkey2 */ + foreach_node(PathKey, pathkey1, keys1) + { + if (pathkey_is_monotonic_of(pathkey1, pathkey2)) + { + found = true; + break; + } + } + } + + if (!found) break; ncommon++; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index b6185825fcb..221ab29a3ee 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -1829,6 +1829,11 @@ typedef struct PlanInvalItem * than the previous call. A monotonically decreasing function cannot yield a * higher value on subsequent calls, and a function which is both must return * the same value on each call. + * + * Used both for window function run conditions (SupportRequestWFuncMonotonic) + * and for per-argument monotonicity of scalar functions + * (SupportRequestMonotonic), where it enables the planner to use an index + * on 'x' to satisfy ORDER BY / GROUP BY on 'f(x)'. */ typedef enum MonotonicFunction { diff --git a/src/include/nodes/supportnodes.h b/src/include/nodes/supportnodes.h index 132a2bcd8af..561758736e8 100644 --- a/src/include/nodes/supportnodes.h +++ b/src/include/nodes/supportnodes.h @@ -445,4 +445,38 @@ typedef struct SupportRequestModifyInPlace int paramid; /* ID of Param(s) representing variable */ } SupportRequestModifyInPlace; +/* ---------- + * The Monotonic request allows the support function to indicate that a + * scalar function is monotonically increasing or decreasing with respect + * to one or more of its arguments. This is used by the planner to + * recognize that an index ordering on 'x' can satisfy an ORDER BY or + * GROUP BY on 'f(x)' when f is monotonic in x, eliminating unnecessary + * Sort nodes. + * + * The support function should return a const MonotonicFunction * array with + * one entry per function argument, indicating the monotonicity of each + * argument. + * + * Inputs: + * 'expr' is the FuncExpr or OpExpr node for the function/operator call. + * + * Outputs (set by prosupport function before returning): + * 'nslopes' is the number of slopes defined in the array. + * 'slopes' points to a MonotonicFunction array (one per argument up to + * nslopes). Arguments beyond nslopes are treated as MONOTONICFUNC_NONE. + * Return the request pointer on success, or NULL if not monotonic. + * ---------- + */ +typedef struct SupportRequestMonotonic +{ + NodeTag type; + + /* Input fields: */ + Node *expr; /* FuncExpr or OpExpr */ + + /* Output fields (set by prosupport function): */ + int nslopes; /* number of slopes in array */ + const MonotonicFunction *slopes; /* array of slopes, one per arg */ +} SupportRequestMonotonic; + #endif /* SUPPORTNODES_H */ -- 2.53.0