From 7d668752ceb49b901571a96d156e0219da4e7c1f Mon Sep 17 00:00:00 2001 From: Evdokimov Ilia Date: Wed, 25 Feb 2026 23:00:32 +0300 Subject: [PATCH v6 2/3] Use O(1) selectivity formula for eqsel/neqsel IN/ALL Replace per-element iteration in ScalarArrayOpExpr selectivity estimation with a closed-form probability formula when all elements share the same eqsel()/neqsel() semantics. Preserves existing independence/disjoint models while reducing planning cost for large IN/ALL lists from O(N) to O(1). Special handling added for unique columns using 1/reltuples. --- src/backend/utils/adt/selfuncs.c | 157 +++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index eef3f0375a5..f6091a576d8 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -184,6 +184,9 @@ get_relation_stats_hook_type get_relation_stats_hook = NULL; get_index_stats_hook_type get_index_stats_hook = NULL; static double eqsel_internal(PG_FUNCTION_ARGS, bool negate); +static Selectivity calculate_combined_selectivity(Selectivity s2, int num_elems, + bool useOr, + bool isEquality, bool isInequality); static double eqjoinsel_inner(FmgrInfo *eqproc, Oid collation, Oid hashLeft, Oid hashRight, VariableStatData *vardata1, VariableStatData *vardata2, @@ -1893,6 +1896,61 @@ strip_array_coercion(Node *node) return node; } +/* + * calculate_combined_selectivity + * + * Combine selectivities of N identical ScalarArrayOpExpr elements. + * + * This function assumes that all elements of the IN/ANY or ALL list + * have the same per-element selectivity s2, and computes the overall + * selectivity without iterating over the elements. + * + * For OR semantics (x = ANY (...)): + * main model : 1 - (1 - s2)^N + * disjoint model : N * s2 + * + * For AND semantics (x <> ALL (...)): + * main model : s2^N + * disjoint model : 1 - N * (1 - s2) + * + * If the disjoint estimate is within [0,1], it is preferred. + * Otherwise, we fall back to the main (independence) model. + */ +static Selectivity +calculate_combined_selectivity(Selectivity s2, int num_elems, bool useOr, bool isEquality, bool isInequality) +{ + bool use_disjoint = false; + Selectivity s1; + Selectivity s1disjoint; + + s1 = s1disjoint = (useOr ? 0.0 : 1.0); + + if (useOr) + { + if (isEquality) + { + s1disjoint = s2 * num_elems; + if (s1disjoint >= 0.0 && s1disjoint <= 1.0) + use_disjoint = true; + } + s1 = use_disjoint ? s1disjoint : (1.0 - pow(1.0 - s2, num_elems)); + } + else + { + if (isInequality) + { + s1disjoint = 1.0 + num_elems * (s2 - 1.0); + if (s1disjoint >= 0.0 && s1disjoint <= 1.0) + use_disjoint = true; + } + s1 = use_disjoint ? s1disjoint : pow(s2, num_elems); + } + + CLAMP_PROBABILITY(s1); + + return s1; +} + /* * scalararraysel - Selectivity of ScalarArrayOpExpr Node. */ @@ -2030,6 +2088,72 @@ scalararraysel(PlannerInfo *root, elmlen, elmbyval, elmalign, &elem_values, &elem_nulls, &num_elems); + /* + * Try to avoid O(N^2) selectivity calculation for ScalarArrayOpExpr. + * + * For equality/inequality operators in restriction clauses, + * attempt to derive a single per-element selectivity (s2) and + * combine it in O(1) time using a closed-form formula instead + * of iterating over all elements. + */ + if ((isEquality || isInequality) && !is_join_clause) + { + VariableStatData vardata; + Selectivity s2 = -1.0; + Node *other_op = NULL; + bool var_on_left; + + /* + * If the clause is of the form "var OP something" or + * "something OP var", extract statistics for the variable. + * Otherwise, fall back to a default per-element estimate. + */ + if (get_restriction_variable(root, clause->args, varRelid, &vardata, &other_op, &var_on_left)) + { + /* + * Fast path for unique columns. + * + * If the variable is known to be unique and the relation + * has at least one tuple, equality selectivity is exactly + * 1 / reltuples. + */ + if (vardata.isunique && vardata.rel && vardata.rel->tuples >= 1.0) + { + s2 = 1.0 / vardata.rel->tuples; + if (HeapTupleIsValid(vardata.statsTuple)) + { + Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); + if (isInequality) + s2 = 1.0 - s2 - stats->stanullfrac; + } + } + else if (isInequality) + { + Oid negator = get_negator(operator); + if (!OidIsValid(negator)) + s2 = 1.0 - DEFAULT_EQ_SEL; + } + + ReleaseVariableStats(vardata); + + if (s2 >= 0.0) + { + CLAMP_PROBABILITY(s2); + + s1 = calculate_combined_selectivity(s2, num_elems, useOr, isEquality, isInequality); + + return s1; + } + } + else + { + s2 = (isInequality) ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL; + s1 = calculate_combined_selectivity(s2, num_elems, useOr, isEquality, isInequality); + + return s1; + } + } + /* * For generic operators, we assume the probability of success is * independent for each array element. But for "= ANY" or "<> ALL", @@ -2105,6 +2229,39 @@ scalararraysel(PlannerInfo *root, get_typlenbyval(arrayexpr->element_typeid, &elmlen, &elmbyval); + /* + * Try to avoid O(N^2) selectivity calculation for ScalarArrayOpExpr. + * + * For equality/inequality operators in restriction clauses, + * attempt to derive a single per-element selectivity (s2) and + * combine it in O(1) time using a closed-form formula instead + * of iterating over all elements. + */ + if ((isEquality || isInequality) && !is_join_clause) + { + VariableStatData vardata; + Selectivity s2 = -1.0; + Node *other_op = NULL; + bool var_on_left; + int num_elems = list_length(arrayexpr->elements); + + /* + * If expression is not variable = something or something = + * variable, then fall back to default code path to compute + * default selectivity. + */ + if (!get_restriction_variable(root, clause->args, varRelid, + &vardata, &other_op, &var_on_left)) + { + s2 = (isInequality) ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL; + s1 = calculate_combined_selectivity(s2, num_elems, useOr, isEquality, isInequality); + + return s1; + } + else + ReleaseVariableStats(vardata); + } + /* * We use the assumption of disjoint probabilities here too, although * the odds of equal array elements are rather higher if the elements -- 2.34.1