diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index af2934a..7453329 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -23,23 +23,39 @@ #include "utils/lsyscache.h" #include "utils/selfuncs.h" +#define CACHESEL_LOBOUND 0x0001 /* has var > something selectivity */ +#define CACHESEL_HIBOUND 0x0002 /* has var < something selectivity */ +#define CACHESEL_NULLTEST 0x0004 /* has var IS NULL selectivity */ +#define CACHESEL_NOTNULLTEST 0x0008 /* has var IS NOT NULL selectivity */ +#define CACHESEL_OTHERSTRICT 0x0010 /* has another OpExpr selectivity */ /* - * Data structure for accumulating info about possible range-query - * clause pairs in clauselist_selectivity. + * Data structure for caching selectivity for clauselist_selectivity. */ -typedef struct RangeQueryClause +typedef struct CachedSelectivityClause { - struct RangeQueryClause *next; /* next in linked list */ + struct CachedSelectivityClause *next; /* next in linked list */ Node *var; /* The common variable of the clauses */ - bool have_lobound; /* found a low-bound clause yet? */ - bool have_hibound; /* found a high-bound clause yet? */ + int selmask; /* Bitmask of which sel types are stored */ Selectivity lobound; /* Selectivity of a var > something clause */ Selectivity hibound; /* Selectivity of a var < something clause */ -} RangeQueryClause; + Selectivity nullsel; /* Selectivity of a IS NULL test */ + Selectivity notnullsel; /* Selectivity of a IS NOT NULL test */ + Selectivity otherstrictsel; /* Selectivity of any other strict clauses */ +} CachedSelectivityClause; -static void addRangeClause(RangeQueryClause **rqlist, Node *clause, - bool varonleft, bool isLTsel, Selectivity s2); + +static CachedSelectivityClause *findCachedSelectivityVar( + CachedSelectivityClause **cslist, Node *expr); +static void addCachedSelectivityNullTest(CachedSelectivityClause **cslist, + Node *expr, Selectivity s2); +static void addCachedSelectivityNotNullTest(CachedSelectivityClause **cslist, + Node *expr, Selectivity s2); +static void addCachedSelectivityRangeVar(CachedSelectivityClause **cslist, + Node *expr, bool varonleft, bool isLTsel, Selectivity s2); +static void addCachedSelectivityOtherStrictClause( + CachedSelectivityClause **cslist, + Node *expr, Selectivity s2); /**************************************************************************** @@ -60,27 +76,38 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause, * subclauses. However, that's only right if the subclauses have independent * probabilities, and in reality they are often NOT independent. So, * we want to be smarter where we can. - - * Currently, the only extra smarts we have is to recognize "range queries", - * such as "x > 34 AND x < 42". Clauses are recognized as possible range - * query components if they are restriction opclauses whose operators have + * + * Currently we only handle OpExprs with 2 operands, and IS [NOT] NULL tests. + * We have extra smarts to recognize "range queries", such as + * "x > 34 AND x < 42". These clauses are recognized as possible range query + * components if they are restriction opclauses whose operators have * scalarltsel() or scalargtsel() as their restriction selectivity estimator. - * We pair up clauses of this form that refer to the same variable. An - * unpairable clause of this kind is simply multiplied into the selectivity - * product in the normal way. But when we find a pair, we know that the - * selectivities represent the relative positions of the low and high bounds - * within the column's range, so instead of figuring the selectivity as - * hisel * losel, we can figure it as hisel + losel - 1. (To visualize this, - * see that hisel is the fraction of the range below the high bound, while - * losel is the fraction above the low bound; so hisel can be interpreted - * directly as a 0..1 value but we need to convert losel to 1-losel before - * interpreting it as a value. Then the available range is 1-losel to hisel. - * However, this calculation double-excludes nulls, so really we need - * hisel + losel + null_frac - 1.) + * IS [NOT] NULL tests are handled too, it's a common enough, and perhaps + * innocent enough looking mistake to include a IS NOT NULL test along with + * another condition for the same var in a query, so its useful in this case + * to treat the "IS NOT NULL" as a no-op. However, since we only collect + * OpExprs and NullTests, we're only going to detect this NullTest issue in + * the cases where the other found qual is an OpExpr. + * + * We collect up clauses so that we're able to conditionally apply each + * selectivity based on the other clauses which have been seen for the same + * Var. Any clause which is unpaired is simply multiplied into the + * selectivity product in the normal way. But when we find a pair, we can + * conditionally apply the selectivities. This also allows us to detect weird + * corner cases such as "x = 1 AND x IS NULL". With range pairs, we know that + * the selectivities represent the relative positions of the low and high + * bounds within the column's range, so instead of figuring the selectivity + * as hisel * losel, we can figure it as hisel + losel - 1. (To visualize + * this, see that hisel is the fraction of the range below the high bound, + * while losel is the fraction above the low bound; so hisel can be + * interpreted directly as a 0..1 value but we need to convert losel to + * 1-losel before interpreting it as a value. Then the available range is + * 1-losel to hisel. However, this calculation double-excludes nulls, so + * really we need hisel + losel + null_frac - 1.) * - * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation - * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation - * yields an impossible (negative) result. + * For range pairs, if either selectivity is exactly DEFAULT_INEQ_SEL, we + * forget this equation and instead use DEFAULT_RANGE_INEQ_SEL. The same + * applies if the equation yields an impossible (negative) result. * * A free side-effect is that we can recognize redundant inequalities such * as "x < 4 AND x < 5"; only the tighter constraint will be counted. @@ -96,7 +123,7 @@ clauselist_selectivity(PlannerInfo *root, SpecialJoinInfo *sjinfo) { Selectivity s1 = 1.0; - RangeQueryClause *rqlist = NULL; + CachedSelectivityClause *cslist = NULL; ListCell *l; /* @@ -140,6 +167,20 @@ clauselist_selectivity(PlannerInfo *root, else rinfo = NULL; + if (IsA(clause, NullTest)) + { + NullTestType nulltesttype = ((NullTest *) clause)->nulltesttype; + + if (nulltesttype == IS_NULL) + addCachedSelectivityNullTest(&cslist, + (Node *) ((NullTest *) clause)->arg, s2); + else if (nulltesttype == IS_NOT_NULL) + addCachedSelectivityNotNullTest(&cslist, + (Node *) ((NullTest *) clause)->arg, s2); + + continue; /* drop to loop bottom */ + } + /* * See if it looks like a restriction clause with a pseudoconstant on * one side. (Anything more complicated than that might not behave in @@ -151,26 +192,35 @@ clauselist_selectivity(PlannerInfo *root, OpExpr *expr = (OpExpr *) clause; bool varonleft = true; bool ok; + Node *leftexpr = (Node *) linitial(expr->args); + Node *rightexpr = (Node *) lsecond(expr->args); if (rinfo) { ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) && - (is_pseudo_constant_clause_relids(lsecond(expr->args), + (is_pseudo_constant_clause_relids(rightexpr, rinfo->right_relids) || (varonleft = false, - is_pseudo_constant_clause_relids(linitial(expr->args), + is_pseudo_constant_clause_relids(leftexpr, rinfo->left_relids))); } else { ok = (NumRelids(clause) == 1) && - (is_pseudo_constant_clause(lsecond(expr->args)) || + (is_pseudo_constant_clause(rightexpr) || (varonleft = false, - is_pseudo_constant_clause(linitial(expr->args)))); + is_pseudo_constant_clause(leftexpr))); } if (ok) { + Node *var; + + if (varonleft) + var = leftexpr; + else + var = rightexpr; + /* * If it's not a "<" or ">" operator, just merge the * selectivity in generically. But if it's the right oprrest, @@ -179,16 +229,25 @@ clauselist_selectivity(PlannerInfo *root, switch (get_oprrest(expr->opno)) { case F_SCALARLTSEL: - addRangeClause(&rqlist, clause, - varonleft, true, s2); + addCachedSelectivityRangeVar(&cslist, var, + varonleft, true, s2); break; case F_SCALARGTSEL: - addRangeClause(&rqlist, clause, - varonleft, false, s2); + addCachedSelectivityRangeVar(&cslist, var, + varonleft, false, s2); break; default: - /* Just merge the selectivity in generically */ - s1 = s1 * s2; + + /* + * Cache all strict clauses in selectivity other. + * Anything non-strict we'll just apply the + * selectivity now, since we're currently unable to do + * anything particularly smart with it. + */ + if (op_strict(expr->opno)) + addCachedSelectivityOtherStrictClause(&cslist, var, s2); + else + s1 = s1 * s2; break; } continue; /* drop to loop bottom */ @@ -200,176 +259,259 @@ clauselist_selectivity(PlannerInfo *root, } /* - * Now scan the rangequery pair list. + * Now scan the cached selectivity list */ - while (rqlist != NULL) + while (cslist != NULL) { - RangeQueryClause *rqnext; + CachedSelectivityClause *csnext; - if (rqlist->have_lobound && rqlist->have_hibound) + if ((cslist->selmask & CACHESEL_NULLTEST)) { - /* Successfully matched a pair of range clauses */ - Selectivity s2; - /* - * Exact equality to the default value probably means the - * selectivity function punted. This is not airtight but should - * be good enough. + * if null test is not the only flag then there can be no matching + * rows at all. */ - if (rqlist->hibound == DEFAULT_INEQ_SEL || - rqlist->lobound == DEFAULT_INEQ_SEL) + if (cslist->selmask != CACHESEL_NULLTEST) { - s2 = DEFAULT_RANGE_INEQ_SEL; + s1 = 0; + break; /* nothing more needs estimated */ } else - { - s2 = rqlist->hibound + rqlist->lobound - 1.0; + s1 *= cslist->nullsel; + } - /* Adjust for double-exclusion of NULLs */ - s2 += nulltestsel(root, IS_NULL, rqlist->var, - varRelid, jointype, sjinfo); + /* + * An IS NOT NULL test is a no-op if there's any other strict quals, + * so if that's the case, then we'll only apply this, otherwise we'll + * ignore it. + */ + else if (cslist->selmask == CACHESEL_NOTNULLTEST) + s1 *= cslist->notnullsel; + + else + { + /* Check if both lobound and hibound were seen */ + if ((cslist->selmask & (CACHESEL_LOBOUND | CACHESEL_HIBOUND)) == + (CACHESEL_LOBOUND | CACHESEL_HIBOUND)) + { + /* Successfully matched a pair of range clauses */ + Selectivity s2; /* - * A zero or slightly negative s2 should be converted into a - * small positive value; we probably are dealing with a very - * tight range and got a bogus result due to roundoff errors. - * However, if s2 is very negative, then we probably have - * default selectivity estimates on one or both sides of the - * range that we failed to recognize above for some reason. + * Exact equality to the default value probably means the + * selectivity function punted. This is not airtight but + * should be good enough. */ - if (s2 <= 0.0) + if (cslist->hibound == DEFAULT_INEQ_SEL || + cslist->lobound == DEFAULT_INEQ_SEL) { - if (s2 < -0.01) - { - /* - * No data available --- use a default estimate that - * is small, but not real small. - */ - s2 = DEFAULT_RANGE_INEQ_SEL; - } - else + s2 = DEFAULT_RANGE_INEQ_SEL; + } + else + { + s2 = cslist->hibound + cslist->lobound - 1.0; + + /* Adjust for double-exclusion of NULLs */ + s2 += nulltestsel(root, IS_NULL, cslist->var, + varRelid, jointype, sjinfo); + + /* + * A zero or slightly negative s2 should be converted into + * a small positive value; we probably are dealing with a + * very tight range and got a bogus result due to roundoff + * errors. However, if s2 is very negative, then we + * probably have default selectivity estimates on one or + * both sides of the range that we failed to recognize + * above for some reason. + */ + if (s2 <= 0.0) { - /* - * It's just roundoff error; use a small positive - * value - */ - s2 = 1.0e-10; + if (s2 < -0.01) + { + /* + * No data available --- use a default estimate + * that is small, but not real small. + */ + s2 = DEFAULT_RANGE_INEQ_SEL; + } + else + { + /* + * It's just roundoff error; use a small positive + * value + */ + s2 = 1.0e-10; + } } } + /* Merge in the selectivity of the pair of clauses */ + s1 *= s2; } - /* Merge in the selectivity of the pair of clauses */ - s1 *= s2; - } - else - { - /* Only found one of a pair, merge it in generically */ - if (rqlist->have_lobound) - s1 *= rqlist->lobound; else - s1 *= rqlist->hibound; + { + /* Only found one of a range pair, merge it in generically */ + if ((cslist->selmask & CACHESEL_LOBOUND)) + s1 *= cslist->lobound; + else if ((cslist->selmask & CACHESEL_HIBOUND)) + s1 *= cslist->hibound; + } + + /* apply the selectivity for any other seen strict qual */ + if ((cslist->selmask & CACHESEL_OTHERSTRICT)) + s1 *= cslist->otherstrictsel; } + /* release storage and advance */ - rqnext = rqlist->next; - pfree(rqlist); - rqlist = rqnext; + csnext = cslist->next; + pfree(cslist); + cslist = csnext; } return s1; } /* - * addRangeClause --- add a new range clause for clauselist_selectivity + * findCachedSelectivityVar + * Find existing seletivity var, or add this var to the list. + */ +static CachedSelectivityClause * +findCachedSelectivityVar(CachedSelectivityClause **cslist, Node *expr) +{ + CachedSelectivityClause *cselem; + + for (cselem = *cslist; cselem; cselem = cselem->next) + { + /* + * We use full equal() here because the "var" might be a function of + * one or more attributes of the same relation... + */ + if (equal(expr, cselem->var)) + return cselem; + } + + /* not found -- add it */ + cselem = (CachedSelectivityClause *) palloc(sizeof(CachedSelectivityClause)); + cselem->var = expr; + cselem->selmask = 0; + + cselem->next = *cslist; + *cslist = cselem; + return cselem; +} + + +/* + * addCachedSelectivityNullTest + * Cache selectivity for an IS NULL test. + */ +static void +addCachedSelectivityNullTest(CachedSelectivityClause **cslist, Node *expr, + Selectivity s2) +{ + CachedSelectivityClause *cselem; + + cselem = findCachedSelectivityVar(cslist, expr); + + /* We can simply overwrite any previously cached selectivity here */ + cselem->nullsel = s2; + cselem->selmask |= CACHESEL_NULLTEST; +} + +/* + * addCachedSelectivityNotNullTest + * Cache selectivity for an IS NOT NULL test. + */ +static void +addCachedSelectivityNotNullTest(CachedSelectivityClause **cslist, Node *expr, + Selectivity s2) +{ + CachedSelectivityClause *cselem; + + cselem = findCachedSelectivityVar(cslist, expr); + + /* We can simply overwrite any previously cached selectivity here */ + cselem->notnullsel = s2; + cselem->selmask |= CACHESEL_NOTNULLTEST; +} + +/* + * addCachedSelectivityRangeVar + * add a new range clause for clauselist_selectivity * * Here is where we try to match up pairs of range-query clauses */ static void -addRangeClause(RangeQueryClause **rqlist, Node *clause, - bool varonleft, bool isLTsel, Selectivity s2) +addCachedSelectivityRangeVar(CachedSelectivityClause **cslist, Node *expr, + bool varonleft, bool isLTsel, Selectivity s2) { - RangeQueryClause *rqelem; - Node *var; + CachedSelectivityClause *cselem; bool is_lobound; - if (varonleft) + is_lobound = (varonleft != isLTsel); + + cselem = findCachedSelectivityVar(cslist, expr); + + if (is_lobound) { - var = get_leftop((Expr *) clause); - is_lobound = !isLTsel; /* x < something is high bound */ + if (!(cselem->selmask & CACHESEL_LOBOUND)) + { + cselem->selmask |= CACHESEL_LOBOUND; + cselem->lobound = s2; + } + else + { + /*------ + * We have found two similar clauses, such as + * x < y AND x < z. + * Keep only the more restrictive one. + *------ + */ + if (cselem->lobound > s2) + cselem->lobound = s2; + } } else { - var = get_rightop((Expr *) clause); - is_lobound = isLTsel; /* something < x is low bound */ - } - - for (rqelem = *rqlist; rqelem; rqelem = rqelem->next) - { - /* - * We use full equal() here because the "var" might be a function of - * one or more attributes of the same relation... - */ - if (!equal(var, rqelem->var)) - continue; - /* Found the right group to put this clause in */ - if (is_lobound) + if (!(cselem->selmask & CACHESEL_HIBOUND)) { - if (!rqelem->have_lobound) - { - rqelem->have_lobound = true; - rqelem->lobound = s2; - } - else - { - - /*------ - * We have found two similar clauses, such as - * x < y AND x < z. - * Keep only the more restrictive one. - *------ - */ - if (rqelem->lobound > s2) - rqelem->lobound = s2; - } + cselem->selmask |= CACHESEL_HIBOUND; + cselem->hibound = s2; } else { - if (!rqelem->have_hibound) - { - rqelem->have_hibound = true; - rqelem->hibound = s2; - } - else - { - /*------ - * We have found two similar clauses, such as - * x > y AND x > z. - * Keep only the more restrictive one. - *------ - */ - if (rqelem->hibound > s2) - rqelem->hibound = s2; - } + /*------ + * We have found two similar clauses, such as + * x > y AND x > z. + * Keep only the more restrictive one. + *------ + */ + if (cselem->hibound > s2) + cselem->hibound = s2; } - return; } +} - /* No matching var found, so make a new clause-pair data structure */ - rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause)); - rqelem->var = var; - if (is_lobound) - { - rqelem->have_lobound = true; - rqelem->have_hibound = false; - rqelem->lobound = s2; - } +/* + * addCachedSelectivityOtherStrictClause + * Cache the selectivity of other OpExpr type expressions which are + * strict + */ +static void +addCachedSelectivityOtherStrictClause(CachedSelectivityClause **cslist, Node *expr, + Selectivity s2) +{ + CachedSelectivityClause *cselem; + + cselem = findCachedSelectivityVar(cslist, expr); + + if ((cselem->selmask & CACHESEL_OTHERSTRICT)) + cselem->otherstrictsel = cselem->otherstrictsel * s2; else { - rqelem->have_lobound = false; - rqelem->have_hibound = true; - rqelem->hibound = s2; + cselem->otherstrictsel = s2; + cselem->selmask |= CACHESEL_OTHERSTRICT; } - rqelem->next = *rqlist; - *rqlist = rqelem; } /*