From 8c497b61344c499c69791571b2c8b2589c4facde Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Tue, 19 Jul 2022 22:05:36 +0200 Subject: [PATCH 4/4] Don't treat Var op Var as simple clauses in MCV Until now, statext_mcv_clauselist_selectivity defined simple clauses as those with a single attnum. This worked fine for (Var op Const) clauses, but for (Var op Var) that's no longer correct. We need to consider them complex even if both sides reference the same attribute. Note: Perhaps we should just treat (Var op Var) clauses referencing the same attribute as incompatible with extended statistics. After all it's not providing any information about other attributes. Although, it does restrict the relevant part of MCV, because (Var = Var) implies non-NULL. --- src/backend/statistics/extended_stats.c | 95 ++++++++++++++++--- src/backend/statistics/mcv.c | 4 +- .../statistics/extended_stats_internal.h | 2 +- 3 files changed, 83 insertions(+), 18 deletions(-) diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 7bfde068aff..bbf564075c2 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1320,11 +1320,14 @@ choose_best_statistics(List *stats, char requiredkind, bool inh, * statext_is_compatible_clause. It needs to be split like this because * of recursion. The attnums bitmap is an input/output parameter collecting * attribute numbers from all compatible clauses (recursively). + * + * XXX The issimple variable is expected to be initialized by the caller, we + * just update it while recursively analyzing the current clause. */ static bool statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, Index relid, Bitmapset **attnums, - List **exprs) + List **exprs, bool *issimple) { /* Look inside any binary-compatible relabeling (as in examine_variable) */ if (IsA(clause, RelabelType)) @@ -1372,7 +1375,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, * Check if the expression has the right shape. This returns either one * or two expressions, depending on whether there is a Const. */ - if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL)) + if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL, issimple)) return false; /* @@ -1421,20 +1424,37 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, /* * Check all expressions by recursing. Var expressions are handled as * a special case (to match it to attnums etc.) + * + * An opclause is simple if it's (Expr op Const) or (Const op Expr). We + * have already checked the overall shape in examine_opclause_args, but + * we haven't checked the expressions are simple (i.e. pretty much Var), + * so we need to check that now. If we discover a complex expression, we + * consider the whole clause complex. */ foreach (lc, clause_exprs) { Node *clause_expr = (Node *) lfirst(lc); + /* + * XXX Shouldn't this try removing RelabelType, just like we do in + * statext_is_compatible_clause_internal? Otherwise we might treat + * Var (with a RelabelType on top) as complex expression. + */ if (IsA(clause_expr, Var)) { /* if the Var is incompatible, the whole clause is incompatible */ if (!statext_is_compatible_clause_internal(root, clause_expr, - relid, attnums, exprs)) + relid, attnums, exprs, + issimple)) return false; } else /* generic expression */ + { *exprs = lappend(*exprs, clause_expr); + + /* switch to false if there are any complex clauses */ + *issimple = false; + } } return true; @@ -1453,7 +1473,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, return false; /* Check if the expression has the right shape (one Var, one Const) */ - if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL)) + if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL, issimple)) return false; /* There has to be one expression exactly. */ @@ -1501,7 +1521,8 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, /* Check Var IN Array clauses by recursing. */ if (IsA(clause_expr, Var)) return statext_is_compatible_clause_internal(root, clause_expr, - relid, attnums, exprs); + relid, attnums, exprs, + issimple); /* Otherwise we have Expr IN Array. */ *exprs = lappend(*exprs, clause_expr); @@ -1530,6 +1551,18 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, BoolExpr *expr = (BoolExpr *) clause; ListCell *lc; + /* + * All AND/OR clauses are considered complex, even if all arguments are + * simple clauses. For NOT clauses we need to check the argument and then + * we can update the flag. + * + * XXX Maybe for AND/OR we should check if all arguments reference the + * same attnum, and consider them complex only when there are multiple + * attnum values (i.e. different Vars)? + */ + if (!is_notclause(clause)) + *issimple = false; + foreach(lc, expr->args) { /* @@ -1538,7 +1571,8 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, */ if (!statext_is_compatible_clause_internal(root, (Node *) lfirst(lc), - relid, attnums, exprs)) + relid, attnums, exprs, + issimple)) return false; } @@ -1553,7 +1587,8 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, /* Check Var IS NULL clauses by recursing. */ if (IsA(nt->arg, Var)) return statext_is_compatible_clause_internal(root, (Node *) (nt->arg), - relid, attnums, exprs); + relid, attnums, exprs, + issimple); /* Otherwise we have Expr IS NULL. */ *exprs = lappend(*exprs, nt->arg); @@ -1589,22 +1624,35 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, */ static bool statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, - Bitmapset **attnums, List **exprs) + Bitmapset **attnums, List **exprs, bool *issimple) { RangeTblEntry *rte = root->simple_rte_array[relid]; RestrictInfo *rinfo = (RestrictInfo *) clause; int clause_relid; Oid userid; + /* + * Clauses are considered simple by default, and we mark them as complex + * when we discover a complex part. + */ + *issimple = true; + /* * Special-case handling for bare BoolExpr AND clauses, because the * restrictinfo machinery doesn't build RestrictInfos on top of AND * clauses. + * + * AND clauses are considered complex, even if all arguments are + * simple clauses. */ if (is_andclause(clause)) { BoolExpr *expr = (BoolExpr *) clause; ListCell *lc; + bool tmp = false; /* ignored result */ + + /* AND clauses are complex, even if the arguments are simple. */ + *issimple = false; /* * Check that each sub-clause is compatible. We expect these to be @@ -1613,7 +1661,7 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, foreach(lc, expr->args) { if (!statext_is_compatible_clause(root, (Node *) lfirst(lc), - relid, attnums, exprs)) + relid, attnums, exprs, &tmp)) return false; } @@ -1635,7 +1683,7 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, /* Check the clause and determine what attributes it references. */ if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause, - relid, attnums, exprs)) + relid, attnums, exprs, issimple)) return false; /* @@ -1725,6 +1773,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli ListCell *l; Bitmapset **list_attnums; /* attnums extracted from the clause */ List **list_exprs; /* expressions matched to any statistic */ + bool *list_simple; /* marks simple expressions */ int listidx; Selectivity sel = (is_or) ? 0.0 : 1.0; RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); @@ -1739,6 +1788,9 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli /* expressions extracted from complex expressions */ list_exprs = (List **) palloc(sizeof(Node *) * list_length(clauses)); + /* expressions determined to be simple (single expression) */ + list_simple = (bool *) palloc(sizeof(bool) * list_length(clauses)); + /* * Pre-process the clauses list to extract the attnums and expressions * seen in each item. We need to determine if there are any clauses which @@ -1756,17 +1808,21 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli Node *clause = (Node *) lfirst(l); Bitmapset *attnums = NULL; List *exprs = NIL; + bool issimple = false; if (!bms_is_member(listidx, *estimatedclauses) && - statext_is_compatible_clause(root, clause, rel->relid, &attnums, &exprs)) + statext_is_compatible_clause(root, clause, rel->relid, + &attnums, &exprs, &issimple)) { list_attnums[listidx] = attnums; list_exprs[listidx] = exprs; + list_simple[listidx] = issimple; } else { list_attnums[listidx] = NULL; list_exprs[listidx] = NIL; + list_simple[listidx] = false; } listidx++; @@ -1803,6 +1859,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli listidx = -1; foreach(l, clauses) { + Node *clause = (Node *) lfirst(l); + /* Increment the index before we decide if to skip the clause. */ listidx++; @@ -1841,13 +1899,12 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli /* record simple clauses (single column or expression) */ if ((list_attnums[listidx] == NULL && list_length(list_exprs[listidx]) == 1) || - (list_exprs[listidx] == NIL && - bms_membership(list_attnums[listidx]) == BMS_SINGLETON)) + list_simple[listidx]) simple_clauses = bms_add_member(simple_clauses, list_length(stat_clauses)); /* add clause to list and mark it as estimated */ - stat_clauses = lappend(stat_clauses, (Node *) lfirst(l)); + stat_clauses = lappend(stat_clauses, clause); *estimatedclauses = bms_add_member(*estimatedclauses, listidx); /* @@ -2056,13 +2113,15 @@ statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, * specifies on which side of the operator we found the expression node. */ bool -examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp) +examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp, + bool *issimplep) { List *exprs = NIL; Const *cst = NULL; bool expronleft; Node *leftop, *rightop; + bool issimple; /* enforced by statext_is_compatible_clause_internal */ Assert(list_length(args) == 2); @@ -2082,18 +2141,21 @@ examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp exprs = lappend(exprs, leftop); cst = (Const *) rightop; expronleft = true; + issimple = true; } else if (IsA(leftop, Const)) { exprs = lappend(exprs, rightop); cst = (Const *) leftop; expronleft = false; + issimple = true; } else { exprs = lappend(exprs, leftop); exprs = lappend(exprs, rightop); expronleft = false; + issimple = false; } /* return pointers to the extracted parts if requested */ @@ -2106,6 +2168,9 @@ examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp if (expronleftp) *expronleftp = expronleft; + if (issimplep) + *issimplep = (*issimplep && issimple); + return true; } diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 5806cfbb564..09a30dc1122 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -1654,7 +1654,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, fmgr_info(get_opcode(expr->opno), &opproc); /* extract the var/expr and const from the expression */ - if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft)) + if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft, NULL)) elog(ERROR, "incompatible clause"); if (cst) /* Expr op Const */ @@ -1820,7 +1820,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, fmgr_info(get_opcode(expr->opno), &opproc); /* extract the var/expr and const from the expression */ - if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft)) + if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft, NULL)) elog(ERROR, "incompatible clause"); /* ScalarArrayOpExpr has the Var always on the left */ diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index bd4bf7fdf14..bfe8fdf264a 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -100,7 +100,7 @@ extern SortItem *build_sorted_items(StatsBuildData *data, int *nitems, int numattrs, AttrNumber *attnums); extern bool examine_opclause_args(List *args, List **exprs, Const **cstp, - bool *expronleftp); + bool *expronleftp, bool *issimplep); extern Selectivity mcv_combine_selectivities(Selectivity simple_sel, Selectivity mcv_sel, -- 2.34.3