From 87fb7e95f6694c7f9f79c28fc660211af9cfa15b Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Wed, 18 Aug 2021 11:53:45 +0200 Subject: [PATCH 5/5] Don't treat Var op Var as simple clauses --- src/backend/statistics/extended_stats.c | 86 +++++++++++++++---- src/backend/statistics/mcv.c | 4 +- .../statistics/extended_stats_internal.h | 2 +- 3 files changed, 74 insertions(+), 18 deletions(-) diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index e1fcfd0711..bf4aa682ae 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1319,11 +1319,14 @@ choose_best_statistics(List *stats, char requiredkind, * statext_is_compatible_clause. It needs to be split like this because * of recursion. The attnums bitmap is an input/output parameter collecting * attribute numbers from all compatible clauses (recursively). + * + * XXX The issimple variable is expected to be initialized by the caller, we + * just update it while recursively analyzing the current clause. */ static bool statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, Index relid, Bitmapset **attnums, - List **exprs) + List **exprs, bool *issimple) { /* Look inside any binary-compatible relabeling (as in examine_variable) */ if (IsA(clause, RelabelType)) @@ -1371,7 +1374,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, * Check if the expression has the right shape. This returns either one * or two expressions, depending on whether there is a Const. */ - if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL)) + if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL, issimple)) return false; /* @@ -1420,6 +1423,12 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, /* * Check all expressions by recursing. Var expressions are handled as * a special case (to match it to attnums etc.) + * + * An opclause is simple if it's (Expr op Const) or (Const op Expr). We + * have already checked the overall shape in examine_opclause_args, but + * we haven't checked the expressions are simple (i.e. pretty much Var), + * so we need to check that now. If we discover a complex expression, we + * consider the whole clause complex. */ foreach (lc, clause_exprs) { @@ -1429,11 +1438,17 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, { /* if the Var is incompatible, the whole clause is incompatible */ if (!statext_is_compatible_clause_internal(root, clause_expr, - relid, attnums, exprs)) + relid, attnums, exprs, + issimple)) return false; } else /* generic expression */ + { *exprs = lappend(*exprs, clause_expr); + + /* switch to false if there are any complex clauses */ + *issimple = false; + } } return true; @@ -1452,7 +1467,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, return false; /* Check if the expression has the right shape (one Var, one Const) */ - if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL)) + if (!examine_opclause_args(expr->args, &clause_exprs, NULL, NULL, issimple)) return false; /* There has to be one expression exactly. */ @@ -1500,7 +1515,8 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, /* Check Var IN Array clauses by recursing. */ if (IsA(clause_expr, Var)) return statext_is_compatible_clause_internal(root, clause_expr, - relid, attnums, exprs); + relid, attnums, exprs, + issimple); /* Otherwise we have Expr IN Array. */ *exprs = lappend(*exprs, clause_expr); @@ -1529,6 +1545,14 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, BoolExpr *expr = (BoolExpr *) clause; ListCell *lc; + /* + * All AND/OR clauses are considered complex, even if all arguments are + * simple clauses. For NOT clauses we need to check the argument and then + * we can update the flag. + */ + if (!is_notclause(clause)) + *issimple = false; + foreach(lc, expr->args) { /* @@ -1537,7 +1561,8 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, */ if (!statext_is_compatible_clause_internal(root, (Node *) lfirst(lc), - relid, attnums, exprs)) + relid, attnums, exprs, + issimple)) return false; } @@ -1552,7 +1577,8 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, /* Check Var IS NULL clauses by recursing. */ if (IsA(nt->arg, Var)) return statext_is_compatible_clause_internal(root, (Node *) (nt->arg), - relid, attnums, exprs); + relid, attnums, exprs, + issimple); /* Otherwise we have Expr IS NULL. */ *exprs = lappend(*exprs, nt->arg); @@ -1588,22 +1614,35 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, */ static bool statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, - Bitmapset **attnums, List **exprs) + Bitmapset **attnums, List **exprs, bool *issimple) { RangeTblEntry *rte = root->simple_rte_array[relid]; RestrictInfo *rinfo = (RestrictInfo *) clause; int clause_relid; Oid userid; + /* + * Clauses are considered simple by default, and we mark them as complex + * when we discover a complex part. + */ + *issimple = true; + /* * Special-case handling for bare BoolExpr AND clauses, because the * restrictinfo machinery doesn't build RestrictInfos on top of AND * clauses. + * + * AND clauses are considered complex, even if all arguments are + * simple clauses. */ if (is_andclause(clause)) { BoolExpr *expr = (BoolExpr *) clause; ListCell *lc; + bool tmp = false; /* ignored result */ + + /* AND clauses are complex, even if the arguments are simple. */ + *issimple = false; /* * Check that each sub-clause is compatible. We expect these to be @@ -1612,7 +1651,7 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, foreach(lc, expr->args) { if (!statext_is_compatible_clause(root, (Node *) lfirst(lc), - relid, attnums, exprs)) + relid, attnums, exprs, &tmp)) return false; } @@ -1634,7 +1673,7 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, /* Check the clause and determine what attributes it references. */ if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause, - relid, attnums, exprs)) + relid, attnums, exprs, issimple)) return false; /* @@ -1724,6 +1763,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli ListCell *l; Bitmapset **list_attnums; /* attnums extracted from the clause */ List **list_exprs; /* expressions matched to any statistic */ + bool *list_simple; /* marks simple expressions */ int listidx; Selectivity sel = (is_or) ? 0.0 : 1.0; @@ -1737,6 +1777,9 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli /* expressions extracted from complex expressions */ list_exprs = (List **) palloc(sizeof(Node *) * list_length(clauses)); + /* expressions determined to be simple (single expression) */ + list_simple = (bool *) palloc(sizeof(bool) * list_length(clauses)); + /* * Pre-process the clauses list to extract the attnums and expressions * seen in each item. We need to determine if there are any clauses which @@ -1754,17 +1797,21 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli Node *clause = (Node *) lfirst(l); Bitmapset *attnums = NULL; List *exprs = NIL; + bool issimple = false; if (!bms_is_member(listidx, *estimatedclauses) && - statext_is_compatible_clause(root, clause, rel->relid, &attnums, &exprs)) + statext_is_compatible_clause(root, clause, rel->relid, + &attnums, &exprs, &issimple)) { list_attnums[listidx] = attnums; list_exprs[listidx] = exprs; + list_simple[listidx] = issimple; } else { list_attnums[listidx] = NULL; list_exprs[listidx] = NIL; + list_simple[listidx] = false; } listidx++; @@ -1801,6 +1848,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli listidx = -1; foreach(l, clauses) { + Node *clause = (Node *) lfirst(l); + /* Increment the index before we decide if to skip the clause. */ listidx++; @@ -1839,13 +1888,12 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli /* record simple clauses (single column or expression) */ if ((list_attnums[listidx] == NULL && list_length(list_exprs[listidx]) == 1) || - (list_exprs[listidx] == NIL && - bms_membership(list_attnums[listidx]) == BMS_SINGLETON)) + list_simple[listidx]) simple_clauses = bms_add_member(simple_clauses, list_length(stat_clauses)); /* add clause to list and mark it as estimated */ - stat_clauses = lappend(stat_clauses, (Node *) lfirst(l)); + stat_clauses = lappend(stat_clauses, clause); *estimatedclauses = bms_add_member(*estimatedclauses, listidx); /* @@ -2054,13 +2102,15 @@ statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, * specifies on which side of the operator we found the expression node. */ bool -examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp) +examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp, + bool *issimplep) { List *exprs = NIL; Const *cst = NULL; bool expronleft; Node *leftop, *rightop; + bool issimple; /* enforced by statext_is_compatible_clause_internal */ Assert(list_length(args) == 2); @@ -2080,18 +2130,21 @@ examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp exprs = lappend(exprs, leftop); cst = (Const *) rightop; expronleft = true; + issimple = true; } else if (IsA(leftop, Const)) { exprs = lappend(exprs, rightop); cst = (Const *) leftop; expronleft = false; + issimple = true; } else { exprs = lappend(exprs, leftop); exprs = lappend(exprs, rightop); expronleft = false; + issimple = false; } /* return pointers to the extracted parts if requested */ @@ -2104,6 +2157,9 @@ examine_opclause_args(List *args, List **exprsp, Const **cstp, bool *expronleftp if (expronleftp) *expronleftp = expronleft; + if (issimplep) + *issimplep = (*issimplep && issimple); + return true; } diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index ac7d7b8978..eb3267b164 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -1653,7 +1653,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, fmgr_info(get_opcode(expr->opno), &opproc); /* extract the var/expr and const from the expression */ - if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft)) + if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft, NULL)) elog(ERROR, "incompatible clause"); if (cst) /* Expr op Const */ @@ -1819,7 +1819,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, fmgr_info(get_opcode(expr->opno), &opproc); /* extract the var/expr and const from the expression */ - if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft)) + if (!examine_opclause_args(expr->args, &clause_exprs, &cst, &expronleft, NULL)) elog(ERROR, "incompatible clause"); /* ScalarArrayOpExpr has the Var always on the left */ diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index 1f30fa9060..d86cc4184b 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -100,7 +100,7 @@ extern SortItem *build_sorted_items(StatsBuildData *data, int *nitems, int numattrs, AttrNumber *attnums); extern bool examine_opclause_args(List *args, List **exprs, Const **cstp, - bool *expronleftp); + bool *expronleftp, bool *issimplep); extern Selectivity mcv_combine_selectivities(Selectivity simple_sel, Selectivity mcv_sel, -- 2.31.1