From 97f5f3d1af05729dff567c07b56a486924871d95 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Wed, 27 Jan 2021 10:30:02 +0000 Subject: [PATCH 4/4] Review comments. --- src/backend/catalog/system_views.sql | 10 +- src/backend/statistics/extended_stats.c | 884 ++++-------------- src/backend/statistics/mcv.c | 40 +- .../statistics/extended_stats_internal.h | 10 +- src/test/regress/expected/rules.out | 8 +- 5 files changed, 221 insertions(+), 731 deletions(-) diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 32ad93db3f..8238515bfa 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -297,7 +297,7 @@ CREATE VIEW pg_stats_ext_exprs WITH (security_barrier) AS sn.nspname AS statistics_schemaname, s.stxname AS statistics_name, pg_get_userbyid(s.stxowner) AS statistics_owner, - stat_exprs.expr, + stat.expr, (stat.a).stanullfrac AS null_frac, (stat.a).stawidth AS avg_width, (stat.a).stadistinct AS n_distinct, @@ -355,11 +355,9 @@ CREATE VIEW pg_stats_ext_exprs WITH (security_barrier) AS LEFT JOIN pg_namespace cn ON (cn.oid = c.relnamespace) LEFT JOIN pg_namespace sn ON (sn.oid = s.stxnamespace) JOIN LATERAL ( - SELECT unnest(pg_get_statisticsobjdef_expressions(s.oid)) AS expr - ) stat_exprs ON (stat_exprs.expr IS NOT NULL) - LEFT JOIN LATERAL ( - SELECT unnest(sd.stxdexpr)::pg_statistic AS a - ) stat ON (TRUE); + SELECT unnest(pg_get_statisticsobjdef_expressions(s.oid)) AS expr, + unnest(sd.stxdexpr)::pg_statistic AS a + ) stat ON (stat.expr IS NOT NULL); -- unprivileged users may read pg_statistic_ext but not pg_statistic_ext_data REVOKE ALL on pg_statistic_ext_data FROM public; diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index fd6e160ff4..6ed938d6ab 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -118,6 +118,10 @@ BuildRelationExtStatistics(Relation onerel, double totalrows, MemoryContext oldcxt; int64 ext_cnt; + /* Do nothing if there are no columns to analyze. */ + if (!natts) + return; + cxt = AllocSetContextCreate(CurrentMemoryContext, "BuildRelationExtStatistics", ALLOCSET_DEFAULT_SIZES); @@ -265,10 +269,7 @@ ComputeExtStatisticsRows(Relation onerel, MemoryContext oldcxt; int result = 0; - /* - * When there are no columns to analyze, just return 0. That's enough - * for the callers to not build anything. - */ + /* If there are no columns to analyze, just return 0. */ if (!natts) return 0; @@ -1069,6 +1070,63 @@ has_stats_of_kind(List *stats, char requiredkind) return false; } +/* + * stat_find_expression + * Search for an expression in statistics object's list of expressions. + * + * Returns the index of the expression in the statistics object's list of + * expressions, or -1 if not found. + */ +static int +stat_find_expression(StatisticExtInfo *stat, Node *expr) +{ + ListCell *lc; + int idx; + + idx = 0; + foreach(lc, stat->exprs) + { + Node *stat_expr = (Node *) lfirst(lc); + + if (equal(stat_expr, expr)) + return idx; + idx++; + } + + /* Expression not found */ + return -1; +} + +/* + * stat_covers_expressions + * Test whether a statistics object covers all expressions in a list. + * + * Returns true if all expressions are covered. If expr_idxs is non-NULL, it + * is populated with the indexes of the expressions found. + */ +static bool +stat_covers_expressions(StatisticExtInfo *stat, List *exprs, + Bitmapset **expr_idxs) +{ + ListCell *lc; + + foreach (lc, exprs) + { + Node *expr = (Node *) lfirst(lc); + int expr_idx; + + expr_idx = stat_find_expression(stat, expr); + if (expr_idx == -1) + return false; + + if (expr_idxs != NULL) + *expr_idxs = bms_add_member(*expr_idxs, expr_idx); + } + + /* If we reach here, all expressions are covered */ + return true; +} + /* * choose_best_statistics * Look for and return statistics with the specified 'requiredkind' which @@ -1101,9 +1159,9 @@ choose_best_statistics(List *stats, char requiredkind, { int i; StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc); - Bitmapset *matched = NULL; + Bitmapset *matched_attnums = NULL; + Bitmapset *matched_exprs = NULL; int num_matched; - int num_matched_exprs; int numkeys; /* skip statistics that are not of the correct type */ @@ -1111,74 +1169,49 @@ choose_best_statistics(List *stats, char requiredkind, continue; /* - * Collect attributes in remaining (unestimated) clauses fully covered - * by this statistic object. + * Collect attributes and expressions in remaining (unestimated) + * clauses fully covered by this statistic object. */ for (i = 0; i < nclauses; i++) { + Bitmapset *expr_idxs = NULL; + /* ignore incompatible/estimated clauses */ - if (!clause_attnums[i]) + if (!clause_attnums[i] && !clause_exprs[i]) continue; /* ignore clauses that are not covered by this object */ - if (!bms_is_subset(clause_attnums[i], info->keys)) + if (!bms_is_subset(clause_attnums[i], info->keys) || + !stat_covers_expressions(info, clause_exprs[i], &expr_idxs)) continue; - matched = bms_add_members(matched, clause_attnums[i]); + /* record attnums and indexes of expressions covered */ + matched_attnums = bms_add_members(matched_attnums, clause_attnums[i]); + matched_exprs = bms_add_members(matched_exprs, expr_idxs); } - num_matched = bms_num_members(matched); - bms_free(matched); + num_matched = bms_num_members(matched_attnums) + bms_num_members(matched_exprs); - /* - * Collect expressions in remaining (unestimated) expressions, covered - * by an expression in this statistic object. - */ - num_matched_exprs = 0; - for (i = 0; i < nclauses; i++) - { - ListCell *lc3; - - /* ignore incompatible/estimated expressions */ - if (!clause_exprs[i]) - continue; - - /* ignore expressions that are not covered by this object */ - foreach (lc3, clause_exprs[i]) - { - ListCell *lc2; - Node *expr = (Node *) lfirst(lc3); - - foreach(lc2, info->exprs) - { - Node *stat_expr = (Node *) lfirst(lc2); - - if (equal(expr, stat_expr)) - { - num_matched_exprs++; - break; - } - } - } - } + bms_free(matched_attnums); + bms_free(matched_exprs); /* * save the actual number of keys in the stats so that we can choose * the narrowest stats with the most matching keys. */ - numkeys = bms_num_members(info->keys); + numkeys = bms_num_members(info->keys) + list_length(info->exprs); /* - * Use this object when it increases the number of matched clauses or - * when it matches the same number of attributes but these stats have - * fewer keys than any previous match. + * Use this object when it increases the number of matched attributes + * and expressions or when it matches the same number of attributes + * and expressions but these stats have fewer keys than any previous + * match. */ - if (num_matched + num_matched_exprs > best_num_matched || - ((num_matched + num_matched_exprs) == best_num_matched && - numkeys < best_match_keys)) + if (num_matched > best_num_matched || + (num_matched == best_num_matched && numkeys < best_match_keys)) { best_match = info; - best_num_matched = num_matched + num_matched_exprs; + best_num_matched = num_matched; best_match_keys = numkeys; } } @@ -1197,7 +1230,8 @@ choose_best_statistics(List *stats, char requiredkind, */ static bool statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, - Index relid, Bitmapset **attnums) + Index relid, Bitmapset **attnums, + List **exprs) { /* Look inside any binary-compatible relabeling (as in examine_variable) */ if (IsA(clause, RelabelType)) @@ -1225,19 +1259,19 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, return true; } - /* (Var op Const) or (Const op Var) */ + /* (Var/Expr op Const) or (Const op Var/Expr) */ if (is_opclause(clause)) { RangeTblEntry *rte = root->simple_rte_array[relid]; OpExpr *expr = (OpExpr *) clause; - Var *var; + Node *clause_expr; /* Only expressions with two arguments are considered compatible. */ if (list_length(expr->args) != 2) return false; - /* Check if the expression has the right shape (one Var, one Const) */ - if (!examine_opclause_expression(expr, &var, NULL, NULL)) + /* Check if the expression has the right shape */ + if (!examine_opclause_args(expr->args, &clause_expr, NULL, NULL)) return false; /* @@ -1255,7 +1289,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, case F_SCALARLESEL: case F_SCALARGTSEL: case F_SCALARGESEL: - /* supported, will continue with inspection of the Var */ + /* supported, will continue with inspection of the Var/Expr */ break; default: @@ -1277,23 +1311,29 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, !get_func_leakproof(get_opcode(expr->opno))) return false; - return statext_is_compatible_clause_internal(root, (Node *) var, - relid, attnums); + /* Check (Var op Const) or (Const op Var) clauses by recursing. */ + if (IsA(clause_expr, Var)) + return statext_is_compatible_clause_internal(root, clause_expr, + relid, attnums, exprs); + + /* Otherwise we have (Expr op Const) or (Const op Expr). */ + *exprs = lappend(*exprs, clause_expr); + return true; } - /* Var IN Array */ + /* Var/Expr IN Array */ if (IsA(clause, ScalarArrayOpExpr)) { RangeTblEntry *rte = root->simple_rte_array[relid]; ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; - Var *var; + Node *clause_expr; /* Only expressions with two arguments are considered compatible. */ if (list_length(expr->args) != 2) return false; /* Check if the expression has the right shape (one Var, one Const) */ - if (!examine_clause_args(expr->args, &var, NULL, NULL)) + if (!examine_opclause_args(expr->args, &clause_expr, NULL, NULL)) return false; /* @@ -1311,7 +1351,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, case F_SCALARLESEL: case F_SCALARGTSEL: case F_SCALARGESEL: - /* supported, will continue with inspection of the Var */ + /* supported, will continue with inspection of the Var/Expr */ break; default: @@ -1333,8 +1373,14 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, !get_func_leakproof(get_opcode(expr->opno))) return false; - return statext_is_compatible_clause_internal(root, (Node *) var, - relid, attnums); + /* Check Var IN Array clauses by recursing. */ + if (IsA(clause_expr, Var)) + return statext_is_compatible_clause_internal(root, clause_expr, + relid, attnums, exprs); + + /* Otherwise we have Expr IN Array. */ + *exprs = lappend(*exprs, clause_expr); + return true; } /* AND/OR/NOT clause */ @@ -1367,264 +1413,62 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, */ if (!statext_is_compatible_clause_internal(root, (Node *) lfirst(lc), - relid, attnums)) + relid, attnums, exprs)) return false; } return true; } - /* Var IS NULL */ + /* Var/Expr IS NULL */ if (IsA(clause, NullTest)) { NullTest *nt = (NullTest *) clause; - /* - * Only simple (Var IS NULL) expressions supported for now. Maybe we - * could use examine_variable to fix this? - */ - if (!IsA(nt->arg, Var)) - return false; - - return statext_is_compatible_clause_internal(root, (Node *) (nt->arg), - relid, attnums); - } - - return false; -} - -/* - * statext_extract_expression_internal - * Extract parts of an expressions to match against extended stats. - * - * Given an expression, decompose it into "parts" that will be analyzed and - * matched against extended statistics. If the expression is not considered - * compatible (supported by extended statistics), this returns NIL. - * - * There's a certain amount of ambiguity, because some expressions may be - * split into parts in multiple ways. For example, consider expression - * - * (a + b) = 1 - * - * which may be either considered as a single boolean expression, or it may - * be split into expression (a + b) and a constant. So this might return - * either ((a+b)=1) or (a+b) as valid expressions, but this does affect - * matching to extended statistics, because the expressions have to match - * the definition exactly. So ((a+b)=1) would match statistics defined as - * - * CREATE STATISTICS s ON ((a+b) = 1) FROM t; - * - * but not - * - * CREATE STATISTICS s ON (a+b) FROM t; - * - * which might be a bit confusing. We might enhance this to track those - * alternative decompositions somehow, and then modify the matching to - * extended statistics. But it seems non-trivial, because the AND/OR - * clauses make it "recursive". - * - * in which expressions might be extracted. - */ -static List * -statext_extract_expression_internal(PlannerInfo *root, Node *clause, Index relid) -{ - /* Look inside any binary-compatible relabeling (as in examine_variable) */ - if (IsA(clause, RelabelType)) - clause = (Node *) ((RelabelType *) clause)->arg; - - /* plain Var references (boolean Vars or recursive checks) */ - if (IsA(clause, Var)) - { - Var *var = (Var *) clause; - - /* Ensure var is from the correct relation */ - if (var->varno != relid) - return NIL; - - /* we also better ensure the Var is from the current level */ - if (var->varlevelsup > 0) - return NIL; - - /* Also skip system attributes (we don't allow stats on those). */ - if (!AttrNumberIsForUserDefinedAttr(var->varattno)) - return NIL; - - return list_make1(clause); - } - - /* (Var op Const) or (Const op Var) */ - if (is_opclause(clause)) - { - RangeTblEntry *rte = root->simple_rte_array[relid]; - OpExpr *expr = (OpExpr *) clause; - Node *expr2 = NULL; - - /* Only expressions with two arguments are considered compatible. */ - if (list_length(expr->args) != 2) - return NIL; - - /* Check if the expression has the right shape (one Expr, one Const) */ - if (!examine_opclause_expression2(expr, &expr2, NULL, NULL)) - return NIL; + /* Check Var IS NULL clauses by recursing. */ + if (IsA(nt->arg, Var)) + return statext_is_compatible_clause_internal(root, (Node *) (nt->arg), + relid, attnums, exprs); - /* - * If it's not one of the supported operators ("=", "<", ">", etc.), - * just ignore the clause, as it's not compatible with MCV lists. - * - * This uses the function for estimating selectivity, not the operator - * directly (a bit awkward, but well ...). - */ - switch (get_oprrest(expr->opno)) - { - case F_EQSEL: - case F_NEQSEL: - case F_SCALARLTSEL: - case F_SCALARLESEL: - case F_SCALARGTSEL: - case F_SCALARGESEL: - /* supported, will continue with inspection of the Var */ - break; - - default: - /* other estimators are considered unknown/unsupported */ - return NIL; - } - - /* - * If there are any securityQuals on the RTE from security barrier - * views or RLS policies, then the user may not have access to all the - * table's data, and we must check that the operator is leak-proof. - * - * If the operator is leaky, then we must ignore this clause for the - * purposes of estimating with MCV lists, otherwise the operator might - * reveal values from the MCV list that the user doesn't have - * permission to see. - */ - if (rte->securityQuals != NIL && - !get_func_leakproof(get_opcode(expr->opno))) - return NIL; - - return list_make1(expr2); - } - - if (IsA(clause, ScalarArrayOpExpr)) - { - RangeTblEntry *rte = root->simple_rte_array[relid]; - ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; - Node *expr2 = NULL; - - /* Only expressions with two arguments are considered compatible. */ - if (list_length(expr->args) != 2) - return NIL; - - /* Check if the expression has the right shape (one Expr, one Const) */ - if (!examine_clause_args2(expr->args, &expr2, NULL, NULL)) - return NIL; - - /* - * If there are any securityQuals on the RTE from security barrier - * views or RLS policies, then the user may not have access to all the - * table's data, and we must check that the operator is leak-proof. - * - * If the operator is leaky, then we must ignore this clause for the - * purposes of estimating with MCV lists, otherwise the operator might - * reveal values from the MCV list that the user doesn't have - * permission to see. - */ - if (rte->securityQuals != NIL && - !get_func_leakproof(get_opcode(expr->opno))) - return NIL; - - return list_make1(expr2); - } - - /* AND/OR/NOT clause */ - if (is_andclause(clause) || - is_orclause(clause) || - is_notclause(clause)) - { - /* - * AND/OR/NOT-clauses are supported if all sub-clauses are supported - * - * Perhaps we could improve this by handling mixed cases, when some of - * the clauses are supported and some are not. Selectivity for the - * supported subclauses would be computed using extended statistics, - * and the remaining clauses would be estimated using the traditional - * algorithm (product of selectivities). - * - * It however seems overly complex, and in a way we already do that - * because if we reject the whole clause as unsupported here, it will - * be eventually passed to clauselist_selectivity() which does exactly - * this (split into supported/unsupported clauses etc). - */ - BoolExpr *expr = (BoolExpr *) clause; - ListCell *lc; - List *exprs = NIL; - - foreach(lc, expr->args) - { - List *tmp; - - /* - * Had we found incompatible clause in the arguments, treat the - * whole clause as incompatible. - */ - tmp = statext_extract_expression_internal(root, - (Node *) lfirst(lc), - relid); - - if (!tmp) - return NIL; - - exprs = list_concat(exprs, tmp); - } - - return exprs; - } - - /* Var IS NULL */ - if (IsA(clause, NullTest)) - { - NullTest *nt = (NullTest *) clause; - - /* - * Only simple (Var IS NULL) expressions supported for now. Maybe we - * could use examine_variable to fix this? - */ - if (!IsA(nt->arg, Var)) - return NIL; - - return statext_extract_expression_internal(root, (Node *) (nt->arg), - relid); + /* Otherwise we have Expr IS NULL. */ + *exprs = lappend(*exprs, nt->arg); + return true; } - return NIL; + /* + * Treat any other expressions as bare expressions to be matched against + * expressions in statistics objects. + */ + *exprs = lappend(*exprs, clause); + return true; } /* * statext_is_compatible_clause * Determines if the clause is compatible with MCV lists. * - * Currently, we only support three types of clauses: + * Currently, we only support the following types of clauses: * - * (a) OpExprs of the form (Var op Const), or (Const op Var), where the op - * is one of ("=", "<", ">", ">=", "<=") + * (a) OpExprs of the form (Var/Expr op Const), or (Const op Var/Expr), where + * the op is one of ("=", "<", ">", ">=", "<=") * - * (b) (Var IS [NOT] NULL) + * (b) (Var/Expr IS [NOT] NULL) * * (c) combinations using AND/OR/NOT * - * (d) ScalarArrayOpExprs of the form (Var op ANY (array)) or (Var op ALL (array)) + * (d) ScalarArrayOpExprs of the form (Var/Expr op ANY (array)) or (Var/Expr + * op ALL (array)) * * In the future, the range of supported clauses may be expanded to more * complex cases, for example (Var op Var). */ static bool statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, - Bitmapset **attnums) + Bitmapset **attnums, List **exprs) { RangeTblEntry *rte = root->simple_rte_array[relid]; RestrictInfo *rinfo = (RestrictInfo *) clause; + int clause_relid; Oid userid; /* @@ -1644,7 +1488,7 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, foreach(lc, expr->args) { if (!statext_is_compatible_clause(root, (Node *) lfirst(lc), - relid, attnums)) + relid, attnums, exprs)) return false; } @@ -1659,142 +1503,59 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, if (rinfo->pseudoconstant) return false; - /* clauses referencing multiple varnos are incompatible */ - if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON) + /* Clauses referencing other varnos are incompatible. */ + if (!bms_get_singleton_member(rinfo->clause_relids, &clause_relid) || + clause_relid != relid) return false; /* Check the clause and determine what attributes it references. */ if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause, - relid, attnums)) + relid, attnums, exprs)) return false; /* - * Check that the user has permission to read all these attributes. Use - * checkAsUser if it's set, in case we're accessing the table via a view. + * Check that the user has permission to read all required attributes. + * Use checkAsUser if it's set, in case we're accessing the table via a + * view. */ userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); if (pg_class_aclcheck(rte->relid, userid, ACL_SELECT) != ACLCHECK_OK) { + Bitmapset *clause_attnums; + /* Don't have table privilege, must check individual columns */ - if (bms_is_member(InvalidAttrNumber, *attnums)) + if (*exprs != NIL) { - /* Have a whole-row reference, must have access to all columns */ - if (pg_attribute_aclcheck_all(rte->relid, userid, ACL_SELECT, - ACLMASK_ALL) != ACLCHECK_OK) - return false; + pull_varattnos((Node *) exprs, relid, &clause_attnums); + clause_attnums = bms_add_members(clause_attnums, *attnums); } else - { - /* Check the columns referenced by the clause */ - int attnum = -1; - - while ((attnum = bms_next_member(*attnums, attnum)) >= 0) - { - if (pg_attribute_aclcheck(rte->relid, attnum, userid, - ACL_SELECT) != ACLCHECK_OK) - return false; - } - } - } - - /* If we reach here, the clause is OK */ - return true; -} - -/* - * statext_extract_expression - * Determines if the clause is compatible with extended statistics. - * - * Currently, we only support three types of clauses: - * - * (a) OpExprs of the form (Var op Const), or (Const op Var), where the op - * is one of ("=", "<", ">", ">=", "<=") - * - * (b) (Var IS [NOT] NULL) - * - * (c) combinations using AND/OR/NOT - * - * (d) ScalarArrayOpExprs of the form (Var op ANY (array)) or (Var op ALL (array)) - * - * In the future, the range of supported clauses may be expanded to more - * complex cases, for example (Var op Var). - */ -static List * -statext_extract_expression(PlannerInfo *root, Node *clause, Index relid) -{ - RestrictInfo *rinfo = (RestrictInfo *) clause; - RangeTblEntry *rte = root->simple_rte_array[relid]; - List *exprs; - Oid userid; - - if (!IsA(rinfo, RestrictInfo)) - return NIL; - - /* Pseudoconstants are not really interesting here. */ - if (rinfo->pseudoconstant) - return NIL; - - /* clauses referencing multiple varnos are incompatible */ - if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON) - return NIL; - - /* Check the clause and extract expressions it's composed of. */ - exprs = statext_extract_expression_internal(root, (Node *) rinfo->clause, relid); - - /* - * If there are no potentially interesting expressions (supported by - * extended statistics), we're done; - */ - if (!exprs) - return NIL; - - /* - * Check that the user has permission to read all these attributes. Use - * checkAsUser if it's set, in case we're accessing the table via a view. - */ - userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); - - if (pg_class_aclcheck(rte->relid, userid, ACL_SELECT) != ACLCHECK_OK) - { - Bitmapset *attnums = NULL; + clause_attnums = *attnums; - /* Extract all attribute numbers from the expressions. */ - pull_varattnos((Node *) exprs, relid, &attnums); - - /* Don't have table privilege, must check individual columns */ - if (bms_is_member(InvalidAttrNumber, attnums)) + if (bms_is_member(InvalidAttrNumber, clause_attnums)) { /* Have a whole-row reference, must have access to all columns */ if (pg_attribute_aclcheck_all(rte->relid, userid, ACL_SELECT, ACLMASK_ALL) != ACLCHECK_OK) - return NIL; + return false; } else { /* Check the columns referenced by the clause */ int attnum = -1; - while ((attnum = bms_next_member(attnums, attnum)) >= 0) + while ((attnum = bms_next_member(clause_attnums, attnum)) >= 0) { - AttrNumber tmp; - - /* Adjust for system attributes (offset for bitmap). */ - tmp = attnum + FirstLowInvalidHeapAttributeNumber; - - /* Ignore system attributes, those can't have statistics. */ - if (!AttrNumberIsForUserDefinedAttr(tmp)) - return NIL; - - if (pg_attribute_aclcheck(rte->relid, tmp, userid, + if (pg_attribute_aclcheck(rte->relid, attnum, userid, ACL_SELECT) != ACLCHECK_OK) - return NIL; + return false; } } } /* If we reach here, the clause is OK */ - return exprs; + return true; } /* @@ -1854,12 +1615,12 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli list_exprs = (List **) palloc(sizeof(Node *) * list_length(clauses)); /* - * Pre-process the clauses list to extract the attnums seen in each item. - * We need to determine if there's any clauses which will be useful for - * selectivity estimations with extended stats. Along the way we'll record - * all of the attnums for each clause in a list which we'll reference - * later so we don't need to repeat the same work again. We'll also keep - * track of all attnums seen. + * Pre-process the clauses list to extract the attnums and expressions + * seen in each item. We need to determine if there are any clauses which + * will be useful for selectivity estimations with extended stats. Along + * the way we'll record all of the attnums and expressions for each clause + * in lists which we'll reference later so we don't need to repeat the + * same work again. * * We also skip clauses that we already estimated using different types of * statistics (we treat them as incompatible). @@ -1869,100 +1630,18 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli { Node *clause = (Node *) lfirst(l); Bitmapset *attnums = NULL; + List *exprs = NIL; - /* the clause is considered incompatible by default */ - list_attnums[listidx] = NULL; - - /* and it's also not covered exactly by the statistic */ - list_exprs[listidx] = NULL; - - /* - * First see if the clause is simple enough to be covered directly - * by the attributes. If not, see if there's at least one statistic - * object using the expression as-is. - */ if (!bms_is_member(listidx, *estimatedclauses) && - statext_is_compatible_clause(root, clause, rel->relid, &attnums)) + statext_is_compatible_clause(root, clause, rel->relid, &attnums, &exprs)) { - /* simple expression, covered through attnum(s) */ list_attnums[listidx] = attnums; + list_exprs[listidx] = exprs; } else { - ListCell *lc; - List *exprs; - - /* - * XXX This is kinda dubious, because we extract the smallest - * clauses - e.g. from (Var op Const) we extract Var. But maybe - * the statistics covers larger expressions, so maybe this will - * skip that. For example give ((a+b) + (c+d)) it's not clear - * if we should extract the whole clause or some smaller parts. - * OTOH we need (Expr op Const) so maybe we only care about the - * clause as a whole? - */ - exprs = statext_extract_expression(root, clause, rel->relid); - - /* complex expression, search for statistic covering all parts */ - foreach(lc, rel->statlist) - { - ListCell *le; - StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc); - - /* - * Assume all parts are covered by this statistics, we'll - * stop if we found part that is not covered. - */ - bool covered = true; - - /* have we already matched the expression to a statistic? */ - Assert(!list_exprs[listidx]); - - /* no expressions in the statistic */ - if (!info->exprs) - continue; - - foreach(le, exprs) - { - ListCell *lc2; - Node *expr = (Node *) lfirst(le); - bool found = false; - - /* - * Walk the expressions, see if all expressions extracted from - * the clause are covered by the extended statistic object. - */ - foreach (lc2, info->exprs) - { - Node *stat_expr = (Node *) lfirst(lc2); - - if (equal(expr, stat_expr)) - { - found = true; - break; - } - } - - /* found expression not covered by the statistics, stop */ - if (!found) - { - covered = false; - break; - } - } - - /* - * OK, we found a statistics covering this clause, stop looking - * for another one - */ - if (covered) - { - /* XXX should this add the original expression instead? */ - list_exprs[listidx] = exprs; - break; - } - - } + list_attnums[listidx] = NULL; + list_exprs[listidx] = NIL; } listidx++; @@ -1993,69 +1672,39 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli /* now filter the clauses to be estimated using the selected MCV */ stat_clauses = NIL; - /* record which clauses are simple (single column) */ + /* record which clauses are simple (single column or expression) */ simple_clauses = NULL; listidx = 0; foreach(l, clauses) { /* - * If the clause is compatible with the selected statistics, mark - * it as estimated and add it to the list to estimate. It may be - * either a simple clause, or an expression. + * If the clause is not already estimated and is compatible with + * the selected statistics object (all attributes and expressions + * covered), mark it as estimated and add it to the list to + * estimate. */ - if (list_attnums[listidx] != NULL && - bms_is_subset(list_attnums[listidx], stat->keys)) + if (!bms_is_member(listidx, *estimatedclauses) && + bms_is_subset(list_attnums[listidx], stat->keys) && + stat_covers_expressions(stat, list_exprs[listidx], NULL)) { - /* simple clause (single Var) */ - if (bms_membership(list_attnums[listidx]) == BMS_SINGLETON) + /* record simple clauses (single column or expression) */ + if ((list_attnums[listidx] == NULL && + list_length(list_exprs[listidx]) == 1) || + (list_exprs[listidx] == NIL && + bms_membership(list_attnums[listidx]) == BMS_SINGLETON)) simple_clauses = bms_add_member(simple_clauses, list_length(stat_clauses)); + /* add clause to list and mark as estimated */ stat_clauses = lappend(stat_clauses, (Node *) lfirst(l)); *estimatedclauses = bms_add_member(*estimatedclauses, listidx); bms_free(list_attnums[listidx]); list_attnums[listidx] = NULL; - } - else if (list_exprs[listidx] != NIL) - { - /* are all parts of the expression covered by the statistic? */ - ListCell *lc; - int ncovered = 0; - - foreach (lc, list_exprs[listidx]) - { - ListCell *lc2; - Node *expr = (Node *) lfirst(lc); - bool found = false; - - foreach (lc2, stat->exprs) - { - Node *stat_expr = (Node *) lfirst(lc2); - - if (equal(expr, stat_expr)) - { - found = true; - break; - } - } - - /* count it as covered and continue to the next expression */ - if (found) - ncovered++; - } - - /* all parts of the expression are covered by this statistics */ - if (ncovered == list_length(list_exprs[listidx])) - { - stat_clauses = lappend(stat_clauses, (Node *) lfirst(l)); - *estimatedclauses = bms_add_member(*estimatedclauses, listidx); - - list_free(list_exprs[listidx]); - list_exprs[listidx] = NULL; - } + list_free(list_exprs[listidx]); + list_exprs[listidx] = NULL; } listidx++; @@ -2244,69 +1893,20 @@ statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, } /* - * examine_opclause_expression - * Split expression into Var and Const parts. + * examine_opclause_args + * Split an operator expression's arguments into Expr and Const parts. * - * Attempts to match the arguments to either (Var op Const) or (Const op Var), - * possibly with a RelabelType on top. When the expression matches this form, - * returns true, otherwise returns false. + * Attempts to match the arguments to either (Expr op Const) or (Const op + * Expr), possibly with a RelabelType on top. When the expression matches this + * form, returns true, otherwise returns false. * - * Optionally returns pointers to the extracted Var/Const nodes, when passed - * non-null pointers (varp, cstp and varonleftp). The varonleftp flag specifies - * on which side of the operator we found the Var node. + * Optionally returns pointers to the extracted Expr/Const nodes, when passed + * non-null pointers (exprp, cstp and expronleftp). The expronleftp flag + * specifies on which side of the operator we found the expression node. */ bool -examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp) -{ - Var *var; - Const *cst; - bool varonleft; - Node *leftop, - *rightop; - - /* enforced by statext_is_compatible_clause_internal */ - Assert(list_length(args) == 2); - - leftop = linitial(args); - rightop = lsecond(args); - - /* strip RelabelType from either side of the expression */ - if (IsA(leftop, RelabelType)) - leftop = (Node *) ((RelabelType *) leftop)->arg; - - if (IsA(rightop, RelabelType)) - rightop = (Node *) ((RelabelType *) rightop)->arg; - - if (IsA(leftop, Var) && IsA(rightop, Const)) - { - var = (Var *) leftop; - cst = (Const *) rightop; - varonleft = true; - } - else if (IsA(leftop, Const) && IsA(rightop, Var)) - { - var = (Var *) rightop; - cst = (Const *) leftop; - varonleft = false; - } - else - return false; - - /* return pointers to the extracted parts if requested */ - if (varp) - *varp = var; - - if (cstp) - *cstp = cst; - - if (varonleftp) - *varonleftp = varonleft; - - return true; -} - -bool -examine_clause_args2(List *args, Node **exprp, Const **cstp, bool *expronleftp) +examine_opclause_args(List *args, Node **exprp, Const **cstp, + bool *expronleftp) { Node *expr; Const *cst; @@ -2355,106 +1955,6 @@ examine_clause_args2(List *args, Node **exprp, Const **cstp, bool *expronleftp) return true; } -bool -examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool *varonleftp) -{ - Var *var; - Const *cst; - bool varonleft; - Node *leftop, - *rightop; - - /* enforced by statext_is_compatible_clause_internal */ - Assert(list_length(expr->args) == 2); - - leftop = linitial(expr->args); - rightop = lsecond(expr->args); - - /* strip RelabelType from either side of the expression */ - if (IsA(leftop, RelabelType)) - leftop = (Node *) ((RelabelType *) leftop)->arg; - - if (IsA(rightop, RelabelType)) - rightop = (Node *) ((RelabelType *) rightop)->arg; - - if (IsA(leftop, Var) && IsA(rightop, Const)) - { - var = (Var *) leftop; - cst = (Const *) rightop; - varonleft = true; - } - else if (IsA(leftop, Const) && IsA(rightop, Var)) - { - var = (Var *) rightop; - cst = (Const *) leftop; - varonleft = false; - } - else - return false; - - /* return pointers to the extracted parts if requested */ - if (varp) - *varp = var; - - if (cstp) - *cstp = cst; - - if (varonleftp) - *varonleftp = varonleft; - - return true; -} - -bool -examine_opclause_expression2(OpExpr *expr, Node **exprp, Const **cstp, bool *expronleftp) -{ - Node *expr2; - Const *cst; - bool expronleft; - Node *leftop, - *rightop; - - /* enforced by statext_is_compatible_clause_internal */ - Assert(list_length(expr->args) == 2); - - leftop = linitial(expr->args); - rightop = lsecond(expr->args); - - /* strip RelabelType from either side of the expression */ - if (IsA(leftop, RelabelType)) - leftop = (Node *) ((RelabelType *) leftop)->arg; - - if (IsA(rightop, RelabelType)) - rightop = (Node *) ((RelabelType *) rightop)->arg; - - if (IsA(rightop, Const)) - { - expr2 = (Node *) leftop; - cst = (Const *) rightop; - expronleft = true; - } - else if (IsA(leftop, Const)) - { - expr2 = (Node *) rightop; - cst = (Const *) leftop; - expronleft = false; - } - else - return false; - - /* return pointers to the extracted parts if requested */ - if (exprp) - *exprp = expr2; - - if (cstp) - *cstp = cst; - - if (expronleftp) - *expronleftp = expronleft; - - return true; -} - /* * Compute statistics about expressions of a relation. diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 0c27ee395e..9720e49ab4 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -1612,18 +1612,20 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, OpExpr *expr = (OpExpr *) clause; FmgrInfo opproc; - /* valid only after examine_clause_args returns true */ - Var *var; + /* valid only after examine_opclause_args returns true */ Node *clause_expr; Const *cst; - bool varonleft; bool expronleft; fmgr_info(get_opcode(expr->opno), &opproc); - /* extract the var and const from the expression */ - if (examine_clause_args(expr->args, &var, &cst, &varonleft)) + /* extract the var/expr and const from the expression */ + if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft)) + elog(ERROR, "incompatible clause"); + + if (IsA(clause_expr, Var)) { + Var *var = (Var *) clause_expr; int idx; /* match the attribute to a dimension of the statistic */ @@ -1671,7 +1673,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, * this is OK. We may need to relax this after allowing * extended statistics on expressions. */ - if (varonleft) + if (expronleft) match = DatumGetBool(FunctionCall2Coll(&opproc, var->varcollid, item->values[idx], @@ -1686,8 +1688,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, matches[i] = RESULT_MERGE(matches[i], is_or, match); } } - /* extract the expr and const from the expression */ - else if (examine_clause_args2(expr->args, &clause_expr, &cst, &expronleft)) + else { ListCell *lc; int idx; @@ -1767,26 +1768,26 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, matches[i] = RESULT_MERGE(matches[i], is_or, match); } } - else - elog(ERROR, "incompatible clause"); } else if (IsA(clause, ScalarArrayOpExpr)) { ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; FmgrInfo opproc; - /* valid only after examine_clause_args returns true */ - Var *var; + /* valid only after examine_opclause_args returns true */ Node *clause_expr; Const *cst; - bool varonleft; bool expronleft; fmgr_info(get_opcode(expr->opno), &opproc); - /* extract the var and const from the expression */ - if (examine_clause_args(expr->args, &var, &cst, &varonleft)) + /* extract the var/expr and const from the expression */ + if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft)) + elog(ERROR, "incompatible clause"); + + if (IsA(clause_expr, Var)) { + Var *var = (Var *) clause_expr; int idx; ArrayType *arrayval; @@ -1798,7 +1799,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, bool *elem_nulls; /* ScalarArrayOpExpr has the Var always on the left */ - Assert(varonleft); + Assert(expronleft); if (!cst->constisnull) { @@ -1878,8 +1879,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, matches[i] = RESULT_MERGE(matches[i], is_or, match); } } - /* extract the expr and const from the expression */ - else if (examine_clause_args2(expr->args, &clause_expr, &cst, &expronleft)) + else { ListCell *lc; int idx; @@ -1893,7 +1893,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, bool *elem_nulls; Oid collid = exprCollation(clause_expr); - /* ScalarArrayOpExpr has the Var always on the left */ + /* ScalarArrayOpExpr has the Expr always on the left */ Assert(expronleft); if (!cst->constisnull) @@ -1988,8 +1988,6 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, matches[i] = RESULT_MERGE(matches[i], is_or, match); } } - else - elog(ERROR, "incompatible clause"); } else if (IsA(clause, NullTest)) { diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index 092bc3eb8a..b2e59f9bc5 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -113,14 +113,8 @@ extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows, MultiSortSupport mss, int numattrs, AttrNumber *attnums); -extern bool examine_clause_args(List *args, Var **varp, - Const **cstp, bool *varonleftp); -extern bool examine_clause_args2(List *args, Node **exprp, - Const **cstp, bool *expronleftp); -extern bool examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, - bool *varonleftp); -extern bool examine_opclause_expression2(OpExpr *expr, Node **exprp, Const **cstp, - bool *expronleftp); +extern bool examine_opclause_args(List *args, Node **exprp, + Const **cstp, bool *expronleftp); extern Selectivity mcv_combine_selectivities(Selectivity simple_sel, Selectivity mcv_sel, diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index e5e40f92e0..ac4ba9d0a2 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2427,7 +2427,7 @@ pg_stats_ext_exprs| SELECT cn.nspname AS schemaname, sn.nspname AS statistics_schemaname, s.stxname AS statistics_name, pg_get_userbyid(s.stxowner) AS statistics_owner, - stat_exprs.expr, + stat.expr, (stat.a).stanullfrac AS null_frac, (stat.a).stawidth AS avg_width, (stat.a).stadistinct AS n_distinct, @@ -2487,13 +2487,13 @@ pg_stats_ext_exprs| SELECT cn.nspname AS schemaname, WHEN ((stat.a).stakind5 = 5) THEN (stat.a).stanumbers5 ELSE NULL::real[] END AS elem_count_histogram - FROM ((((((pg_statistic_ext s + FROM (((((pg_statistic_ext s JOIN pg_class c ON ((c.oid = s.stxrelid))) LEFT JOIN pg_statistic_ext_data sd ON ((s.oid = sd.stxoid))) LEFT JOIN pg_namespace cn ON ((cn.oid = c.relnamespace))) LEFT JOIN pg_namespace sn ON ((sn.oid = s.stxnamespace))) - JOIN LATERAL ( SELECT unnest(pg_get_statisticsobjdef_expressions(s.oid)) AS expr) stat_exprs ON ((stat_exprs.expr IS NOT NULL))) - LEFT JOIN LATERAL ( SELECT unnest(sd.stxdexpr) AS a) stat ON (true)); + JOIN LATERAL ( SELECT unnest(pg_get_statisticsobjdef_expressions(s.oid)) AS expr, + unnest(sd.stxdexpr) AS a) stat ON ((stat.expr IS NOT NULL))); pg_tables| SELECT n.nspname AS schemaname, c.relname AS tablename, pg_get_userbyid(c.relowner) AS tableowner, -- 2.26.2