From 73f52d0909374446cd689457f0a4ef52addb035e Mon Sep 17 00:00:00 2001 From: Andy Fan Date: Tue, 1 Feb 2022 14:54:07 +0800 Subject: [PATCH v2 2/6] After distributing the new derived RestrictInfo into RelOptInfo, then the rows estimation is wrong at the joinrel part. The reason is well described at [1] and [2], To fix this issue, I added a new field "EquivalenceClass *derived" in RestrictInfo struct to indicate how this qual is generated. we would ignore such qual during estimate the rows size. All the set_xx_size should be take care, but for now, just set_plain_rel_size is taken care for the PoC purpose. [1] https://www.postgresql.org/message-id/flat/CAKJS1f9FK_X_5HKcPcSeimy16Owe3EmPmmGsGWLcKkj_rW9s6A%40mail.gmail.com [2] https://www.postgresql.org/message-id/flat/1727507.1620948117%40sss.pgh.pa.us#52ac3f46cf614acb0bdbddb7128f5bd2 --- src/backend/optimizer/path/clausesel.c | 34 +++++++++++++++++-------- src/backend/optimizer/path/costsize.c | 25 ++++++++++-------- src/backend/optimizer/path/equivclass.c | 20 ++++++++------- src/backend/optimizer/util/inherit.c | 31 ++++++++++++---------- src/backend/statistics/dependencies.c | 3 ++- src/backend/statistics/extended_stats.c | 5 ++-- src/include/nodes/pathnodes.h | 1 + src/include/optimizer/optimizer.h | 6 +++-- 8 files changed, 77 insertions(+), 48 deletions(-) diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index 06f836308d0..8961e66ea4e 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -106,7 +106,7 @@ clauselist_selectivity(PlannerInfo *root, SpecialJoinInfo *sjinfo) { return clauselist_selectivity_ext(root, clauses, varRelid, - jointype, sjinfo, true); + jointype, sjinfo, true, true); } /* @@ -121,7 +121,8 @@ clauselist_selectivity_ext(PlannerInfo *root, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, - bool use_extended_stats) + bool use_extended_stats, + bool include_derived) { Selectivity s1 = 1.0; RelOptInfo *rel; @@ -137,7 +138,8 @@ clauselist_selectivity_ext(PlannerInfo *root, if (list_length(clauses) == 1) return clause_selectivity_ext(root, (Node *) linitial(clauses), varRelid, jointype, sjinfo, - use_extended_stats); + use_extended_stats, + include_derived); /* * Determine if these clauses reference a single relation. If so, and if @@ -183,7 +185,7 @@ clauselist_selectivity_ext(PlannerInfo *root, /* Compute the selectivity of this clause in isolation */ s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo, - use_extended_stats); + use_extended_stats, include_derived); /* * Check for being passed a RestrictInfo. @@ -412,7 +414,9 @@ clauselist_selectivity_or(PlannerInfo *root, continue; s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid, - jointype, sjinfo, use_extended_stats); + jointype, sjinfo, use_extended_stats, + true /* we never push a derived under or clause */ + ); s1 = s1 + s2 - s1 * s2; } @@ -694,7 +698,7 @@ clause_selectivity(PlannerInfo *root, SpecialJoinInfo *sjinfo) { return clause_selectivity_ext(root, clause, varRelid, - jointype, sjinfo, true); + jointype, sjinfo, true, true); } /* @@ -709,7 +713,8 @@ clause_selectivity_ext(PlannerInfo *root, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, - bool use_extended_stats) + bool use_extended_stats, + bool include_derived) { Selectivity s1 = 0.5; /* default for any unhandled clause type */ RestrictInfo *rinfo = NULL; @@ -742,6 +747,9 @@ clause_selectivity_ext(PlannerInfo *root, if (rinfo->norm_selec > 1) return (Selectivity) 1.0; + if (rinfo->derived && !include_derived) + return (Selectivity) 1.0; + /* * If possible, cache the result of the selectivity calculation for * the clause. We can cache if varRelid is zero or the clause @@ -830,7 +838,8 @@ clause_selectivity_ext(PlannerInfo *root, varRelid, jointype, sjinfo, - use_extended_stats); + use_extended_stats, + include_derived); } else if (is_andclause(clause)) { @@ -840,7 +849,8 @@ clause_selectivity_ext(PlannerInfo *root, varRelid, jointype, sjinfo, - use_extended_stats); + use_extended_stats, + include_derived); } else if (is_orclause(clause)) { @@ -959,7 +969,8 @@ clause_selectivity_ext(PlannerInfo *root, varRelid, jointype, sjinfo, - use_extended_stats); + use_extended_stats, + include_derived); } else if (IsA(clause, CoerceToDomain)) { @@ -969,7 +980,8 @@ clause_selectivity_ext(PlannerInfo *root, varRelid, jointype, sjinfo, - use_extended_stats); + use_extended_stats, + include_derived); } else { diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 8dc7dd4ca26..9e303877af7 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -4928,11 +4928,13 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel) Assert(rel->relid > 0); nrows = rel->tuples * - clauselist_selectivity(root, - rel->baserestrictinfo, - 0, - JOIN_INNER, - NULL); + clauselist_selectivity_ext(root, + rel->baserestrictinfo, + 0, + JOIN_INNER, + NULL, + true, + false /* include_derived */); rel->rows = clamp_row_est(nrows); @@ -4964,11 +4966,14 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel, */ allclauses = list_concat_copy(param_clauses, rel->baserestrictinfo); nrows = rel->tuples * - clauselist_selectivity(root, - allclauses, - rel->relid, /* do not use 0! */ - JOIN_INNER, - NULL); + clauselist_selectivity_ext(root, + allclauses, + rel->relid, /* do not use 0! */ + JOIN_INNER, + NULL, + true, + false /* doesn't include the derived clause */ + ); nrows = clamp_row_est(nrows); /* For safety, make sure result is not more than the base estimate */ if (nrows > rel->rows) diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index f9ae2785d60..6ed9e8c9064 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -1366,6 +1366,7 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, int strategy; Oid opno; Oid family; + RestrictInfo *rinfo; if (ef->ef_source_rel == relid) continue; @@ -1393,15 +1394,16 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, if (opno == InvalidOid) continue; - process_implied_equality(root, opno, - ec->ec_collation, - leftexpr, - rightexpr, - bms_copy(ec->ec_relids), - bms_copy(cur_em->em_nullable_relids), - ec->ec_min_security, - ec->ec_below_outer_join, - false); + rinfo = process_implied_equality(root, opno, + ec->ec_collation, + leftexpr, + rightexpr, + bms_copy(ec->ec_relids), + bms_copy(cur_em->em_nullable_relids), + ec->ec_min_security, + ec->ec_below_outer_join, + false); + rinfo->derived = ec; } prev_ems[relid] = cur_em; diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c index 7e134822f36..8a5863c4da4 100644 --- a/src/backend/optimizer/util/inherit.c +++ b/src/backend/optimizer/util/inherit.c @@ -797,6 +797,7 @@ apply_child_basequals(PlannerInfo *root, RelOptInfo *parentrel, { Node *onecq = (Node *) lfirst(lc2); bool pseudoconstant; + RestrictInfo *child_rinfo; /* check for pseudoconstant (no Vars or volatile functions) */ pseudoconstant = @@ -807,15 +808,19 @@ apply_child_basequals(PlannerInfo *root, RelOptInfo *parentrel, /* tell createplan.c to check for gating quals */ root->hasPseudoConstantQuals = true; } + + child_rinfo = make_restrictinfo(root, + (Expr *) onecq, + rinfo->is_pushed_down, + rinfo->outerjoin_delayed, + pseudoconstant, + rinfo->security_level, + NULL, NULL, NULL); + + child_rinfo->derived = rinfo->derived; /* reconstitute RestrictInfo with appropriate properties */ - childquals = lappend(childquals, - make_restrictinfo(root, - (Expr *) onecq, - rinfo->is_pushed_down, - rinfo->outerjoin_delayed, - pseudoconstant, - rinfo->security_level, - NULL, NULL, NULL)); + childquals = lappend(childquals, child_rinfo); + /* track minimum security level among child quals */ cq_min_security = Min(cq_min_security, rinfo->security_level); } @@ -844,13 +849,13 @@ apply_child_basequals(PlannerInfo *root, RelOptInfo *parentrel, foreach(lc2, qualset) { Expr *qual = (Expr *) lfirst(lc2); + RestrictInfo *rinfo = make_restrictinfo(root, qual, + true, false, false, + security_level, + NULL, NULL, NULL); /* not likely that we'd see constants here, so no check */ - childquals = lappend(childquals, - make_restrictinfo(root, qual, - true, false, false, - security_level, - NULL, NULL, NULL)); + childquals = lappend(childquals, rinfo); cq_min_security = Min(cq_min_security, security_level); } security_level++; diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index 34326d55619..838446a220d 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -1076,7 +1076,8 @@ clauselist_apply_dependencies(PlannerInfo *root, List *clauses, } simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid, - jointype, sjinfo, false); + jointype, sjinfo, false, + true /* probably no reasonable */); attr_sel[attidx++] = simple_sel; } diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index ca48395d5c5..38836f58c4e 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1870,7 +1870,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli * columns/clauses. */ simple_sel = clause_selectivity_ext(root, clause, varRelid, - jointype, sjinfo, false); + jointype, sjinfo, false, + true); overlap_simple_sel = simple_or_sel * simple_sel; @@ -1943,7 +1944,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli */ simple_sel = clauselist_selectivity_ext(root, stat_clauses, varRelid, jointype, - sjinfo, false); + sjinfo, false, true); /* * Multi-column estimate using MCV statistics, along with base and diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index e73fef057a4..42368e10b8e 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -2165,6 +2165,7 @@ typedef struct RestrictInfo /* hash equality operators used for memoize nodes, else InvalidOid */ Oid left_hasheqoperator; Oid right_hasheqoperator; + EquivalenceClass *derived; } RestrictInfo; /* diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index 6b8ee0c69fa..a3385ae51ff 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -68,7 +68,8 @@ extern Selectivity clause_selectivity_ext(PlannerInfo *root, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, - bool use_extended_stats); + bool use_extended_stats, + bool include_derived); extern Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, @@ -79,7 +80,8 @@ extern Selectivity clauselist_selectivity_ext(PlannerInfo *root, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, - bool use_extended_stats); + bool use_extended_stats, + bool include_derived); /* in path/costsize.c: */ -- 2.21.0