From dd8b4392085d27104f159bbb9df13070a28c6d8d Mon Sep 17 00:00:00 2001 From: David Rowley Date: Thu, 18 May 2017 20:10:31 +1200 Subject: [PATCH 2/2] Apply nullfrac during foreign key join estimations --- src/backend/optimizer/path/costsize.c | 33 ++++++++++++++++++++++++++++++--- src/backend/utils/cache/lsyscache.c | 24 ++++++++++++++++++++++++ src/include/utils/lsyscache.h | 1 + 3 files changed, 55 insertions(+), 3 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index d7bdd41..8e14614 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -4317,11 +4317,14 @@ get_foreign_key_join_selectivity(PlannerInfo *root, { ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc); RelOptInfo *ref_rel; + RangeTblEntry *con_rte; bool ref_is_outer; List *removedlist; ListCell *cell; ListCell *prev; ListCell *next; + Selectivity nullfrac; + int i; /* * This FK is not relevant unless it connects a baserel on one side of @@ -4351,7 +4354,6 @@ get_foreign_key_join_selectivity(PlannerInfo *root, { RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); bool remove_it = false; - int i; next = lnext(cell); /* Drop this clause if it matches any column of the FK */ @@ -4450,6 +4452,30 @@ get_foreign_key_join_selectivity(PlannerInfo *root, */ ref_rel = find_base_rel(root, fkinfo->ref_relid); + con_rte = planner_rt_fetch(fkinfo->con_relid, root); + + /* + * Any rows on the referencing side of the foreign key which have + * NULL values in the foreign key columns won't be joined to. Here + * we'll estimate how many that's likely to be. Note that we take the + * maximum nullfrac out of each column in the foreign key. It's not + * certain that this is correct, as, in the situation where a foreign + * key is made up of say, two columns, one column may have 50% of rows + * with a NULL value and the other column may have the other 50% of + * rows with a NULL value, which would mean no rows would be joined + * to, but here we'd estimate 50% or rows would be joined to, which + * is of course, wrong, but it seems that finding the maximum nullfrac + * column to have a higher chance of being correct. We may want to + * revisit this and use the average of the sum (capped at 1.0) and + * the max. + */ + nullfrac = 0.0; + for (i = 0; i < fkinfo->nkeys; i++) + { + float4 attnullfrac = get_attnullfrac(con_rte->relid, + fkinfo->conkey[i]); + nullfrac = Max(nullfrac, attnullfrac); + } if (!ref_is_outer && (jointype == JOIN_SEMI || jointype == JOIN_ANTI)) { @@ -4466,7 +4492,8 @@ get_foreign_key_join_selectivity(PlannerInfo *root, * we'd better just handle this case properly in case it does * arise. */ - fkselec *= ref_rel->rows / Max(ref_rel->tuples, 1.0); + fkselec *= ref_rel->rows * (1.0 - nullfrac) / + Max(ref_rel->tuples, 1.0); } else { @@ -4475,7 +4502,7 @@ get_foreign_key_join_selectivity(PlannerInfo *root, * guard against tuples == 0. Note we should use the raw table * tuple count, not any estimate of its filtered or joined size. */ - fkselec *= 1.0 / Max(ref_rel->tuples, 1.0); + fkselec *= (1.0 - nullfrac) / Max(ref_rel->tuples, 1.0); } } diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 4def73d..5f98b70 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -2812,6 +2812,30 @@ type_is_collatable(Oid typid) /* ---------- STATISTICS CACHE ---------- */ /* + * get_attnullfrac + * + * Given the table and attribute number of a column, get the nullfrac. + * Return 0.0 if no data is available. + */ +float4 +get_attnullfrac(Oid relid, AttrNumber attnum) +{ + HeapTuple tp; + float4 nullfrac = 0.0; + + tp = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum), + BoolGetDatum(false)); + if (HeapTupleIsValid(tp)) + { + nullfrac = ((Form_pg_statistic) GETSTRUCT(tp))->stanullfrac; + ReleaseSysCache(tp); + } + return nullfrac; +} + +/* * get_attavgwidth * * Given the table and attribute number of a column, get the average diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 93588df..31b2cb0 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -169,6 +169,7 @@ extern bool type_is_collatable(Oid typid); extern Oid getBaseType(Oid typid); extern Oid getBaseTypeAndTypmod(Oid typid, int32 *typmod); extern int32 get_typavgwidth(Oid typid, int32 typmod); +extern float4 get_attnullfrac(Oid relid, AttrNumber attnum); extern int32 get_attavgwidth(Oid relid, AttrNumber attnum); extern bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags); -- 1.9.5.msysgit.1