I wrote:
> However, in the case at hand, the complaint basically is why aren't we
> treating the boolean function expression like a boolean variable, and
> looking to see if there are stats available for it, like this other
> bit in clause_selectivity:
> /*
> * A Var at the top of a clause must be a bool Var. This is
> * equivalent to the clause reln.attribute = 't', so we compute
> * the selectivity as if that is what we have.
> */
> s1 = restriction_selectivity(root,
> BooleanEqualOperator,
> list_make2(var,
> makeBoolConst(true,
> false)),
> InvalidOid,
> varRelid);
> Indeed you could argue that this ought to be the fallback behavior for
> *any* unhandled case, not just function expressions. Not sure if we'd
> need to restrict it to single-relation expressions or not.
> The implication of doing it like this would be that the default estimate
> in the absence of any matching stats would be 0.5 (since eqsel defaults
> to 1/ndistinct, and get_variable_numdistinct will report 2.0 for any
> boolean-type expression it has no stats for). That's not a huge change
> from the existing 0.3333333 estimate, which seems pretty unprincipled
> anyway ... but it would probably be enough to annoy people if we did it in
> stable branches. So I'd be inclined to propose changing this in HEAD and
> maybe 9.5, but not further back. (For non-function expressions, 0.5 is
> the default already, so those would not change behavior.)
I experimented with the attached patch. The change in the default
estimate for a function results in just one change in the standard
regression test results, so far as I can find.
Comments, objections?
regards, tom lane
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index dcac1c1..c862b70 100644
*** a/src/backend/optimizer/path/clausesel.c
--- b/src/backend/optimizer/path/clausesel.c
***************
*** 14,20 ****
*/
#include "postgres.h"
- #include "catalog/pg_operator.h"
#include "nodes/makefuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
--- 14,19 ----
*************** clause_selectivity(PlannerInfo *root,
*** 568,585 ****
if (var->varlevelsup == 0 &&
(varRelid == 0 || varRelid == (int) var->varno))
{
! /*
! * A Var at the top of a clause must be a bool Var. This is
! * equivalent to the clause reln.attribute = 't', so we compute
! * the selectivity as if that is what we have.
! */
! s1 = restriction_selectivity(root,
! BooleanEqualOperator,
! list_make2(var,
! makeBoolConst(true,
! false)),
! InvalidOid,
! varRelid);
}
}
else if (IsA(clause, Const))
--- 567,574 ----
if (var->varlevelsup == 0 &&
(varRelid == 0 || varRelid == (int) var->varno))
{
! /* Use the restriction selectivity function for a bool Var */
! s1 = boolvarsel(root, (Node *) var, varRelid);
}
}
else if (IsA(clause, Const))
*************** clause_selectivity(PlannerInfo *root,
*** 680,704 ****
if (IsA(clause, DistinctExpr))
s1 = 1.0 - s1;
}
- else if (is_funcclause(clause))
- {
- /*
- * This is not an operator, so we guess at the selectivity. THIS IS A
- * HACK TO GET V4 OUT THE DOOR. FUNCS SHOULD BE ABLE TO HAVE
- * SELECTIVITIES THEMSELVES. -- JMH 7/9/92
- */
- s1 = (Selectivity) 0.3333333;
- }
- #ifdef NOT_USED
- else if (IsA(clause, SubPlan) ||
- IsA(clause, AlternativeSubPlan))
- {
- /*
- * Just for the moment! FIX ME! - vadim 02/04/98
- */
- s1 = (Selectivity) 0.5;
- }
- #endif
else if (IsA(clause, ScalarArrayOpExpr))
{
/* Use node specific selectivity calculation function */
--- 669,674 ----
*************** clause_selectivity(PlannerInfo *root,
*** 766,771 ****
--- 736,752 ----
jointype,
sjinfo);
}
+ else
+ {
+ /*
+ * For anything else, see if we can consider it as a boolean variable.
+ * This only works if it's an immutable expression in Vars of a single
+ * relation; but there's no point in us checking that here because
+ * boolvarsel() will do it internally, and return a suitable default
+ * selectivity (0.5) if not.
+ */
+ s1 = boolvarsel(root, clause, varRelid);
+ }
/* Cache the result if possible */
if (cacheable)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 72bc502..a643c6f 100644
*** a/src/backend/utils/adt/selfuncs.c
--- b/src/backend/utils/adt/selfuncs.c
***************
*** 105,110 ****
--- 105,111 ----
#include "access/sysattr.h"
#include "catalog/index.h"
#include "catalog/pg_collation.h"
+ #include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_type.h"
*************** icnlikesel(PG_FUNCTION_ARGS)
*** 1440,1445 ****
--- 1441,1479 ----
}
/*
+ * boolvarsel - Selectivity of Boolean variable.
+ *
+ * This can actually be called on any expression involving only Vars of
+ * the specified relation. If there are statistics about the Var or
+ * expression (the latter can happen, if it's indexed) then we'll produce
+ * a real estimate; otherwise we default to 0.5.
+ */
+ Selectivity
+ boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
+ {
+ VariableStatData vardata;
+ double selec;
+
+ examine_variable(root, arg, varRelid, &vardata);
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ /*
+ * A boolean variable V is equivalent to the clause V = 't', so we
+ * compute the selectivity as if that is what we have.
+ */
+ selec = var_eq_const(&vardata, BooleanEqualOperator,
+ BoolGetDatum(true), false, true);
+ }
+ else
+ {
+ /* Punt and estimate 0.5 */
+ selec = 0.5;
+ }
+ ReleaseVariableStats(vardata);
+ return selec;
+ }
+
+ /*
* booltestsel - Selectivity of BooleanTest Node.
*/
Selectivity
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index b3d8017..a7433d9 100644
*** a/src/include/utils/selfuncs.h
--- b/src/include/utils/selfuncs.h
*************** extern Datum icregexnejoinsel(PG_FUNCTIO
*** 164,169 ****
--- 164,170 ----
extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
+ extern Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid);
extern Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype,
Node *arg, int varRelid,
JoinType jointype, SpecialJoinInfo *sjinfo);
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
index 9d3540f..88dcca3 100644
*** a/src/test/regress/expected/rowsecurity.out
--- b/src/test/regress/expected/rowsecurity.out
*************** EXPLAIN (COSTS OFF) SELECT * FROM docume
*** 186,204 ****
(7 rows)
EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);
! QUERY PLAN
! ----------------------------------------------------------------------
Hash Join
! Hash Cond: (category.cid = document.cid)
! -> Seq Scan on category
-> Hash
! -> Subquery Scan on document
! Filter: f_leak(document.dtitle)
! -> Seq Scan on document document_1
! Filter: (dlevel <= $0)
! InitPlan 1 (returns $0)
! -> Index Scan using uaccount_pkey on uaccount
! Index Cond: (pguser = "current_user"())
(11 rows)
-- only owner can change policies
--- 186,204 ----
(7 rows)
EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);
! QUERY PLAN
! ----------------------------------------------------------------
Hash Join
! Hash Cond: (document.cid = category.cid)
! -> Subquery Scan on document
! Filter: f_leak(document.dtitle)
! -> Seq Scan on document document_1
! Filter: (dlevel <= $0)
! InitPlan 1 (returns $0)
! -> Index Scan using uaccount_pkey on uaccount
! Index Cond: (pguser = "current_user"())
-> Hash
! -> Seq Scan on category
(11 rows)
-- only owner can change policies