From 28543dda9febe8d8b5fc91060a4323c08f3c4a8a Mon Sep 17 00:00:00 2001 From: Marti Raudsepp Date: Wed, 1 Oct 2014 02:17:21 +0300 Subject: [PATCH] Simplify EXISTS subqueries containing LIMIT --- src/backend/optimizer/plan/subselect.c | 42 ++++++++++++++++++++++----- src/test/regress/expected/subselect.out | 51 +++++++++++++++++++++++++++++++++ src/test/regress/sql/subselect.sql | 14 +++++++++ 3 files changed, 100 insertions(+), 7 deletions(-) diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 3e7dc85..66d1b90 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -70,7 +70,7 @@ static Node *convert_testexpr_mutator(Node *node, static bool subplan_is_hashable(Plan *plan); static bool testexpr_is_hashable(Node *testexpr); static bool hash_ok_operator(OpExpr *expr); -static bool simplify_EXISTS_query(Query *query); +static bool simplify_EXISTS_query(PlannerInfo *root, Query *query); static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, Node **testexpr, List **paramIds); static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root); @@ -452,7 +452,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, * If it's an EXISTS subplan, we might be able to simplify it. */ if (subLinkType == EXISTS_SUBLINK) - simple_exists = simplify_EXISTS_query(subquery); + simple_exists = simplify_EXISTS_query(root, subquery); /* * For an EXISTS subplan, tell lower-level planner to expect that only the @@ -518,7 +518,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, /* Make a second copy of the original subquery */ subquery = (Query *) copyObject(orig_subquery); /* and re-simplify */ - simple_exists = simplify_EXISTS_query(subquery); + simple_exists = simplify_EXISTS_query(root, subquery); Assert(simple_exists); /* See if it can be converted to an ANY query */ subquery = convert_EXISTS_to_ANY(root, subquery, @@ -1359,7 +1359,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, * targetlist, we have to fail, because the pullup operation leaves us * with noplace to evaluate the targetlist. */ - if (!simplify_EXISTS_query(subselect)) + if (!simplify_EXISTS_query(root, subselect)) return NULL; /* @@ -1486,11 +1486,11 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, * Returns TRUE if was able to discard the targetlist, else FALSE. */ static bool -simplify_EXISTS_query(Query *query) +simplify_EXISTS_query(PlannerInfo *root, Query *query) { /* * We don't try to simplify at all if the query uses set operations, - * aggregates, modifying CTEs, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; + * aggregates, modifying CTEs, HAVING, OFFSET, or FOR UPDATE/SHARE; * none of these seem likely in normal usage and their possible effects * are complex. */ @@ -1501,7 +1501,6 @@ simplify_EXISTS_query(Query *query) query->hasModifyingCTE || query->havingQual || query->limitOffset || - query->limitCount || query->rowMarks) return false; @@ -1513,6 +1512,35 @@ simplify_EXISTS_query(Query *query) return false; /* + * A subquery that has a LIMIT clause with a positive value or NULL causes + * no behavioral change to the query. With EXISTS we only care about the + * first row anyway, so we'll simply remove the LIMIT clause. If the LIMIT + * value does not reduce to a constant that's positive or NULL then do not + * touch it. + */ + if (query->limitCount) + { + /* + * eval_const_expressions has not been called yet by subquery_planner, + * may still contain int64 coercions etc. + */ + Node *node = eval_const_expressions(root, query->limitCount); + Const *limit; + + if (! IsA(node, Const)) + return false; + + limit = (Const *) node; + Assert(limit->consttype == INT8OID); + + /* If it's not NULL and not positive, keep it as a regular subquery */ + if (!limit->constisnull && DatumGetInt64(limit->constvalue) <= 0) + return false; + + query->limitCount = NULL; + } + + /* * Otherwise, we can throw away the targetlist, as well as any GROUP, * WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will * change a nonzero-rows result to zero rows or vice versa. (Furthermore, diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 01c9130..e3b871b 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -775,6 +775,57 @@ select * from int4_tbl o where (f1, f1) in (1 row) -- +-- Check EXISTS simplification with LIMIT +-- +explain (costs off) +select * from int4_tbl o where exists + (select 1 from int4_tbl i where i.f1=o.f1 limit null); + QUERY PLAN +------------------------------------ + Hash Semi Join + Hash Cond: (o.f1 = i.f1) + -> Seq Scan on int4_tbl o + -> Hash + -> Seq Scan on int4_tbl i +(5 rows) + +explain (costs off) +select * from int4_tbl o where not exists + (select 1 from int4_tbl i where i.f1=o.f1 limit 1); + QUERY PLAN +------------------------------------ + Hash Anti Join + Hash Cond: (o.f1 = i.f1) + -> Seq Scan on int4_tbl o + -> Hash + -> Seq Scan on int4_tbl i +(5 rows) + +explain (costs off) +select * from int4_tbl o where exists (select 1 limit -1); + QUERY PLAN +------------------------------ + Result + One-Time Filter: $0 + InitPlan 1 (returns $0) + -> Limit + -> Result + -> Seq Scan on int4_tbl o +(6 rows) + +explain (costs off) +select * from int4_tbl o where exists (select 1 limit 0); + QUERY PLAN +------------------------------ + Result + One-Time Filter: $0 + InitPlan 1 (returns $0) + -> Limit + -> Result + -> Seq Scan on int4_tbl o +(6 rows) + +-- -- check for over-optimization of whole-row Var referencing an Append plan -- select (select q from diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 56707e2..3f04137 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -433,6 +433,20 @@ select * from int4_tbl o where (f1, f1) in (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); -- +-- Check EXISTS simplification with LIMIT +-- +explain (costs off) +select * from int4_tbl o where exists + (select 1 from int4_tbl i where i.f1=o.f1 limit null); +explain (costs off) +select * from int4_tbl o where not exists + (select 1 from int4_tbl i where i.f1=o.f1 limit 1); +explain (costs off) +select * from int4_tbl o where exists (select 1 limit -1); +explain (costs off) +select * from int4_tbl o where exists (select 1 limit 0); + +-- -- check for over-optimization of whole-row Var referencing an Append plan -- select (select q from -- 2.1.2