Join removal and attr_needed cleanup - Mailing list pgsql-hackers
From | Antonin Houska |
---|---|
Subject | Join removal and attr_needed cleanup |
Date | |
Msg-id | 63490.1714386843@antos Whole thread Raw |
Responses |
Re: Join removal and attr_needed cleanup
|
List | pgsql-hackers |
The attached patch tries to fix a corner case where attr_needed of an inner relation of an OJ contains the join relid only because another, already-removed OJ, needed some of its attributes. The unnecessary presence of the join relid in attr_needed can prevent the planner from further join removals. Do cases like this seem worth the effort and is the logic I use correct? -- Antonin Houska Web: https://www.cybertec-postgresql.com From 279856bf97ce08c0c2e0c736a00831bf6324713b Mon Sep 17 00:00:00 2001 From: Antonin Houska <ah@cybertec.at> Date: Mon, 29 Apr 2024 11:34:30 +0200 Subject: [PATCH] Cleanup attr_needed properly after join removal. If an outer join (J1) was removed and if the remaining outer relation is also an outer join (J2), the inner relation of J2 may still have the J2's relid in the attr_needed set, because its attribute was referenced by J1. However, after removal of J1, it's possible that no other join (nor the query targetlist) actually needs any attribute of J2, and so the J2's relid should not be in attr_needed anymore. This patch tries to spend more effort on checking the contents of attr_needed after removal of J1 so that J2 can potentially be removed as well. --- src/backend/optimizer/plan/analyzejoins.c | 117 +++++++++++++++++++++- src/test/regress/expected/join.out | 9 ++ src/test/regress/sql/join.sql | 5 + 3 files changed, 130 insertions(+), 1 deletion(-) diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 506fccd20c..1b9623efaa 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -46,6 +46,10 @@ bool enable_self_join_removal; /* local functions */ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static bool innerrel_attrs_needed_above_outer_join(PlannerInfo *root, + RelOptInfo *innerrel, + SpecialJoinInfo *sjinfo, + Relids joinrelids); static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo); static void remove_rel_from_restrictinfo(RestrictInfo *rinfo, @@ -183,6 +187,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) List *clause_list = NIL; ListCell *l; int attroff; + Oid remove_ojrelid; + bool remove_ojrelid_valid; /* * Must be a left join to a single baserel, else we aren't going to be @@ -218,6 +224,13 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) joinrelids = bms_copy(inputrelids); joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid); + /* + * By default there's no reason to remove sjinfo->ojrelid from + * attr_needed, see below. + */ + remove_ojrelid = InvalidOid; + remove_ojrelid_valid = false; + /* * We can't remove the join if any inner-rel attributes are used above the * join. Here, "above" the join includes pushed-down conditions, so we @@ -233,7 +246,38 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) attroff >= 0; attroff--) { - if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids)) + Relids attr_needed = innerrel->attr_needed[attroff]; + + /* + * If this join was among outer relations of an already-removed join, + * attr_needed may still contain the relid of the current join because + * join clauses of the removed join referenced it. Re-check if another + * join can still reference this join and if not, remove it from + * attr_needed. + */ + if (bms_is_member(sjinfo->ojrelid, attr_needed)) + { + /* Do the following check only once per inner relation. */ + if (!remove_ojrelid_valid) + { + /* + * If no clause requires the join relid anymore, remember that + * we should remove it from attr_needed. + */ + if (!innerrel_attrs_needed_above_outer_join(root, innerrel, + sjinfo, + joinrelids)) + remove_ojrelid = sjinfo->ojrelid; + + remove_ojrelid_valid = true; + } + + if (OidIsValid(remove_ojrelid)) + innerrel->attr_needed[attroff] = bms_del_member(attr_needed, + remove_ojrelid); + } + + if (!bms_is_subset(attr_needed, inputrelids)) return false; } @@ -333,6 +377,77 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) return false; } +/* + * innerrel_attrs_needed_above_outer_join + * Re-check whether attributes of inner relation of an OJ are still needed + * above that OJ. + */ +static bool +innerrel_attrs_needed_above_outer_join(PlannerInfo *root, RelOptInfo *innerrel, + SpecialJoinInfo *sjinfo, + Relids joinrelids) +{ + bool found = false; + ListCell *l; + + /* Check the join clauses */ + foreach(l, innerrel->joininfo) + { + RestrictInfo *ri = lfirst_node(RestrictInfo, l); + + if (bms_is_member(sjinfo->ojrelid, ri->required_relids)) + return true; + } + + /* + * Also check if any clause generated from EC may need the ojrelid. + */ + foreach(l, root->eq_classes) + { + EquivalenceClass *ec = lfirst_node(EquivalenceClass, l); + + /* + * Ignore ECs which cannot generate join clauses for this join. + */ + if (!bms_is_member(sjinfo->ojrelid, ec->ec_relids)) + continue; + + if (bms_is_subset(ec->ec_relids, joinrelids)) + { + ListCell *l2; + + /* + * Even if the EC does not exceed joinrelids, it might have been + * created from a pushed-down clause. In such a case, the + * attribute may be needed above the join, so we cannot remove the + * ojrelid from attr_needed. + */ + foreach(l2, ec->ec_sources) + { + RestrictInfo *ri = lfirst_node(RestrictInfo, l2); + + if (RINFO_IS_PUSHED_DOWN(ri, joinrelids)) + { + found = true; + break; + } + } + if (found) + break; + } + else + { + /* + * The EC can generate a join clause between this join and + * some other relation. + */ + found = true; + break; + } + } + + return found; +} /* * Remove the target rel->relid and references to the target join from the diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 8b640c2fc2..82a905bcdd 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -5483,6 +5483,15 @@ select 1 from a t1 -> Seq Scan on a t3 (7 rows) +explain (costs off) +select a.id from a +left join b on a.id = b.id +left join c on c.id = a.id and c.id = b.id; + QUERY PLAN +--------------- + Seq Scan on a +(1 row) + -- another example (bug #17781) explain (costs off) select ss1.f1 diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index c4c6c7b8ba..8e81e09ec6 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1967,6 +1967,11 @@ select 1 from a t1 inner join a t3 on true left join a t4 on t2.id = t4.id and t2.id = t3.id; +explain (costs off) +select a.id from a +left join b on a.id = b.id +left join c on c.id = a.id and c.id = b.id; + -- another example (bug #17781) explain (costs off) select ss1.f1 -- 2.44.0
pgsql-hackers by date: