Join removal and attr_needed cleanup - Mailing list pgsql-hackers

From Antonin Houska
Subject Join removal and attr_needed cleanup
Date
Msg-id 63490.1714386843@antos
Whole thread Raw
List pgsql-hackers
The attached patch tries to fix a corner case where attr_needed of an inner
relation of an OJ contains the join relid only because another,
already-removed OJ, needed some of its attributes. The unnecessary presence of
the join relid in attr_needed can prevent the planner from further join
removals.

Do cases like this seem worth the effort and is the logic I use correct?

-- 
Antonin Houska
Web: https://www.cybertec-postgresql.com

From 279856bf97ce08c0c2e0c736a00831bf6324713b Mon Sep 17 00:00:00 2001
From: Antonin Houska <ah@cybertec.at>
Date: Mon, 29 Apr 2024 11:34:30 +0200
Subject: [PATCH] Cleanup attr_needed properly after join removal.

If an outer join (J1) was removed and if the remaining outer relation is also
an outer join (J2), the inner relation of J2 may still have the J2's relid in
the attr_needed set, because its attribute was referenced by J1. However,
after removal of J1, it's possible that no other join (nor the query
targetlist) actually needs any attribute of J2, and so the J2's relid should
not be in attr_needed anymore.

This patch tries to spend more effort on checking the contents of attr_needed
after removal of J1 so that J2 can potentially be removed as well.
---
 src/backend/optimizer/plan/analyzejoins.c | 117 +++++++++++++++++++++-
 src/test/regress/expected/join.out        |   9 ++
 src/test/regress/sql/join.sql             |   5 +
 3 files changed, 130 insertions(+), 1 deletion(-)

diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 506fccd20c..1b9623efaa 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -46,6 +46,10 @@ bool        enable_self_join_removal;
 /* local functions */
 static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
 
+static bool innerrel_attrs_needed_above_outer_join(PlannerInfo *root,
+                                                   RelOptInfo *innerrel,
+                                                   SpecialJoinInfo *sjinfo,
+                                                   Relids joinrelids);
 static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
                                           SpecialJoinInfo *sjinfo);
 static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
@@ -183,6 +187,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
     List       *clause_list = NIL;
     ListCell   *l;
     int            attroff;
+    Oid            remove_ojrelid;
+    bool        remove_ojrelid_valid;
 
     /*
      * Must be a left join to a single baserel, else we aren't going to be
@@ -218,6 +224,13 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
     joinrelids = bms_copy(inputrelids);
     joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
 
+    /*
+     * By default there's no reason to remove sjinfo->ojrelid from
+     * attr_needed, see below.
+     */
+    remove_ojrelid = InvalidOid;
+    remove_ojrelid_valid = false;
+
     /*
      * We can't remove the join if any inner-rel attributes are used above the
      * join.  Here, "above" the join includes pushed-down conditions, so we
@@ -233,7 +246,38 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
          attroff >= 0;
          attroff--)
     {
-        if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
+        Relids    attr_needed = innerrel->attr_needed[attroff];
+
+        /*
+         * If this join was among outer relations of an already-removed join,
+         * attr_needed may still contain the relid of the current join because
+         * join clauses of the removed join referenced it. Re-check if another
+         * join can still reference this join and if not, remove it from
+         * attr_needed.
+         */
+        if (bms_is_member(sjinfo->ojrelid, attr_needed))
+        {
+            /* Do the following check only once per inner relation. */
+            if (!remove_ojrelid_valid)
+            {
+                /*
+                 * If no clause requires the join relid anymore, remember that
+                 * we should remove it from attr_needed.
+                 */
+                if (!innerrel_attrs_needed_above_outer_join(root, innerrel,
+                                                            sjinfo,
+                                                            joinrelids))
+                    remove_ojrelid = sjinfo->ojrelid;
+
+                remove_ojrelid_valid = true;
+            }
+
+            if (OidIsValid(remove_ojrelid))
+                innerrel->attr_needed[attroff] = bms_del_member(attr_needed,
+                                                                remove_ojrelid);
+        }
+
+        if (!bms_is_subset(attr_needed, inputrelids))
             return false;
     }
 
@@ -333,6 +377,77 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
     return false;
 }
 
+/*
+ * innerrel_attrs_needed_above_outer_join
+ *      Re-check whether attributes of inner relation of an OJ are still needed
+ *      above that OJ.
+ */
+static bool
+innerrel_attrs_needed_above_outer_join(PlannerInfo *root, RelOptInfo *innerrel,
+                                       SpecialJoinInfo *sjinfo,
+                                       Relids joinrelids)
+{
+    bool    found = false;
+    ListCell    *l;
+
+    /* Check the join clauses */
+    foreach(l, innerrel->joininfo)
+    {
+        RestrictInfo *ri = lfirst_node(RestrictInfo, l);
+
+        if (bms_is_member(sjinfo->ojrelid, ri->required_relids))
+            return true;
+    }
+
+    /*
+     * Also check if any clause generated from EC may need the ojrelid.
+     */
+    foreach(l, root->eq_classes)
+    {
+        EquivalenceClass    *ec = lfirst_node(EquivalenceClass, l);
+
+        /*
+         * Ignore ECs which cannot generate join clauses for this join.
+         */
+        if (!bms_is_member(sjinfo->ojrelid, ec->ec_relids))
+            continue;
+
+        if (bms_is_subset(ec->ec_relids, joinrelids))
+        {
+            ListCell    *l2;
+
+            /*
+             * Even if the EC does not exceed joinrelids, it might have been
+             * created from a pushed-down clause. In such a case, the
+             * attribute may be needed above the join, so we cannot remove the
+             * ojrelid from attr_needed.
+             */
+            foreach(l2, ec->ec_sources)
+            {
+                RestrictInfo    *ri = lfirst_node(RestrictInfo, l2);
+
+                if (RINFO_IS_PUSHED_DOWN(ri, joinrelids))
+                {
+                    found = true;
+                    break;
+                }
+            }
+            if (found)
+                break;
+        }
+        else
+        {
+            /*
+             * The EC can generate a join clause between this join and
+             * some other relation.
+             */
+            found = true;
+            break;
+        }
+    }
+
+    return found;
+}
 
 /*
  * Remove the target rel->relid and references to the target join from the
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 8b640c2fc2..82a905bcdd 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5483,6 +5483,15 @@ select 1 from a t1
          ->  Seq Scan on a t3
 (7 rows)
 
+explain (costs off)
+select a.id from a
+left join b on a.id = b.id
+left join c on c.id = a.id and c.id = b.id;
+  QUERY PLAN   
+---------------
+ Seq Scan on a
+(1 row)
+
 -- another example (bug #17781)
 explain (costs off)
 select ss1.f1
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index c4c6c7b8ba..8e81e09ec6 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1967,6 +1967,11 @@ select 1 from a t1
    inner join a t3 on true
     left join a t4 on t2.id = t4.id and t2.id = t3.id;
 
+explain (costs off)
+select a.id from a
+left join b on a.id = b.id
+left join c on c.id = a.id and c.id = b.id;
+
 -- another example (bug #17781)
 explain (costs off)
 select ss1.f1
-- 
2.44.0


pgsql-hackers by date:

Previous
From: Amit Kapila
Date:
Subject: Re: speed up a logical replica setup
Next
From: Amit Kapila
Date:
Subject: Re: speed up a logical replica setup