Re: BUG #18953: Planner fails to build plan for complex query with LATERAL references - Mailing list pgsql-bugs

From Tom Lane
Subject Re: BUG #18953: Planner fails to build plan for complex query with LATERAL references
Date
Msg-id 1499514.1749848324@sss.pgh.pa.us
Whole thread Raw
In response to Re: BUG #18953: Planner fails to build plan for complex query with LATERAL references  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-bugs
I wrote:
> My thought is that have_dangerous_phv() was never more than a
> quick-n-dirty kludge, and what we really ought to do is remove it.

Here's a WIP patch along that line.  It's unfinished in that, for
testing purposes, I just lobotomized have_dangerous_phv() to return
constant false rather than taking it out entirely.  But of course
we'd want to clean up all the dead code if we go this way.

I have mixed feelings about whether to back-patch or just make
this change in HEAD.  While we're clearly fixing a bug here,
the bug's been there for 10 years, so the lack of field reports
suggests strongly that this is not something ordinary users write.
Two arguments against back-patching are that
(1) the odds of introducing a new bug aren't zero;
(2) removing the have_dangerous_phv() restriction will change
some plan choices, which we generally dislike doing in stable
branches.

I'm still comfortable with shoving this into v18, but maybe we
should leave the back branches alone.

            regards, tom lane

diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 60d65762b5d..d41702cd398 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -1308,22 +1308,6 @@ bool
 have_dangerous_phv(PlannerInfo *root,
                    Relids outer_relids, Relids inner_params)
 {
-    ListCell   *lc;
-
-    foreach(lc, root->placeholder_list)
-    {
-        PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
-
-        if (!bms_is_subset(phinfo->ph_eval_at, inner_params))
-            continue;            /* ignore, could not be a nestloop param */
-        if (!bms_overlap(phinfo->ph_eval_at, outer_relids))
-            continue;            /* ignore, not relevant to this join */
-        if (bms_is_subset(phinfo->ph_eval_at, outer_relids))
-            continue;            /* safe, it can be eval'd within outerrel */
-        /* Otherwise, it's potentially unsafe, so reject the join */
-        return true;
-    }
-
     /* OK to perform the join */
     return false;
 }
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 4ad30b7627e..8baf36ba4b7 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -4348,9 +4348,11 @@ create_nestloop_plan(PlannerInfo *root,
     List       *joinrestrictclauses = best_path->jpath.joinrestrictinfo;
     List       *joinclauses;
     List       *otherclauses;
-    Relids        outerrelids;
     List       *nestParams;
+    List       *outer_tlist;
+    bool        outer_parallel_safe;
     Relids        saveOuterRels = root->curOuterRels;
+    ListCell   *lc;

     /*
      * If the inner path is parameterized by the topmost parent of the outer
@@ -4412,9 +4414,47 @@ create_nestloop_plan(PlannerInfo *root,
      * Identify any nestloop parameters that should be supplied by this join
      * node, and remove them from root->curOuterParams.
      */
-    outerrelids = best_path->jpath.outerjoinpath->parent->relids;
-    nestParams = identify_current_nestloop_params(root, outerrelids);
+    nestParams = identify_current_nestloop_params(root,
+                                                  best_path->jpath.outerjoinpath);
+
+    /*
+     * While nestloop parameters that are Vars had better be available from
+     * the outer_plan already, there are edge cases where nestloop parameters
+     * that are PHVs won't be.  In such cases we must add them to the
+     * outer_plan's tlist, since the executor's NestLoopParam machinery
+     * requires the params to be simple outer-Var references to that tlist.
+     */
+    outer_tlist = outer_plan->targetlist;
+    outer_parallel_safe = outer_plan->parallel_safe;
+    foreach(lc, nestParams)
+    {
+        NestLoopParam *nlp = (NestLoopParam *) lfirst(lc);
+        TargetEntry *tle;
+
+        if (IsA(nlp->paramval, Var))
+            continue;            /* nothing to do for simple Vars */
+        if (tlist_member((Expr *) nlp->paramval, outer_tlist))
+            continue;            /* already available */
+
+        /* Make a shallow copy of outer_tlist, if we didn't already */
+        if (outer_tlist == outer_plan->targetlist)
+            outer_tlist = list_copy(outer_tlist);
+        /* ... and add the needed expression */
+        tle = makeTargetEntry((Expr *) copyObject(nlp->paramval),
+                              list_length(outer_tlist) + 1,
+                              NULL,
+                              true);
+        outer_tlist = lappend(outer_tlist, tle);
+        /* ... and track whether tlist is (still) parallel-safe */
+        if (outer_parallel_safe)
+            outer_parallel_safe = is_parallel_safe(root,
+                                                   (Node *) nlp->paramval);
+    }
+    if (outer_tlist != outer_plan->targetlist)
+        outer_plan = change_plan_targetlist(outer_plan, outer_tlist,
+                                            outer_parallel_safe);

+    /* And finally, we can build the join plan node */
     join_plan = make_nestloop(tlist,
                               joinclauses,
                               otherclauses,
diff --git a/src/backend/optimizer/util/paramassign.c b/src/backend/optimizer/util/paramassign.c
index 3bd3ce37c8f..9836abf9479 100644
--- a/src/backend/optimizer/util/paramassign.c
+++ b/src/backend/optimizer/util/paramassign.c
@@ -600,7 +600,7 @@ process_subquery_nestloop_params(PlannerInfo *root, List *subplan_params)

 /*
  * Identify any NestLoopParams that should be supplied by a NestLoop plan
- * node with the specified lefthand rels.  Remove them from the active
+ * node with the specified lefthand input path.  Remove them from the active
  * root->curOuterParams list and return them as the result list.
  *
  * XXX Here we also hack up the returned Vars and PHVs so that they do not
@@ -626,11 +626,26 @@ process_subquery_nestloop_params(PlannerInfo *root, List *subplan_params)
  * subquery, which'd be unduly expensive.
  */
 List *
-identify_current_nestloop_params(PlannerInfo *root, Relids leftrelids)
+identify_current_nestloop_params(PlannerInfo *root, Path *leftpath)
 {
     List       *result;
+    Relids        leftrelids = leftpath->parent->relids;
+    Relids        outerrelids = PATH_REQ_OUTER(leftpath);
+    Relids        allleftrelids;
     ListCell   *cell;

+    /*
+     * We'll be able to evaluate a PHV in the lefthand path if it uses the
+     * lefthand rels plus any available required-outer rels.  But don't do so
+     * if it uses *only* required-outer rels; in that case it should be
+     * evaluated higher in the tree.  For Vars, no such hair-splitting is
+     * necessary since they depend on only one relid.
+     */
+    if (outerrelids)
+        allleftrelids = bms_union(leftrelids, outerrelids);
+    else
+        allleftrelids = leftrelids;
+
     result = NIL;
     foreach(cell, root->curOuterParams)
     {
@@ -653,18 +668,20 @@ identify_current_nestloop_params(PlannerInfo *root, Relids leftrelids)
                                                 leftrelids);
             result = lappend(result, nlp);
         }
-        else if (IsA(nlp->paramval, PlaceHolderVar) &&
-                 bms_is_subset(find_placeholder_info(root,
-                                                     (PlaceHolderVar *) nlp->paramval)->ph_eval_at,
-                               leftrelids))
+        else if (IsA(nlp->paramval, PlaceHolderVar))
         {
             PlaceHolderVar *phv = (PlaceHolderVar *) nlp->paramval;
+            Relids        eval_at = find_placeholder_info(root, phv)->ph_eval_at;

-            root->curOuterParams = foreach_delete_current(root->curOuterParams,
-                                                          cell);
-            phv->phnullingrels = bms_intersect(phv->phnullingrels,
-                                               leftrelids);
-            result = lappend(result, nlp);
+            if (bms_is_subset(eval_at, allleftrelids) &&
+                bms_overlap(eval_at, leftrelids))
+            {
+                root->curOuterParams = foreach_delete_current(root->curOuterParams,
+                                                              cell);
+                phv->phnullingrels = bms_intersect(phv->phnullingrels,
+                                                   leftrelids);
+                result = lappend(result, nlp);
+            }
         }
     }
     return result;
diff --git a/src/include/optimizer/paramassign.h b/src/include/optimizer/paramassign.h
index 59dcb1ff053..d30d20de299 100644
--- a/src/include/optimizer/paramassign.h
+++ b/src/include/optimizer/paramassign.h
@@ -30,7 +30,7 @@ extern Param *replace_nestloop_param_placeholdervar(PlannerInfo *root,
 extern void process_subquery_nestloop_params(PlannerInfo *root,
                                              List *subplan_params);
 extern List *identify_current_nestloop_params(PlannerInfo *root,
-                                              Relids leftrelids);
+                                              Path *leftpath);
 extern Param *generate_new_exec_param(PlannerInfo *root, Oid paramtype,
                                       int32 paramtypmod, Oid paramcollation);
 extern int    assign_special_exec_param(PlannerInfo *root);
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index f35a0b18c37..dfff24edf03 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4035,6 +4035,37 @@ select * from
  1 | 2 | 2
 (1 row)

+-- This example demonstrates the folly of our old "have_dangerous_phv" logic
+begin;
+set local from_collapse_limit to 2;
+explain (verbose, costs off)
+select * from int8_tbl t1
+  left join
+    (select coalesce(t2.q1 + x, 0) from int8_tbl t2,
+       lateral (select t3.q1 as x from int8_tbl t3,
+                  lateral (select t2.q1, t3.q1 offset 0) s))
+  on true;
+                            QUERY PLAN
+------------------------------------------------------------------
+ Nested Loop Left Join
+   Output: t1.q1, t1.q2, (COALESCE((t2.q1 + t3.q1), '0'::bigint))
+   ->  Seq Scan on public.int8_tbl t1
+         Output: t1.q1, t1.q2
+   ->  Materialize
+         Output: (COALESCE((t2.q1 + t3.q1), '0'::bigint))
+         ->  Nested Loop
+               Output: COALESCE((t2.q1 + t3.q1), '0'::bigint)
+               ->  Seq Scan on public.int8_tbl t2
+                     Output: t2.q1, t2.q2
+               ->  Nested Loop
+                     Output: t3.q1
+                     ->  Seq Scan on public.int8_tbl t3
+                           Output: t3.q1, t3.q2
+                     ->  Result
+                           Output: NULL::bigint, NULL::bigint
+(16 rows)
+
+rollback;
 -- Test proper handling of appendrel PHVs during useless-RTE removal
 explain (costs off)
 select * from
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index cc5128add4d..ebed8bdd152 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1332,6 +1332,18 @@ select * from
   (select 1 as x) ss1 left join (select 2 as y) ss2 on (true),
   lateral (select ss2.y as z limit 1) ss3;

+-- This example demonstrates the folly of our old "have_dangerous_phv" logic
+begin;
+set local from_collapse_limit to 2;
+explain (verbose, costs off)
+select * from int8_tbl t1
+  left join
+    (select coalesce(t2.q1 + x, 0) from int8_tbl t2,
+       lateral (select t3.q1 as x from int8_tbl t3,
+                  lateral (select t2.q1, t3.q1 offset 0) s))
+  on true;
+rollback;
+
 -- Test proper handling of appendrel PHVs during useless-RTE removal
 explain (costs off)
 select * from

pgsql-bugs by date:

Previous
From: "David G. Johnston"
Date:
Subject: Re: BUG #18958: "pg_ctl start" allows subsequent CTRL-C key in cmd.exe to unexpectedly terminate cluster on Windows
Next
From: Lowell Hought
Date:
Subject: Re: BUG #18950: pgsql function that worked in Postgresql 16 does not return in Postgresql 17