Re: d25ea01275 and partitionwise join - Mailing list pgsql-hackers

From Tom Lane
Subject Re: d25ea01275 and partitionwise join
Date
Msg-id 31424.1586194866@sss.pgh.pa.us
Whole thread Raw
In response to Re: d25ea01275 and partitionwise join  (Amit Langote <amitlangote09@gmail.com>)
Responses Re: d25ea01275 and partitionwise join  (Amit Langote <amitlangote09@gmail.com>)
List pgsql-hackers
Amit Langote <amitlangote09@gmail.com> writes:
> which does succeed in using partitionwise join.  Please see attached
> delta that applies on your v7 if that is what you'd rather have.

I figured these queries were cheap enough that we could afford to run
both.  With that and some revision of the comments (per attached),
I was feeling like we were ready to go.  However, re-reading the thread,
one of Richard's comments struck me as still relevant.  If you try, say,

create table p (k int, val int) partition by range(k);
create table p_1 partition of p for values from (1) to (10);
create table p_2 partition of p for values from (10) to (100);

set enable_partitionwise_join = 1;

explain
select * from (p as t1 full join p as t2 on t1.k = t2.k) as t12(k1,val1,k2,val2)
  full join p as t3 on COALESCE(t12.k1, t12.k2) = t3.k;

this patch will give you a partitioned join, with a different plan
than you get without enable_partitionwise_join.  This is scary,
because it's not immediately obvious that the transformation is
correct.

I *think* that it might be all right, because although what we
are matching to is a user-written COALESCE() not an actual
FULL JOIN USING column, it has to behave in somewhat the same
way.  In particular, by construction it must be a coalesce of
some representation of the matching partition columns of the
full join's inputs.  So, even though it might go to null in
different cases than an actual USING variable would do, it
does not break the ability to partition the join.

However, I have not spent a whole lot of time thinking about
partitionwise joins, so rather than go ahead and commit I am
going to toss that point back out for community consideration.
At the very least, what I'd written in the comment needs a
lot more defense than it has now.

Thoughts?

            regards, tom lane

diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index af1fb48..d190b4b 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -17,6 +17,7 @@
 #include <limits.h>

 #include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
 #include "optimizer/appendinfo.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
@@ -1890,7 +1891,8 @@ set_joinrel_partition_key_exprs(RelOptInfo *joinrel,
                                 RelOptInfo *outer_rel, RelOptInfo *inner_rel,
                                 JoinType jointype)
 {
-    int            partnatts = joinrel->part_scheme->partnatts;
+    PartitionScheme part_scheme = joinrel->part_scheme;
+    int            partnatts = part_scheme->partnatts;

     joinrel->partexprs = (List **) palloc0(sizeof(List *) * partnatts);
     joinrel->nullable_partexprs =
@@ -1899,7 +1901,8 @@ set_joinrel_partition_key_exprs(RelOptInfo *joinrel,
     /*
      * The joinrel's partition expressions are the same as those of the input
      * rels, but we must properly classify them as nullable or not in the
-     * joinrel's output.
+     * joinrel's output.  (Also, we add some more partition expressions if
+     * it's a FULL JOIN.)
      */
     for (int cnt = 0; cnt < partnatts; cnt++)
     {
@@ -1910,6 +1913,7 @@ set_joinrel_partition_key_exprs(RelOptInfo *joinrel,
         const List *inner_null_expr = inner_rel->nullable_partexprs[cnt];
         List       *partexpr = NIL;
         List       *nullable_partexpr = NIL;
+        ListCell   *lc;

         switch (jointype)
         {
@@ -1969,6 +1973,38 @@ set_joinrel_partition_key_exprs(RelOptInfo *joinrel,
                                                 outer_null_expr);
                 nullable_partexpr = list_concat(nullable_partexpr,
                                                 inner_null_expr);
+
+                /*
+                 * Also add CoalesceExprs corresponding to each possible
+                 * full-join output variable (that is, left side coalesced to
+                 * right side), so that we can match equijoin expressions
+                 * using those variables.  We really only need these for
+                 * columns merged by JOIN USING, and only with the pairs of
+                 * input items that correspond to the data structures that
+                 * parse analysis would build for such variables.  But it's
+                 * hard to tell which those are, so just make all the pairs.
+                 * Extra items in the nullable_partexprs list won't cause big
+                 * problems.  We assume no type coercions are needed to make
+                 * the coalesce expressions, since columns of different types
+                 * won't have gotten classified as the same PartitionScheme.
+                 */
+                foreach(lc, list_concat_copy(outer_expr, outer_null_expr))
+                {
+                    Node       *larg = (Node *) lfirst(lc);
+                    ListCell   *lc2;
+
+                    foreach(lc2, list_concat_copy(inner_expr, inner_null_expr))
+                    {
+                        Node       *rarg = (Node *) lfirst(lc2);
+                        CoalesceExpr *c = makeNode(CoalesceExpr);
+
+                        c->coalescetype = exprType(larg);
+                        c->coalescecollid = exprCollation(larg);
+                        c->args = list_make2(larg, rarg);
+                        c->location = -1;
+                        nullable_partexpr = lappend(nullable_partexpr, c);
+                    }
+                }
                 break;

             default:
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 469c686..39c7b2f 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -613,6 +613,9 @@ typedef struct PartitionSchemeData *PartitionScheme;
  * that expression goes in the partexprs[i] list if the base relation
  * is not nullable by this join or any lower outer join, or in the
  * nullable_partexprs[i] list if the base relation is nullable.
+ * Furthermore, FULL JOINs add extra nullable_partexprs expressions
+ * corresponding to COALESCE expressions of the left and right join columns,
+ * to simplify matching join clauses to those lists.
  *----------
  */
 typedef enum RelOptKind
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index b3fbe47..a35e8e3 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -750,6 +750,116 @@ SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2
  550 | 0550 |     |      |     1100 | 0
 (12 rows)

+--
+-- 3-way full join
+--
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b)
+  WHERE a BETWEEN 490 AND 510;
+                                                               QUERY PLAN
                  

+-----------------------------------------------------------------------------------------------------------------------------------------
+ Aggregate
+   ->  Append
+         ->  Hash Full Join
+               Hash Cond: ((COALESCE(prt1_1.a, p2_1.a) = p3_1.a) AND (COALESCE(prt1_1.b, p2_1.b) = p3_1.b))
+               Filter: ((COALESCE(COALESCE(prt1_1.a, p2_1.a), p3_1.a) >= 490) AND (COALESCE(COALESCE(prt1_1.a,
p2_1.a),p3_1.a) <= 510)) 
+               ->  Hash Full Join
+                     Hash Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                     ->  Seq Scan on prt1_p1 prt1_1
+                     ->  Hash
+                           ->  Seq Scan on prt2_p1 p2_1
+               ->  Hash
+                     ->  Seq Scan on prt2_p1 p3_1
+         ->  Hash Full Join
+               Hash Cond: ((COALESCE(prt1_2.a, p2_2.a) = p3_2.a) AND (COALESCE(prt1_2.b, p2_2.b) = p3_2.b))
+               Filter: ((COALESCE(COALESCE(prt1_2.a, p2_2.a), p3_2.a) >= 490) AND (COALESCE(COALESCE(prt1_2.a,
p2_2.a),p3_2.a) <= 510)) 
+               ->  Hash Full Join
+                     Hash Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                     ->  Seq Scan on prt1_p2 prt1_2
+                     ->  Hash
+                           ->  Seq Scan on prt2_p2 p2_2
+               ->  Hash
+                     ->  Seq Scan on prt2_p2 p3_2
+         ->  Hash Full Join
+               Hash Cond: ((COALESCE(prt1_3.a, p2_3.a) = p3_3.a) AND (COALESCE(prt1_3.b, p2_3.b) = p3_3.b))
+               Filter: ((COALESCE(COALESCE(prt1_3.a, p2_3.a), p3_3.a) >= 490) AND (COALESCE(COALESCE(prt1_3.a,
p2_3.a),p3_3.a) <= 510)) 
+               ->  Hash Full Join
+                     Hash Cond: ((prt1_3.a = p2_3.a) AND (prt1_3.b = p2_3.b))
+                     ->  Seq Scan on prt1_p3 prt1_3
+                     ->  Hash
+                           ->  Seq Scan on prt2_p3 p2_3
+               ->  Hash
+                     ->  Seq Scan on prt2_p3 p3_3
+(32 rows)
+
+SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b)
+  WHERE a BETWEEN 490 AND 510;
+ count
+-------
+    14
+(1 row)
+
+--
+-- 4-way full join
+--
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b) FULL JOIN prt1 p4
(a,b,c)USING (a, b) 
+  WHERE a BETWEEN 490 AND 510;
+                                                                                 QUERY PLAN
                                                      

+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Aggregate
+   ->  Append
+         ->  Hash Full Join
+               Hash Cond: ((COALESCE(COALESCE(prt1_1.a, p2_1.a), p3_1.a) = p4_1.a) AND (COALESCE(COALESCE(prt1_1.b,
p2_1.b),p3_1.b) = p4_1.b)) 
+               Filter: ((COALESCE(COALESCE(COALESCE(prt1_1.a, p2_1.a), p3_1.a), p4_1.a) >= 490) AND
(COALESCE(COALESCE(COALESCE(prt1_1.a,p2_1.a), p3_1.a), p4_1.a) <= 510)) 
+               ->  Hash Full Join
+                     Hash Cond: ((COALESCE(prt1_1.a, p2_1.a) = p3_1.a) AND (COALESCE(prt1_1.b, p2_1.b) = p3_1.b))
+                     ->  Hash Full Join
+                           Hash Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+                           ->  Seq Scan on prt1_p1 prt1_1
+                           ->  Hash
+                                 ->  Seq Scan on prt2_p1 p2_1
+                     ->  Hash
+                           ->  Seq Scan on prt2_p1 p3_1
+               ->  Hash
+                     ->  Seq Scan on prt1_p1 p4_1
+         ->  Hash Full Join
+               Hash Cond: ((COALESCE(COALESCE(prt1_2.a, p2_2.a), p3_2.a) = p4_2.a) AND (COALESCE(COALESCE(prt1_2.b,
p2_2.b),p3_2.b) = p4_2.b)) 
+               Filter: ((COALESCE(COALESCE(COALESCE(prt1_2.a, p2_2.a), p3_2.a), p4_2.a) >= 490) AND
(COALESCE(COALESCE(COALESCE(prt1_2.a,p2_2.a), p3_2.a), p4_2.a) <= 510)) 
+               ->  Hash Full Join
+                     Hash Cond: ((COALESCE(prt1_2.a, p2_2.a) = p3_2.a) AND (COALESCE(prt1_2.b, p2_2.b) = p3_2.b))
+                     ->  Hash Full Join
+                           Hash Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+                           ->  Seq Scan on prt1_p2 prt1_2
+                           ->  Hash
+                                 ->  Seq Scan on prt2_p2 p2_2
+                     ->  Hash
+                           ->  Seq Scan on prt2_p2 p3_2
+               ->  Hash
+                     ->  Seq Scan on prt1_p2 p4_2
+         ->  Hash Full Join
+               Hash Cond: ((COALESCE(COALESCE(prt1_3.a, p2_3.a), p3_3.a) = p4_3.a) AND (COALESCE(COALESCE(prt1_3.b,
p2_3.b),p3_3.b) = p4_3.b)) 
+               Filter: ((COALESCE(COALESCE(COALESCE(prt1_3.a, p2_3.a), p3_3.a), p4_3.a) >= 490) AND
(COALESCE(COALESCE(COALESCE(prt1_3.a,p2_3.a), p3_3.a), p4_3.a) <= 510)) 
+               ->  Hash Full Join
+                     Hash Cond: ((COALESCE(prt1_3.a, p2_3.a) = p3_3.a) AND (COALESCE(prt1_3.b, p2_3.b) = p3_3.b))
+                     ->  Hash Full Join
+                           Hash Cond: ((prt1_3.a = p2_3.a) AND (prt1_3.b = p2_3.b))
+                           ->  Seq Scan on prt1_p3 prt1_3
+                           ->  Hash
+                                 ->  Seq Scan on prt2_p3 p2_3
+                     ->  Hash
+                           ->  Seq Scan on prt2_p3 p3_3
+               ->  Hash
+                     ->  Seq Scan on prt1_p3 p4_3
+(44 rows)
+
+SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b) FULL JOIN prt1 p4
(a,b,c)USING (a, b) 
+  WHERE a BETWEEN 490 AND 510;
+ count
+-------
+    14
+(1 row)
+
 -- Cases with non-nullable expressions in subquery results;
 -- make sure these go to null as expected
 EXPLAIN (COSTS OFF)
diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql
index 575ba7b..dad1e07 100644
--- a/src/test/regress/sql/partition_join.sql
+++ b/src/test/regress/sql/partition_join.sql
@@ -145,6 +145,24 @@ EXPLAIN (COSTS OFF)
 SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3
ON(t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b; 
 SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3
ON(t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b; 

+--
+-- 3-way full join
+--
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b)
+  WHERE a BETWEEN 490 AND 510;
+SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b)
+  WHERE a BETWEEN 490 AND 510;
+
+--
+-- 4-way full join
+--
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b) FULL JOIN prt1 p4
(a,b,c)USING (a, b) 
+  WHERE a BETWEEN 490 AND 510;
+SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(b,a,c) USING (a, b) FULL JOIN prt1 p4
(a,b,c)USING (a, b) 
+  WHERE a BETWEEN 490 AND 510;
+
 -- Cases with non-nullable expressions in subquery results;
 -- make sure these go to null as expected
 EXPLAIN (COSTS OFF)

pgsql-hackers by date:

Previous
From: Peter Geoghegan
Date:
Subject: Re: pg_stat_statements issue with parallel maintenance (Was Re: WALusage calculation patch)
Next
From: Fujii Masao
Date:
Subject: Re: Don't try fetching future segment of a TLI.