From f1ee02c90f42dfc8a3f49d43ad2c60e6c0030e96 Mon Sep 17 00:00:00 2001 From: Tender Wang Date: Thu, 1 Jan 2026 13:44:18 +0800 Subject: [PATCH v4] Reduce JOIN_LEFT TO JOIN_ANTI. Since we have collected notnull constraints, so we can detect an anti-join, if we were to check whether Vars coming from the RHS must be non-null because of table constraints. For example, SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) WHERE b.z IS NULL; If the b.z has non-null table constraint, we can reduce JOIN_LEFT to JOIN_ANTI. --- src/backend/optimizer/prep/prepjointree.c | 90 ++++++++++++++++++----- src/test/regress/expected/join.out | 40 ++++++++++ src/test/regress/sql/join.sql | 21 ++++++ 3 files changed, 134 insertions(+), 17 deletions(-) diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index c80bfc88d82..d444637080a 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -177,6 +177,8 @@ static Node *find_jointree_node_for_rel(Node *jtnode, int relid); static nullingrel_info *get_nullingrels(Query *parse); static void get_nullingrels_recurse(Node *jtnode, Relids upper_nullingrels, nullingrel_info *info); +static bool have_var_is_notnull(PlannerInfo *root, Relids right_rels, + List *forced_null_vars); /* @@ -3230,6 +3232,57 @@ reduce_outer_joins(PlannerInfo *root) } } +/* + * have_var_is_notnull + * check whether the vars from RHS of join have notnull constraints. + */ +static bool +have_var_is_notnull(PlannerInfo *root, Relids right_rels, List *forced_null_vars) +{ + ListCell *lc; + Bitmapset *cols; + int i; + int index; + RangeTblEntry *rte; + Bitmapset *notnullattnums; + bool result = false; + + i = 0; + foreach(lc, forced_null_vars) + { + cols = lfirst_node(Bitmapset, lc); + if (cols == NULL) + { + i++; + continue; + } + /* Skip if the var doesn't belong to RHS of join */ + if (!bms_is_member(i, right_rels)) + { + i++; + continue; + } + rte = rt_fetch(i, root->parse->rtable); + notnullattnums = find_relation_notnullatts(root, rte->relid); + index = -1; + while ((index = bms_next_member(cols, index)) >= 0) + { + AttrNumber attno = index + FirstLowInvalidHeapAttributeNumber; + if (bms_is_member(attno, notnullattnums)) + { + result = true; + break; + } + } + + if (result) + break; + i++; + } + + return result; +} + /* * reduce_outer_joins_pass1 - phase 1 data collection * @@ -3441,29 +3494,32 @@ reduce_outer_joins_pass2(Node *jtnode, /* * See if we can reduce JOIN_LEFT to JOIN_ANTI. This is the case if * the join's own quals are strict for any var that was forced null by - * higher qual levels. NOTE: there are other ways that we could - * detect an anti-join, in particular if we were to check whether Vars + * higher qual levels. Since we have collected notnull constraints, so + * we can detect an anti-join, if we were to check whether Vars * coming from the RHS must be non-null because of table constraints. - * That seems complicated and expensive though (in particular, one - * would have to be wary of lower outer joins). For the moment this - * seems sufficient. + * For example, SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) WHERE b.z IS NULL; + * if the b.z had non-null table constraint, we can reduce JOIN_LEFT to JOIN_ANTI. */ if (jointype == JOIN_LEFT) { - List *nonnullable_vars; - Bitmapset *overlap; + if(forced_null_vars != NIL) { + List *nonnullable_vars; + Bitmapset *overlap; - /* Find Vars in j->quals that must be non-null in joined rows */ - nonnullable_vars = find_nonnullable_vars(j->quals); + /* Find Vars in j->quals that must be non-null in joined rows */ + nonnullable_vars = find_nonnullable_vars(j->quals); - /* - * It's not sufficient to check whether nonnullable_vars and - * forced_null_vars overlap: we need to know if the overlap - * includes any RHS variables. - */ - overlap = mbms_overlap_sets(nonnullable_vars, forced_null_vars); - if (bms_overlap(overlap, right_state->relids)) - jointype = JOIN_ANTI; + /* + * It's not sufficient to check whether nonnullable_vars and + * forced_null_vars overlap: we need to know if the overlap + * includes any RHS variables. + */ + overlap = mbms_overlap_sets(nonnullable_vars, forced_null_vars); + if (bms_overlap(overlap, right_state->relids)) + jointype = JOIN_ANTI; + else if (have_var_is_notnull(root, right_state->relids, forced_null_vars)) + jointype = JOIN_ANTI; + } } /* diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index d05a0ca0373..0d138d3d3c4 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3194,6 +3194,15 @@ reset enable_nestloop; -- -- basic semijoin and antijoin recognition tests -- +CREATE TABLE aj1 (id integer primary key); +CREATE TABLE aj2 (id integer primary key, aj1_id integer); +CREATE TABLE aj3 (id integer primary key, aj2_id integer); +INSERT INTO aj1 VALUES (1); +INSERT INTO aj1 VALUES (2); +INSERT INTO aj2 VALUES (1, 1); +INSERT INTO aj2 VALUES (2, 1); +INSERT INTO aj3 VALUES (1, 2); +INSERT INTO aj3 VALUES (2, 2); explain (costs off) select a.* from tenk1 a where unique1 in (select unique2 from tenk1 b); @@ -3254,6 +3263,37 @@ where b.unique2 is null; -> Index Only Scan using tenk1_unique2 on tenk1 b (5 rows) +explain (costs off) +select aj1.* from aj1 left join aj2 on aj1.id = aj2.aj1_id +where aj2.id is null; + QUERY PLAN +------------------------------------ + Hash Right Anti Join + Hash Cond: (aj2.aj1_id = aj1.id) + -> Seq Scan on aj2 + -> Hash + -> Seq Scan on aj1 +(5 rows) + +explain (costs off) +select aj1.* from aj1 + left join (select aj2.id aj2_id, aj2.aj1_id, aj3.id aj3_id + from aj2 left join aj3 on aj2.id = aj3.aj2_id) x + on aj1.id = x.aj1_id +where x.aj2_id is null; + QUERY PLAN +------------------------------------------ + Hash Right Anti Join + Hash Cond: (aj2.aj1_id = aj1.id) + -> Hash Right Join + Hash Cond: (aj3.aj2_id = aj2.id) + -> Seq Scan on aj3 + -> Hash + -> Seq Scan on aj2 + -> Hash + -> Seq Scan on aj1 +(9 rows) + -- check that we avoid de-duplicating columns redundantly set enable_memoize to off; explain (costs off) diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index b91fb7574df..c21312191fb 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -837,6 +837,16 @@ reset enable_nestloop; -- -- basic semijoin and antijoin recognition tests -- +CREATE TABLE aj1 (id integer primary key); +CREATE TABLE aj2 (id integer primary key, aj1_id integer); +CREATE TABLE aj3 (id integer primary key, aj2_id integer); + +INSERT INTO aj1 VALUES (1); +INSERT INTO aj1 VALUES (2); +INSERT INTO aj2 VALUES (1, 1); +INSERT INTO aj2 VALUES (2, 1); +INSERT INTO aj3 VALUES (1, 2); +INSERT INTO aj3 VALUES (2, 2); explain (costs off) select a.* from tenk1 a @@ -859,6 +869,17 @@ explain (costs off) select a.* from tenk1 a left join tenk1 b on a.unique1 = b.unique2 where b.unique2 is null; +explain (costs off) +select aj1.* from aj1 left join aj2 on aj1.id = aj2.aj1_id +where aj2.id is null; + +explain (costs off) +select aj1.* from aj1 + left join (select aj2.id aj2_id, aj2.aj1_id, aj3.id aj3_id + from aj2 left join aj3 on aj2.id = aj3.aj2_id) x + on aj1.id = x.aj1_id +where x.aj2_id is null; + -- check that we avoid de-duplicating columns redundantly set enable_memoize to off; explain (costs off) -- 2.34.1