diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index efd0fbc21c..916220ef94 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -89,6 +89,9 @@ static bool contain_outer_selfref(Node *node); static bool contain_outer_selfref_walker(Node *node, Index *depth); static void inline_cte(PlannerInfo *root, CommonTableExpr *cte); static bool inline_cte_walker(Node *node, inline_cte_walker_context *context); +static bool is_NOTANY_compatible_with_antijoin(Query *outerquery, + SubLink * sublink, + Node *notnull_proofs); static bool simplify_EXISTS_query(PlannerInfo *root, Query *query); static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, Node **testexpr, List **paramIds); @@ -1174,6 +1177,98 @@ inline_cte_walker(Node *node, inline_cte_walker_context *context) } +/* + * is_NOTANY_compatible_with_antijoin + * True if the NOT IN sublink can be safely converted into an ANTI JOIN. + * Per SQL spec, NOT IN is not ordinarily equivalent to an anti-join, + * however, if we can prove that all of the expressions on both sides of + * the, would be, join condition are all certainly not NULL and none of + * the join expressions can produce NULLs, then it's safe to convert NOT + * IN to an anti-join. + * + * To ensure that the join expressions cannot be NULL, we can't just check for + * strict operators since these only guarantee NULL on NULL input, we need a + * guarantee of NOT NULL on NOT NULL input. Fortunately we can just insist + * that the operator is a member of a btree or hash opfamily. + * + * 'outerquery' is the parse of the query that the NOT IN is present in. + * 'notnull_proofs' are quals from the same syntactical level as the NOT IN. + * These will be used to assist in proving the outer query's would be join + * expressions cannot be NULL. + * + * Note: + * This function is quite locked into the NOT IN syntax. Certain assumptions + * are made about the structure of the join conditions: + * + * 1. We assume that when more than 1 join condition exists that these are + * AND type conditions, i.e not OR conditions. + * + * 2. We assume that each join qual is an OpExpr with 2 arguments and the + * first arguments in each qual is the one that belongs to the outer side + * of the NOT IN clause. + */ +static bool +is_NOTANY_compatible_with_antijoin(Query *outerquery, SubLink *sublink, + Node *notnull_proofs) +{ + Node *testexpr = sublink->testexpr; + List *outerexpr; + ListCell *lc; + + /* Extract the outer rel's join condition expressions. */ + + /* if it's a single expression */ + if (IsA(testexpr, OpExpr)) + { + OpExpr *opexpr = (OpExpr *) testexpr; + + Assert(list_length(opexpr->args) == 2); + + /* Reject if op is not part of a btree or hash opfamily */ + if (!has_merge_or_hash_join_opfamilies(opexpr->opno)) + return false; + + outerexpr = list_make1(linitial(opexpr->args)); + } + + /* Extract exprs from multiple expressions ANDed together */ + else if (IsA(testexpr, BoolExpr)) + { + List *list = ((BoolExpr *) testexpr)->args; + + outerexpr = NIL; + + /* Build a list containing the lefthand expr from each OpExpr */ + foreach(lc, list) + { + OpExpr *opexpr = (OpExpr *) lfirst(lc); + + Assert(list_length(opexpr->args) == 2); + + /* Reject if op is not part of a btree or hash opfamily */ + if (!has_merge_or_hash_join_opfamilies(opexpr->opno)) + return false; + + outerexpr = lappend(outerexpr, linitial(opexpr->args)); + } + } + else + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(testexpr)); + + Assert(outerexpr != NIL); + + /* Check if any outer expressions can be NULL. */ + if (!expressions_are_not_nullable(outerquery, outerexpr, notnull_proofs)) + return false; + + /* Now validate the subquery targetlist to ensure no NULL are possible. */ + if (!query_outputs_are_not_nullable((Query *) sublink->subselect)) + return false; + + return true; /* supports ANTI JOIN */ +} + /* * convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join * @@ -1183,11 +1278,22 @@ inline_cte_walker(Node *node, inline_cte_walker_context *context) * If so, form a JoinExpr and return it. Return NULL if the SubLink cannot * be converted to a join. * - * The only non-obvious input parameter is available_rels: this is the set - * of query rels that can safely be referenced in the sublink expression. - * (We must restrict this to avoid changing the semantics when a sublink - * is present in an outer join's ON qual.) The conversion must fail if - * the converted qual would reference any but these parent-query relids. + * If under_not is true, the caller actually found NOT (ANY SubLink), + * so that what we must try to build is an ANTI not SEMI join. Per SQL spec, + * NOT IN is not ordinarily equivalent to an anti-join, so that by default we + * have to fail when under_not. However, if we can prove that all of the + * expressions on both sides of the, would be, join condition are all + * certainly not NULL and none of the join expressions can produce NULLs, then + * it's safe to convert NOT IN to an anti-join. To assist with nullability + * proofs for the join conditions on the outside of the join, we make use of + *'notnull_proofs'. It is the caller's responsibility to ensure these proofs + * come from the same syntactical level as the NOT IN sublink. + * + * available_rels is the set of query rels that can safely be referenced + * in the sublink expression. (We must restrict this to avoid changing the + * semantics when a sublink is present in an outer join's ON qual.) + * The conversion must fail if the converted qual would reference any but + * these parent-query relids. * * On success, the returned JoinExpr has larg = NULL and rarg = the jointree * item representing the pulled-up subquery. The caller must set larg to @@ -1210,7 +1316,8 @@ inline_cte_walker(Node *node, inline_cte_walker_context *context) */ JoinExpr * convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, - Relids available_rels) + bool under_not, Relids available_rels, + Node *notnull_proofs) { JoinExpr *result; Query *parse = root->parse; @@ -1225,6 +1332,11 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, Assert(sublink->subLinkType == ANY_SUBLINK); + /* Check if NOT IN can be converted to an anti-join. */ + if (under_not && + !is_NOTANY_compatible_with_antijoin(parse, sublink, notnull_proofs)) + return NULL; + /* * The sub-select must not refer to any Vars of the parent query. (Vars of * higher levels should be okay, though.) @@ -1294,7 +1406,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, * And finally, build the JoinExpr node. */ result = makeNode(JoinExpr); - result->jointype = JOIN_SEMI; + result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI; result->isNatural = false; result->larg = NULL; /* caller must fill this in */ result->rarg = (Node *) rtr; @@ -1309,9 +1421,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, /* * convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join * - * The API of this function is identical to convert_ANY_sublink_to_join's, - * except that we also support the case where the caller has found NOT EXISTS, - * so we need an additional input parameter "under_not". + * The API of this function is identical to convert_ANY_sublink_to_join's. */ JoinExpr * convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 67eeba938d..1ee2a3cd0a 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -64,7 +64,8 @@ static Node *pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, Relids *relids); static Node *pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, Node **jtlink1, Relids available_rels1, - Node **jtlink2, Relids available_rels2); + Node **jtlink2, Relids available_rels2, + Node *notnull_proofs); static Node *pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode, JoinExpr *lowest_outer_join, JoinExpr *lowest_nulling_outer_join, @@ -261,7 +262,7 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, /* Now process qual --- all children are available for use */ newf->quals = pull_up_sublinks_qual_recurse(root, f->quals, &jtlink, frelids, - NULL, NULL); + NULL, NULL, f->quals); /* * Note that the result will be either newf, or a stack of JoinExprs @@ -315,13 +316,13 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, &jtlink, bms_union(leftrelids, rightrelids), - NULL, NULL); + NULL, NULL, j->quals); break; case JOIN_LEFT: j->quals = pull_up_sublinks_qual_recurse(root, j->quals, &j->rarg, rightrelids, - NULL, NULL); + NULL, NULL, j->quals); break; case JOIN_FULL: /* can't do anything with full-join quals */ @@ -330,7 +331,7 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, j->quals = pull_up_sublinks_qual_recurse(root, j->quals, &j->larg, leftrelids, - NULL, NULL); + NULL, NULL, j->quals); break; default: elog(ERROR, "unrecognized join type: %d", @@ -370,12 +371,17 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, * and/or jtlink2 in the order we encounter them. We rely on subsequent * optimization to rearrange the stack if appropriate. * + * notnull_proofs, if not passed as NULL, is used to help prove that the + * would be outer rel's join condition exprs cannot be NULL when we attempt + * to convert NOT IN to use an antijoin. + * * Returns the replacement qual node, or NULL if the qual should be removed. */ static Node * pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, Node **jtlink1, Relids available_rels1, - Node **jtlink2, Relids available_rels2) + Node **jtlink2, Relids available_rels2, + Node *notnull_proofs) { if (node == NULL) return NULL; @@ -388,8 +394,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, /* Is it a convertible ANY or EXISTS clause? */ if (sublink->subLinkType == ANY_SUBLINK) { - if ((j = convert_ANY_sublink_to_join(root, sublink, - available_rels1)) != NULL) + if ((j = convert_ANY_sublink_to_join(root, sublink, false, + available_rels1, NULL)) != NULL) { /* Yes; insert the new join node into the join tree */ j->larg = *jtlink1; @@ -409,13 +415,14 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, &j->larg, available_rels1, &j->rarg, - child_rels); + child_rels, + notnull_proofs); /* Return NULL representing constant TRUE */ return NULL; } if (available_rels2 != NULL && - (j = convert_ANY_sublink_to_join(root, sublink, - available_rels2)) != NULL) + (j = convert_ANY_sublink_to_join(root, sublink, false, + available_rels2, NULL)) != NULL) { /* Yes; insert the new join node into the join tree */ j->larg = *jtlink2; @@ -435,7 +442,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, &j->larg, available_rels2, &j->rarg, - child_rels); + child_rels, + notnull_proofs); /* Return NULL representing constant TRUE */ return NULL; } @@ -463,7 +471,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, &j->larg, available_rels1, &j->rarg, - child_rels); + child_rels, + notnull_proofs); /* Return NULL representing constant TRUE */ return NULL; } @@ -489,7 +498,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, &j->larg, available_rels2, &j->rarg, - child_rels); + child_rels, + notnull_proofs); /* Return NULL representing constant TRUE */ return NULL; } @@ -499,14 +509,70 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, } if (is_notclause(node)) { - /* If the immediate argument of NOT is EXISTS, try to convert */ + /* If the immediate argument of NOT is ANY or EXISTS, try to convert */ SubLink *sublink = (SubLink *) get_notclausearg((Expr *) node); JoinExpr *j; Relids child_rels; if (sublink && IsA(sublink, SubLink)) { - if (sublink->subLinkType == EXISTS_SUBLINK) + if (sublink->subLinkType == ANY_SUBLINK) + { + if ((j = convert_ANY_sublink_to_join(root, sublink, true, + available_rels1, notnull_proofs)) != NULL) + { + /* Yes; insert the new join node into the join tree */ + j->larg = *jtlink1; + *jtlink1 = (Node *) j; + /* Recursively process pulled-up jointree nodes */ + j->rarg = pull_up_sublinks_jointree_recurse(root, + j->rarg, + &child_rels); + + /* + * Now recursively process the pulled-up quals. Because + * we are underneath a NOT, we can't pull up sublinks that + * reference the left-hand stuff, but it's still okay to + * pull up sublinks referencing j->rarg. + */ + j->quals = pull_up_sublinks_qual_recurse(root, + j->quals, + &j->rarg, + child_rels, + NULL, NULL, + notnull_proofs); + /* Return NULL representing constant TRUE */ + return NULL; + } + if (available_rels2 != NULL && + (j = convert_ANY_sublink_to_join(root, sublink, true, + available_rels2, notnull_proofs)) != NULL) + { + /* Yes; insert the new join node into the join tree */ + j->larg = *jtlink2; + *jtlink2 = (Node *) j; + /* Recursively process pulled-up jointree nodes */ + j->rarg = pull_up_sublinks_jointree_recurse(root, + j->rarg, + &child_rels); + + /* + * Now recursively process the pulled-up quals. Because + * we are underneath a NOT, we can't pull up sublinks that + * reference the left-hand stuff, but it's still okay to + * pull up sublinks referencing j->rarg. + */ + j->quals = pull_up_sublinks_qual_recurse(root, + j->quals, + &j->rarg, + child_rels, + NULL, NULL, + notnull_proofs); + /* Return NULL representing constant TRUE */ + return NULL; + } + } + else if (sublink->subLinkType == EXISTS_SUBLINK) { if ((j = convert_EXISTS_sublink_to_join(root, sublink, true, available_rels1)) != NULL) @@ -529,7 +595,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, j->quals, &j->rarg, child_rels, - NULL, NULL); + NULL, NULL, + notnull_proofs); /* Return NULL representing constant TRUE */ return NULL; } @@ -555,7 +622,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, j->quals, &j->rarg, child_rels, - NULL, NULL); + NULL, NULL, + notnull_proofs); /* Return NULL representing constant TRUE */ return NULL; } @@ -580,7 +648,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, jtlink1, available_rels1, jtlink2, - available_rels2); + available_rels2, + notnull_proofs); if (newclause) newclauses = lappend(newclauses, newclause); } diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 2e84d6b3b4..7d7e5b040d 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -42,6 +42,7 @@ #include "parser/parse_agg.h" #include "parser/parse_coerce.h" #include "parser/parse_func.h" +#include "parser/parsetree.h" #include "rewrite/rewriteManip.h" #include "tcop/tcopprot.h" #include "utils/acl.h" @@ -113,6 +114,8 @@ static bool contain_context_dependent_node_walker(Node *node, int *flags); static bool contain_leaked_vars_walker(Node *node, void *context); static Relids find_nonnullable_rels_walker(Node *node, bool top_level); static List *find_nonnullable_vars_walker(Node *node, bool top_level); +static void find_innerjoined_rels(Node *jtnode, + Relids *innerjoined_rels, List **usable_quals); static bool is_strict_saop(ScalarArrayOpExpr *expr, bool falseOK); static Node *eval_const_expressions_mutator(Node *node, eval_const_expressions_context *context); @@ -1467,6 +1470,10 @@ contain_leaked_vars_walker(Node *node, void *context) context); } +/***************************************************************************** + * Nullability analysis + *****************************************************************************/ + /* * find_nonnullable_rels * Determine which base rels are forced nonnullable by given clause. @@ -1710,7 +1717,7 @@ find_nonnullable_rels_walker(Node *node, bool top_level) * but here we assume that the input is a Boolean expression, and wish to * see if NULL inputs will provably cause a FALSE-or-NULL result. We expect * the expression to have been AND/OR flattened and converted to implicit-AND - * format. + * format (but the results are still good if it wasn't AND/OR flattened). * * The result is a palloc'd List, but we have not copied the member Var nodes. * Also, we don't bother trying to eliminate duplicate entries. @@ -2011,6 +2018,393 @@ find_forced_null_var(Node *node) return NULL; } +/* + * expressions_are_not_nullable + * Return TRUE if all 'exprs' are certainly not NULL. + * + * The reason this takes a Query, and not just a list of expressions, is so + * that we can determine which relations are INNER JOINed and make use of the + * query's WHERE/ON clauses to help prove that inner joined rel's Vars cannot + * be NULL in the absense of a NOT NULL constraint. + * + * 'query' is the query that the 'exprs' belong to. + * 'notnull_proofs' can be passed to assist in proving the non-nullability of + * 'exprs'. These may be used for outer joined Vars or for Vars that allow + * NULLs to help determine if each 'exprs' cannot be NULL. It is the callers + * responsibility to ensure 'notnull_proofs' come from the same syntactical + * level as 'exprs'. + * + * In current usage, the passed exprs haven't yet been through any planner + * processing. This means that applying find_nonnullable_vars() on them isn't + * really ideal: for lack of const-simplification, we might be unable to prove + * not-nullness in some cases where we could have proved it afterwards. + * However, we should not get any false positive results. + * + * Here we can err on the side of conservatism: if we're not sure then it's + * okay to return FALSE. + */ +bool +expressions_are_not_nullable(Query *query, List *exprs, Node *notnull_proofs) +{ + Relids innerjoined_rels = NULL; + List *innerjoined_useful_quals = NIL; + bool computed_innerjoined_rels = false; + List *nonnullable_vars = NIL; + bool computed_nonnullable_vars = false; + List *nonnullable_inner_vars = NIL; + bool computed_nonnullable_inner_vars = false; + ListCell *lc; + + foreach(lc, exprs) + { + Expr *expr = (Expr *) lfirst(lc); + + /* + * For the most part we don't try to deal with anything more complex + * than Consts and Vars; but it seems worthwhile to look through + * binary relabelings, since we know those don't introduce nulls. + */ + while (expr && IsA(expr, RelabelType)) + expr = ((RelabelType *) expr)->arg; + + if (expr == NULL) /* paranoia */ + return false; + + if (IsA(expr, Const)) + { + /* Consts are easy: they're either null or not. */ + if (((Const *) expr)->constisnull) + return false; + } + else if (IsA(expr, Var)) + { + Var *var = (Var *) expr; + + /* Currently, we punt for any nonlocal Vars */ + if (var->varlevelsup != 0) + return false; + + /* + * Since the subquery hasn't yet been through expression + * preprocessing, we must apply flatten_join_alias_vars to the + * given Var, and to any Vars found by find_nonnullable_vars, to + * avoid being fooled by join aliases. If we get something other + * than a plain Var out of the substitution, punt. + */ + var = (Var *) flatten_join_alias_vars(query, (Node *) var); + + if (!IsA(var, Var)) + return false; + Assert(var->varlevelsup == 0); + + /* + * We don't bother to compute innerjoined_rels until we've found a + * Var we must analyze. + */ + if (!computed_innerjoined_rels) + { + find_innerjoined_rels((Node *) query->jointree, + &innerjoined_rels, + &innerjoined_useful_quals); + computed_innerjoined_rels = true; + } + + /* Check if the Var is from an INNER JOINed rel */ + if (bms_is_member(var->varno, innerjoined_rels)) + { + RangeTblEntry *rte = rt_fetch(var->varno, query->rtable); + + /* + * If Var is from a plain relation and its column is marked + * NOT NULL according to the catalogs, it can't produce NULL. + */ + if (rte->rtekind == RTE_RELATION && + get_attnotnull(rte->relid, var->varattno)) + continue; /* cannot produce NULL */ + + /* + * Otherwise check for the existance of quals which filter + * out NULL values for this Var. We may need to compute the + * nonnullable_inner_vars, if not done already. + */ + if (!computed_nonnullable_inner_vars) + { + nonnullable_inner_vars = + find_nonnullable_vars((Node *) innerjoined_useful_quals); + nonnullable_inner_vars = (List *) + flatten_join_alias_vars(query, + (Node *) nonnullable_inner_vars); + /* We don't bother removing any non-Vars from the result */ + computed_nonnullable_inner_vars = true; + } + + if (list_member(nonnullable_inner_vars, var)) + continue; /* cannot produce NULL */ + } + + /* + * Even if that didn't work, we can conclude that the Var is not + * nullable if find_nonnullable_vars can find a "var IS NOT NULL" + * or similarly strict condition among the notnull_proofs. + * Compute the list of Vars having such quals if we didn't + * already. + */ + if (!computed_nonnullable_vars) + { + nonnullable_vars = find_nonnullable_vars(notnull_proofs); + nonnullable_vars = (List *) + flatten_join_alias_vars(query, + (Node *) nonnullable_vars); + /* We don't bother removing any non-Vars from the result */ + computed_nonnullable_vars = true; + } + + if (!list_member(nonnullable_vars, var)) + return false; /* we failed to prove the Var non-null */ + } + else + { + /* Not a Const or Var; punt */ + return false; + } + } + + return true; /* exprs cannot emit NULLs */ +} + +/* + * query_outputs_are_not_nullable + * Returns TRUE if the output values of the Query are certainly not NULL. + * All output columns must return non-NULL to answer TRUE. + * + * The reason this takes a Query, and not just an individual tlist expression, + * is so that we can make use of the query's WHERE/ON clauses to prove it does + * not return nulls. + * + * In current usage, the passed sub-Query hasn't yet been through any planner + * processing. This means that applying find_nonnullable_vars() to its WHERE + * clauses isn't really ideal: for lack of const-simplification, we might be + * unable to prove not-nullness in some cases where we could have proved it + * afterwards. However, we should not get any false positive results. + * + * Like the other forms of nullability analysis above, we can err on the + * side of conservatism: if we're not sure, it's okay to return FALSE. + */ +bool +query_outputs_are_not_nullable(Query *query) +{ + Relids innerjoined_rels = NULL; + bool computed_innerjoined_rels = false; + List *usable_quals = NIL; + List *nonnullable_vars = NIL; + bool computed_nonnullable_vars = false; + ListCell *tl; + + /* + * If the query contains set operations, punt. The set ops themselves + * couldn't introduce nulls that weren't in their inputs, but the tlist + * present in the top-level query is just dummy and won't give us useful + * info. We could get an answer by recursing to examine each leaf query, + * but for the moment it doesn't seem worth the extra complication. + * + * Note that we needn't consider other top-level operators such as + * DISTINCT, GROUP BY, etc, as those will not introduce nulls either. + */ + if (query->setOperations) + return false; + + /* + * Examine each targetlist entry to prove that it can't produce NULL. + */ + foreach(tl, query->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(tl); + Expr *expr = tle->expr; + + /* Resjunk columns can be ignored: they don't produce output values */ + if (tle->resjunk) + continue; + + /* + * For the most part we don't try to deal with anything more complex + * than Consts and Vars; but it seems worthwhile to look through + * binary relabelings, since we know those don't introduce nulls. + */ + while (expr && IsA(expr, RelabelType)) + expr = ((RelabelType *) expr)->arg; + + if (expr == NULL) /* paranoia */ + return false; + + if (IsA(expr, Const)) + { + /* Consts are easy: they're either null or not. */ + if (((Const *) expr)->constisnull) + return false; + } + else if (IsA(expr, Var)) + { + Var *var = (Var *) expr; + + /* Currently, we punt for any nonlocal Vars */ + if (var->varlevelsup != 0) + return false; + + /* + * Since the subquery hasn't yet been through expression + * preprocessing, we must apply flatten_join_alias_vars to the + * given Var, and to any Vars found by find_nonnullable_vars, to + * avoid being fooled by join aliases. If we get something other + * than a plain Var out of the substitution, punt. + */ + var = (Var *) flatten_join_alias_vars(query, (Node *) var); + + if (!IsA(var, Var)) + return false; + Assert(var->varlevelsup == 0); + + /* + * We don't bother to compute innerjoined_rels and usable_quals + * until we've found a Var we must analyze. + */ + if (!computed_innerjoined_rels) + { + find_innerjoined_rels((Node *) query->jointree, + &innerjoined_rels, &usable_quals); + computed_innerjoined_rels = true; + } + + /* + * If Var is from a plain relation, and that relation is not on + * the nullable side of any outer join, and its column is marked + * NOT NULL according to the catalogs, it can't produce NULL. + */ + if (bms_is_member(var->varno, innerjoined_rels)) + { + RangeTblEntry *rte = rt_fetch(var->varno, query->rtable); + + if (rte->rtekind == RTE_RELATION && + get_attnotnull(rte->relid, var->varattno)) + continue; /* cannot produce NULL */ + } + + /* + * Even if that didn't work, we can conclude that the Var is not + * nullable if find_nonnullable_vars can find a "var IS NOT NULL" + * or similarly strict condition among the usable_quals. Compute + * the list of Vars having such quals if we didn't already. + */ + if (!computed_nonnullable_vars) + { + nonnullable_vars = find_nonnullable_vars((Node *) usable_quals); + nonnullable_vars = (List *) + flatten_join_alias_vars(query, + (Node *) nonnullable_vars); + /* We don't bother removing any non-Vars from the result */ + computed_nonnullable_vars = true; + } + + if (!list_member(nonnullable_vars, var)) + return false; /* we failed to prove the Var non-null */ + } + else + { + /* Not a Const or Var; punt */ + return false; + } + } + + return true; /* query cannot emit NULLs */ +} + +/* + * find_innerjoined_rels + * Traverse jointree to locate non-outerjoined-rels and quals above them + * + * We fill innerjoined_rels with the relids of all rels that are not below + * the nullable side of any outer join (which would cause their Vars to be + * possibly NULL regardless of what's in the catalogs). In the same scan, + * we locate all WHERE and JOIN/ON quals that constrain these rels add them to + * the usable_quals list (forming a list with implicit-AND semantics). + * + * Top-level caller must initialize innerjoined_rels/usable_quals to NULL/NIL. + */ +static void +find_innerjoined_rels(Node *jtnode, + Relids *innerjoined_rels, List **usable_quals) +{ + if (jtnode == NULL) + return; + if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + + *innerjoined_rels = bms_add_member(*innerjoined_rels, varno); + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + ListCell *lc; + + /* All elements of the FROM list are allowable */ + foreach(lc, f->fromlist) + find_innerjoined_rels((Node *) lfirst(lc), + innerjoined_rels, usable_quals); + /* ... and its WHERE quals are too */ + if (f->quals) + *usable_quals = lappend(*usable_quals, f->quals); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + switch (j->jointype) + { + case JOIN_INNER: + /* visit both children */ + find_innerjoined_rels(j->larg, + innerjoined_rels, usable_quals); + find_innerjoined_rels(j->rarg, + innerjoined_rels, usable_quals); + /* and grab the ON quals too */ + if (j->quals) + *usable_quals = lappend(*usable_quals, j->quals); + break; + + case JOIN_LEFT: + case JOIN_SEMI: + case JOIN_ANTI: + + /* + * Only the left input is possibly non-nullable; furthermore, + * the quals of this join don't constrain the left input. + * Note: we probably can't see SEMI or ANTI joins at this + * point, but if we do, we can treat them like LEFT joins. + */ + find_innerjoined_rels(j->larg, + innerjoined_rels, usable_quals); + break; + + case JOIN_RIGHT: + /* Reverse of the above case */ + find_innerjoined_rels(j->rarg, + innerjoined_rels, usable_quals); + break; + + case JOIN_FULL: + /* Neither side is non-nullable, so stop descending */ + break; + + default: + elog(ERROR, "unrecognized join type: %d", + (int) j->jointype); + } + } + else + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(jtnode)); +} + /* * Can we treat a ScalarArrayOpExpr as strict? * diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index b4f2d0f35a..ae1d9453c3 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -388,6 +388,45 @@ get_mergejoin_opfamilies(Oid opno) return result; } +/* + * has_merge_or_hash_join_opfamilies + * TRUE iif 'opno' belongs to any btree or hash opfamily's equality + * strategy. + */ +bool +has_merge_or_hash_join_opfamilies(Oid opno) +{ + bool result = false; + CatCList *catlist; + int i; + + /* + * Search pg_amop to see if the target operator is registered as the "=" + * operator of any btree opfamily. + */ + catlist = SearchSysCacheList1(AMOPOPID, ObjectIdGetDatum(opno)); + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple tuple = &catlist->members[i]->tuple; + Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple); + + /* must be btree or hash equality */ + if ((aform->amopmethod == BTREE_AM_OID && + aform->amopstrategy == BTEqualStrategyNumber) || + (aform->amopmethod == HASH_AM_OID && + aform->amopstrategy == HTEqualStrategyNumber)) + { + result = true; + break; + } + } + + ReleaseSysCacheList(catlist); + + return result; +} + /* * get_compatible_hash_operators * Get the OID(s) of hash equality operator(s) compatible with the given @@ -908,6 +947,35 @@ get_atttypetypmodcoll(Oid relid, AttrNumber attnum, ReleaseSysCache(tp); } +/* + * get_attnotnull + * + * Given the relation id and the attribute number, + * return the "attnotnull" field from the attribute relation. + * + * Returns false if the attr doesn't exist (or is dropped). + */ +bool +get_attnotnull(Oid relid, AttrNumber attnum) +{ + HeapTuple tp; + + tp = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + bool result; + + result = att_tup->attnotnull; + ReleaseSysCache(tp); + return result; + } + else + return false; +} + /* ---------- COLLATION CACHE ---------- */ /* diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index 2f9aeec4a7..b8e933383c 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -44,6 +44,9 @@ extern Relids find_nonnullable_rels(Node *clause); extern List *find_nonnullable_vars(Node *clause); extern List *find_forced_null_vars(Node *clause); extern Var *find_forced_null_var(Node *clause); +extern bool expressions_are_not_nullable(Query *query, List *exprs, + Node *notnull_proofs); +extern bool query_outputs_are_not_nullable(Query *query); extern bool is_pseudo_constant_clause(Node *clause); extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids); diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h index 71a22eeb64..7c1062d299 100644 --- a/src/include/optimizer/subselect.h +++ b/src/include/optimizer/subselect.h @@ -19,7 +19,9 @@ extern void SS_process_ctes(PlannerInfo *root); extern JoinExpr *convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, - Relids available_rels); + bool under_not, + Relids available_rels, + Node *notnull_proofs); extern JoinExpr *convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, bool under_not, diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index c8df5bff9f..2664196800 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -76,6 +76,7 @@ extern bool get_ordering_op_properties(Oid opno, extern Oid get_equality_op_for_ordering_op(Oid opno, bool *reverse); extern Oid get_ordering_op_for_equality_op(Oid opno, bool use_lhs_type); extern List *get_mergejoin_opfamilies(Oid opno); +extern bool has_merge_or_hash_join_opfamilies(Oid opno); extern bool get_compatible_hash_operators(Oid opno, Oid *lhs_opno, Oid *rhs_opno); extern bool get_op_hash_functions(Oid opno, @@ -90,6 +91,7 @@ extern char get_attgenerated(Oid relid, AttrNumber attnum); extern Oid get_atttype(Oid relid, AttrNumber attnum); extern void get_atttypetypmodcoll(Oid relid, AttrNumber attnum, Oid *typid, int32 *typmod, Oid *collid); +extern bool get_attnotnull(Oid relid, AttrNumber attnum); extern char *get_collation_name(Oid colloid); extern bool get_collation_isdeterministic(Oid colloid); extern char *get_constraint_name(Oid conoid); diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 2d1963d12a..797d40a2ce 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -1442,3 +1442,382 @@ select * from x for update; Output: subselect_tbl.f1, subselect_tbl.f2, subselect_tbl.f3 (2 rows) +-- +-- Check NOT IN performs an ANTI JOIN when NULLs are not possible +-- on either side of the, would be, join condition. +-- +BEGIN; +CREATE TEMP TABLE a (id INT PRIMARY KEY); +CREATE TEMP TABLE b (x INT NOT NULL, y INT); +CREATE TEMP TABLE c (z INT NOT NULL); +INSERT INTO b VALUES(1,1),(2,2),(3,NULL); +-- No ANTI JOIN, b.x is from an outer join +EXPLAIN (COSTS OFF) +SELECT * FROM a +LEFT OUTER JOIN b ON a.id = b.x +WHERE b.x NOT IN(SELECT z FROM c); + QUERY PLAN +------------------------------------ + Hash Right Join + Hash Cond: (b.x = a.id) + Filter: (NOT (hashed SubPlan 1)) + -> Seq Scan on b + -> Hash + -> Seq Scan on a + SubPlan 1 + -> Seq Scan on c +(8 rows) + +-- ANTI JOIN, b.x is from an outer join but b.x > 100 +-- forces the join not to produce NULL on the righthand +-- side. +EXPLAIN (COSTS OFF) +SELECT * FROM a +LEFT OUTER JOIN b ON a.id = b.x +WHERE b.x NOT IN(SELECT z FROM c) + AND b.x > 100; + QUERY PLAN +--------------------------------- + Hash Join + Hash Cond: (b.x = a.id) + -> Hash Anti Join + Hash Cond: (b.x = c.z) + -> Seq Scan on b + Filter: (x > 100) + -> Hash + -> Seq Scan on c + -> Hash + -> Seq Scan on a +(10 rows) + +-- No ANTI JOIN, b.x is from an outer join +EXPLAIN (COSTS OFF) +SELECT * FROM a +FULL OUTER JOIN b ON a.id = b.x +WHERE b.x NOT IN(SELECT y FROM c); + QUERY PLAN +----------------------------- + Hash Full Join + Hash Cond: (b.x = a.id) + Filter: (NOT (SubPlan 1)) + -> Seq Scan on b + -> Hash + -> Seq Scan on a + SubPlan 1 + -> Seq Scan on c +(8 rows) + +-- No ANTI JOIN. y can have NULLs +EXPLAIN (COSTS OFF) +SELECT * FROM b WHERE y NOT IN (SELECT z FROM c); + QUERY PLAN +------------------------------------ + Seq Scan on b + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Seq Scan on c +(4 rows) + +-- c is an empty relation so should cause no filtering on b +SELECT * FROM b WHERE y NOT IN (SELECT z FROM c); + x | y +---+--- + 1 | 1 + 2 | 2 + 3 | +(3 rows) + +INSERT INTO c VALUES(1); +-- Records where y is NULL should be filtered out. +SELECT * FROM b WHERE y NOT IN (SELECT z FROM c); + x | y +---+--- + 2 | 2 +(1 row) + +-- No ANTI JOIN, y can be NULL +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT y FROM b); + QUERY PLAN +------------------------------------ + Seq Scan on a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Seq Scan on b +(4 rows) + +-- No ANTI JOIN, x is NOT NULL, but we don't know if + 1 will change that. +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT x+1 FROM b); + QUERY PLAN +------------------------------------ + Seq Scan on a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Seq Scan on b +(4 rows) + +-- ANTI JOIN 1 is a Const that is not null. +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT 1 FROM b); + QUERY PLAN +--------------------------- + Nested Loop Anti Join + Join Filter: (a.id = 1) + -> Seq Scan on a + -> Materialize + -> Seq Scan on b +(5 rows) + +-- No ANTI JOIN, results contain a NULL Const +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT NULL::int FROM b); + QUERY PLAN +------------------------------------ + Seq Scan on a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Seq Scan on b +(4 rows) + +-- ANTI JOIN y = 1 means y can't be NULL +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT y FROM b WHERE y = 1); + QUERY PLAN +------------------------------- + Hash Anti Join + Hash Cond: (a.id = b.y) + -> Seq Scan on a + -> Hash + -> Seq Scan on b + Filter: (y = 1) +(6 rows) + +-- No ANTI JOIN, OR condition does not ensure y = 1 +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT y FROM b WHERE y = 1 OR x = 1); + QUERY PLAN +---------------------------------------- + Seq Scan on a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Seq Scan on b + Filter: ((y = 1) OR (x = 1)) +(5 rows) + +-- No ANTI JOIN, OR condition does not ensure y = 1 or y = 2 +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT y FROM b WHERE (y = 1 OR x = 1) AND (y = 2 OR x = 2)); + QUERY PLAN +------------------------------------------------------------------- + Seq Scan on a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Seq Scan on b + Filter: (((y = 1) OR (x = 1)) AND ((y = 2) OR (x = 2))) +(5 rows) + +-- ANTI JOIN y must be 2, so can't be NULL +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT y FROM b WHERE (y = 1 OR x = 1) AND y = 2); + QUERY PLAN +---------------------------------------------------------- + Hash Anti Join + Hash Cond: (a.id = b.y) + -> Seq Scan on a + -> Hash + -> Seq Scan on b + Filter: ((y = 2) AND ((y = 1) OR (x = 1))) +(6 rows) + +-- ANTI JOIN y can be 1 or 2, but can't be null. +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT y FROM b WHERE (y = 1 OR y = 2)); + QUERY PLAN +-------------------------------------------- + Hash Anti Join + Hash Cond: (a.id = b.y) + -> Seq Scan on a + -> Hash + -> Seq Scan on b + Filter: ((y = 1) OR (y = 2)) +(6 rows) + +-- No ANTI JOIN c.z is from a left outer join so it can have nulls. +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT c.z FROM b LEFT JOIN c ON b.x = c.z); + QUERY PLAN +------------------------------------ + Seq Scan on a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Merge Left Join + Merge Cond: (b.x = c.z) + -> Sort + Sort Key: b.x + -> Seq Scan on b + -> Sort + Sort Key: c.z + -> Seq Scan on c +(11 rows) + +-- ANTI JOIN, c.z is not from an outer join +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT c.z FROM b RIGHT JOIN c ON b.x = c.z); + QUERY PLAN +----------------------------------------- + Merge Anti Join + Merge Cond: (a.id = c.z) + -> Index Only Scan using a_pkey on a + -> Merge Left Join + Merge Cond: (c.z = b.x) + -> Sort + Sort Key: c.z + -> Seq Scan on c + -> Sort + Sort Key: b.x + -> Seq Scan on b +(11 rows) + +-- No ANTI JOIN, b.x is from an outer join +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT b.x FROM b RIGHT JOIN c ON b.x = c.z); + QUERY PLAN +------------------------------------ + Seq Scan on a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Merge Right Join + Merge Cond: (b.x = c.z) + -> Sort + Sort Key: b.x + -> Seq Scan on b + -> Sort + Sort Key: c.z + -> Seq Scan on c +(11 rows) + +-- No ANTI JOIN, c.z is from an outer join +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT c.z FROM b FULL JOIN c ON b.x = c.z); + QUERY PLAN +------------------------------------ + Seq Scan on a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Merge Full Join + Merge Cond: (b.x = c.z) + -> Sort + Sort Key: b.x + -> Seq Scan on b + -> Sort + Sort Key: c.z + -> Seq Scan on c +(11 rows) + +-- No ANTI JOIN, b.x is from an outer join +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT b.x FROM b FULL JOIN c ON b.x = c.z); + QUERY PLAN +------------------------------------ + Seq Scan on a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Merge Full Join + Merge Cond: (b.x = c.z) + -> Sort + Sort Key: b.x + -> Seq Scan on b + -> Sort + Sort Key: c.z + -> Seq Scan on c +(11 rows) + +-- ANTI JOIN, c.z is from an inner join and has a NOT NULL constraint. +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT c.z FROM b INNER JOIN c ON b.x = c.z); + QUERY PLAN +----------------------------------------- + Merge Anti Join + Merge Cond: (a.id = c.z) + -> Index Only Scan using a_pkey on a + -> Merge Join + Merge Cond: (b.x = c.z) + -> Sort + Sort Key: b.x + -> Seq Scan on b + -> Sort + Sort Key: c.z + -> Seq Scan on c +(11 rows) + +-- ANTI JOIN, c.z must be 1 +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT c.z FROM b LEFT JOIN c ON b.x = c.z WHERE c.z = 1); + QUERY PLAN +------------------------------------------- + Hash Anti Join + Hash Cond: (a.id = c.z) + -> Seq Scan on a + -> Hash + -> Nested Loop + -> Seq Scan on c + Filter: (z = 1) + -> Materialize + -> Seq Scan on b + Filter: (x = 1) +(10 rows) + +-- ANTI JOIN, c.z can't be NULL +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT c.z FROM b LEFT JOIN c ON b.x = c.z WHERE c.z IS NOT NULL); + QUERY PLAN +--------------------------------------------- + Merge Anti Join + Merge Cond: (a.id = c.z) + -> Index Only Scan using a_pkey on a + -> Merge Join + Merge Cond: (b.x = c.z) + -> Sort + Sort Key: b.x + -> Seq Scan on b + -> Sort + Sort Key: c.z + -> Seq Scan on c + Filter: (z IS NOT NULL) +(12 rows) + +-- ANTI JOIN, b.y cannot be NULL due to the join condition b.y = c.z +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT b.y FROM b INNER JOIN c ON b.y = c.z); + QUERY PLAN +----------------------------------------- + Merge Anti Join + Merge Cond: (a.id = b.y) + -> Index Only Scan using a_pkey on a + -> Merge Join + Merge Cond: (b.y = c.z) + -> Sort + Sort Key: b.y + -> Seq Scan on b + -> Sort + Sort Key: c.z + -> Seq Scan on c +(11 rows) + +-- No ANTI JOIN when the type's '=' operator is not a member of a btree or +-- hash opfamily. +CREATE TEMP TABLE l1 (a LINE NOT NULL); +CREATE TEMP TABLE l2 (b LINE NOT NULL); +EXPLAIN (COSTS OFF) +SELECT * FROM l1 WHERE a NOT IN(SELECT b FROM l2); + QUERY PLAN +------------------------------ + Seq Scan on l1 + Filter: (NOT (SubPlan 1)) + SubPlan 1 + -> Materialize + -> Seq Scan on l2 +(5 rows) + +ROLLBACK; diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 99ca69791e..4f4285cb50 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -744,3 +744,130 @@ select * from (with x as (select 2 as y) select * from x) ss; explain (verbose, costs off) with x as (select * from subselect_tbl) select * from x for update; + +-- +-- Check NOT IN performs an ANTI JOIN when NULLs are not possible +-- on either side of the, would be, join condition. +-- + +BEGIN; + +CREATE TEMP TABLE a (id INT PRIMARY KEY); +CREATE TEMP TABLE b (x INT NOT NULL, y INT); +CREATE TEMP TABLE c (z INT NOT NULL); + +INSERT INTO b VALUES(1,1),(2,2),(3,NULL); + +-- No ANTI JOIN, b.x is from an outer join +EXPLAIN (COSTS OFF) +SELECT * FROM a +LEFT OUTER JOIN b ON a.id = b.x +WHERE b.x NOT IN(SELECT z FROM c); + +-- ANTI JOIN, b.x is from an outer join but b.x > 100 +-- forces the join not to produce NULL on the righthand +-- side. +EXPLAIN (COSTS OFF) +SELECT * FROM a +LEFT OUTER JOIN b ON a.id = b.x +WHERE b.x NOT IN(SELECT z FROM c) + AND b.x > 100; + +-- No ANTI JOIN, b.x is from an outer join +EXPLAIN (COSTS OFF) +SELECT * FROM a +FULL OUTER JOIN b ON a.id = b.x +WHERE b.x NOT IN(SELECT y FROM c); + +-- No ANTI JOIN. y can have NULLs +EXPLAIN (COSTS OFF) +SELECT * FROM b WHERE y NOT IN (SELECT z FROM c); + +-- c is an empty relation so should cause no filtering on b +SELECT * FROM b WHERE y NOT IN (SELECT z FROM c); + +INSERT INTO c VALUES(1); + +-- Records where y is NULL should be filtered out. +SELECT * FROM b WHERE y NOT IN (SELECT z FROM c); + +-- No ANTI JOIN, y can be NULL +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT y FROM b); + +-- No ANTI JOIN, x is NOT NULL, but we don't know if + 1 will change that. +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT x+1 FROM b); + +-- ANTI JOIN 1 is a Const that is not null. +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT 1 FROM b); + +-- No ANTI JOIN, results contain a NULL Const +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT NULL::int FROM b); + +-- ANTI JOIN y = 1 means y can't be NULL +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT y FROM b WHERE y = 1); + +-- No ANTI JOIN, OR condition does not ensure y = 1 +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT y FROM b WHERE y = 1 OR x = 1); + +-- No ANTI JOIN, OR condition does not ensure y = 1 or y = 2 +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT y FROM b WHERE (y = 1 OR x = 1) AND (y = 2 OR x = 2)); + +-- ANTI JOIN y must be 2, so can't be NULL +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT y FROM b WHERE (y = 1 OR x = 1) AND y = 2); + +-- ANTI JOIN y can be 1 or 2, but can't be null. +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT y FROM b WHERE (y = 1 OR y = 2)); + +-- No ANTI JOIN c.z is from a left outer join so it can have nulls. +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT c.z FROM b LEFT JOIN c ON b.x = c.z); + +-- ANTI JOIN, c.z is not from an outer join +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT c.z FROM b RIGHT JOIN c ON b.x = c.z); + +-- No ANTI JOIN, b.x is from an outer join +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT b.x FROM b RIGHT JOIN c ON b.x = c.z); + +-- No ANTI JOIN, c.z is from an outer join +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT c.z FROM b FULL JOIN c ON b.x = c.z); + +-- No ANTI JOIN, b.x is from an outer join +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT b.x FROM b FULL JOIN c ON b.x = c.z); + +-- ANTI JOIN, c.z is from an inner join and has a NOT NULL constraint. +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT c.z FROM b INNER JOIN c ON b.x = c.z); + +-- ANTI JOIN, c.z must be 1 +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT c.z FROM b LEFT JOIN c ON b.x = c.z WHERE c.z = 1); + +-- ANTI JOIN, c.z can't be NULL +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT c.z FROM b LEFT JOIN c ON b.x = c.z WHERE c.z IS NOT NULL); + +-- ANTI JOIN, b.y cannot be NULL due to the join condition b.y = c.z +EXPLAIN (COSTS OFF) +SELECT * FROM a WHERE id NOT IN (SELECT b.y FROM b INNER JOIN c ON b.y = c.z); + +-- No ANTI JOIN when the type's '=' operator is not a member of a btree or +-- hash opfamily. +CREATE TEMP TABLE l1 (a LINE NOT NULL); +CREATE TEMP TABLE l2 (b LINE NOT NULL); +EXPLAIN (COSTS OFF) +SELECT * FROM l1 WHERE a NOT IN(SELECT b FROM l2); + +ROLLBACK;