From c0ba98141c183090783653d882d206acb7c38a29 Mon Sep 17 00:00:00 2001 From: Ilia Evdokimov Date: Wed, 18 Mar 2026 17:23:50 +0300 Subject: [PATCH v9] Reduce planning time for large NOT IN lists containing NULL For x <> ALL (...), the presence of a NULL makes the selectivity 0.0. The planner currently still iterates over all elements and computes per-element selectivity, even though the final result is known. Add an early NULL check for constant arrays and immediately return 0.0 under ALL semantics. This reduces planning time for large <> ALL lists without changing semantics. --- src/backend/utils/adt/selfuncs.c | 17 +++++++++++ src/test/regress/expected/planner_est.out | 35 +++++++++++++++++++++++ src/test/regress/sql/planner_est.sql | 16 +++++++++++ 3 files changed, 68 insertions(+) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index d4da0e8dea9..073d93f4f3d 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -2018,6 +2018,15 @@ scalararraysel(PlannerInfo *root, if (arrayisnull) /* qual can't succeed if null array */ return (Selectivity) 0.0; arrayval = DatumGetArrayTypeP(arraydatum); + + /* + * For ALL semantics, if the array contains NULL, assume operator is + * strict. The ScalarArrayOpExpr cannot evaluate to TRUE, so return + * zero. + */ + if (!useOr && array_contains_nulls(arrayval)) + return (Selectivity) 0.0; + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), &elmlen, &elmbyval, &elmalign); deconstruct_array(arrayval, @@ -2115,6 +2124,14 @@ scalararraysel(PlannerInfo *root, List *args; Selectivity s2; + /* + * For ALL semantics, if the array contains NULL, assume operator + * is strict. The ScalarArrayOpExpr cannot evaluate to TRUE, so + * return zero. + */ + if (!useOr && IsA(elem, Const) && ((Const *) elem)->constisnull) + return (Selectivity) 0.0; + /* * Theoretically, if elem isn't of nominal_element_type we should * insert a RelabelType, but it seems unlikely that any operator diff --git a/src/test/regress/expected/planner_est.out b/src/test/regress/expected/planner_est.out index 3a47061800a..624d5858687 100644 --- a/src/test/regress/expected/planner_est.out +++ b/src/test/regress/expected/planner_est.out @@ -183,4 +183,39 @@ false, true, false, true); Function Scan on generate_series g (cost=N..N rows=1000 width=N) (1 row) +-- +-- Test <> ALL behavior when NULL appears in the array +-- +SELECT explain_mask_costs($$ +SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[NULL]::integer[]);$$, +true, true, false, true); + explain_mask_costs +-------------------------------------------------------------------------- + Seq Scan on tenk1 (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1) + Filter: (unique1 <> ALL ('{NULL}'::integer[])) + Rows Removed by Filter: 10000 +(3 rows) + +SELECT explain_mask_costs($$ +SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[1, 2, 99, NULL]);$$, +true, true, false, true); + explain_mask_costs +-------------------------------------------------------------------------- + Seq Scan on tenk1 (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1) + Filter: (unique1 <> ALL ('{1,2,99,NULL}'::integer[])) + Rows Removed by Filter: 10000 +(3 rows) + +SELECT explain_mask_costs($$ +SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[1, 2, 98, (SELECT 99), NULL]);$$, +true, true, false, true); + explain_mask_costs +------------------------------------------------------------------------------------- + Seq Scan on tenk1 (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1) + Filter: (unique1 <> ALL (ARRAY[1, 2, 98, (InitPlan expr_1).col1, NULL::integer])) + Rows Removed by Filter: 10000 + InitPlan expr_1 + -> Result (cost=N..N rows=1 width=N) (actual rows=1.00 loops=1) +(5 rows) + DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); diff --git a/src/test/regress/sql/planner_est.sql b/src/test/regress/sql/planner_est.sql index 47d5ae679c7..61c9f640761 100644 --- a/src/test/regress/sql/planner_est.sql +++ b/src/test/regress/sql/planner_est.sql @@ -131,5 +131,21 @@ SELECT explain_mask_costs($$ SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$, false, true, false, true); +-- +-- Test <> ALL behavior when NULL appears in the array +-- + +SELECT explain_mask_costs($$ +SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[NULL]::integer[]);$$, +true, true, false, true); + +SELECT explain_mask_costs($$ +SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[1, 2, 99, NULL]);$$, +true, true, false, true); + +SELECT explain_mask_costs($$ +SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[1, 2, 98, (SELECT 99), NULL]);$$, +true, true, false, true); + DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); -- 2.34.1