From a9dd1d1c9990a30173c5210570efccb034e8b5f4 Mon Sep 17 00:00:00 2001 From: Evdokimov Ilia Date: Wed, 25 Feb 2026 00:08:26 +0300 Subject: [PATCH v4] Reduce planning time for large NOT IN lists containing NULL For x <> ALL (...) / x NOT IN (...), the presence of a NULL element makes the selectivity 0.0. The planner currently still iterates over all elements and computes per-element selectivity, even though the final result is known. Add an early NULL check for constant arrays and immediately return 0.0 under ALL semantics. This reduces planning time for large NOT IN / <> ALL lists without changing semantics. --- src/backend/utils/adt/selfuncs.c | 9 +++++ src/test/regress/expected/expressions.out | 44 +++++++++++++++++++++++ src/test/regress/sql/expressions.sql | 41 +++++++++++++++++++++ 3 files changed, 94 insertions(+) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 29fec655593..eef3f0375a5 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -2018,6 +2018,11 @@ scalararraysel(PlannerInfo *root, if (arrayisnull) /* qual can't succeed if null array */ return (Selectivity) 0.0; arrayval = DatumGetArrayTypeP(arraydatum); + + /* Selectivity of "WHERE x NOT IN (NULL, ... )" is always 0 */ + if (!useOr && array_contains_nulls(arrayval)) + return (Selectivity) 0.0; + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), &elmlen, &elmbyval, &elmalign); deconstruct_array(arrayval, @@ -2115,6 +2120,10 @@ scalararraysel(PlannerInfo *root, List *args; Selectivity s2; + /* Selectivity of "WHERE x NOT IN (NULL, ... )" is always 0 */ + if (!useOr && IsA(elem, Const) && ((Const *) elem)->constisnull) + return (Selectivity) 0.0; + /* * Theoretically, if elem isn't of nominal_element_type we should * insert a RelabelType, but it seems unlikely that any operator diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out index 9a3c97b15a3..34f14a5775a 100644 --- a/src/test/regress/expected/expressions.out +++ b/src/test/regress/expected/expressions.out @@ -426,3 +426,47 @@ select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myi (0 rows) rollback; +-- Test <> ALL when array initially contained NULL but no longer does +begin; +create function check_estimated_rows(text) returns table (estimated int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*)'); + return query select tmp[1]::int; + end if; + end loop; +end; +$$; +create function replace_elem(arr int[], idx int, val int) +returns int[] AS $$ +begin + arr[idx] := val; + return arr; +end; +$$ language plpgsql immutable; +create table notin_test as select generate_series(1, 1000) as x; +analyze notin_test; +select * from check_estimated_rows('select * from notin_test where x <> all(array[1,99,3])'); + estimated +----------- + 997 +(1 row) + +-- same array, constructed from an array with a NULL +select * from check_estimated_rows('select * from notin_test where x <> all(replace_elem(array[1,null,3], 2, 99))'); + estimated +----------- + 997 +(1 row) + +rollback; diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql index e02c21f3368..ca94859bbf8 100644 --- a/src/test/regress/sql/expressions.sql +++ b/src/test/regress/sql/expressions.sql @@ -209,3 +209,44 @@ select * from inttest where a not in (1::myint,2::myint,3::myint,4::myint,5::myi select * from inttest where a not in (0::myint,2::myint,3::myint,4::myint,5::myint, null); rollback; + +-- Test <> ALL when array initially contained NULL but no longer does + +begin; + +create function check_estimated_rows(text) returns table (estimated int) +language plpgsql as +$$ +declare + ln text; + tmp text[]; + first_row bool := true; +begin + for ln in + execute format('explain %s', $1) + loop + if first_row then + first_row := false; + tmp := regexp_match(ln, 'rows=(\d*)'); + return query select tmp[1]::int; + end if; + end loop; +end; +$$; + +create function replace_elem(arr int[], idx int, val int) +returns int[] AS $$ +begin + arr[idx] := val; + return arr; +end; +$$ language plpgsql immutable; + +create table notin_test as select generate_series(1, 1000) as x; +analyze notin_test; + +select * from check_estimated_rows('select * from notin_test where x <> all(array[1,99,3])'); +-- same array, constructed from an array with a NULL +select * from check_estimated_rows('select * from notin_test where x <> all(replace_elem(array[1,null,3], 2, 99))'); + +rollback; \ No newline at end of file -- 2.34.1