From 10c03ef1b41f69b54db761b16f8bb1e0642d815b Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 29 Dec 2022 22:41:36 +0100 Subject: [PATCH] Fix handling of NULLs in BRIN summaries BRIN did not properly distinguish empty (new) and all-NULL ranges. All ranges were initialized with all_nulls=true and opclasses simply reset this to false when adding the first non-NULL value. This fails if the first value in the range is NULL, and there are no other NULLs in the range - we forget the range contains a NULL value. This happens because the "all_nulls" flag is used for two separate purposes - to mark empty ranges (not representing any rows yet) and ranges containing only NULL values. The opclass can't know which of those cases is it. The opclass might also set has_nulls=true when resetting the all_nulls flag - that would make it correct, but the indexes would be useless for IS NULL conditions as all ranges start with all_nulls=true (and so all ranges would have one of those flags set to true). Ideally we'd introduce a new "is_empty" flag marking empty summaries, but that would break ABI and/or on-disk format, depending on where we add the flag. Considering we need to backpatch this, that doesn't seem particularly great. So instead we use an "impossible" combination of both flags (all_nulls and has_nulls) set to true to mark "empty" ranges. It'd be better to have a single flag for the whole index tuple (and not per-summary one), because "range is empty" applies to all ranges in a multi-column index, but this is where the existing flags are. We could skip storing index tuples with this combination, but then we'd have to always read/process this range - even if there are no rows, it would still require reading the pages etc. So we store them, but ignore them when building the bitmap. --- src/backend/access/brin/brin.c | 78 ++++++++++++++++++- src/backend/access/brin/brin_tuple.c | 11 ++- ...summarization-and-inprogress-insertion.out | 8 +- ...ummarization-and-inprogress-insertion.spec | 1 + 4 files changed, 89 insertions(+), 9 deletions(-) diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 7e386250ae..c7bf8963fa 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -591,6 +591,17 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm) bval = &dtup->bt_columns[attno - 1]; + /* + * If the range has both allnulls and hasnulls set, it means + * there are no rows in the range, so we can skip it (we have + * scan keys, and we know there's nothing to match). + */ + if (bval->bv_allnulls && bval->bv_hasnulls) + { + addrange = false; + break; + } + /* * First check if there are any IS [NOT] NULL scan keys, * and if we're violating them. In that case we can @@ -1608,11 +1619,32 @@ union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b) if (opcinfo->oi_regular_nulls) { - /* Adjust "hasnulls". */ + /* + * If B is empty (represents no rows), ignore it and just keep + * A as is (might be empty etc.). + */ + if (col_b->bv_allnulls && col_b->bv_hasnulls) + continue; + + /* + * Adjust "hasnulls". + * + * It may happen A has allnulls=true, and we should reset it. We + * need to copy the values from B first, which happens later. + * We know the next condition can't trigger, because B is not + * empty so only one of the flags is true. + */ if (!col_a->bv_hasnulls && col_b->bv_hasnulls) col_a->bv_hasnulls = true; - /* If there are no values in B, there's nothing left to do. */ + /* + * If there are no values in B, there's nothing left to do. + * + * Note this is mutually exclusive with the preceding condition. + * We have skipped "empty" B, so hasnulls and allnulls can't be + * both true. So if we adjusted hasnulls for A, there have to be + * values for B (i.e. we're not terminating here). + */ if (col_b->bv_allnulls) continue; @@ -1626,6 +1658,7 @@ union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b) { int i; + /* This also applies if we adjusted hasnulls=true earlier. */ col_a->bv_allnulls = false; for (i = 0; i < opcinfo->oi_nstored; i++) @@ -1710,24 +1743,53 @@ add_values_to_range(Relation idxRel, BrinDesc *bdesc, BrinMemTuple *dtup, Datum result; BrinValues *bval; FmgrInfo *addValue; + bool first_row; + bool has_nulls = false; bval = &dtup->bt_columns[keyno]; + /* + * Is this the first tuple we're adding to the range range? We track + * that by setting both bv_hasnulls and bval->bv_allnulls to true + * during initialization. But it's not a valid combination (at most + * one of those flags should be set), so we reset the second flag. + */ + first_row = (bval->bv_hasnulls && bval->bv_allnulls); + + if (bval->bv_hasnulls && bval->bv_allnulls) + { + bval->bv_hasnulls = false; + modified = true; + } + if (bdesc->bd_info[keyno]->oi_regular_nulls && nulls[keyno]) { /* * If the new value is null, we record that we saw it if it's the * first one; otherwise, there's nothing to do. + * + * XXX This used to check "hasnulls" but now that might result in + * having both flags set. That used to be OK, because we just + * ignore hasnulls flag in brin_form_tuple when allnulls=true. + * But now we interpret this combination as "firt row" so it + * would confuse following calls. So make sure to only set one + * of the flags - when allnulls=true we're done, as it already + * marks the range as containing ranges. */ - if (!bval->bv_hasnulls) + if (!bval->bv_allnulls) { bval->bv_hasnulls = true; modified = true; } - continue; } + /* + * Does the range already has NULL values? Either of the flags can + * be set, but we ignore the state before adding first row. + */ + has_nulls = (bval->bv_hasnulls || bval->bv_allnulls) && !first_row; + addValue = index_getprocinfo(idxRel, keyno + 1, BRIN_PROCNUM_ADDVALUE); result = FunctionCall4Coll(addValue, @@ -1736,8 +1798,16 @@ add_values_to_range(Relation idxRel, BrinDesc *bdesc, BrinMemTuple *dtup, PointerGetDatum(bval), values[keyno], nulls[keyno]); + /* if that returned true, we need to insert the updated tuple */ modified |= DatumGetBool(result); + + /* + * If we had NULLS, and the opclass didn't set allnulls=true, set + * the hasnulls so that we know there are NULL values. + */ + if (has_nulls && !bval->bv_allnulls) + bval->bv_hasnulls = true; } return modified; diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c index c0e2dbd23b..308c12a255 100644 --- a/src/backend/access/brin/brin_tuple.c +++ b/src/backend/access/brin/brin_tuple.c @@ -516,8 +516,10 @@ brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc) for (i = 0; i < brdesc->bd_tupdesc->natts; i++) { dtuple->bt_columns[i].bv_attno = i + 1; + + /* each memtuple starts as if it represents no rows */ dtuple->bt_columns[i].bv_allnulls = true; - dtuple->bt_columns[i].bv_hasnulls = false; + dtuple->bt_columns[i].bv_hasnulls = true; dtuple->bt_columns[i].bv_values = (Datum *) currdatum; dtuple->bt_columns[i].bv_mem_value = PointerGetDatum(NULL); @@ -585,6 +587,13 @@ brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple) { int i; + /* + * Make sure to overwrite the hasnulls flag, because it was initialized + * to true by brin_memtuple_initialize and we don't want to skip it if + * allnulls. + */ + dtup->bt_columns[keyno].bv_hasnulls = hasnulls[keyno]; + if (allnulls[keyno]) { valueno += brdesc->bd_info[keyno]->oi_nstored; diff --git a/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out b/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out index 2a4755d099..584ac2602f 100644 --- a/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out +++ b/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out @@ -4,7 +4,7 @@ starting permutation: s2check s1b s2b s1i s2summ s1c s2c s2check step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); itemoffset|blknum|attnum|allnulls|hasnulls|placeholder|value ----------+------+------+--------+--------+-----------+-------- - 1| 0| 1|f |f |f |{1 .. 1} + 1| 0| 1|f |t |f |{1 .. 1} (1 row) step s1b: BEGIN ISOLATION LEVEL REPEATABLE READ; @@ -26,7 +26,7 @@ step s2c: COMMIT; step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); itemoffset|blknum|attnum|allnulls|hasnulls|placeholder|value ----------+------+------+--------+--------+-----------+----------- - 1| 0| 1|f |f |f |{1 .. 1} + 1| 0| 1|f |t |f |{1 .. 1} 2| 1| 1|f |f |f |{1 .. 1000} (2 rows) @@ -35,7 +35,7 @@ starting permutation: s2check s1b s1i s2vacuum s1c s2check step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); itemoffset|blknum|attnum|allnulls|hasnulls|placeholder|value ----------+------+------+--------+--------+-----------+-------- - 1| 0| 1|f |f |f |{1 .. 1} + 1| 0| 1|f |t |f |{1 .. 1} (1 row) step s1b: BEGIN ISOLATION LEVEL REPEATABLE READ; @@ -45,7 +45,7 @@ step s1c: COMMIT; step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); itemoffset|blknum|attnum|allnulls|hasnulls|placeholder|value ----------+------+------+--------+--------+-----------+----------- - 1| 0| 1|f |f |f |{1 .. 1} + 1| 0| 1|f |t |f |{1 .. 1} 2| 1| 1|f |f |f |{1 .. 1000} (2 rows) diff --git a/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec b/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec index 19ac18a2e8..18ba92b7ba 100644 --- a/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec +++ b/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec @@ -9,6 +9,7 @@ setup ) WITH (fillfactor=10); CREATE INDEX brinidx ON brin_iso USING brin (value) WITH (pages_per_range=1); -- this fills the first page + INSERT INTO brin_iso VALUES (NULL); DO $$ DECLARE curtid tid; BEGIN -- 2.38.1