From f65629a86d25f1247c91b1a2fbbba7869c51192a Mon Sep 17 00:00:00 2001 From: David Rowley Date: Fri, 20 Mar 2026 11:01:13 +1300 Subject: [PATCH v2] Introduce bms_offset_members() function Effectively a function to bitshift members by the specified number of bits. We have various fragments of code doing this manually with a bms_next_member() loop. We can do this more efficiently with bitshifting. --- src/backend/nodes/bitmapset.c | 136 ++++++++++++++++++ src/backend/optimizer/plan/setrefs.c | 10 +- src/backend/optimizer/prep/prepjointree.c | 30 ++-- src/backend/rewrite/rewriteManip.c | 25 +--- src/backend/statistics/extended_stats.c | 12 +- src/include/nodes/bitmapset.h | 1 + .../expected/test_bitmapset.out | 50 +++++++ .../test_bitmapset/sql/test_bitmapset.sql | 15 ++ .../test_bitmapset/test_bitmapset--1.0.sql | 8 ++ .../modules/test_bitmapset/test_bitmapset.c | 105 +++++++++++++- 10 files changed, 339 insertions(+), 53 deletions(-) diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index f053d8c4d64..f3f87f11cfd 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -39,6 +39,7 @@ #include "postgres.h" #include "common/hashfn.h" +#include "common/int.h" #include "nodes/bitmapset.h" #include "nodes/pg_list.h" #include "port/pg_bitutils.h" @@ -405,6 +406,140 @@ bms_difference(const Bitmapset *a, const Bitmapset *b) return result; } +/* + * bms_offset_members + * Creates a new Bitmapset with all members of 'a' adjusted to add the + * value of 'offset' to each member. + * + * Members which would become negative as a result of a negative offset will + * be removed from the set, whereas too large an offset, which would result in + * a member going > INT_MAX, will result in an ERROR. + */ +Bitmapset * +bms_offset_members(const Bitmapset *a, int offset) +{ + Bitmapset *result; + int offset_words; + int offset_bits; + int new_nwords; + int old_nwords; + int32 high_bit; + int old_highest; + int new_highest; + + Assert(bms_is_valid_set(a)); + + /* nothing to do for empty sets */ + if (a == NULL) + return NULL; + + old_nwords = a->nwords; + offset_words = WORDNUM(offset); + offset_bits = BITNUM(offset); + high_bit = bmw_leftmost_one_pos(a->words[a->nwords - 1]); + old_highest = (old_nwords - 1) * BITS_PER_BITMAPWORD + high_bit; + + /* don't create a set with a member that doesn't fit into an int32 */ + if (pg_add_s32_overflow(old_highest, offset, &new_highest)) + elog(ERROR, "bitmapset overflow"); + /* return NULL if the new set would be empty */ + else if (new_highest < 0) + return NULL; + + new_nwords = WORDNUM(new_highest) + 1; + result = (Bitmapset *) palloc0(BITMAPSET_SIZE(new_nwords)); + result->type = T_Bitmapset; + result->nwords = new_nwords; + + /* handle zero and positive offsets (bitshift left) */ + if (offset >= 0) + { + /* + * We special-case offsetting only by whole words so we don't have to + * special-case bitshifting by BITS_PER_BITMAPWORD places, which has + * an undefined behavior. + */ + if (offset_bits == 0) + { + int i = 0; + + /* + * The old set is guaranteed to have at least 1 word, so use + * do/while to save the redundant initial loop bounds check. + */ + do + { + Assert(i + offset_words < new_nwords); + result->words[i + offset_words] = a->words[i]; + } while (++i < old_nwords); + } + else + { + int carry_bits = BITS_PER_BITMAPWORD - offset_bits; + bitmapword prev_carry = 0; + int i = 0; + + do + { + bitmapword carry = (a->words[i] >> carry_bits); + + Assert(i + offset_words < new_nwords); + /* shift bits up and carry bits from the previous word */ + result->words[i + offset_words] = (a->words[i] << offset_bits) | prev_carry; + prev_carry = carry; + } while (++i < old_nwords); + result->words[new_nwords - 1] |= prev_carry; + } + } + + /* handle negative offset (bitshift right) */ + else + { + /* make the negative offset_words and offset_bits positive */ + offset_words = 0 - offset_words; + offset_bits = 0 - offset_bits; + + /* as above, special case shifting only by whole words */ + if (offset_bits == 0) + { + int i = 0; + + do + { + Assert(i + offset_words < old_nwords); + result->words[i] = a->words[i + offset_words]; + } while (++i < new_nwords); + } + else + { + int carry_bits = BITS_PER_BITMAPWORD - offset_bits; + bitmapword prev_carry = 0; + int i = new_nwords - 1; + + /* carry bits from any word just above where the loop starts */ + if (old_nwords > new_nwords + offset_words) + prev_carry = (a->words[new_nwords + offset_words] << carry_bits); + + /* + * We loop backward over the array so we correctly carry bits from + * higher words. + */ + do + { + bitmapword carry = (a->words[i + offset_words] << carry_bits); + + Assert(i + offset_words < old_nwords); + + /* shift bits down and carry bits from the previous word */ + result->words[i] = (a->words[i + offset_words] >> offset_bits) | prev_carry; + prev_carry = carry; + } while (--i >= 0); + } + } + + return result; +} + /* * bms_is_subset - is A a subset of B? */ @@ -991,6 +1126,7 @@ bms_replace_members(Bitmapset *a, const Bitmapset *b) return a; } + /* * bms_add_range * Add members in the range of 'lower' to 'upper' to the set. diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index ff0e875f2a2..ebae7a8dea9 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -2057,16 +2057,10 @@ set_hash_references(PlannerInfo *root, Plan *plan, int rtoffset) static Relids offset_relid_set(Relids relids, int rtoffset) { - Relids result = NULL; - int rtindex; - - /* If there's no offset to apply, we needn't recompute the value */ + /* If there's no offset to apply, we needn't make another set */ if (rtoffset == 0) return relids; - rtindex = -1; - while ((rtindex = bms_next_member(relids, rtindex)) >= 0) - result = bms_add_member(result, rtindex + rtoffset); - return result; + return bms_offset_members(relids, rtoffset); } /* diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 95bf51606cc..78281262ba1 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -3731,7 +3731,7 @@ has_notnull_forced_var(PlannerInfo *root, List *forced_null_vars, RangeTblEntry *rte; Bitmapset *notnullattnums; Bitmapset *forcednullattnums = NULL; - int attno; + int lowest_attno; varno++; @@ -3751,22 +3751,22 @@ has_notnull_forced_var(PlannerInfo *root, List *forced_null_vars, if (bms_is_member(varno, right_state->nullable_rels)) continue; - /* - * Iterate over attributes and adjust the bitmap indexes by - * FirstLowInvalidHeapAttributeNumber to get the actual attribute - * numbers. - */ - attno = -1; - while ((attno = bms_next_member(attrs, attno)) >= 0) - { - AttrNumber real_attno = attno + FirstLowInvalidHeapAttributeNumber; + /* find the lowest member to check if system columns are present */ + lowest_attno = bms_next_member(attrs, -1); - /* system columns cannot be NULL */ - if (real_attno < 0) - return true; + /* we checked for an empty set above */ + Assert(lowest_attno >= 0); - forcednullattnums = bms_add_member(forcednullattnums, real_attno); - } + /* system columns cannot be NULL */ + if (lowest_attno + FirstLowInvalidHeapAttributeNumber < 0) + return true; + + /* + * Offset the bitmap members by FirstLowInvalidHeapAttributeNumber to + * get the actual attribute numbers. + */ + forcednullattnums = bms_offset_members(attrs, + FirstLowInvalidHeapAttributeNumber); rte = rt_fetch(varno, root->parse->rtable); diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index 4bf4aa0d6d1..6137f4303f2 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -64,7 +64,6 @@ static bool contain_windowfuncs_walker(Node *node, void *context); static bool locate_windowfunc_walker(Node *node, locate_windowfunc_context *context); static bool checkExprHasSubLink_walker(Node *node, void *context); -static Relids offset_relid_set(Relids relids, int offset); static Node *add_nulling_relids_mutator(Node *node, add_nulling_relids_context *context); static Node *remove_nulling_relids_mutator(Node *node, @@ -397,8 +396,8 @@ OffsetVarNodes_walker(Node *node, OffsetVarNodes_context *context) if (var->varlevelsup == context->sublevels_up) { var->varno += context->offset; - var->varnullingrels = offset_relid_set(var->varnullingrels, - context->offset); + var->varnullingrels = bms_offset_members(var->varnullingrels, + context->offset); if (var->varnosyn > 0) var->varnosyn += context->offset; } @@ -435,10 +434,10 @@ OffsetVarNodes_walker(Node *node, OffsetVarNodes_context *context) if (phv->phlevelsup == context->sublevels_up) { - phv->phrels = offset_relid_set(phv->phrels, - context->offset); - phv->phnullingrels = offset_relid_set(phv->phnullingrels, - context->offset); + phv->phrels = bms_offset_members(phv->phrels, + context->offset); + phv->phnullingrels = bms_offset_members(phv->phnullingrels, + context->offset); } /* fall through to examine children */ } @@ -524,18 +523,6 @@ OffsetVarNodes(Node *node, int offset, int sublevels_up) OffsetVarNodes_walker(node, &context); } -static Relids -offset_relid_set(Relids relids, int offset) -{ - Relids result = NULL; - int rtindex; - - rtindex = -1; - while ((rtindex = bms_next_member(relids, rtindex)) >= 0) - result = bms_add_member(result, rtindex + offset); - return result; -} - /* * ChangeVarNodes - adjust Var nodes for a specific change of RT index * diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 2b83355d26e..332e7423bd8 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1673,8 +1673,7 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, */ if (!leakproof) { - Bitmapset *clause_attnums = NULL; - int attnum = -1; + Bitmapset *clause_attnums; /* * We have to check per-column privileges. *attnums has the attnums @@ -1685,13 +1684,8 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, * while attnums within *attnums aren't. Convert *attnums to the * offset style so we can combine the results. */ - while ((attnum = bms_next_member(*attnums, attnum)) >= 0) - { - clause_attnums = - bms_add_member(clause_attnums, - attnum - FirstLowInvalidHeapAttributeNumber); - } - + clause_attnums = bms_offset_members(*attnums, + 0 - FirstLowInvalidHeapAttributeNumber); /* Now merge attnums from *exprs into clause_attnums */ if (*exprs != NIL) pull_varattnos((Node *) *exprs, relid, &clause_attnums); diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index 067ec72e99b..997f8a1cd96 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -100,6 +100,7 @@ extern void bms_free(Bitmapset *a); extern Bitmapset *bms_union(const Bitmapset *a, const Bitmapset *b); extern Bitmapset *bms_intersect(const Bitmapset *a, const Bitmapset *b); extern Bitmapset *bms_difference(const Bitmapset *a, const Bitmapset *b); +extern Bitmapset *bms_offset_members(const Bitmapset *a, int offset); extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b); extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b); extern bool bms_is_member(int x, const Bitmapset *a); diff --git a/src/test/modules/test_bitmapset/expected/test_bitmapset.out b/src/test/modules/test_bitmapset/expected/test_bitmapset.out index f75cb46b869..568b97bb635 100644 --- a/src/test/modules/test_bitmapset/expected/test_bitmapset.out +++ b/src/test/modules/test_bitmapset/expected/test_bitmapset.out @@ -528,6 +528,49 @@ SELECT test_bms_difference(NULL, NULL) AS result; <> (1 row) +-- bms_offset_members() +-- Ensure overflow detection works +SELECT test_bms_offset_members('(b 1)', 2147483647); +ERROR: bitmapset overflow +SELECT test_bms_offset_members('(b 2)', 2147483646); +ERROR: bitmapset overflow +-- Ensure members are all offset as expected +SELECT test_bms_offset_members('(b 1 3 5)', 1); + test_bms_offset_members +------------------------- + (b 2 4 6) +(1 row) + +SELECT test_bms_offset_members('(b 1 3 5)', 0); + test_bms_offset_members +------------------------- + (b 1 3 5) +(1 row) + +SELECT test_bms_offset_members('(b 1 3 5)', -1); + test_bms_offset_members +------------------------- + (b 0 2 4) +(1 row) + +SELECT test_bms_offset_members('(b 31 32 63 64)', 1); + test_bms_offset_members +------------------------- + (b 32 33 64 65) +(1 row) + +SELECT test_bms_offset_members('(b 31 32 63 64)', 0); + test_bms_offset_members +------------------------- + (b 31 32 63 64) +(1 row) + +SELECT test_bms_offset_members('(b 31 32 63 64)', -1); + test_bms_offset_members +------------------------- + (b 30 31 62 63) +(1 row) + -- bms_is_member() SELECT test_bms_is_member('(b)', -5); -- error ERROR: negative bitmapset member not allowed @@ -1575,4 +1618,11 @@ SELECT test_random_operations(-1, 10000, 81920, 0) > 0 AS result; t (1 row) +-- perform some random test on bms_offset_members() +SELECT test_random_offset_operations(NULL, 10000, 1024, 0) AS result; + result +-------- + 10000 +(1 row) + DROP EXTENSION test_bitmapset; diff --git a/src/test/modules/test_bitmapset/sql/test_bitmapset.sql b/src/test/modules/test_bitmapset/sql/test_bitmapset.sql index e75ab8f620a..9b852d6d990 100644 --- a/src/test/modules/test_bitmapset/sql/test_bitmapset.sql +++ b/src/test/modules/test_bitmapset/sql/test_bitmapset.sql @@ -145,6 +145,18 @@ SELECT test_bms_difference('(b 5)', NULL) AS result; SELECT test_bms_difference(NULL, '(b 5)') AS result; SELECT test_bms_difference(NULL, NULL) AS result; +-- bms_offset_members() +-- Ensure overflow detection works +SELECT test_bms_offset_members('(b 1)', 2147483647); +SELECT test_bms_offset_members('(b 2)', 2147483646); +-- Ensure members are all offset as expected +SELECT test_bms_offset_members('(b 1 3 5)', 1); +SELECT test_bms_offset_members('(b 1 3 5)', 0); +SELECT test_bms_offset_members('(b 1 3 5)', -1); +SELECT test_bms_offset_members('(b 31 32 63 64)', 1); +SELECT test_bms_offset_members('(b 31 32 63 64)', 0); +SELECT test_bms_offset_members('(b 31 32 63 64)', -1); + -- bms_is_member() SELECT test_bms_is_member('(b)', -5); -- error SELECT test_bms_is_member('(b 1 3 5)', 1) AS result; @@ -403,4 +415,7 @@ SELECT test_bms_nonempty_difference('(b 1 2)', '(b 50 100)') AS result; -- random operations SELECT test_random_operations(-1, 10000, 81920, 0) > 0 AS result; +-- perform some random test on bms_offset_members() +SELECT test_random_offset_operations(NULL, 10000, 1024, 0) AS result; + DROP EXTENSION test_bitmapset; diff --git a/src/test/modules/test_bitmapset/test_bitmapset--1.0.sql b/src/test/modules/test_bitmapset/test_bitmapset--1.0.sql index 227ecb5aa3b..ee58319e4d9 100644 --- a/src/test/modules/test_bitmapset/test_bitmapset--1.0.sql +++ b/src/test/modules/test_bitmapset/test_bitmapset--1.0.sql @@ -56,6 +56,10 @@ CREATE FUNCTION test_bms_difference(text, text) RETURNS text AS 'MODULE_PATHNAME' LANGUAGE C; +CREATE FUNCTION test_bms_offset_members(text, integer) +RETURNS text +AS 'MODULE_PATHNAME' LANGUAGE C; + CREATE FUNCTION test_bms_is_empty(text) RETURNS boolean AS 'MODULE_PATHNAME' LANGUAGE C; @@ -137,4 +141,8 @@ CREATE FUNCTION test_random_operations(integer, integer, integer, integer) RETURNS integer STRICT AS 'MODULE_PATHNAME' LANGUAGE C; +CREATE FUNCTION test_random_offset_operations(bigint, integer, integer, integer) +RETURNS integer +AS 'MODULE_PATHNAME' LANGUAGE C; + COMMENT ON EXTENSION test_bitmapset IS 'Test code for Bitmapset'; diff --git a/src/test/modules/test_bitmapset/test_bitmapset.c b/src/test/modules/test_bitmapset/test_bitmapset.c index 272bed390a6..c134df97cd5 100644 --- a/src/test/modules/test_bitmapset/test_bitmapset.c +++ b/src/test/modules/test_bitmapset/test_bitmapset.c @@ -19,11 +19,12 @@ #include #include "catalog/pg_type.h" #include "common/pg_prng.h" -#include "utils/array.h" #include "fmgr.h" +#include "miscadmin.h" #include "nodes/bitmapset.h" #include "nodes/nodes.h" #include "nodes/pg_list.h" +#include "utils/array.h" #include "utils/builtins.h" #include "utils/timestamp.h" @@ -43,6 +44,7 @@ PG_FUNCTION_INFO_V1(test_bms_subset_compare); PG_FUNCTION_INFO_V1(test_bms_union); PG_FUNCTION_INFO_V1(test_bms_intersect); PG_FUNCTION_INFO_V1(test_bms_difference); +PG_FUNCTION_INFO_V1(test_bms_offset_members); PG_FUNCTION_INFO_V1(test_bms_is_empty); PG_FUNCTION_INFO_V1(test_bms_membership); PG_FUNCTION_INFO_V1(test_bms_singleton_member); @@ -65,6 +67,7 @@ PG_FUNCTION_INFO_V1(test_bitmap_match); /* Test utility functions */ PG_FUNCTION_INFO_V1(test_random_operations); +PG_FUNCTION_INFO_V1(test_random_offset_operations); /* Convenient macros to test results */ #define EXPECT_TRUE(expr) \ @@ -295,6 +298,18 @@ test_bms_difference(PG_FUNCTION_ARGS) PG_RETURN_BITMAPSET_AS_TEXT(result_bms); } +Datum +test_bms_offset_members(PG_FUNCTION_ARGS) +{ + Bitmapset *bms = PG_ARG_GETBITMAPSET(0); + int offset = PG_GETARG_INT32(1); + + /* left input gets recycled */ + bms = bms_offset_members(bms, offset); + + PG_RETURN_BITMAPSET_AS_TEXT(bms); +} + Datum test_bms_compare(PG_FUNCTION_ARGS) { @@ -580,7 +595,7 @@ test_bitmap_match(PG_FUNCTION_ARGS) } /* - * Contrary to all the other functions which are one-one mappings with the + * Contrary to most of the other functions which are one-one mappings with the * equivalent C functions, this stresses Bitmapsets in a random fashion for * various operations. * @@ -742,3 +757,89 @@ test_random_operations(PG_FUNCTION_ARGS) PG_RETURN_INT32(total_ops); } + +/* + * Random testing for bms_offset_members(). Generates a random set and then + * picks a number to offset the members by. We then create another set, which + * is built by looping over the members of the random set and performing + * bms_add_member and adding on the offset to create a known good set to + * compare the result of bms_offset_members() to. + * + * Arguments: + * arg1: optional random seed, or < 0 to use a random seed. + * arg2: the number of operations to perform. + * arg3: the maximum bitmapset member number to use in the random set. + * arg4: the minimum bitmapset member number to use in the random set. + */ +Datum +test_random_offset_operations(PG_FUNCTION_ARGS) +{ + pg_prng_state state; + uint64 seed; + int num_ops; + int max_range; + int min_value; + int member; + + if (PG_ARGISNULL(0)) + seed = GetCurrentTimestamp(); + else + seed = PG_GETARG_INT64(0); + + num_ops = PG_GETARG_INT32(1); + max_range = PG_GETARG_INT32(2); + min_value = PG_GETARG_INT32(3); + + if (PG_ARGISNULL(1) || num_ops <= 0) + elog(ERROR, "invalid number of operations"); + if (PG_ARGISNULL(2) || max_range <= 0) + elog(ERROR, "invalid maximum range"); + if (PG_ARGISNULL(3) || min_value < 0) + elog(ERROR, "invalid minimum value"); + + pg_prng_seed(&state, seed); + + for (int op = 0; op < num_ops; op++) + { + Bitmapset *random_bms = NULL; + Bitmapset *offset_bms1; + Bitmapset *offset_bms2 = NULL; + int offset; + int nmembers; + + CHECK_FOR_INTERRUPTS(); + + /* Figure out a random offset and how many members to add */ + offset = (pg_prng_uint32(&state) % max_range) - (pg_prng_uint32(&state) % max_range); + nmembers = pg_prng_uint32(&state) % max_range + min_value; + + for (int i = 0; i < nmembers; i++) + { + member = pg_prng_uint32(&state) % max_range + min_value; + random_bms = bms_add_member(random_bms, member); + } + + /* create a known-good set the old fashioned way */ + offset_bms2 = NULL; + member = -1; + while ((member = bms_next_member(random_bms, member)) >= 0) + { + if (member + offset >= 0) + offset_bms2 = bms_add_member(offset_bms2, member + offset); + } + + /* do the offsetting */ + offset_bms1 = bms_offset_members(random_bms, offset); + + /* check against the known-good set */ + if (!bms_equal(offset_bms1, offset_bms2)) + elog(ERROR, "bms_offset_members failed with offset %d seed " INT64_FORMAT, offset, seed); + + /* Cleanup before the next loop */ + bms_free(random_bms); + bms_free(offset_bms1); + bms_free(offset_bms2); + } + + PG_RETURN_INT32(num_ops); +} -- 2.51.0