From 056c803c20628e6e7bf0000f217bfcaadcbeae22 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Fri, 3 Mar 2023 16:40:29 +1300 Subject: [PATCH v1 1/2] Remove trailing zero words from Bitmapsets --- src/backend/nodes/bitmapset.c | 207 +++++++++++++++------------------- 1 file changed, 89 insertions(+), 118 deletions(-) diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 7ba3cf635b..14fb100087 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -5,8 +5,10 @@ * * A bitmap set can represent any set of nonnegative integers, although * it is mainly intended for sets where the maximum value is not large, - * say at most a few hundred. By convention, we always represent the - * empty set by a NULL pointer. + * say at most a few hundred. By convention, we always represent a set with + * the minimum possible number of words, i.e, there are never any trailing + * zero words. Enforcing this requires that an empty set is represented as + * NULL. * * * Copyright (c) 2003-2023, PostgreSQL Global Development Group @@ -85,18 +87,11 @@ bms_copy(const Bitmapset *a) } /* - * bms_equal - are two bitmapsets equal? - * - * This is logical not physical equality; in particular, a NULL pointer will - * be reported as equal to a palloc'd value containing no members. + * bms_equal - are two bitmapsets equal? or both NULL? */ bool bms_equal(const Bitmapset *a, const Bitmapset *b) { - const Bitmapset *shorter; - const Bitmapset *longer; - int shortlen; - int longlen; int i; /* Handle cases where either input is NULL */ @@ -108,30 +103,18 @@ bms_equal(const Bitmapset *a, const Bitmapset *b) } else if (b == NULL) return false; - /* Identify shorter and longer input */ - if (a->nwords <= b->nwords) - { - shorter = a; - longer = b; - } - else - { - shorter = b; - longer = a; - } - /* And process */ - shortlen = shorter->nwords; - for (i = 0; i < shortlen; i++) - { - if (shorter->words[i] != longer->words[i]) - return false; - } - longlen = longer->nwords; - for (; i < longlen; i++) + + /* can't be equal if the word counts don't match */ + if (a->nwords != b->nwords) + return false; + + /* check each word matches */ + for (i = 0; i < a->nwords; i++) { - if (longer->words[i] != 0) + if (a->words[i] != b->words[i]) return false; } + return true; } @@ -146,29 +129,18 @@ bms_equal(const Bitmapset *a, const Bitmapset *b) int bms_compare(const Bitmapset *a, const Bitmapset *b) { - int shortlen; int i; - /* Handle cases where either input is NULL */ if (a == NULL) return (b == NULL) ? 0 : -1; else if (b == NULL) return +1; - /* Handle cases where one input is longer than the other */ - shortlen = Min(a->nwords, b->nwords); - for (i = shortlen; i < a->nwords; i++) - { - if (a->words[i] != 0) - return +1; - } - for (i = shortlen; i < b->nwords; i++) - { - if (b->words[i] != 0) - return -1; - } - /* Process words in common */ - i = shortlen; - while (--i >= 0) + + /* the set with the most words must be greater */ + if (a->nwords != b->nwords) + return (a->nwords > b->nwords) ? +1 : -1; + + for (i = a->nwords - 1; i >= 0; i--) { bitmapword aw = a->words[i]; bitmapword bw = b->words[i]; @@ -261,6 +233,7 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b) { Bitmapset *result; const Bitmapset *other; + int lastnonzero; int resultlen; int i; @@ -280,14 +253,23 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b) } /* And intersect the longer input with the result */ resultlen = result->nwords; + lastnonzero = -1; for (i = 0; i < resultlen; i++) + { result->words[i] &= other->words[i]; + + if (result->words[i] != 0) + lastnonzero = i; + } /* If we computed an empty result, we must return NULL */ - if (bms_is_empty_internal(result)) + if (lastnonzero == -1) { pfree(result); return NULL; } + + /* get rid of trailing zero words */ + result->nwords = lastnonzero + 1; return result; } @@ -298,6 +280,7 @@ Bitmapset * bms_difference(const Bitmapset *a, const Bitmapset *b) { Bitmapset *result; + int lastnonzero; int shortlen; int i; @@ -319,8 +302,19 @@ bms_difference(const Bitmapset *a, const Bitmapset *b) result = bms_copy(a); /* And remove b's bits from result */ shortlen = Min(a->nwords, b->nwords); + lastnonzero = -1; for (i = 0; i < shortlen; i++) + { result->words[i] &= ~b->words[i]; + + if (result->words[i] != 0) + lastnonzero = i; + } + + /* get rid of trailing zero words */ + result->nwords = lastnonzero + 1; + Assert(result->nwords != 0); + /* Need not check for empty result, since we handled that case above */ return result; } @@ -331,32 +325,23 @@ bms_difference(const Bitmapset *a, const Bitmapset *b) bool bms_is_subset(const Bitmapset *a, const Bitmapset *b) { - int shortlen; - int longlen; int i; - /* Handle cases where either input is NULL */ if (a == NULL) return true; /* empty set is a subset of anything */ if (b == NULL) return false; - /* Check common words */ - shortlen = Min(a->nwords, b->nwords); - for (i = 0; i < shortlen; i++) + + /* 'a' can't be a subset of 'b' if it contains more words */ + if (a->nwords > b->nwords) + return false; + + /* Check all 'a' members are set in 'b' */ + for (i = 0; i < a->nwords; i++) { if ((a->words[i] & ~b->words[i]) != 0) return false; } - /* Check extra words */ - if (a->nwords > b->nwords) - { - longlen = a->nwords; - for (; i < longlen; i++) - { - if (a->words[i] != 0) - return false; - } - } return true; } @@ -370,7 +355,6 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b) { BMS_Comparison result; int shortlen; - int longlen; int i; /* Handle cases where either input is NULL */ @@ -408,31 +392,17 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b) /* Check extra words */ if (a->nwords > b->nwords) { - longlen = a->nwords; - for (; i < longlen; i++) - { - if (a->words[i] != 0) - { - /* a is not a subset of b */ - if (result == BMS_SUBSET1) - return BMS_DIFFERENT; - result = BMS_SUBSET2; - } - } + /* if a has more words then a is not a subset of b */ + if (result == BMS_SUBSET1) + return BMS_DIFFERENT; + return BMS_SUBSET2; } else if (a->nwords < b->nwords) { - longlen = b->nwords; - for (; i < longlen; i++) - { - if (b->words[i] != 0) - { - /* b is not a subset of a */ - if (result == BMS_SUBSET2) - return BMS_DIFFERENT; - result = BMS_SUBSET1; - } - } + /* if b has more words then b is not a subset of a */ + if (result == BMS_SUBSET2) + return BMS_DIFFERENT; + return BMS_SUBSET1; } return result; } @@ -563,27 +533,21 @@ bms_overlap_list(const Bitmapset *a, const List *b) bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b) { - int shortlen; int i; - /* Handle cases where either input is NULL */ if (a == NULL) return false; if (b == NULL) return true; - /* Check words in common */ - shortlen = Min(a->nwords, b->nwords); - for (i = 0; i < shortlen; i++) + /* if a has more words then it must contain additional members */ + if (a->nwords > b->nwords) + return true; + /* Check all 'a' members are set in 'b' */ + for (i = 0; i < a->nwords; i++) { if ((a->words[i] & ~b->words[i]) != 0) return true; } - /* Check extra words in a */ - for (; i < a->nwords; i++) - { - if (a->words[i] != 0) - return true; - } return false; } @@ -927,6 +891,7 @@ bms_add_range(Bitmapset *a, int lower, int upper) Bitmapset * bms_int_members(Bitmapset *a, const Bitmapset *b) { + int lastnonzero; int shortlen; int i; @@ -940,16 +905,24 @@ bms_int_members(Bitmapset *a, const Bitmapset *b) } /* Intersect b into a; we need never copy */ shortlen = Min(a->nwords, b->nwords); + lastnonzero = -1; for (i = 0; i < shortlen; i++) + { a->words[i] &= b->words[i]; - for (; i < a->nwords; i++) - a->words[i] = 0; + + if (a->words[i] != 0) + lastnonzero = i; + } + /* If we computed an empty result, we must return NULL */ - if (bms_is_empty_internal(a)) + if (lastnonzero == -1) { pfree(a); return NULL; } + + /* get rid of trailing zero words */ + a->nwords = lastnonzero + 1; return a; } @@ -959,6 +932,7 @@ bms_int_members(Bitmapset *a, const Bitmapset *b) Bitmapset * bms_del_members(Bitmapset *a, const Bitmapset *b) { + int lastnonzero; int shortlen; int i; @@ -969,14 +943,25 @@ bms_del_members(Bitmapset *a, const Bitmapset *b) return a; /* Remove b's bits from a; we need never copy */ shortlen = Min(a->nwords, b->nwords); + lastnonzero = -1; for (i = 0; i < shortlen; i++) + { a->words[i] &= ~b->words[i]; + + if (a->words[i] != 0) + lastnonzero = i; + } + /* If we computed an empty result, we must return NULL */ - if (bms_is_empty_internal(a)) + if (lastnonzero == -1) { pfree(a); return NULL; } + + /* get rid of trailing zero words */ + a->nwords = lastnonzero + 1; + return a; } @@ -1140,28 +1125,14 @@ bms_prev_member(const Bitmapset *a, int prevbit) /* * bms_hash_value - compute a hash key for a Bitmapset - * - * Note: we must ensure that any two bitmapsets that are bms_equal() will - * hash to the same value; in practice this means that trailing all-zero - * words must not affect the result. Hence we strip those before applying - * hash_any(). */ uint32 bms_hash_value(const Bitmapset *a) { - int lastword; - if (a == NULL) return 0; /* All empty sets hash to 0 */ - for (lastword = a->nwords; --lastword >= 0;) - { - if (a->words[lastword] != 0) - break; - } - if (lastword < 0) - return 0; /* All empty sets hash to 0 */ return DatumGetUInt32(hash_any((const unsigned char *) a->words, - (lastword + 1) * sizeof(bitmapword))); + a->nwords * sizeof(bitmapword))); } /* -- 2.37.2