From 2cf5d136f6a80ca2cfc703c54566fe636b41ca28 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Sat, 11 Feb 2023 20:50:03 +0100 Subject: [PATCH 8/9] Support SK_SEARCHARRAY in BRIN bloom --- src/backend/access/brin/brin_bloom.c | 157 ++++++++++++++++++++++----- src/include/catalog/pg_amproc.dat | 60 ++++++++++ src/include/catalog/pg_proc.dat | 3 + 3 files changed, 194 insertions(+), 26 deletions(-) diff --git a/src/backend/access/brin/brin_bloom.c b/src/backend/access/brin/brin_bloom.c index 4ff80aeb0c..48b9847bb2 100644 --- a/src/backend/access/brin/brin_bloom.c +++ b/src/backend/access/brin/brin_bloom.c @@ -125,9 +125,11 @@ #include "access/stratnum.h" #include "catalog/pg_type.h" #include "catalog/pg_amop.h" +#include "utils/array.h" #include "utils/builtins.h" #include "utils/datum.h" #include "utils/lsyscache.h" +#include "utils/memutils.h" #include "utils/rel.h" #include "utils/syscache.h" @@ -151,6 +153,13 @@ */ #define PROCNUM_BASE 11 +/* + * We use some private sk_flags bits in preprocessed scan keys. We're allowed + * to use bits 16-31 (see skey.h). The uppermost bits are copied from the + * index's indoption[] array entry for the index attribute. + */ +#define SK_BRIN_HASHES 0x00010000 /* deconstructed array, calculated hashes */ + /* * Storage type for BRIN's reloptions. */ @@ -402,21 +411,14 @@ bloom_add_value(BloomFilter *filter, uint32 value, bool *updated) return filter; } - /* * bloom_contains_value * Check if the bloom filter contains a particular value. */ static bool -bloom_contains_value(BloomFilter *filter, uint32 value) +bloom_contains_hashes(BloomFilter *filter, uint64 h1, uint64 h2) { int i; - uint64 h1, - h2; - - /* calculate the two hashes */ - h1 = hash_bytes_uint32_extended(value, BLOOM_SEED_1) % filter->nbits; - h2 = hash_bytes_uint32_extended(value, BLOOM_SEED_2) % filter->nbits; /* compute the requested number of hashes */ for (i = 0; i < filter->nhashes; i++) @@ -590,6 +592,99 @@ brin_bloom_add_value(PG_FUNCTION_ARGS) PG_RETURN_BOOL(updated); } +typedef struct HashCache { + int nelements; + uint64 *h1; + uint64 *h2; +} HashCache; + +Datum +brin_bloom_preprocess(PG_FUNCTION_ARGS) +{ + BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0); + ScanKey key = (ScanKey) PG_GETARG_POINTER(1); + BloomOptions *opts = (BloomOptions *) PG_GET_OPCLASS_OPTIONS(); + ScanKey newkey; + HashCache *cache = palloc0(sizeof(HashCache)); + + int nbits; + FmgrInfo *finfo; + uint32 hashValue; + + /* we'll need to calculate hashes, so get the proc */ + finfo = bloom_get_procinfo(bdesc, key->sk_attno, PROCNUM_HASH); + + /* + * We don't have a filter from any range yet, so we just re-calculate + * the size (number of bits) just like bloom_init. + */ + bloom_filter_size(brin_bloom_get_ndistinct(bdesc, opts), + BloomGetFalsePositiveRate(opts), + NULL, &nbits, NULL); + + /* precalculate the hash even for simple scan keys */ + if (!(key->sk_flags & SK_SEARCHARRAY)) + { + Datum value = key->sk_argument; + + cache->nelements = 1; + cache->h1 = (uint64 *) palloc0(sizeof(uint64)); + cache->h2 = (uint64 *) palloc0(sizeof(uint64)); + + hashValue = DatumGetUInt32(FunctionCall1Coll(finfo, key->sk_collation, value)); + + cache->h1[0] = hash_bytes_uint32_extended(hashValue, BLOOM_SEED_1) % nbits; + cache->h2[0] = hash_bytes_uint32_extended(hashValue, BLOOM_SEED_2) % nbits; + } + else + { + ArrayType *arrayval; + int16 elmlen; + bool elmbyval; + char elmalign; + int num_elems; + Datum *elem_values; + bool *elem_nulls; + + arrayval = DatumGetArrayTypeP(key->sk_argument); + + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), + &elmlen, &elmbyval, &elmalign); + + deconstruct_array(arrayval, + ARR_ELEMTYPE(arrayval), + elmlen, elmbyval, elmalign, + &elem_values, &elem_nulls, &num_elems); + + cache->nelements = num_elems; + cache->h1 = (uint64 *) palloc0(sizeof(uint64) * num_elems); + cache->h2 = (uint64 *) palloc0(sizeof(uint64) * num_elems); + + for (int i = 0; i < num_elems; i++) + { + Datum element = elem_values[i]; + + hashValue = DatumGetUInt32(FunctionCall1Coll(finfo, key->sk_collation, element)); + + cache->h1[i] = hash_bytes_uint32_extended(hashValue, BLOOM_SEED_1) % nbits; + cache->h2[i] = hash_bytes_uint32_extended(hashValue, BLOOM_SEED_2) % nbits; + } + } + + newkey = palloc0(sizeof(ScanKeyData)); + + ScanKeyEntryInitializeWithInfo(newkey, + (key->sk_flags | SK_BRIN_HASHES), + key->sk_attno, + key->sk_strategy, + key->sk_subtype, + key->sk_collation, + &key->sk_func, + PointerGetDatum(cache)); + + PG_RETURN_POINTER(newkey); +} + /* * Given an index tuple corresponding to a certain page range and a scan key, * return whether the scan key is consistent with the index tuple's bloom @@ -598,16 +693,10 @@ brin_bloom_add_value(PG_FUNCTION_ARGS) Datum brin_bloom_consistent(PG_FUNCTION_ARGS) { - BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0); BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1); ScanKey *keys = (ScanKey *) PG_GETARG_POINTER(2); int nkeys = PG_GETARG_INT32(3); - Oid colloid = PG_GET_COLLATION(); - AttrNumber attno; - Datum value; bool matches; - FmgrInfo *finfo; - uint32 hashValue; BloomFilter *filter; int keyno; @@ -621,26 +710,42 @@ brin_bloom_consistent(PG_FUNCTION_ARGS) for (keyno = 0; keyno < nkeys; keyno++) { ScanKey key = keys[keyno]; + HashCache *cache = (HashCache *) key->sk_argument; /* NULL keys are handled and filtered-out in bringetbitmap */ Assert(!(key->sk_flags & SK_ISNULL)); - attno = key->sk_attno; - value = key->sk_argument; + /* + * Keys should be preprocessed into a hash cache (even a single + * value scan keys, not just SK_SEARCHARRAY ones). + */ + Assert(key->sk_flags & SK_BRIN_HASHES); switch (key->sk_strategy) { case BloomEqualStrategyNumber: - - /* - * We want to return the current page range if the bloom filter - * seems to contain the value. - */ - finfo = bloom_get_procinfo(bdesc, attno, PROCNUM_HASH); - - hashValue = DatumGetUInt32(FunctionCall1Coll(finfo, colloid, value)); - matches &= bloom_contains_value(filter, hashValue); - + { + /* assume no match */ + matches = false; + + /* + * We want to return the current page range if the bloom filter + * seems to contain any of the values (or a single value). + */ + for (int i = 0; i < cache->nelements; i++) + { + bool tmp = false; + + tmp = bloom_contains_hashes(filter, cache->h1[i], cache->h2[i]); + + /* if we found a matching value, we have a match */ + if (DatumGetBool(tmp)) + { + matches = BoolGetDatum(true); + break; + } + } + } break; default: /* shouldn't happen */ diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat index ed5b21e7f9..d951fcd1a0 100644 --- a/src/include/catalog/pg_amproc.dat +++ b/src/include/catalog/pg_amproc.dat @@ -822,6 +822,9 @@ { amprocfamily => 'brin/bytea_bloom_ops', amproclefttype => 'bytea', amprocrighttype => 'bytea', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/bytea_bloom_ops', amproclefttype => 'bytea', + amprocrighttype => 'bytea', amprocnum => '6', + amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/bytea_bloom_ops', amproclefttype => 'bytea', amprocrighttype => 'bytea', amprocnum => '11', amproc => 'hashvarlena' }, @@ -853,6 +856,8 @@ amprocrighttype => 'char', amprocnum => '4', amproc => 'brin_bloom_union' }, { amprocfamily => 'brin/char_bloom_ops', amproclefttype => 'char', amprocrighttype => 'char', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/char_bloom_ops', amproclefttype => 'char', + amprocrighttype => 'char', amprocnum => '6', amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/char_bloom_ops', amproclefttype => 'char', amprocrighttype => 'char', amprocnum => '11', amproc => 'hashchar' }, @@ -884,6 +889,8 @@ amprocrighttype => 'name', amprocnum => '4', amproc => 'brin_bloom_union' }, { amprocfamily => 'brin/name_bloom_ops', amproclefttype => 'name', amprocrighttype => 'name', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/name_bloom_ops', amproclefttype => 'name', + amprocrighttype => 'name', amprocnum => '6', amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/name_bloom_ops', amproclefttype => 'name', amprocrighttype => 'name', amprocnum => '11', amproc => 'hashname' }, @@ -1010,6 +1017,8 @@ amprocrighttype => 'int8', amprocnum => '4', amproc => 'brin_bloom_union' }, { amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int8', amprocrighttype => 'int8', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int8', + amprocrighttype => 'int8', amprocnum => '6', amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int8', amprocrighttype => 'int8', amprocnum => '11', amproc => 'hashint8' }, @@ -1025,6 +1034,8 @@ amprocrighttype => 'int2', amprocnum => '4', amproc => 'brin_bloom_union' }, { amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int2', amprocrighttype => 'int2', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int2', + amprocrighttype => 'int2', amprocnum => '6', amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int2', amprocrighttype => 'int2', amprocnum => '11', amproc => 'hashint2' }, @@ -1040,6 +1051,8 @@ amprocrighttype => 'int4', amprocnum => '4', amproc => 'brin_bloom_union' }, { amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int4', amprocrighttype => 'int4', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int4', + amprocrighttype => 'int4', amprocnum => '6', amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int4', amprocrighttype => 'int4', amprocnum => '11', amproc => 'hashint4' }, @@ -1071,6 +1084,8 @@ amprocrighttype => 'text', amprocnum => '4', amproc => 'brin_bloom_union' }, { amprocfamily => 'brin/text_bloom_ops', amproclefttype => 'text', amprocrighttype => 'text', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/text_bloom_ops', amproclefttype => 'text', + amprocrighttype => 'text', amprocnum => '6', amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/text_bloom_ops', amproclefttype => 'text', amprocrighttype => 'text', amprocnum => '11', amproc => 'hashtext' }, @@ -1124,6 +1139,8 @@ amprocrighttype => 'oid', amprocnum => '4', amproc => 'brin_bloom_union' }, { amprocfamily => 'brin/oid_bloom_ops', amproclefttype => 'oid', amprocrighttype => 'oid', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/oid_bloom_ops', amproclefttype => 'oid', + amprocrighttype => 'oid', amprocnum => '6', amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/oid_bloom_ops', amproclefttype => 'oid', amprocrighttype => 'oid', amprocnum => '11', amproc => 'hashoid' }, @@ -1154,6 +1171,8 @@ amprocrighttype => 'tid', amprocnum => '4', amproc => 'brin_bloom_union' }, { amprocfamily => 'brin/tid_bloom_ops', amproclefttype => 'tid', amprocrighttype => 'tid', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/tid_bloom_ops', amproclefttype => 'tid', + amprocrighttype => 'tid', amprocnum => '6', amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/tid_bloom_ops', amproclefttype => 'tid', amprocrighttype => 'tid', amprocnum => '11', amproc => 'hashtid' }, @@ -1273,6 +1292,9 @@ { amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float4', amprocrighttype => 'float4', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float4', + amprocrighttype => 'float4', amprocnum => '6', + amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float4', amprocrighttype => 'float4', amprocnum => '11', amproc => 'hashfloat4' }, @@ -1290,6 +1312,9 @@ { amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float8', amprocrighttype => 'float8', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float8', + amprocrighttype => 'float8', amprocnum => '6', + amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float8', amprocrighttype => 'float8', amprocnum => '11', amproc => 'hashfloat8' }, @@ -1349,6 +1374,9 @@ { amprocfamily => 'brin/macaddr_bloom_ops', amproclefttype => 'macaddr', amprocrighttype => 'macaddr', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/macaddr_bloom_ops', amproclefttype => 'macaddr', + amprocrighttype => 'macaddr', amprocnum => '6', + amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/macaddr_bloom_ops', amproclefttype => 'macaddr', amprocrighttype => 'macaddr', amprocnum => '11', amproc => 'hashmacaddr' }, @@ -1408,6 +1436,9 @@ { amprocfamily => 'brin/macaddr8_bloom_ops', amproclefttype => 'macaddr8', amprocrighttype => 'macaddr8', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/macaddr8_bloom_ops', amproclefttype => 'macaddr8', + amprocrighttype => 'macaddr8', amprocnum => '6', + amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/macaddr8_bloom_ops', amproclefttype => 'macaddr8', amprocrighttype => 'macaddr8', amprocnum => '11', amproc => 'hashmacaddr8' }, @@ -1462,6 +1493,8 @@ amprocrighttype => 'inet', amprocnum => '4', amproc => 'brin_bloom_union' }, { amprocfamily => 'brin/network_bloom_ops', amproclefttype => 'inet', amprocrighttype => 'inet', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/network_bloom_ops', amproclefttype => 'inet', + amprocrighttype => 'inet', amprocnum => '6', amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/network_bloom_ops', amproclefttype => 'inet', amprocrighttype => 'inet', amprocnum => '11', amproc => 'hashinet' }, @@ -1520,6 +1553,9 @@ { amprocfamily => 'brin/bpchar_bloom_ops', amproclefttype => 'bpchar', amprocrighttype => 'bpchar', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/bpchar_bloom_ops', amproclefttype => 'bpchar', + amprocrighttype => 'bpchar', amprocnum => '6', + amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/bpchar_bloom_ops', amproclefttype => 'bpchar', amprocrighttype => 'bpchar', amprocnum => '11', amproc => 'hashbpchar' }, @@ -1574,6 +1610,8 @@ amprocrighttype => 'time', amprocnum => '4', amproc => 'brin_bloom_union' }, { amprocfamily => 'brin/time_bloom_ops', amproclefttype => 'time', amprocrighttype => 'time', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/time_bloom_ops', amproclefttype => 'time', + amprocrighttype => 'time', amprocnum => '6', amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/time_bloom_ops', amproclefttype => 'time', amprocrighttype => 'time', amprocnum => '11', amproc => 'time_hash' }, @@ -1707,6 +1745,9 @@ { amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamp', amprocrighttype => 'timestamp', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamp', + amprocrighttype => 'timestamp', amprocnum => '6', + amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamp', amprocrighttype => 'timestamp', amprocnum => '11', amproc => 'timestamp_hash' }, @@ -1726,6 +1767,9 @@ { amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamptz', amprocrighttype => 'timestamptz', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamptz', + amprocrighttype => 'timestamptz', amprocnum => '6', + amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamptz', amprocrighttype => 'timestamptz', amprocnum => '11', amproc => 'timestamp_hash' }, @@ -1742,6 +1786,8 @@ amprocrighttype => 'date', amprocnum => '4', amproc => 'brin_bloom_union' }, { amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'date', amprocrighttype => 'date', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'date', + amprocrighttype => 'date', amprocnum => '6', amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'date', amprocrighttype => 'date', amprocnum => '11', amproc => 'hashint4' }, @@ -1801,6 +1847,9 @@ { amprocfamily => 'brin/interval_bloom_ops', amproclefttype => 'interval', amprocrighttype => 'interval', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/interval_bloom_ops', amproclefttype => 'interval', + amprocrighttype => 'interval', amprocnum => '6', + amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/interval_bloom_ops', amproclefttype => 'interval', amprocrighttype => 'interval', amprocnum => '11', amproc => 'interval_hash' }, @@ -1859,6 +1908,9 @@ { amprocfamily => 'brin/timetz_bloom_ops', amproclefttype => 'timetz', amprocrighttype => 'timetz', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/timetz_bloom_ops', amproclefttype => 'timetz', + amprocrighttype => 'timetz', amprocnum => '6', + amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/timetz_bloom_ops', amproclefttype => 'timetz', amprocrighttype => 'timetz', amprocnum => '11', amproc => 'timetz_hash' }, @@ -1949,6 +2001,9 @@ { amprocfamily => 'brin/numeric_bloom_ops', amproclefttype => 'numeric', amprocrighttype => 'numeric', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/numeric_bloom_ops', amproclefttype => 'numeric', + amprocrighttype => 'numeric', amprocnum => '6', + amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/numeric_bloom_ops', amproclefttype => 'numeric', amprocrighttype => 'numeric', amprocnum => '11', amproc => 'hash_numeric' }, @@ -2003,6 +2058,8 @@ amprocrighttype => 'uuid', amprocnum => '4', amproc => 'brin_bloom_union' }, { amprocfamily => 'brin/uuid_bloom_ops', amproclefttype => 'uuid', amprocrighttype => 'uuid', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/uuid_bloom_ops', amproclefttype => 'uuid', + amprocrighttype => 'uuid', amprocnum => '6', amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/uuid_bloom_ops', amproclefttype => 'uuid', amprocrighttype => 'uuid', amprocnum => '11', amproc => 'uuid_hash' }, @@ -2087,6 +2144,9 @@ { amprocfamily => 'brin/pg_lsn_bloom_ops', amproclefttype => 'pg_lsn', amprocrighttype => 'pg_lsn', amprocnum => '5', amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/pg_lsn_bloom_ops', amproclefttype => 'pg_lsn', + amprocrighttype => 'pg_lsn', amprocnum => '6', + amproc => 'brin_bloom_preprocess' }, { amprocfamily => 'brin/pg_lsn_bloom_ops', amproclefttype => 'pg_lsn', amprocrighttype => 'pg_lsn', amprocnum => '11', amproc => 'pg_lsn_hash' }, diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 753c41d5cd..4325229c9d 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -8633,6 +8633,9 @@ { oid => '4595', descr => 'BRIN bloom support', proname => 'brin_bloom_options', proisstrict => 'f', prorettype => 'void', proargtypes => 'internal', prosrc => 'brin_bloom_options' }, +{ oid => '9325', descr => 'BRIN bloom support', + proname => 'brin_bloom_preprocess', proisstrict => 'f', prorettype => 'internal', + proargtypes => 'internal internal', prosrc => 'brin_bloom_preprocess' }, # userlock replacements { oid => '2880', descr => 'obtain exclusive advisory lock', -- 2.39.1