From 3cd625d2d42554111b34d70d0820f68c7ec5db11 Mon Sep 17 00:00:00 2001 From: Aleksander Alekseev Date: Tue, 13 Jan 2026 14:51:21 +0300 Subject: [PATCH v4 1/2] Simplify abbreviated key hashing using murmurhash64() Now when all Datums are 64-bit values we can simplify the code by using murmurhash64() in *_abbrev_convert() functions. Also replace hash_uint32() with murmurhash32() in a few other places for consistency. Author: Aleksander Alekseev Suggested-by: John Naylor Reviewed-by: John Naylor Discussion: https://postgr.es/m/CAJ7c6TMPhDRQMmkUHPv8oOK97B1mR8NRS61DgjpdaZUPAwaeZQ%40mail.gmail.com --- src/backend/commands/async.c | 6 +++--- src/backend/utils/adt/bytea.c | 9 ++------- src/backend/utils/adt/mac.c | 11 +++-------- src/backend/utils/adt/network.c | 6 +----- src/backend/utils/adt/numeric.c | 5 +---- src/backend/utils/adt/uuid.c | 6 +----- src/backend/utils/adt/varlena.c | 9 ++------- 7 files changed, 13 insertions(+), 39 deletions(-) diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 657c591618d..315270ddb11 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -651,9 +651,9 @@ globalChannelTableHash(const void *key, size_t size, void *arg) const GlobalChannelKey *k = (const GlobalChannelKey *) key; dshash_hash h; - h = DatumGetUInt32(hash_uint32(k->dboid)); - h ^= DatumGetUInt32(hash_any((const unsigned char *) k->channel, - strnlen(k->channel, NAMEDATALEN))); + h = murmurhash32(k->dboid); + h ^= hash_any((const unsigned char *) k->channel, + strnlen(k->channel, NAMEDATALEN)); return h; } diff --git a/src/backend/utils/adt/bytea.c b/src/backend/utils/adt/bytea.c index fd7662d41ee..f32bd23d03c 100644 --- a/src/backend/utils/adt/bytea.c +++ b/src/backend/utils/adt/bytea.c @@ -1110,17 +1110,12 @@ bytea_abbrev_convert(Datum original, SortSupport ssup) Min(len, PG_CACHE_LINE_SIZE))); if (len > PG_CACHE_LINE_SIZE) - hash ^= DatumGetUInt32(hash_uint32((uint32) len)); + hash ^= murmurhash32((uint32) len); addHyperLogLog(&bss->full_card, hash); /* Hash abbreviated key */ - { - uint32 tmp; - - tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32); - hash = DatumGetUInt32(hash_uint32(tmp)); - } + hash = (uint32) murmurhash64(DatumGetUInt64(res)); addHyperLogLog(&bss->abbr_card, hash); diff --git a/src/backend/utils/adt/mac.c b/src/backend/utils/adt/mac.c index f14675dea40..0658846f274 100644 --- a/src/backend/utils/adt/mac.c +++ b/src/backend/utils/adt/mac.c @@ -492,17 +492,12 @@ macaddr_abbrev_convert(Datum original, SortSupport ssup) uss->input_count += 1; /* - * Cardinality estimation. The estimate uses uint32, so XOR the two 32-bit - * halves together to produce slightly more entropy. The two zeroed bytes - * won't have any practical impact on this operation. + * Cardinality estimation. The estimate uses uint32, so we hash the full + * 64-bit value and take the lower 32 bits of the result. */ if (uss->estimating) { - uint32 tmp; - - tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32); - - addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); + addHyperLogLog(&uss->abbr_card, (uint32) murmurhash64(DatumGetUInt64(res))); } /* diff --git a/src/backend/utils/adt/network.c b/src/backend/utils/adt/network.c index 3a2002097dd..c226af5ca80 100644 --- a/src/backend/utils/adt/network.c +++ b/src/backend/utils/adt/network.c @@ -739,11 +739,7 @@ network_abbrev_convert(Datum original, SortSupport ssup) /* Hash abbreviated key */ if (uss->estimating) { - uint32 tmp; - - tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32); - - addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); + addHyperLogLog(&uss->abbr_card, (uint32) murmurhash64(DatumGetUInt64(res))); } return res; diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index d25b8ad505d..bbe2581f0b7 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -2397,10 +2397,7 @@ numeric_abbrev_convert_var(const NumericVar *var, NumericSortSupport *nss) if (nss->estimating) { - uint32 tmp = ((uint32) result - ^ (uint32) ((uint64) result >> 32)); - - addHyperLogLog(&nss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); + addHyperLogLog(&nss->abbr_card, (uint32) murmurhash64(result)); } return NumericAbbrevGetDatum(result); diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c index 6ee3752ac78..888802c3012 100644 --- a/src/backend/utils/adt/uuid.c +++ b/src/backend/utils/adt/uuid.c @@ -396,11 +396,7 @@ uuid_abbrev_convert(Datum original, SortSupport ssup) if (uss->estimating) { - uint32 tmp; - - tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32); - - addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); + addHyperLogLog(&uss->abbr_card, (uint32) murmurhash64(DatumGetUInt64(res))); } /* diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 7caf700fd61..ea3d4dcf18b 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -2155,17 +2155,12 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) Min(len, PG_CACHE_LINE_SIZE))); if (len > PG_CACHE_LINE_SIZE) - hash ^= DatumGetUInt32(hash_uint32((uint32) len)); + hash ^= murmurhash32((uint32) len); addHyperLogLog(&sss->full_card, hash); /* Hash abbreviated key */ - { - uint32 tmp; - - tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32); - hash = DatumGetUInt32(hash_uint32(tmp)); - } + hash = (uint32) murmurhash64(DatumGetUInt64(res)); addHyperLogLog(&sss->abbr_card, hash); -- 2.43.0