From 185072d5c64f6d252db1d3440f22dcca7ba2b424 Mon Sep 17 00:00:00 2001 From: Aleksander Alekseev Date: Thu, 18 Jul 2024 12:59:40 +0300 Subject: [PATCH v1] Add crc32(text) & crc32(bytea) Per several user requests. Aleksander Alekseev, reviewed by TODO FIXME Discussion: TODO FIXME --- doc/src/sgml/func.sgml | 36 ++++++++++ src/backend/utils/adt/Makefile | 1 + src/backend/utils/adt/hashfuncs.c | 90 ++++++++++++++++++++++++ src/backend/utils/adt/meson.build | 1 + src/include/catalog/pg_proc.dat | 8 +++ src/test/regress/expected/opr_sanity.out | 2 + src/test/regress/expected/strings.out | 27 +++++++ src/test/regress/sql/strings.sql | 10 +++ 8 files changed, 175 insertions(+) create mode 100644 src/backend/utils/adt/hashfuncs.c diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 3f93c61aa3..7943b1ee2a 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -3030,6 +3030,24 @@ SELECT NOT(ROW(table.*) IS NOT NULL) FROM TABLE; -- detect at least one null in + + + + crc32 + + crc32 ( text ) + text + + + Computes the CRC32 hash of + the argument, with the result written in hexadecimal. + + + crc32('PostgreSQL') + cb97b83b + + + @@ -4484,6 +4502,24 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); + + + + crc32 + + crc32 ( bytea ) + text + + + Computes the CRC32 hash of + the binary string, with the result written in hexadecimal. + + + crc32('PostgreSQL' :: bytea) + cb97b83b + + + diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index edb09d4e35..be84d68856 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -42,6 +42,7 @@ OBJS = \ geo_ops.o \ geo_selfuncs.o \ geo_spgist.o \ + hashfuncs.o \ hbafuncs.o \ inet_cidr_ntop.o \ inet_net_pton.o \ diff --git a/src/backend/utils/adt/hashfuncs.c b/src/backend/utils/adt/hashfuncs.c new file mode 100644 index 0000000000..13a5fe55fc --- /dev/null +++ b/src/backend/utils/adt/hashfuncs.c @@ -0,0 +1,90 @@ +/*------------------------------------------------------------------------- + * + * hashfuncs.c + * Non-cryptographic hash functions + * + * Portions Copyright (c) 2024, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/hashfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "utils/builtins.h" +#include "utils/pg_crc.h" +#include "varatt.h" + +/* + * Calculate CRC32 of the given data. + * + * Common code for crc32_text() and crc32_bytea(). + */ +static unsigned int +crc32_sz(const char *buf, int size) +{ + pg_crc32 crc; + const char *p = buf; + + INIT_TRADITIONAL_CRC32(crc); + while (size > 0) + { + char c = (char) (*p); + + COMP_TRADITIONAL_CRC32(crc, &c, 1); + size--; + p++; + } + FIN_TRADITIONAL_CRC32(crc); + return (unsigned int) crc; +} + +/* + * Create a CRC32 hash of a text value and return it as hex string. + */ +Datum +crc32_text(PG_FUNCTION_ARGS) +{ + text *in_text = PG_GETARG_TEXT_PP(0); + size_t len; + unsigned int hashsum; + char result[16]; + + /* calculate the length of the buffer using varlena metadata */ + len = VARSIZE_ANY_EXHDR(in_text); + + /* get the hash result */ + hashsum = crc32_sz(VARDATA_ANY(in_text), len); + + /* format the hex string */ + snprintf(result, sizeof(result), "%08x", hashsum); + + /* convert to text and return it */ + PG_RETURN_TEXT_P(cstring_to_text(result)); +} + +/* + * Create a CRC32 hash of a bytea value and return it as a hex string. + */ +Datum +crc32_bytea(PG_FUNCTION_ARGS) +{ + bytea *in = PG_GETARG_BYTEA_PP(0); + size_t len; + unsigned int hashsum; + char result[16]; + + /* calculate the length of the buffer using varlena metadata */ + len = VARSIZE_ANY_EXHDR(in); + + /* get the hash result */ + hashsum = crc32_sz(VARDATA_ANY(in), len); + + /* format the hex string */ + snprintf(result, sizeof(result), "%08x", hashsum); + + /* convert to text and return it */ + PG_RETURN_TEXT_P(cstring_to_text(result)); +} diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build index 8c6fc80c37..6239ea7b42 100644 --- a/src/backend/utils/adt/meson.build +++ b/src/backend/utils/adt/meson.build @@ -31,6 +31,7 @@ backend_sources += files( 'geo_ops.c', 'geo_selfuncs.c', 'geo_spgist.c', + 'hashfuncs.c', 'hbafuncs.c', 'inet_cidr_ntop.c', 'inet_net_pton.c', diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 73d9cf8582..0f79998bba 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -7704,6 +7704,14 @@ proname => 'system', provolatile => 'v', prorettype => 'tsm_handler', proargtypes => 'internal', prosrc => 'tsm_system_handler' }, +# non-cryptographic +{ oid => '8571', descr => 'CRC32 hash', + proname => 'crc32', proleakproof => 't', prorettype => 'text', + proargtypes => 'text', prosrc => 'crc32_text' }, +{ oid => '8572', descr => 'CRC32 hash', + proname => 'crc32', proleakproof => 't', prorettype => 'text', + proargtypes => 'bytea', prosrc => 'crc32_bytea' }, + # cryptographic { oid => '2311', descr => 'MD5 hash', proname => 'md5', proleakproof => 't', prorettype => 'text', diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 9d047b21b8..e170a9bb1b 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -874,6 +874,8 @@ xid8ne(xid8,xid8) xid8cmp(xid8,xid8) uuid_extract_timestamp(uuid) uuid_extract_version(uuid) +crc32(text) +crc32(bytea) -- restore normal output mode \a\t -- List of functions used by libpq's fe-lobj.c diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 52b69a107f..de77070038 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -2203,6 +2203,33 @@ select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff" ffffffff (1 row) +-- +-- CRC32 +-- +SELECT crc32(''); + crc32 +---------- + 00000000 +(1 row) + +SELECT crc32('The quick brown fox jumps over the lazy dog.'); + crc32 +---------- + 519025e9 +(1 row) + +SELECT crc32('' :: bytea); + crc32 +---------- + 00000000 +(1 row) + +SELECT crc32('The quick brown fox jumps over the lazy dog.' :: bytea); + crc32 +---------- + 519025e9 +(1 row) + -- -- SHA-2 -- diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 3959678992..93149d170a 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -702,6 +702,16 @@ select to_hex(-1234::bigint) AS "fffffffffffffb2e"; select to_hex(256*256*256 - 1) AS "ffffff"; select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff"; +-- +-- CRC32 +-- + +SELECT crc32(''); +SELECT crc32('The quick brown fox jumps over the lazy dog.'); + +SELECT crc32('' :: bytea); +SELECT crc32('The quick brown fox jumps over the lazy dog.' :: bytea); + -- -- SHA-2 -- -- 2.45.2