From 3228c640097de8b6817057b750a41d38265a219d Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Thu, 17 Aug 2017 11:16:51 -0400 Subject: [PATCH] Add optional second hash proc. --- doc/src/sgml/xindex.sgml | 11 +- src/backend/access/hash/hashfunc.c | 257 +++++++++++++++++++++++++++- src/backend/access/hash/hashpage.c | 2 +- src/backend/access/hash/hashutil.c | 6 +- src/backend/access/hash/hashvalidate.c | 33 +++- src/backend/commands/opclasscmds.c | 34 +++- src/backend/utils/cache/lsyscache.c | 8 +- src/backend/utils/cache/typcache.c | 2 +- src/include/access/hash.h | 17 +- src/include/catalog/pg_amproc.h | 1 + src/include/catalog/pg_proc.h | 2 + src/test/regress/expected/alter_generic.out | 4 +- 12 files changed, 344 insertions(+), 33 deletions(-) diff --git a/doc/src/sgml/xindex.sgml b/doc/src/sgml/xindex.sgml index 333a36c456..0f3c46b11f 100644 --- a/doc/src/sgml/xindex.sgml +++ b/doc/src/sgml/xindex.sgml @@ -436,7 +436,8 @@ - Hash indexes require one support function, shown in . @@ -451,9 +452,15 @@ - Compute the hash value for a key + Compute the 32-bit hash value for a key 1 + + + Compute the 64-bit hash value for a key given a 64-bit salt + + 2 + diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index a127f3f8b1..511d079af7 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -59,6 +59,12 @@ hashint4(PG_FUNCTION_ARGS) } Datum +hashint4extended(PG_FUNCTION_ARGS) +{ + return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1)); +} + +Datum hashint8(PG_FUNCTION_ARGS) { /* @@ -502,7 +508,227 @@ hash_any(register const unsigned char *k, register int keylen) } /* - * hash_uint32() -- hash a 32-bit value + * hash_any_extended() -- hash into a 64-bit value, using an optional seed + * k : the key (the unaligned variable-length array of bytes) + * len : the length of the key, counting by bytes + * seed : a 64-bit seed (0 means no seed) + * + * Returns a uint64 value. Otherwise similar to hash_any. + */ +Datum +hash_any_extended(register const unsigned char *k, register int keylen, + uint64 seed) +{ + register uint32 a, + b, + c, + len; + + /* Set up the internal state */ + len = keylen; + a = b = c = 0x9e3779b9 + len + 3923095; + + /* If the seed is non-zero, use it to perturb the internal state. */ + if (seed != 0) + { + /* + * In essence, the seed is treated as part of the data being hashed, + * but for simplicity, we pretend that it's padded with four bytes of + * zeroes so that the seed constitutes a 4-byte chunk. + */ + a += (uint32) (seed >> 32); + b += (uint32) seed; + mix(a, b, c); + } + + /* If the source pointer is word-aligned, we use word-wide fetches */ + if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0) + { + /* Code path for aligned source data */ + register const uint32 *ka = (const uint32 *) k; + + /* handle most of the key */ + while (len >= 12) + { + a += ka[0]; + b += ka[1]; + c += ka[2]; + mix(a, b, c); + ka += 3; + len -= 12; + } + + /* handle the last 11 bytes */ + k = (const unsigned char *) ka; +#ifdef WORDS_BIGENDIAN + switch (len) + { + case 11: + c += ((uint32) k[10] << 8); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 24); + /* the lowest byte of c is reserved for the length */ + /* fall through */ + case 8: + b += ka[1]; + a += ka[0]; + break; + case 7: + b += ((uint32) k[6] << 8); + /* fall through */ + case 6: + b += ((uint32) k[5] << 16); + /* fall through */ + case 5: + b += ((uint32) k[4] << 24); + /* fall through */ + case 4: + a += ka[0]; + break; + case 3: + a += ((uint32) k[2] << 8); + /* fall through */ + case 2: + a += ((uint32) k[1] << 16); + /* fall through */ + case 1: + a += ((uint32) k[0] << 24); + /* case 0: nothing left to add */ + } +#else /* !WORDS_BIGENDIAN */ + switch (len) + { + case 11: + c += ((uint32) k[10] << 24); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 8); + /* the lowest byte of c is reserved for the length */ + /* fall through */ + case 8: + b += ka[1]; + a += ka[0]; + break; + case 7: + b += ((uint32) k[6] << 16); + /* fall through */ + case 6: + b += ((uint32) k[5] << 8); + /* fall through */ + case 5: + b += k[4]; + /* fall through */ + case 4: + a += ka[0]; + break; + case 3: + a += ((uint32) k[2] << 16); + /* fall through */ + case 2: + a += ((uint32) k[1] << 8); + /* fall through */ + case 1: + a += k[0]; + /* case 0: nothing left to add */ + } +#endif /* WORDS_BIGENDIAN */ + } + else + { + /* Code path for non-aligned source data */ + + /* handle most of the key */ + while (len >= 12) + { +#ifdef WORDS_BIGENDIAN + a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24)); + b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24)); + c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24)); +#else /* !WORDS_BIGENDIAN */ + a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24)); + b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24)); + c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24)); +#endif /* WORDS_BIGENDIAN */ + mix(a, b, c); + k += 12; + len -= 12; + } + + /* handle the last 11 bytes */ +#ifdef WORDS_BIGENDIAN + switch (len) /* all the case statements fall through */ + { + case 11: + c += ((uint32) k[10] << 8); + case 10: + c += ((uint32) k[9] << 16); + case 9: + c += ((uint32) k[8] << 24); + /* the lowest byte of c is reserved for the length */ + case 8: + b += k[7]; + case 7: + b += ((uint32) k[6] << 8); + case 6: + b += ((uint32) k[5] << 16); + case 5: + b += ((uint32) k[4] << 24); + case 4: + a += k[3]; + case 3: + a += ((uint32) k[2] << 8); + case 2: + a += ((uint32) k[1] << 16); + case 1: + a += ((uint32) k[0] << 24); + /* case 0: nothing left to add */ + } +#else /* !WORDS_BIGENDIAN */ + switch (len) /* all the case statements fall through */ + { + case 11: + c += ((uint32) k[10] << 24); + case 10: + c += ((uint32) k[9] << 16); + case 9: + c += ((uint32) k[8] << 8); + /* the lowest byte of c is reserved for the length */ + case 8: + b += ((uint32) k[7] << 24); + case 7: + b += ((uint32) k[6] << 16); + case 6: + b += ((uint32) k[5] << 8); + case 5: + b += k[4]; + case 4: + a += ((uint32) k[3] << 24); + case 3: + a += ((uint32) k[2] << 16); + case 2: + a += ((uint32) k[1] << 8); + case 1: + a += k[0]; + /* case 0: nothing left to add */ + } +#endif /* WORDS_BIGENDIAN */ + } + + final(a, b, c); + + /* report the result */ + return UInt64GetDatum(((uint64) b << 32) | c); +} + +/* + * hash_uint32() -- hash a 32-bit value to a 32-bit value * * This has the same result as * hash_any(&k, sizeof(uint32)) @@ -523,3 +749,32 @@ hash_uint32(uint32 k) /* report the result */ return UInt32GetDatum(c); } + +/* + * hash_uint32_extended() -- hash a 32-bit value to a 64-bit value, with a seed + * + * Like hash_uint32, this is a convenience function. + */ +Datum +hash_uint32_extended(uint32 k, uint64 seed) +{ + register uint32 a, + b, + c; + + a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095; + + if (seed != 0) + { + a += (uint32) (seed >> 32); + b += (uint32) seed; + mix(a, b, c); + } + + a += k; + + final(a, b, c); + + /* report the result */ + return UInt64GetDatum(((uint64) b << 32) | c); +} diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 7b2906b0ca..05798419fc 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -373,7 +373,7 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum) if (ffactor < 10) ffactor = 10; - procid = index_getprocid(rel, 1, HASHPROC); + procid = index_getprocid(rel, 1, HASHSTANDARD_PROC); /* * We initialize the metapage, the first N bucket pages, and the first diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c index 9b803af7c2..869cbc1081 100644 --- a/src/backend/access/hash/hashutil.c +++ b/src/backend/access/hash/hashutil.c @@ -85,7 +85,7 @@ _hash_datum2hashkey(Relation rel, Datum key) Oid collation; /* XXX assumes index has only one attribute */ - procinfo = index_getprocinfo(rel, 1, HASHPROC); + procinfo = index_getprocinfo(rel, 1, HASHSTANDARD_PROC); collation = rel->rd_indcollation[0]; return DatumGetUInt32(FunctionCall1Coll(procinfo, collation, key)); @@ -108,10 +108,10 @@ _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype) hash_proc = get_opfamily_proc(rel->rd_opfamily[0], keytype, keytype, - HASHPROC); + HASHSTANDARD_PROC); if (!RegProcedureIsValid(hash_proc)) elog(ERROR, "missing support function %d(%u,%u) for index \"%s\"", - HASHPROC, keytype, keytype, + HASHSTANDARD_PROC, keytype, keytype, RelationGetRelationName(rel)); collation = rel->rd_indcollation[0]; diff --git a/src/backend/access/hash/hashvalidate.c b/src/backend/access/hash/hashvalidate.c index 30b29cb100..f952bb9d0b 100644 --- a/src/backend/access/hash/hashvalidate.c +++ b/src/backend/access/hash/hashvalidate.c @@ -29,7 +29,7 @@ #include "utils/syscache.h" -static bool check_hash_func_signature(Oid funcid, Oid restype, Oid argtype); +static bool check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype); /* @@ -105,8 +105,9 @@ hashvalidate(Oid opclassoid) /* Check procedure numbers and function signatures */ switch (procform->amprocnum) { - case HASHPROC: - if (!check_hash_func_signature(procform->amproc, INT4OID, + case HASHSTANDARD_PROC: + case HASHEXTENDED_PROC: + if (!check_hash_func_signature(procform->amproc, procform->amprocnum, procform->amproclefttype)) { ereport(INFO, @@ -264,19 +265,37 @@ hashvalidate(Oid opclassoid) * hacks in the core hash opclass definitions. */ static bool -check_hash_func_signature(Oid funcid, Oid restype, Oid argtype) +check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype) { bool result = true; + Oid restype; + int16 nargs; HeapTuple tp; Form_pg_proc procform; + switch (amprocnum) + { + case HASHSTANDARD_PROC: + restype = INT4OID; + nargs = 1; + break; + + case HASHEXTENDED_PROC: + restype = INT8OID; + nargs = 2; + break; + + default: + elog(ERROR, "invalid amprocnum"); + } + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for function %u", funcid); procform = (Form_pg_proc) GETSTRUCT(tp); if (procform->prorettype != restype || procform->proretset || - procform->pronargs != 1) + procform->pronargs != nargs) result = false; if (!IsBinaryCoercible(argtype, procform->proargtypes.values[0])) @@ -308,6 +327,10 @@ check_hash_func_signature(Oid funcid, Oid restype, Oid argtype) result = false; } + /* If function takes a second argument, it must be for a 64-bit salt. */ + if (nargs == 2 && procform->proargtypes.values[1] != INT8OID) + result = false; + ReleaseSysCache(tp); return result; } diff --git a/src/backend/commands/opclasscmds.c b/src/backend/commands/opclasscmds.c index a31b1acb9c..4a8aaf38ad 100644 --- a/src/backend/commands/opclasscmds.c +++ b/src/backend/commands/opclasscmds.c @@ -18,6 +18,7 @@ #include #include "access/genam.h" +#include "access/hash.h" #include "access/heapam.h" #include "access/nbtree.h" #include "access/htup_details.h" @@ -1129,7 +1130,8 @@ assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid) /* * btree comparison procs must be 2-arg procs returning int4, while btree * sortsupport procs must take internal and return void. hash support - * procs must be 1-arg procs returning int4. Otherwise we don't know. + * proc 1 must be a 1-arg proc returning int4, while proc 2 must be a 2-arg + * proc returning int8. Otherwise we don't know. */ if (amoid == BTREE_AM_OID) { @@ -1172,14 +1174,28 @@ assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid) } else if (amoid == HASH_AM_OID) { - if (procform->pronargs != 1) - ereport(ERROR, - (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("hash procedures must have one argument"))); - if (procform->prorettype != INT4OID) - ereport(ERROR, - (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("hash procedures must return integer"))); + if (member->number == HASHSTANDARD_PROC) + { + if (procform->pronargs != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("hash procedure 1 must have one argument"))); + if (procform->prorettype != INT4OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("hash procedure 1 must return integer"))); + } + else if (member->number == HASHEXTENDED_PROC) + { + if (procform->pronargs != 2) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("hash procedure 2 must have two arguments"))); + if (procform->prorettype != INT8OID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("hash procedure 2 must return bigint"))); + } /* * If lefttype/righttype isn't specified, use the proc's input type diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 82763f8013..b7a14dc87e 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -490,8 +490,8 @@ get_compatible_hash_operators(Oid opno, /* * get_op_hash_functions - * Get the OID(s) of hash support function(s) compatible with the given - * operator, operating on its LHS and/or RHS datatype as required. + * Get the OID(s) of the standard hash support function(s) compatible with + * the given operator, operating on its LHS and/or RHS datatype as required. * * A function for the LHS type is sought and returned into *lhs_procno if * lhs_procno isn't NULL. Similarly, a function for the RHS type is sought @@ -542,7 +542,7 @@ get_op_hash_functions(Oid opno, *lhs_procno = get_opfamily_proc(aform->amopfamily, aform->amoplefttype, aform->amoplefttype, - HASHPROC); + HASHSTANDARD_PROC); if (!OidIsValid(*lhs_procno)) continue; /* Matching LHS found, done if caller doesn't want RHS */ @@ -564,7 +564,7 @@ get_op_hash_functions(Oid opno, *rhs_procno = get_opfamily_proc(aform->amopfamily, aform->amoprighttype, aform->amoprighttype, - HASHPROC); + HASHSTANDARD_PROC); if (!OidIsValid(*rhs_procno)) { /* Forget any LHS function from this opfamily */ diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c index 7ec31eb3e3..96139ed204 100644 --- a/src/backend/utils/cache/typcache.c +++ b/src/backend/utils/cache/typcache.c @@ -474,7 +474,7 @@ lookup_type_cache(Oid type_id, int flags) hash_proc = get_opfamily_proc(typentry->hash_opf, typentry->hash_opintype, typentry->hash_opintype, - HASHPROC); + HASHSTANDARD_PROC); /* * As above, make sure hash_array will succeed. We don't currently diff --git a/src/include/access/hash.h b/src/include/access/hash.h index 72fce3038c..13505bc580 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -289,12 +289,16 @@ typedef HashMetaPageData *HashMetaPage; #define HTMaxStrategyNumber 1 /* - * When a new operator class is declared, we require that the user supply - * us with an amproc procudure for hashing a key of the new type. - * Since we only have one such proc in amproc, it's number 1. + * When a new operator class is declared, we require that the user supply + * us with an amproc procudure for hashing a key of the new type, returning + * a 32-bit hash value. We call this the "standard" hash procedure. We + * also allow an optional "extended" hash procedure which accepts a salt and + * returns a 64-bit hash value. This is highly recommended but, for reasons + * of backward compatibility, optional. */ -#define HASHPROC 1 -#define HASHNProcs 1 +#define HASHSTANDARD_PROC 1 +#define HASHEXTENDED_PROC 2 +#define HASHNProcs 2 /* public routines */ @@ -322,7 +326,10 @@ extern bytea *hashoptions(Datum reloptions, bool validate); extern bool hashvalidate(Oid opclassoid); extern Datum hash_any(register const unsigned char *k, register int keylen); +extern Datum hash_any_extended(register const unsigned char *k, register int + keylen, uint64 seed); extern Datum hash_uint32(uint32 k); +extern Datum hash_uint32_extended(uint32 k, uint64 seed); /* private routines */ diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h index 7d245b1271..79efc2f5dc 100644 --- a/src/include/catalog/pg_amproc.h +++ b/src/include/catalog/pg_amproc.h @@ -161,6 +161,7 @@ DATA(insert ( 1971 701 701 1 452 )); DATA(insert ( 1975 869 869 1 422 )); DATA(insert ( 1977 21 21 1 449 )); DATA(insert ( 1977 23 23 1 450 )); +DATA(insert ( 1977 23 23 2 425 )); DATA(insert ( 1977 20 20 1 949 )); DATA(insert ( 1983 1186 1186 1 1697 )); DATA(insert ( 1985 829 829 1 399 )); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 8b33b4e0ea..f2e9f7a553 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -670,6 +670,8 @@ DATA(insert OID = 449 ( hashint2 PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 DESCR("hash"); DATA(insert OID = 450 ( hashint4 PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 23 "23" _null_ _null_ _null_ _null_ _null_ hashint4 _null_ _null_ _null_ )); DESCR("hash"); +DATA(insert OID = 425 ( hashint4extended PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 20 "23 20" _null_ _null_ _null_ _null_ _null_ hashint4extended _null_ _null_ _null_ )); +DESCR("hash"); DATA(insert OID = 949 ( hashint8 PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 23 "20" _null_ _null_ _null_ _null_ _null_ hashint8 _null_ _null_ _null_ )); DESCR("hash"); DATA(insert OID = 451 ( hashfloat4 PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 23 "700" _null_ _null_ _null_ _null_ _null_ hashfloat4 _null_ _null_ _null_ )); diff --git a/src/test/regress/expected/alter_generic.out b/src/test/regress/expected/alter_generic.out index 9f6ad4de33..767c09bec5 100644 --- a/src/test/regress/expected/alter_generic.out +++ b/src/test/regress/expected/alter_generic.out @@ -421,7 +421,7 @@ BEGIN TRANSACTION; CREATE OPERATOR FAMILY alt_opf13 USING hash; CREATE FUNCTION fn_opf13 (int4) RETURNS BIGINT AS 'SELECT NULL::BIGINT;' LANGUAGE SQL; ALTER OPERATOR FAMILY alt_opf13 USING hash ADD FUNCTION 1 fn_opf13(int4); -ERROR: hash procedures must return integer +ERROR: hash procedure 1 must return integer DROP OPERATOR FAMILY alt_opf13 USING hash; ERROR: current transaction is aborted, commands ignored until end of transaction block ROLLBACK; @@ -439,7 +439,7 @@ BEGIN TRANSACTION; CREATE OPERATOR FAMILY alt_opf15 USING hash; CREATE FUNCTION fn_opf15 (int4, int2) RETURNS BIGINT AS 'SELECT NULL::BIGINT;' LANGUAGE SQL; ALTER OPERATOR FAMILY alt_opf15 USING hash ADD FUNCTION 1 fn_opf15(int4, int2); -ERROR: hash procedures must have one argument +ERROR: hash procedure 1 must have one argument DROP OPERATOR FAMILY alt_opf15 USING hash; ERROR: current transaction is aborted, commands ignored until end of transaction block ROLLBACK; -- 2.11.0 (Apple Git-81)