diff --git a/src/backend/access/gin/ginbulk.c b/src/backend/access/gin/ginbulk.c index 302cb2092a9..74cc62839cb 100644 --- a/src/backend/access/gin/ginbulk.c +++ b/src/backend/access/gin/ginbulk.c @@ -76,8 +76,8 @@ cmpEntryAccumulator(const RBTNode *a, const RBTNode *b, void *arg) BuildAccumulator *accum = (BuildAccumulator *) arg; return ginCompareAttEntries(accum->ginstate, - ea->attnum, ea->key, ea->category, - eb->attnum, eb->key, eb->category); + ea->attnum, ea->key, ea->hash, ea->category, + eb->attnum, eb->key, eb->hash, eb->category); } /* Allocator function for rbtree.c */ @@ -147,7 +147,7 @@ getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value) static void ginInsertBAEntry(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, - Datum key, GinNullCategory category) + Datum key, uint32 hash, GinNullCategory category) { GinEntryAccumulator eatmp; GinEntryAccumulator *ea; @@ -159,6 +159,7 @@ ginInsertBAEntry(BuildAccumulator *accum, */ eatmp.attnum = attnum; eatmp.key = key; + eatmp.hash = hash; eatmp.category = category; /* temporarily set up single-entry itempointer list */ eatmp.list = heapptr; @@ -209,7 +210,7 @@ ginInsertBAEntry(BuildAccumulator *accum, void ginInsertBAEntries(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, - Datum *entries, GinNullCategory *categories, + Datum *entries, uint32 *hashes, GinNullCategory *categories, int32 nentries) { uint32 step = nentries; @@ -236,7 +237,7 @@ ginInsertBAEntries(BuildAccumulator *accum, for (i = step - 1; i < nentries && i >= 0; i += step << 1 /* *2 */ ) ginInsertBAEntry(accum, heapptr, attnum, - entries[i], categories[i]); + entries[i], hashes[i], categories[i]); step >>= 1; /* /2 */ } diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c index ba6bbd562df..8b2125bb00c 100644 --- a/src/backend/access/gin/ginentrypage.c +++ b/src/backend/access/gin/ginentrypage.c @@ -255,8 +255,8 @@ entryIsMoveRight(GinBtree btree, Page page) key = gintuple_get_key(btree->ginstate, itup, &category); if (ginCompareAttEntries(btree->ginstate, - btree->entryAttnum, btree->entryKey, btree->entryCategory, - attnum, key, category) > 0) + btree->entryAttnum, btree->entryKey, 0, btree->entryCategory, + attnum, key, 0, category) > 0) return true; return false; @@ -313,8 +313,9 @@ entryLocateEntry(GinBtree btree, GinBtreeStack *stack) result = ginCompareAttEntries(btree->ginstate, btree->entryAttnum, btree->entryKey, + 0, btree->entryCategory, - attnum, key, category); + attnum, key, 0, category); } if (result == 0) @@ -384,8 +385,9 @@ entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) result = ginCompareAttEntries(btree->ginstate, btree->entryAttnum, btree->entryKey, + 0, btree->entryCategory, - attnum, key, category); + attnum, key, 0, category); if (result == 0) { stack->off = mid; diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index a6d88572cc2..6010e8091bb 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -746,7 +746,7 @@ processPendingPage(BuildAccumulator *accum, KeyArray *ka, * and reset ka. */ ginInsertBAEntries(accum, &heapptr, attrnum, - ka->keys, ka->categories, ka->nvalues); + ka->keys, NULL, ka->categories, ka->nvalues); ka->nvalues = 0; heapptr = itup->t_tid; attrnum = curattnum; @@ -759,7 +759,7 @@ processPendingPage(BuildAccumulator *accum, KeyArray *ka, /* Dump out all remaining keys */ ginInsertBAEntries(accum, &heapptr, attrnum, - ka->keys, ka->categories, ka->nvalues); + ka->keys, NULL, ka->categories, ka->nvalues); } /* diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index f3b19d280c3..1d38b6aa462 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -283,8 +283,8 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack, &newCategory); if (ginCompareEntries(btree->ginstate, attnum, - newDatum, newCategory, - idatum, icategory) == 0) + newDatum, 0, newCategory, + idatum, 0, icategory) == 0) break; /* Found! */ } @@ -1724,8 +1724,10 @@ collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos) res = ginCompareEntries(&so->ginstate, entry->attnum, entry->queryKey, + 0, entry->queryCategory, datum[StopMiddle - 1], + 0, category[StopMiddle - 1]); } diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 7286432698e..f54e9e9b26c 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -193,7 +193,7 @@ static void _gin_parallel_scan_and_build(GinBuildState *buildstate, static Datum _gin_parse_tuple(GinTuple *a, ItemPointerData **items); static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category, - Datum key, int16 typlen, bool typbyval, + Datum key, int16 typlen, bool typbyval, uint32 hash, ItemPointerData *items, uint32 nitems, Size *len); @@ -412,8 +412,8 @@ ginEntryInsert(GinState *ginstate, * This function is used only during initial index creation. */ static void -ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum, - Datum value, bool isNull, +ginHeapTupleBulkInsert(GinBuildState *buildstate, Relation index, + OffsetNumber attnum, Datum value, bool isNull, ItemPointer heapptr) { Datum *entries; @@ -421,14 +421,41 @@ ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum, int32 nentries; MemoryContext oldCtx; + uint32 *hashes; + TypeCacheEntry *typentry; + TupleDesc tdesc = RelationGetDescr(index); + Form_pg_attribute attr = TupleDescAttr(tdesc, (attnum - 1)); + oldCtx = MemoryContextSwitchTo(buildstate->funcCtx); entries = ginExtractEntries(buildstate->accum.ginstate, attnum, value, isNull, &nentries, &categories); + + hashes = palloc0(sizeof(uint32) * nentries); + MemoryContextSwitchTo(oldCtx); + typentry = lookup_type_cache(attr->atttypid, TYPECACHE_HASH_PROC); + if (OidIsValid(typentry->hash_proc)) + { + /* FIXME is it correct to use attr->attcollation, not rd_indcollation? */ + Oid collation = attr->attcollation; + if (!OidIsValid(collation)) + collation = DEFAULT_COLLATION_OID; + + for (int i = 0; i < nentries; i++) + { + if (categories[i] != GIN_CAT_NORM_KEY) + continue; + + hashes[i] = DatumGetUInt32(OidFunctionCall1Coll(typentry->hash_proc, + collation, + entries[i])); + } + } + ginInsertBAEntries(&buildstate->accum, heapptr, attnum, - entries, categories, nentries); + entries, hashes, categories, nentries); buildstate->indtuples += nentries; @@ -446,7 +473,7 @@ ginBuildCallback(Relation index, ItemPointer tid, Datum *values, oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++) - ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1), + ginHeapTupleBulkInsert(buildstate, index, (OffsetNumber) (i + 1), values[i], isnull[i], tid); /* If we've maxed out our available memory, dump everything to the index */ @@ -495,6 +522,8 @@ ginFlushBuildState(GinBuildState *buildstate, Relation index) { /* information about the key */ Form_pg_attribute attr = TupleDescAttr(tdesc, (attnum - 1)); + TypeCacheEntry *typentry; + uint32 hash = 0; /* GIN tuple and tuple length */ GinTuple *tup; @@ -503,8 +532,22 @@ ginFlushBuildState(GinBuildState *buildstate, Relation index) /* there could be many entries, so be willing to abort here */ CHECK_FOR_INTERRUPTS(); + /* calculate hash of the key */ + typentry = lookup_type_cache(attr->atttypid, TYPECACHE_HASH_PROC); + if (OidIsValid(typentry->hash_proc) && (category == GIN_CAT_NORM_KEY)) + { + /* FIXME is it correct to use attr->attcollation, not rd_indcollation? */ + Oid collation = attr->attcollation; + if (!OidIsValid(collation)) + collation = DEFAULT_COLLATION_OID; + + hash = DatumGetUInt32(OidFunctionCall1Coll(typentry->hash_proc, + collation, + key)); + } + tup = _gin_build_tuple(attnum, category, - key, attr->attlen, attr->attbyval, + key, attr->attlen, attr->attbyval, hash, list, nlist, &tuplen); tuplesort_putgintuple(buildstate->bs_worker_sort, tup, tuplen); @@ -562,7 +605,7 @@ ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values, buildstate->tid = *tid; for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++) - ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1), + ginHeapTupleBulkInsert(buildstate, index, (OffsetNumber) (i + 1), values[i], isnull[i], tid); /* @@ -1149,6 +1192,7 @@ typedef struct GinBuffer GinNullCategory category; Datum key; /* 0 if no key (and keylen == 0) */ Size keylen; /* number of bytes (not typlen) */ + uint32 hash; /* type info */ int16 typlen; @@ -1315,6 +1359,9 @@ GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup) if (tup->category != buffer->category) return false; + if (tup->hash != buffer->hash) + return false; + /* * For NULL/empty keys, this means equality, for normal keys we need to * compare the actual key value. @@ -1377,6 +1424,7 @@ GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup) buffer->typlen = tup->typlen; buffer->typbyval = tup->typbyval; + buffer->hash = tup->hash; if (tup->category == GIN_CAT_NORM_KEY) buffer->key = datumCopy(key, buffer->typbyval, buffer->typlen); @@ -1695,6 +1743,7 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort, ntup = _gin_build_tuple(buffer->attnum, buffer->category, buffer->key, buffer->typlen, buffer->typbyval, + 0, buffer->items, buffer->nitems, &ntuplen); tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen); @@ -1723,6 +1772,7 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort, ntup = _gin_build_tuple(buffer->attnum, buffer->category, buffer->key, buffer->typlen, buffer->typbyval, + 0, buffer->items, buffer->nitems, &ntuplen); tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen); @@ -1971,7 +2021,7 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc) */ static GinTuple * _gin_build_tuple(OffsetNumber attrnum, unsigned char category, - Datum key, int16 typlen, bool typbyval, + Datum key, int16 typlen, bool typbyval, uint32 hash, ItemPointerData *items, uint32 nitems, Size *len) { @@ -2030,6 +2080,7 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category, tuple->category = category; tuple->keylen = keylen; tuple->nitems = nitems; + tuple->hash = hash; /* key type info */ tuple->typlen = typlen; @@ -2138,6 +2189,12 @@ _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup) if (a->category > b->category) return 1; + if (a->hash < b->hash) + return -1; + + if (a->hash > b->hash) + return 1; + if (a->category == GIN_CAT_NORM_KEY) { keya = _gin_parse_tuple(a, NULL); diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index 63ded6301e2..4bb15d6ccc3 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -82,8 +82,10 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum, prevEntry->attnum == attnum && ginCompareEntries(ginstate, attnum, prevEntry->queryKey, + 0, prevEntry->queryCategory, queryKey, + 0, queryCategory) == 0) { /* Successful match */ diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index a61532538c0..f47417ee638 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -388,8 +388,8 @@ GinInitMetabuffer(Buffer b) */ int ginCompareEntries(GinState *ginstate, OffsetNumber attnum, - Datum a, GinNullCategory categorya, - Datum b, GinNullCategory categoryb) + Datum a, uint32 hasha, GinNullCategory categorya, + Datum b, uint32 hashb, GinNullCategory categoryb) { /* if not of same null category, sort by that first */ if (categorya != categoryb) @@ -399,6 +399,9 @@ ginCompareEntries(GinState *ginstate, OffsetNumber attnum, if (categorya != GIN_CAT_NORM_KEY) return 0; + if (hasha != hashb) + return (hasha < hashb) ? -1 : 1; + /* both not null, so safe to call the compareFn */ return DatumGetInt32(FunctionCall2Coll(&ginstate->compareFn[attnum - 1], ginstate->supportCollation[attnum - 1], @@ -410,14 +413,14 @@ ginCompareEntries(GinState *ginstate, OffsetNumber attnum, */ int ginCompareAttEntries(GinState *ginstate, - OffsetNumber attnuma, Datum a, GinNullCategory categorya, - OffsetNumber attnumb, Datum b, GinNullCategory categoryb) + OffsetNumber attnuma, Datum a, uint32 hasha, GinNullCategory categorya, + OffsetNumber attnumb, Datum b, uint32 hashb, GinNullCategory categoryb) { /* attribute number is the first sort key */ if (attnuma != attnumb) return (attnuma < attnumb) ? -1 : 1; - return ginCompareEntries(ginstate, attnuma, a, categorya, b, categoryb); + return ginCompareEntries(ginstate, attnuma, a, hasha, categorya, b, hashb, categoryb); } diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index 95d8805b66f..d213f22d6f3 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -97,11 +97,11 @@ extern void GinInitBuffer(Buffer b, uint32 f); extern void GinInitPage(Page page, uint32 f, Size pageSize); extern void GinInitMetabuffer(Buffer b); extern int ginCompareEntries(GinState *ginstate, OffsetNumber attnum, - Datum a, GinNullCategory categorya, - Datum b, GinNullCategory categoryb); + Datum a, uint32 hasha, GinNullCategory categorya, + Datum b, uint32 hashb, GinNullCategory categoryb); extern int ginCompareAttEntries(GinState *ginstate, - OffsetNumber attnuma, Datum a, GinNullCategory categorya, - OffsetNumber attnumb, Datum b, GinNullCategory categoryb); + OffsetNumber attnuma, Datum a, uint32 hasha, GinNullCategory categorya, + OffsetNumber attnumb, Datum b, uint32 hashb, GinNullCategory categoryb); extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum, Datum value, bool isNull, int32 *nentries, GinNullCategory **categories); @@ -423,6 +423,7 @@ typedef struct GinEntryAccumulator { RBTNode rbtnode; Datum key; + uint32 hash; GinNullCategory category; OffsetNumber attnum; bool shouldSort; @@ -444,7 +445,8 @@ typedef struct extern void ginInitBA(BuildAccumulator *accum); extern void ginInsertBAEntries(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, - Datum *entries, GinNullCategory *categories, + Datum *entries, uint32 *hashes, + GinNullCategory *categories, int32 nentries); extern void ginBeginBAScan(BuildAccumulator *accum); extern ItemPointerData *ginGetBAEntry(BuildAccumulator *accum, diff --git a/src/include/access/gin_tuple.h b/src/include/access/gin_tuple.h index ce555031335..1075aad917d 100644 --- a/src/include/access/gin_tuple.h +++ b/src/include/access/gin_tuple.h @@ -26,6 +26,7 @@ typedef struct GinTuple bool typbyval; /* typbyval for key */ signed char category; /* category: normal or NULL? */ int nitems; /* number of TIDs in the data */ + uint32 hash; /* hash of the value */ char data[FLEXIBLE_ARRAY_MEMBER]; } GinTuple;