From 0c99e0fa2969a14cd156cfc0adc45ba85c48093f Mon Sep 17 00:00:00 2001 From: Hubert Zhang Date: Tue, 28 May 2019 08:47:15 +0000 Subject: [PATCH] Allow to continue to split batch when tuples become diverse When build hash table, we need to increase batch number when spaceAllowed is reached. If the split process failed (all the tuples fall into one batch and the other is empty) then the split flag growEnable will be turned off forever in past. Since later tuples may become diverse, we add a new logic to re-enable growing batch when there are benefit Co-authored-by: Ning Yu --- src/backend/executor/nodeHash.c | 35 +++++++++++++++++++++++++++++------ src/include/executor/hashjoin.h | 2 ++ 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 64eec91..fa2a2f8 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -43,7 +43,7 @@ #include "utils/syscache.h" -static void ExecHashIncreaseNumBatches(HashJoinTable hashtable); +static void ExecHashIncreaseNumBatches(HashJoinTable hashtable, uint32 hashvalue, uint32 hashTupleSize); static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable); static void ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable); static void ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable); @@ -494,6 +494,8 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, hashtable->nbatch_original = nbatch; hashtable->nbatch_outstart = nbatch; hashtable->growEnabled = true; + hashtable->growRemainOldBatch = true; + hashtable->splittableSize = 0; hashtable->totalTuples = 0; hashtable->partialTuples = 0; hashtable->skewTuples = 0; @@ -882,7 +884,7 @@ ExecHashTableDestroy(HashJoinTable hashtable) * current memory consumption */ static void -ExecHashIncreaseNumBatches(HashJoinTable hashtable) +ExecHashIncreaseNumBatches(HashJoinTable hashtable, uint32 hashvalue, uint32 hashTupleSize) { int oldnbatch = hashtable->nbatch; int curbatch = hashtable->curbatch; @@ -892,9 +894,29 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable) long nfreed; HashMemoryChunk oldchunks; - /* do nothing if we've decided to shut off growth */ + /* + * If we've shut off the growth, check whether the new tuple could benefit + * from the split. When splittable_size reaches the spaceAllowed, re-enable + * growEnabled flag and do the real split further. + */ if (!hashtable->growEnabled) - return; + { + int bucketno; + int batchno; + hashtable->nbatch *= 2; + ExecHashGetBucketAndBatch(hashtable, hashvalue, &bucketno, &batchno); + hashtable->nbatch = oldnbatch; + if ((hashtable->growRemainOldBatch) && (batchno != curbatch) + || (!hashtable->growRemainOldBatch) && (batchno == curbatch)) + hashtable->splittableSize += hashTupleSize; + if (hashtable->splittableSize >= hashtable->spaceAllowed ) + { + hashtable->growEnabled = true; + hashtable->splittableSize = 0; + } + else + return ; + } /* safety check to avoid overflow */ if (oldnbatch > Min(INT_MAX / 2, MaxAllocSize / (sizeof(void *) * 2))) @@ -1040,6 +1062,7 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable) if (nfreed == 0 || nfreed == ninmemory) { hashtable->growEnabled = false; + hashtable->growRemainOldBatch = (nfreed == 0) ? true : false; #ifdef HJDEBUG printf("Hashjoin %p: disabling further increase of nbatch\n", hashtable); @@ -1656,7 +1679,7 @@ ExecHashTableInsert(HashJoinTable hashtable, if (hashtable->spaceUsed + hashtable->nbuckets_optimal * sizeof(HashJoinTuple) > hashtable->spaceAllowed) - ExecHashIncreaseNumBatches(hashtable); + ExecHashIncreaseNumBatches(hashtable, hashvalue, (uint32)hashTupleSize); } else { @@ -2435,7 +2458,7 @@ ExecHashSkewTableInsert(HashJoinTable hashtable, /* Check we are not over the total spaceAllowed, either */ if (hashtable->spaceUsed > hashtable->spaceAllowed) - ExecHashIncreaseNumBatches(hashtable); + ExecHashIncreaseNumBatches(hashtable, hashvalue, (uint32)hashTupleSize); if (shouldFree) heap_free_minimal_tuple(tuple); diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index 2c94b92..e6ce30c 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -314,6 +314,8 @@ typedef struct HashJoinTableData int nbatch_outstart; /* nbatch when we started outer scan */ bool growEnabled; /* flag to shut off nbatch increases */ + bool growRemainOldBatch; /* all the tuple remain in old batch when growing */ + int splittableSize; /* size of splittable tuples in hashtable */ double totalTuples; /* # tuples obtained from inner plan */ double partialTuples; /* # tuples obtained from inner plan by me */ -- 1.8.3.1