commit 7448239668ca4fedd6c1f5cf3d1d434efcc3291f
Author: mithun <mithun@localhost.localdomain>
Date:   Tue Feb 21 12:08:11 2017 +0530

    Feature : expand hash table efficiently.

diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index 3334089..4a9a409 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -48,7 +48,7 @@ bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
 	 * Convert to absolute page number by adding the number of bucket pages
 	 * that exist before this split point.
 	 */
-	return (BlockNumber) ((1 << i) + ovflbitnum);
+	return (BlockNumber) (_hash_get_tbuckets(i) + ovflbitnum);
 }
 
 /*
@@ -66,9 +66,9 @@ _hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
 	/* Determine the split number containing this page */
 	for (i = 1; i <= splitnum; i++)
 	{
-		if (ovflblkno <= (BlockNumber) (1 << i))
+		if (ovflblkno <= (BlockNumber) _hash_get_tbuckets(i))
 			break;				/* oops */
-		bitnum = ovflblkno - (1 << i);
+		bitnum = ovflblkno - _hash_get_tbuckets(i);
 
 		/*
 		 * bitnum has to be greater than number of overflow page added in
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index 9485978..1afcbd0 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -315,7 +315,7 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
 	int32		ffactor;
 	double		dnumbuckets;
 	uint32		num_buckets;
-	uint32		log2_num_buckets;
+	uint32		spare_index;
 	uint32		i;
 
 	/* safety check */
@@ -350,11 +350,10 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
 	else if (dnumbuckets >= (double) 0x40000000)
 		num_buckets = 0x40000000;
 	else
-		num_buckets = ((uint32) 1) << _hash_log2((uint32) dnumbuckets);
+		num_buckets = _hash_get_tbuckets(_hash_spareindex(dnumbuckets));
 
-	log2_num_buckets = _hash_log2(num_buckets);
-	Assert(num_buckets == (((uint32) 1) << log2_num_buckets));
-	Assert(log2_num_buckets < HASH_MAX_SPLITPOINTS);
+	spare_index = _hash_spareindex(num_buckets);
+	Assert(spare_index < HASH_MAX_SPLITPOINTS);
 
 	/*
 	 * We initialize the metapage, the first N bucket pages, and the first
@@ -400,18 +399,20 @@ _hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
 
 	/*
 	 * We initialize the index with N buckets, 0 .. N-1, occupying physical
-	 * blocks 1 to N.  The first freespace bitmap page is in block N+1. Since
-	 * N is a power of 2, we can set the masks this way:
+	 * blocks 1 to N.  The first freespace bitmap page is in block N+1.
 	 */
-	metap->hashm_maxbucket = metap->hashm_lowmask = num_buckets - 1;
-	metap->hashm_highmask = (num_buckets << 1) - 1;
+	metap->hashm_maxbucket = num_buckets - 1;
+
+	/* set hishmask, which should be sufficient to cover num_buckets. */
+	metap->hashm_highmask = (1 << (_hash_log2(num_buckets + 1))) - 1;
+	metap->hashm_lowmask = (metap->hashm_highmask >> 1);
 
 	MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
 	MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
 
 	/* Set up mapping for one spare page after the initial splitpoints */
-	metap->hashm_spares[log2_num_buckets] = 1;
-	metap->hashm_ovflpoint = log2_num_buckets;
+	metap->hashm_spares[spare_index] = 1;
+	metap->hashm_ovflpoint = spare_index;
 	metap->hashm_firstfree = 0;
 
 	/*
@@ -619,8 +620,8 @@ restart_expand:
 	{
 		/*
 		 * Copy bucket mapping info now; refer to the comment in code below
-		 * where we copy this information before calling _hash_splitbucket
-		 * to see why this is okay.
+		 * where we copy this information before calling _hash_splitbucket to
+		 * see why this is okay.
 		 */
 		maxbucket = metap->hashm_maxbucket;
 		highmask = metap->hashm_highmask;
@@ -649,21 +650,23 @@ restart_expand:
 	start_nblkno = BUCKET_TO_BLKNO(metap, new_bucket);
 
 	/*
-	 * If the split point is increasing (hashm_maxbucket's log base 2
-	 * increases), we need to allocate a new batch of bucket pages.
+	 * If the split point is increasing we need to allocate a new batch of
+	 * bucket pages.
 	 */
-	spare_ndx = _hash_log2(new_bucket + 1);
+	spare_ndx = _hash_spareindex(new_bucket + 1);
 	if (spare_ndx > metap->hashm_ovflpoint)
 	{
+		uint32		buckets_toadd = 0;
+
 		Assert(spare_ndx == metap->hashm_ovflpoint + 1);
 
 		/*
 		 * The number of buckets in the new splitpoint is equal to the total
-		 * number already in existence, i.e. new_bucket.  Currently this maps
-		 * one-to-one to blocks required, but someday we may need a more
-		 * complicated calculation here.
+		 * number already in existence, i.e. new_bucket. We add one fourth of
+		 * total buckets to be added in this split point.
 		 */
-		if (!_hash_alloc_buckets(rel, start_nblkno, new_bucket))
+		buckets_toadd = ((1 << ((spare_ndx >> 2) - 1)) >> 2);
+		if (!_hash_alloc_buckets(rel, start_nblkno, buckets_toadd))
 		{
 			/* can't split due to BlockNumber overflow */
 			_hash_relbuf(rel, buf_oblkno);
@@ -847,10 +850,9 @@ _hash_splitbucket(Relation rel,
 
 	/*
 	 * Mark the old bucket to indicate that split is in progress.  (At
-	 * operation end, we will clear the split-in-progress flag.)  Also,
-	 * for a primary bucket page, hasho_prevblkno stores the number of
-	 * buckets that existed as of the last split, so we must update that
-	 * value here.
+	 * operation end, we will clear the split-in-progress flag.)  Also, for a
+	 * primary bucket page, hasho_prevblkno stores the number of buckets that
+	 * existed as of the last split, so we must update that value here.
 	 */
 	oopaque->hasho_flag |= LH_BUCKET_BEING_SPLIT;
 	oopaque->hasho_prevblkno = maxbucket;
@@ -1206,11 +1208,11 @@ _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf, Bucket obucket,
  *	_hash_getcachedmetap() -- Returns cached metapage data.
  *
  *	If metabuf is not InvalidBuffer, caller must hold a pin, but no lock, on
- *  the metapage.  If not set, we'll set it before returning if we have to
- *  refresh the cache, and return with a pin but no lock on it; caller is
- *  responsible for releasing the pin.
+ *	the metapage.  If not set, we'll set it before returning if we have to
+ *	refresh the cache, and return with a pin but no lock on it; caller is
+ *	responsible for releasing the pin.
  *
- *  We refresh the cache if it's not initialized yet or force_refresh is true.
+ *	We refresh the cache if it's not initialized yet or force_refresh is true.
  */
 HashMetaPage
 _hash_getcachedmetap(Relation rel, Buffer *metabuf, bool force_refresh)
@@ -1220,13 +1222,13 @@ _hash_getcachedmetap(Relation rel, Buffer *metabuf, bool force_refresh)
 	Assert(metabuf);
 	if (force_refresh || rel->rd_amcache == NULL)
 	{
-		char   *cache = NULL;
+		char	   *cache = NULL;
 
 		/*
-		 * It's important that we don't set rd_amcache to an invalid
-		 * value.  Either MemoryContextAlloc or _hash_getbuf could fail,
-		 * so don't install a pointer to the newly-allocated storage in the
-		 * actual relcache entry until both have succeeeded.
+		 * It's important that we don't set rd_amcache to an invalid value.
+		 * Either MemoryContextAlloc or _hash_getbuf could fail, so don't
+		 * install a pointer to the newly-allocated storage in the actual
+		 * relcache entry until both have succeeeded.
 		 */
 		if (rel->rd_amcache == NULL)
 			cache = MemoryContextAlloc(rel->rd_indexcxt,
@@ -1261,7 +1263,7 @@ _hash_getcachedmetap(Relation rel, Buffer *metabuf, bool force_refresh)
  *	us an opportunity to use the previously saved metapage contents to reach
  *	the target bucket buffer, instead of reading from the metapage every time.
  *	This saves one buffer access every time we want to reach the target bucket
- *  buffer, which is very helpful savings in bufmgr traffic and contention.
+ *	buffer, which is very helpful savings in bufmgr traffic and contention.
  *
  *	The access type parameter (HASH_READ or HASH_WRITE) indicates whether the
  *	bucket buffer has to be locked for reading or writing.
diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c
index c705531..2c0e975 100644
--- a/src/backend/access/hash/hashutil.c
+++ b/src/backend/access/hash/hashutil.c
@@ -149,6 +149,57 @@ _hash_log2(uint32 num)
 }
 
 /*
+ * _hash_spareindex -- returns spare index of the bucket
+ */
+uint32
+_hash_spareindex(uint32 num_bucket)
+{
+	uint32		i,
+				tbuckets,
+				bucket_pos;
+
+	i = _hash_log2(num_bucket);
+
+	/*
+	 * Get to the buckets main spare index position and then add slot number
+	 * to where this bucket is allocated.
+	 */
+	tbuckets = 1 << (i - 1);
+	bucket_pos = num_bucket - tbuckets;
+	return ((i << 2) + ((tbuckets >> 2) ? ((bucket_pos - 1) / (tbuckets >> 2)) : 0));
+}
+
+/*
+ *	_hash_get_tbuckets -- returns total number of buckets for this split number.
+ */
+uint32
+_hash_get_tbuckets(uint32 split_num)
+{
+	uint32		tbuckets,
+				slot;
+
+	/*
+	 * For the first three groups of split_num we will not have enough buckets
+	 * to distribute among the 4 slots. So just return their total buckets
+	 * irrespective of the slot they occupy.
+	 */
+	if ((split_num >> 2) == 0)
+		return 1;
+	if ((split_num >> 2) == 1)
+		return 2;
+	if ((split_num >> 2) == 2)
+		return 4;
+	/*
+	 * total_buckets = total number of buckets in previous split point group +
+	 * number of buckets for our slot in the current split point group.
+	 */
+	tbuckets = (1 << ((split_num >> 2) - 1));
+	slot = ((3 & split_num) + 1);
+	tbuckets = tbuckets + (slot * (tbuckets >> 2));
+	return tbuckets;
+}
+
+/*
  * _hash_checkpage -- sanity checks on the format of all hash pages
  *
  * If flags is not zero, it is a bitwise OR of the acceptable values of
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index 3bf587b..3407659 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -36,7 +36,7 @@ typedef uint32 Bucket;
 #define InvalidBucket	((Bucket) 0xFFFFFFFF)
 
 #define BUCKET_TO_BLKNO(metap,B) \
-		((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_log2((B)+1)-1] : 0)) + 1)
+		((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_spareindex((B)+1)-1] : 0)) + 1)
 
 /*
  * Special space for hash index pages.
@@ -168,7 +168,7 @@ typedef HashScanOpaqueData *HashScanOpaque;
  * needing to fit into the metapage.  (With 8K block size, 128 bitmaps
  * limit us to 64 GB of overflow space...)
  */
-#define HASH_MAX_SPLITPOINTS		32
+#define HASH_MAX_SPLITPOINTS		128
 #define HASH_MAX_BITMAPS			128
 
 typedef struct HashMetaPageData
@@ -364,6 +364,8 @@ extern uint32 _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype);
 extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
 					 uint32 highmask, uint32 lowmask);
 extern uint32 _hash_log2(uint32 num);
+extern uint32 _hash_spareindex(uint32 num);
+extern uint32 _hash_get_tbuckets(uint32 num);
 extern void _hash_checkpage(Relation rel, Buffer buf, int flags);
 extern uint32 _hash_get_indextuple_hashkey(IndexTuple itup);
 extern bool _hash_convert_tuple(Relation index,