From 99c1c7846b97446c48bb1ca262218d83b5dd5bf1 Mon Sep 17 00:00:00 2001
From: ashu <ashutosh12.1@example.com>
Date: Fri, 11 Aug 2017 17:59:47 +0530
Subject: [PATCH] Improve locking startegy during VACUUM in Hash Index for
 regular tables.

Patch by Ashutosh Sharma <ashu.coek88@gmail.com>
---
 src/backend/access/hash/README     |  2 +-
 src/backend/access/hash/hash.c     | 44 ++++++++++++++++++++++++++------------
 src/backend/access/hash/hashovfl.c |  4 +---
 3 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/src/backend/access/hash/README b/src/backend/access/hash/README
index eef7d66..34a84ce 100644
--- a/src/backend/access/hash/README
+++ b/src/backend/access/hash/README
@@ -396,8 +396,8 @@ The fourth operation is garbage collection (bulk deletion):
 			mark the target page dirty
 			write WAL for deleting tuples from target page
 			if this is the last bucket page, break out of loop
-			pin and x-lock next page
 			release prior lock and pin (except keep pin on primary bucket page)
+			pin and x-lock next page
 		if the page we have locked is not the primary bucket page:
 			release lock and take exclusive lock on primary bucket page
 		if there are no other pins on the primary bucket page:
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 45a3a5a..012e00f 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -660,11 +660,9 @@ hashvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
  * that the next valid TID will be greater than or equal to the current
  * valid TID.  There can't be any concurrent scans in progress when we first
  * enter this function because of the cleanup lock we hold on the primary
- * bucket page, but as soon as we release that lock, there might be.  We
- * handle that by conspiring to prevent those scans from passing our cleanup
- * scan.  To do that, we lock the next page in the bucket chain before
- * releasing the lock on the previous page.  (This type of lock chaining is
- * not ideal, so we might want to look for a better solution at some point.)
+ * bucket page, but as soon as we release that lock, there might be. But,
+ * we do not have to bother about it, as the hash index scan work in page
+ * at a time mode.
  *
  * We need to retain a pin on the primary bucket to ensure that no concurrent
  * split can start.
@@ -833,18 +831,36 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
 		if (!BlockNumberIsValid(blkno))
 			break;
 
-		next_buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
-											  LH_OVERFLOW_PAGE,
-											  bstrategy);
-
 		/*
-		 * release the lock on previous page after acquiring the lock on next
-		 * page
+		 * As the hash index scan works in page-at-a-time mode, vacuum can
+		 * release the lock on previous page before acquiring lock on the next
+		 * page for regular tables, but, for unlogged tables, we avoid this as
+		 * we do not want scan to cross vacuum when both are running on the
+		 * same bucket page. This is to ensure that, we are safe during dead
+		 * marking of index tuples in _hash_kill_items().
 		 */
-		if (retain_pin)
-			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+		if (RelationNeedsWAL(rel))
+		{
+			if (retain_pin)
+				LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+			else
+				_hash_relbuf(rel, buf);
+
+			next_buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
+												  LH_OVERFLOW_PAGE,
+												  bstrategy);
+		}
 		else
-			_hash_relbuf(rel, buf);
+		{
+			next_buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
+												  LH_OVERFLOW_PAGE,
+												  bstrategy);
+
+			if (retain_pin)
+				LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+			else
+				_hash_relbuf(rel, buf);
+		}
 
 		buf = next_buf;
 	}
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index c206e70..3a7011d 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -790,9 +790,7 @@ _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
  *	Caller must acquire cleanup lock on the primary page of the target
  *	bucket to exclude any scans that are in progress, which could easily
  *	be confused into returning the same tuple more than once or some tuples
- *	not at all by the rearrangement we are performing here.  To prevent
- *	any concurrent scan to cross the squeeze scan we use lock chaining
- *	similar to hasbucketcleanup.  Refer comments atop hashbucketcleanup.
+ *	not at all by the rearrangement we are performing here.
  *
  *	We need to retain a pin on the primary bucket to ensure that no concurrent
  *	split can start.
-- 
1.8.3.1