From bfd42993d2bf9ba88ffd26815565a321eec12440 Mon Sep 17 00:00:00 2001
From: Amit Kapila <akapila@postgresql.org>
Date: Thu, 12 Mar 2020 17:00:05 +0530
Subject: [PATCH] Allow relation extension and page locks to conflict among
 parallel group members.

---
 src/backend/storage/lmgr/README     | 60 ++++++++++++++++++++-----------------
 src/backend/storage/lmgr/deadlock.c |  9 ++++++
 src/backend/storage/lmgr/lock.c     | 12 ++++++++
 src/backend/storage/lmgr/proc.c     |  8 ++++-
 src/include/storage/lock.h          |  1 +
 5 files changed, 62 insertions(+), 28 deletions(-)

diff --git a/src/backend/storage/lmgr/README b/src/backend/storage/lmgr/README
index 56b0a12..13eb1cc 100644
--- a/src/backend/storage/lmgr/README
+++ b/src/backend/storage/lmgr/README
@@ -597,21 +597,22 @@ deadlock detection algorithm very much, but it makes the bookkeeping more
 complicated.
 
 We choose to regard locks held by processes in the same parallel group as
-non-conflicting.  This means that two processes in a parallel group can hold a
-self-exclusive lock on the same relation at the same time, or one process can
-acquire an AccessShareLock while the other already holds AccessExclusiveLock.
-This might seem dangerous and could be in some cases (more on that below), but
-if we didn't do this then parallel query would be extremely prone to
-self-deadlock.  For example, a parallel query against a relation on which the
-leader already had AccessExclusiveLock would hang, because the workers would
-try to lock the same relation and be blocked by the leader; yet the leader
-can't finish until it receives completion indications from all workers.  An
-undetected deadlock results.  This is far from the only scenario where such a
-problem happens.  The same thing will occur if the leader holds only
-AccessShareLock, the worker seeks AccessShareLock, but between the time the
-leader attempts to acquire the lock and the time the worker attempts to
-acquire it, some other process queues up waiting for an AccessExclusiveLock.
-In this case, too, an indefinite hang results.
+non-conflicting with the exception of relation extension and page locks.  This
+means that two processes in a parallel group can hold a self-exclusive lock on
+the same relation at the same time, or one process can acquire an AccessShareLock
+while the other already holds AccessExclusiveLock.  This might seem dangerous and
+could be in some cases (more on that below), but if we didn't do this then
+parallel query would be extremely prone to self-deadlock.  For example, a
+parallel query against a relation on which the leader already had
+AccessExclusiveLock would hang, because the workers would try to lock the same
+relation and be blocked by the leader; yet the leader can't finish until it
+receives completion indications from all workers.  An undetected deadlock
+results.  This is far from the only scenario where such a problem happens.  The
+same thing will occur if the leader holds only AccessShareLock, the worker
+seeks AccessShareLock, but between the time the leader attempts to acquire the
+lock and the time the worker attempts to acquire it, some other process queues
+up waiting for an AccessExclusiveLock.  In this case, too, an indefinite hang
+results.
 
 It might seem that we could predict which locks the workers will attempt to
 acquire and ensure before going parallel that those locks would be acquired
@@ -637,18 +638,23 @@ the other is safe enough.  Problems would occur if the leader initiated
 parallelism from a point in the code at which it had some backend-private
 state that made table access from another process unsafe, for example after
 calling SetReindexProcessing and before calling ResetReindexProcessing,
-catastrophe could ensue, because the worker won't have that state.  Similarly,
-problems could occur with certain kinds of non-relation locks, such as
-relation extension locks.  It's no safer for two related processes to extend
-the same relation at the time than for unrelated processes to do the same.
-However, since parallel mode is strictly read-only at present, neither this
-nor most of the similar cases can arise at present.  To allow parallel writes,
-we'll either need to (1) further enhance the deadlock detector to handle those
-types of locks in a different way than other types; or (2) have parallel
-workers use some other mutual exclusion method for such cases; or (3) revise
-those cases so that they no longer use heavyweight locking in the first place
-(which is not a crazy idea, given that such lock acquisitions are not expected
-to deadlock and that heavyweight lock acquisition is fairly slow anyway).
+catastrophe could ensue, because the worker won't have that state.
+
+To allow parallel inserts and parallel copy, we have ensured that relation
+extension and page locks don't participate in group locking which means such
+locks can conflict among the same group members.  This is required as it is no
+safer for two related processes to extend the same relation or perform clean up
+in gin indexes at a time than for unrelated processes to do the same.  We don't
+acquire a heavyweight lock on any other object after relation extension lock
+which means such a lock can never participate in the deadlock cycle.  After
+acquiring page locks, we can acquire relation extension lock but reverse never
+happens, so those will also not participate in deadlock.  To allow for other
+parallel writes like parallel update or parallel delete, we'll either need to
+(1) further enhance the deadlock detector to handle those tuple locks in a
+different way than other types; or (2) have parallel workers use some other
+mutual exclusion method for such cases.  Currently, the parallel mode is
+strictly read-only, but now we have the infrastructure to allow parallel
+inserts and parallel copy.
 
 Group locking adds three new members to each PGPROC: lockGroupLeader,
 lockGroupMembers, and lockGroupLink. A PGPROC's lockGroupLeader is NULL for
diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c
index f8c5df0..80ec88b 100644
--- a/src/backend/storage/lmgr/deadlock.c
+++ b/src/backend/storage/lmgr/deadlock.c
@@ -555,6 +555,15 @@ FindLockCycleRecurseMember(PGPROC *checkProc,
 	int			numLockModes,
 				lm;
 
+	/*
+	 * The relation extension or page lock can never participate in actual
+	 * deadlock cycle.  See Asserts in LockAcquireExtended.  So, there is
+	 * no advantage in checking wait edges from it.
+	 */
+	if ((LOCK_LOCKTAG(*lock) == LOCKTAG_RELATION_EXTEND) ||
+		(LOCK_LOCKTAG(*lock) == LOCKTAG_PAGE))
+		return false;
+        
 	lockMethodTable = GetLocksMethodTable(lock);
 	numLockModes = lockMethodTable->numLockModes;
 	conflictMask = lockMethodTable->conflictTab[checkProc->waitLockMode];
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 56dba09..6fdfeba 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -1404,6 +1404,18 @@ LockCheckConflicts(LockMethod lockMethodTable,
 	}
 
 	/*
+	 * The relation extension or page lock conflict even between the group
+	 * members.
+	 */
+	if ((LOCK_LOCKTAG(*lock) == LOCKTAG_RELATION_EXTEND) ||
+		(LOCK_LOCKTAG(*lock) == LOCKTAG_PAGE))
+	{
+		PROCLOCK_PRINT("LockCheckConflicts: conflicting (group)",
+				proclock);
+		return true;
+	}
+
+	/*
 	 * Locks held in conflicting modes by members of our own lock group are
 	 * not real conflicts; we can subtract those out and see if we still have
 	 * a conflict.  This is O(N) in the number of processes holding or
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index eb321f7..b18f61b 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -1077,7 +1077,13 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 
 	/*
 	 * If group locking is in use, locks held by members of my locking group
-	 * need to be included in myHeldLocks.
+	 * need to be included in myHeldLocks.  This is not required for
+	 * relation extension or page locks which conflict among group members.
+	 * However, including them in myHeldLocks will give group members the
+	 * priority to get those locks as compared to other backends which are
+	 * also trying to acquire those locks.  OTOH, we can avoid giving
+	 * priority to group members for that kind of locks, but there
+	 * doesn't appear to be a clear advantage of the same.
 	 */
 	if (leader != NULL)
 	{
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index bb8e4e6..fac979d 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -301,6 +301,7 @@ typedef struct LOCK
 } LOCK;
 
 #define LOCK_LOCKMETHOD(lock) ((LOCKMETHODID) (lock).tag.locktag_lockmethodid)
+#define LOCK_LOCKTAG(lock) ((LockTagType) (lock).tag.locktag_type)
 
 
 /*
-- 
1.8.3.1