diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 8dc3054..51b24d0 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -105,7 +105,7 @@ typedef struct pgssEntry
  */
 typedef struct pgssSharedState
 {
-	LWLockId	lock;			/* protects hashtable search/modification */
+	FlexLockId	lock;			/* protects hashtable search/modification */
 	int			query_size;		/* max query length in bytes */
 } pgssSharedState;
 
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index d1e628f..8517b36 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6199,14 +6199,14 @@ LOG:  CleanUpLock: deleting: lock(0xb7acd844) id(24688,24696,0,0,0,1)
      </varlistentry>
 
      <varlistentry>
-      <term><varname>trace_lwlocks</varname> (<type>boolean</type>)</term>
+      <term><varname>trace_flexlocks</varname> (<type>boolean</type>)</term>
       <indexterm>
-       <primary><varname>trace_lwlocks</> configuration parameter</primary>
+       <primary><varname>trace_flexlocks</> configuration parameter</primary>
       </indexterm>
       <listitem>
        <para>
-        If on, emit information about lightweight lock usage.  Lightweight
-        locks are intended primarily to provide mutual exclusion of access
+        If on, emit information about FlexLock usage.  FlexLocks
+        are intended primarily to provide mutual exclusion of access
         to shared-memory data structures.
        </para>
        <para>
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index b9dc1d2..98ed0d3 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1724,49 +1724,49 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS procpid,
       or kilobytes of memory used for an internal sort.</entry>
     </row>
     <row>
-     <entry>lwlock-acquire</entry>
-     <entry>(LWLockId, LWLockMode)</entry>
-     <entry>Probe that fires when an LWLock has been acquired.
-      arg0 is the LWLock's ID.
-      arg1 is the requested lock mode, either exclusive or shared.</entry>
+     <entry>flexlock-acquire</entry>
+     <entry>(FlexLockId, FlexLockMode)</entry>
+     <entry>Probe that fires when an FlexLock has been acquired.
+      arg0 is the FlexLock's ID.
+      arg1 is the requested lock mode.</entry>
     </row>
     <row>
-     <entry>lwlock-release</entry>
-     <entry>(LWLockId)</entry>
-     <entry>Probe that fires when an LWLock has been released (but note
+     <entry>flexlock-release</entry>
+     <entry>(FlexLockId)</entry>
+     <entry>Probe that fires when a FlexLock has been released (but note
       that any released waiters have not yet been awakened).
-      arg0 is the LWLock's ID.</entry>
+      arg0 is the FlexLock's ID.</entry>
     </row>
     <row>
-     <entry>lwlock-wait-start</entry>
-     <entry>(LWLockId, LWLockMode)</entry>
-     <entry>Probe that fires when an LWLock was not immediately available and
+     <entry>flexlock-wait-start</entry>
+     <entry>(FlexLockId, FlexLockMode)</entry>
+     <entry>Probe that fires when an FlexLock was not immediately available and
       a server process has begun to wait for the lock to become available.
-      arg0 is the LWLock's ID.
+      arg0 is the FlexLock's ID.
       arg1 is the requested lock mode, either exclusive or shared.</entry>
     </row>
     <row>
-     <entry>lwlock-wait-done</entry>
-     <entry>(LWLockId, LWLockMode)</entry>
+     <entry>flexlock-wait-done</entry>
+     <entry>(FlexLockId, FlexLockMode)</entry>
      <entry>Probe that fires when a server process has been released from its
-      wait for an LWLock (it does not actually have the lock yet).
-      arg0 is the LWLock's ID.
+      wait for an FlexLock (it does not actually have the lock yet).
+      arg0 is the FlexLock's ID.
       arg1 is the requested lock mode, either exclusive or shared.</entry>
     </row>
     <row>
-     <entry>lwlock-condacquire</entry>
-     <entry>(LWLockId, LWLockMode)</entry>
-     <entry>Probe that fires when an LWLock was successfully acquired when the
-      caller specified no waiting.
-      arg0 is the LWLock's ID.
+     <entry>flexlock-condacquire</entry>
+     <entry>(FlexLockId, FlexLockMode)</entry>
+     <entry>Probe that fires when an FlexLock was successfully acquired when
+      the caller specified no waiting.
+      arg0 is the FlexLock's ID.
       arg1 is the requested lock mode, either exclusive or shared.</entry>
     </row>
     <row>
-     <entry>lwlock-condacquire-fail</entry>
-     <entry>(LWLockId, LWLockMode)</entry>
-     <entry>Probe that fires when an LWLock was not successfully acquired when
-      the caller specified no waiting.
-      arg0 is the LWLock's ID.
+     <entry>flexlock-condacquire-fail</entry>
+     <entry>(FlexLockId, FlexLockMode)</entry>
+     <entry>Probe that fires when an FlexLock was not successfully acquired
+      when the caller specified no waiting.
+      arg0 is the FlexLock's ID.
       arg1 is the requested lock mode, either exclusive or shared.</entry>
     </row>
     <row>
@@ -1813,11 +1813,11 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS procpid,
      <entry>unsigned int</entry>
     </row>
     <row>
-     <entry>LWLockId</entry>
+     <entry>FlexLockId</entry>
      <entry>int</entry>
     </row>
     <row>
-     <entry>LWLockMode</entry>
+     <entry>FlexLockMode</entry>
      <entry>int</entry>
     </row>
     <row>
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index f7caa34..09d5862 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -151,7 +151,7 @@ SimpleLruShmemSize(int nslots, int nlsns)
 	sz += MAXALIGN(nslots * sizeof(bool));		/* page_dirty[] */
 	sz += MAXALIGN(nslots * sizeof(int));		/* page_number[] */
 	sz += MAXALIGN(nslots * sizeof(int));		/* page_lru_count[] */
-	sz += MAXALIGN(nslots * sizeof(LWLockId));	/* buffer_locks[] */
+	sz += MAXALIGN(nslots * sizeof(FlexLockId));		/* buffer_locks[] */
 
 	if (nlsns > 0)
 		sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));	/* group_lsn[] */
@@ -161,7 +161,7 @@ SimpleLruShmemSize(int nslots, int nlsns)
 
 void
 SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
-			  LWLockId ctllock, const char *subdir)
+			  FlexLockId ctllock, const char *subdir)
 {
 	SlruShared	shared;
 	bool		found;
@@ -202,8 +202,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
 		offset += MAXALIGN(nslots * sizeof(int));
 		shared->page_lru_count = (int *) (ptr + offset);
 		offset += MAXALIGN(nslots * sizeof(int));
-		shared->buffer_locks = (LWLockId *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(LWLockId));
+		shared->buffer_locks = (FlexLockId *) (ptr + offset);
+		offset += MAXALIGN(nslots * sizeof(FlexLockId));
 
 		if (nlsns > 0)
 		{
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 477982d..0805f9c 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -113,7 +113,8 @@ int			max_prepared_xacts = 0;
 
 typedef struct GlobalTransactionData
 {
-	PGPROC		proc;			/* dummy proc */
+	GlobalTransaction next;
+	int			pgprocno;		/* dummy proc */
 	BackendId	dummyBackendId; /* similar to backend id for backends */
 	TimestampTz prepared_at;	/* time of preparation */
 	XLogRecPtr	prepare_lsn;	/* XLOG offset of prepare record */
@@ -207,7 +208,8 @@ TwoPhaseShmemInit(void)
 					  sizeof(GlobalTransaction) * max_prepared_xacts));
 		for (i = 0; i < max_prepared_xacts; i++)
 		{
-			gxacts[i].proc.links.next = (SHM_QUEUE *) TwoPhaseState->freeGXacts;
+			gxacts[i].pgprocno = PreparedXactProcs[i].pgprocno;
+			gxacts[i].next = TwoPhaseState->freeGXacts;
 			TwoPhaseState->freeGXacts = &gxacts[i];
 
 			/*
@@ -243,6 +245,8 @@ MarkAsPreparing(TransactionId xid, const char *gid,
 				TimestampTz prepared_at, Oid owner, Oid databaseid)
 {
 	GlobalTransaction gxact;
+	PGPROC	   *proc;
+	PGPROC_MINIMAL *proc_minimal;
 	int			i;
 
 	if (strlen(gid) >= GIDSIZE)
@@ -274,7 +278,7 @@ MarkAsPreparing(TransactionId xid, const char *gid,
 			TwoPhaseState->numPrepXacts--;
 			TwoPhaseState->prepXacts[i] = TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts];
 			/* and put it back in the freelist */
-			gxact->proc.links.next = (SHM_QUEUE *) TwoPhaseState->freeGXacts;
+			gxact->next = TwoPhaseState->freeGXacts;
 			TwoPhaseState->freeGXacts = gxact;
 			/* Back up index count too, so we don't miss scanning one */
 			i--;
@@ -302,32 +306,36 @@ MarkAsPreparing(TransactionId xid, const char *gid,
 				 errhint("Increase max_prepared_transactions (currently %d).",
 						 max_prepared_xacts)));
 	gxact = TwoPhaseState->freeGXacts;
-	TwoPhaseState->freeGXacts = (GlobalTransaction) gxact->proc.links.next;
+	TwoPhaseState->freeGXacts = (GlobalTransaction) gxact->next;
 
-	/* Initialize it */
-	MemSet(&gxact->proc, 0, sizeof(PGPROC));
-	SHMQueueElemInit(&(gxact->proc.links));
-	gxact->proc.waitStatus = STATUS_OK;
+	proc = &ProcGlobal->allProcs[gxact->pgprocno];
+	proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno];
+
+	/* Initialize the PGPROC entry */
+	MemSet(proc, 0, sizeof(PGPROC));
+	proc->pgprocno = gxact->pgprocno;
+	SHMQueueElemInit(&(proc->links));
+	proc->waitStatus = STATUS_OK;
 	/* We set up the gxact's VXID as InvalidBackendId/XID */
-	gxact->proc.lxid = (LocalTransactionId) xid;
-	gxact->proc.xid = xid;
-	gxact->proc.xmin = InvalidTransactionId;
-	gxact->proc.pid = 0;
-	gxact->proc.backendId = InvalidBackendId;
-	gxact->proc.databaseId = databaseid;
-	gxact->proc.roleId = owner;
-	gxact->proc.inCommit = false;
-	gxact->proc.vacuumFlags = 0;
-	gxact->proc.lwWaiting = false;
-	gxact->proc.lwExclusive = false;
-	gxact->proc.lwWaitLink = NULL;
-	gxact->proc.waitLock = NULL;
-	gxact->proc.waitProcLock = NULL;
+	proc->lxid = (LocalTransactionId) xid;
+	proc_minimal->xid = xid;
+	proc_minimal->xmin = InvalidTransactionId;
+	proc_minimal->inCommit = false;
+	proc_minimal->vacuumFlags = 0;
+	proc->pid = 0;
+	proc->backendId = InvalidBackendId;
+	proc->databaseId = databaseid;
+	proc->roleId = owner;
+	proc->flWaitResult = false;
+	proc->flWaitMode = false;
+	proc->flWaitLink = NULL;
+	proc->waitLock = NULL;
+	proc->waitProcLock = NULL;
 	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
-		SHMQueueInit(&(gxact->proc.myProcLocks[i]));
+		SHMQueueInit(&(proc->myProcLocks[i]));
 	/* subxid data must be filled later by GXactLoadSubxactData */
-	gxact->proc.subxids.overflowed = false;
-	gxact->proc.subxids.nxids = 0;
+	proc_minimal->overflowed = false;
+	proc_minimal->nxids = 0;
 
 	gxact->prepared_at = prepared_at;
 	/* initialize LSN to 0 (start of WAL) */
@@ -358,17 +366,19 @@ static void
 GXactLoadSubxactData(GlobalTransaction gxact, int nsubxacts,
 					 TransactionId *children)
 {
+	PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno];
+	PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno];
 	/* We need no extra lock since the GXACT isn't valid yet */
 	if (nsubxacts > PGPROC_MAX_CACHED_SUBXIDS)
 	{
-		gxact->proc.subxids.overflowed = true;
+		proc_minimal->overflowed = true;
 		nsubxacts = PGPROC_MAX_CACHED_SUBXIDS;
 	}
 	if (nsubxacts > 0)
 	{
-		memcpy(gxact->proc.subxids.xids, children,
+		memcpy(proc->subxids.xids, children,
 			   nsubxacts * sizeof(TransactionId));
-		gxact->proc.subxids.nxids = nsubxacts;
+		proc_minimal->nxids = nsubxacts;
 	}
 }
 
@@ -389,7 +399,7 @@ MarkAsPrepared(GlobalTransaction gxact)
 	 * Put it into the global ProcArray so TransactionIdIsInProgress considers
 	 * the XID as still running.
 	 */
-	ProcArrayAdd(&gxact->proc);
+	ProcArrayAdd(&ProcGlobal->allProcs[gxact->pgprocno]);
 }
 
 /*
@@ -406,6 +416,7 @@ LockGXact(const char *gid, Oid user)
 	for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
 	{
 		GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
+		PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno];
 
 		/* Ignore not-yet-valid GIDs */
 		if (!gxact->valid)
@@ -436,7 +447,7 @@ LockGXact(const char *gid, Oid user)
 		 * there may be some other issues as well.	Hence disallow until
 		 * someone gets motivated to make it work.
 		 */
-		if (MyDatabaseId != gxact->proc.databaseId)
+		if (MyDatabaseId != proc->databaseId)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				  errmsg("prepared transaction belongs to another database"),
@@ -483,7 +494,7 @@ RemoveGXact(GlobalTransaction gxact)
 			TwoPhaseState->prepXacts[i] = TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts];
 
 			/* and put it back in the freelist */
-			gxact->proc.links.next = (SHM_QUEUE *) TwoPhaseState->freeGXacts;
+			gxact->next = TwoPhaseState->freeGXacts;
 			TwoPhaseState->freeGXacts = gxact;
 
 			LWLockRelease(TwoPhaseStateLock);
@@ -518,8 +529,9 @@ TransactionIdIsPrepared(TransactionId xid)
 	for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
 	{
 		GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
+		PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno];
 
-		if (gxact->valid && gxact->proc.xid == xid)
+		if (gxact->valid && proc_minimal->xid == xid)
 		{
 			result = true;
 			break;
@@ -642,6 +654,8 @@ pg_prepared_xact(PG_FUNCTION_ARGS)
 	while (status->array != NULL && status->currIdx < status->ngxacts)
 	{
 		GlobalTransaction gxact = &status->array[status->currIdx++];
+		PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno];
+		PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno];
 		Datum		values[5];
 		bool		nulls[5];
 		HeapTuple	tuple;
@@ -656,11 +670,11 @@ pg_prepared_xact(PG_FUNCTION_ARGS)
 		MemSet(values, 0, sizeof(values));
 		MemSet(nulls, 0, sizeof(nulls));
 
-		values[0] = TransactionIdGetDatum(gxact->proc.xid);
+		values[0] = TransactionIdGetDatum(proc_minimal->xid);
 		values[1] = CStringGetTextDatum(gxact->gid);
 		values[2] = TimestampTzGetDatum(gxact->prepared_at);
 		values[3] = ObjectIdGetDatum(gxact->owner);
-		values[4] = ObjectIdGetDatum(gxact->proc.databaseId);
+		values[4] = ObjectIdGetDatum(proc->databaseId);
 
 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
 		result = HeapTupleGetDatum(tuple);
@@ -711,10 +725,11 @@ TwoPhaseGetDummyProc(TransactionId xid)
 	for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
 	{
 		GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
+		PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno];
 
-		if (gxact->proc.xid == xid)
+		if (proc_minimal->xid == xid)
 		{
-			result = &gxact->proc;
+			result = &ProcGlobal->allProcs[gxact->pgprocno];
 			break;
 		}
 	}
@@ -841,7 +856,9 @@ save_state_data(const void *data, uint32 len)
 void
 StartPrepare(GlobalTransaction gxact)
 {
-	TransactionId xid = gxact->proc.xid;
+	PGPROC *proc = &ProcGlobal->allProcs[gxact->pgprocno];
+	PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno];
+	TransactionId xid = proc_minimal->xid;
 	TwoPhaseFileHeader hdr;
 	TransactionId *children;
 	RelFileNode *commitrels;
@@ -865,7 +882,7 @@ StartPrepare(GlobalTransaction gxact)
 	hdr.magic = TWOPHASE_MAGIC;
 	hdr.total_len = 0;			/* EndPrepare will fill this in */
 	hdr.xid = xid;
-	hdr.database = gxact->proc.databaseId;
+	hdr.database = proc->databaseId;
 	hdr.prepared_at = gxact->prepared_at;
 	hdr.owner = gxact->owner;
 	hdr.nsubxacts = xactGetCommittedChildren(&children);
@@ -913,7 +930,8 @@ StartPrepare(GlobalTransaction gxact)
 void
 EndPrepare(GlobalTransaction gxact)
 {
-	TransactionId xid = gxact->proc.xid;
+	PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno];
+	TransactionId xid = proc_minimal->xid;
 	TwoPhaseFileHeader *hdr;
 	char		path[MAXPGPATH];
 	XLogRecData *record;
@@ -1021,7 +1039,7 @@ EndPrepare(GlobalTransaction gxact)
 	 */
 	START_CRIT_SECTION();
 
-	MyProc->inCommit = true;
+	MyProcMinimal->inCommit = true;
 
 	gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE,
 									records.head);
@@ -1069,7 +1087,7 @@ EndPrepare(GlobalTransaction gxact)
 	 * checkpoint starting after this will certainly see the gxact as a
 	 * candidate for fsyncing.
 	 */
-	MyProc->inCommit = false;
+	MyProcMinimal->inCommit = false;
 
 	END_CRIT_SECTION();
 
@@ -1242,6 +1260,8 @@ void
 FinishPreparedTransaction(const char *gid, bool isCommit)
 {
 	GlobalTransaction gxact;
+	PGPROC	   *proc;
+	PGPROC_MINIMAL *proc_minimal;
 	TransactionId xid;
 	char	   *buf;
 	char	   *bufptr;
@@ -1260,7 +1280,9 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
 	 * try to commit the same GID at once.
 	 */
 	gxact = LockGXact(gid, GetUserId());
-	xid = gxact->proc.xid;
+	proc = &ProcGlobal->allProcs[gxact->pgprocno];
+	proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno];
+	xid = proc_minimal->xid;
 
 	/*
 	 * Read and validate the state file
@@ -1309,7 +1331,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
 									   hdr->nsubxacts, children,
 									   hdr->nabortrels, abortrels);
 
-	ProcArrayRemove(&gxact->proc, latestXid);
+	ProcArrayRemove(proc, latestXid);
 
 	/*
 	 * In case we fail while running the callbacks, mark the gxact invalid so
@@ -1540,10 +1562,11 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon)
 	for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
 	{
 		GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
+		PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[gxact->pgprocno];
 
 		if (gxact->valid &&
 			XLByteLE(gxact->prepare_lsn, redo_horizon))
-			xids[nxids++] = gxact->proc.xid;
+			xids[nxids++] = proc_minimal->xid;
 	}
 
 	LWLockRelease(TwoPhaseStateLock);
@@ -1972,7 +1995,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
 	START_CRIT_SECTION();
 
 	/* See notes in RecordTransactionCommit */
-	MyProc->inCommit = true;
+	MyProcMinimal->inCommit = true;
 
 	/* Emit the XLOG commit record */
 	xlrec.xid = xid;
@@ -2037,7 +2060,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
 	TransactionIdCommitTree(xid, nchildren, children);
 
 	/* Checkpoint can proceed now */
-	MyProc->inCommit = false;
+	MyProcMinimal->inCommit = false;
 
 	END_CRIT_SECTION();
 
diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
index 61dcfed..7c986aa 100644
--- a/src/backend/access/transam/varsup.c
+++ b/src/backend/access/transam/varsup.c
@@ -54,7 +54,7 @@ GetNewTransactionId(bool isSubXact)
 	if (IsBootstrapProcessingMode())
 	{
 		Assert(!isSubXact);
-		MyProc->xid = BootstrapTransactionId;
+		MyProcMinimal->xid = BootstrapTransactionId;
 		return BootstrapTransactionId;
 	}
 
@@ -208,20 +208,21 @@ GetNewTransactionId(bool isSubXact)
 		 * TransactionId and int fetch/store are atomic.
 		 */
 		volatile PGPROC *myproc = MyProc;
+		volatile PGPROC_MINIMAL *myprocminimal = MyProcMinimal;
 
 		if (!isSubXact)
-			myproc->xid = xid;
+			myprocminimal->xid = xid;
 		else
 		{
-			int			nxids = myproc->subxids.nxids;
+			int			nxids = myprocminimal->nxids;
 
 			if (nxids < PGPROC_MAX_CACHED_SUBXIDS)
 			{
 				myproc->subxids.xids[nxids] = xid;
-				myproc->subxids.nxids = nxids + 1;
+				myprocminimal->nxids = nxids + 1;
 			}
 			else
-				myproc->subxids.overflowed = true;
+				myprocminimal->overflowed = true;
 		}
 	}
 
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index c151d3b..21eb404 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -981,7 +981,7 @@ RecordTransactionCommit(void)
 		 * bit fuzzy, but it doesn't matter.
 		 */
 		START_CRIT_SECTION();
-		MyProc->inCommit = true;
+		MyProcMinimal->inCommit = true;
 
 		SetCurrentTransactionStopTimestamp();
 
@@ -1155,7 +1155,7 @@ RecordTransactionCommit(void)
 	 */
 	if (markXidCommitted)
 	{
-		MyProc->inCommit = false;
+		MyProcMinimal->inCommit = false;
 		END_CRIT_SECTION();
 	}
 
@@ -2248,7 +2248,7 @@ AbortTransaction(void)
 	 * Releasing LW locks is critical since we might try to grab them again
 	 * while cleaning up!
 	 */
-	LWLockReleaseAll();
+	FlexLockReleaseAll();
 
 	/* Clean up buffer I/O and buffer context locks, too */
 	AbortBufferIO();
@@ -4138,7 +4138,7 @@ AbortSubTransaction(void)
 	 * FIXME This may be incorrect --- Are there some locks we should keep?
 	 * Buffer locks, for example?  I don't think so but I'm not sure.
 	 */
-	LWLockReleaseAll();
+	FlexLockReleaseAll();
 
 	AbortBufferIO();
 	UnlockBuffers();
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 6bf2421..9ceee91 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -562,13 +562,13 @@ bootstrap_signals(void)
  * Begin shutdown of an auxiliary process.	This is approximately the equivalent
  * of ShutdownPostgres() in postinit.c.  We can't run transactions in an
  * auxiliary process, so most of the work of AbortTransaction() is not needed,
- * but we do need to make sure we've released any LWLocks we are holding.
+ * but we do need to make sure we've released any flex locks we are holding.
  * (This is only critical during an error exit.)
  */
 static void
 ShutdownAuxiliaryProcess(int code, Datum arg)
 {
-	LWLockReleaseAll();
+	FlexLockReleaseAll();
 }
 
 /* ----------------------------------------------------------------
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 32985a4..23556fa 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -40,6 +40,7 @@
 #include "storage/lmgr.h"
 #include "storage/proc.h"
 #include "storage/procarray.h"
+#include "storage/procarraylock.h"
 #include "utils/acl.h"
 #include "utils/attoptcache.h"
 #include "utils/datum.h"
@@ -222,9 +223,9 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
 	/*
 	 * OK, let's do it.  First let other backends know I'm in ANALYZE.
 	 */
-	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
-	MyProc->vacuumFlags |= PROC_IN_ANALYZE;
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockAcquire(PAL_EXCLUSIVE);
+	MyProcMinimal->vacuumFlags |= PROC_IN_ANALYZE;
+	ProcArrayLockRelease();
 
 	/*
 	 * Do the normal non-recursive ANALYZE.
@@ -249,9 +250,9 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
 	 * Reset my PGPROC flag.  Note: we need this here, and not in vacuum_rel,
 	 * because the vacuum flag is cleared by the end-of-xact code.
 	 */
-	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
-	MyProc->vacuumFlags &= ~PROC_IN_ANALYZE;
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockAcquire(PAL_EXCLUSIVE);
+	MyProcMinimal->vacuumFlags &= ~PROC_IN_ANALYZE;
+	ProcArrayLockRelease();
 }
 
 /*
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index f42504c..480bf82 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -39,6 +39,7 @@
 #include "storage/lmgr.h"
 #include "storage/proc.h"
 #include "storage/procarray.h"
+#include "storage/procarraylock.h"
 #include "utils/acl.h"
 #include "utils/fmgroids.h"
 #include "utils/guc.h"
@@ -892,11 +893,11 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound)
 		 * MyProc->xid/xmin, else OldestXmin might appear to go backwards,
 		 * which is probably Not Good.
 		 */
-		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
-		MyProc->vacuumFlags |= PROC_IN_VACUUM;
+		ProcArrayLockAcquire(PAL_EXCLUSIVE);
+		MyProcMinimal->vacuumFlags |= PROC_IN_VACUUM;
 		if (for_wraparound)
-			MyProc->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
-		LWLockRelease(ProcArrayLock);
+			MyProcMinimal->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
+		ProcArrayLockRelease();
 	}
 
 	/*
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index cacedab..f33f573 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -176,9 +176,10 @@ BackgroundWriterMain(void)
 		/*
 		 * These operations are really just a minimal subset of
 		 * AbortTransaction().	We don't have very many resources to worry
-		 * about in bgwriter, but we do have LWLocks, buffers, and temp files.
+		 * about in bgwriter, but we do have flex locks, buffers, and temp
+		 * files.
 		 */
-		LWLockReleaseAll();
+		FlexLockReleaseAll();
 		AbortBufferIO();
 		UnlockBuffers();
 		/* buffer pins are released here: */
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index e9ae1e8..49f07a7 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -281,9 +281,10 @@ CheckpointerMain(void)
 		/*
 		 * These operations are really just a minimal subset of
 		 * AbortTransaction().	We don't have very many resources to worry
-		 * about in checkpointer, but we do have LWLocks, buffers, and temp files.
+		 * about in checkpointer, but we do have flex locks, buffers, and temp
+		 * files.
 		 */
-		LWLockReleaseAll();
+		FlexLockReleaseAll();
 		AbortBufferIO();
 		UnlockBuffers();
 		/* buffer pins are released here: */
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 6758083..14b4368 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -109,6 +109,7 @@
 #include "postmaster/syslogger.h"
 #include "replication/walsender.h"
 #include "storage/fd.h"
+#include "storage/flexlock_internals.h"
 #include "storage/ipc.h"
 #include "storage/pg_shmem.h"
 #include "storage/pmsignal.h"
@@ -404,8 +405,6 @@ typedef struct
 typedef int InheritableSocket;
 #endif
 
-typedef struct LWLock LWLock;	/* ugly kluge */
-
 /*
  * Structure contains all variables passed to exec:ed backends
  */
@@ -426,7 +425,7 @@ typedef struct
 	slock_t    *ShmemLock;
 	VariableCache ShmemVariableCache;
 	Backend    *ShmemBackendArray;
-	LWLock	   *LWLockArray;
+	FlexLock   *FlexLockArray;
 	slock_t    *ProcStructLock;
 	PROC_HDR   *ProcGlobal;
 	PGPROC	   *AuxiliaryProcs;
@@ -4675,7 +4674,6 @@ MaxLivePostmasterChildren(void)
  * functions
  */
 extern slock_t *ShmemLock;
-extern LWLock *LWLockArray;
 extern slock_t *ProcStructLock;
 extern PGPROC *AuxiliaryProcs;
 extern PMSignalData *PMSignalState;
@@ -4720,7 +4718,7 @@ save_backend_variables(BackendParameters *param, Port *port,
 	param->ShmemVariableCache = ShmemVariableCache;
 	param->ShmemBackendArray = ShmemBackendArray;
 
-	param->LWLockArray = LWLockArray;
+	param->FlexLockArray = FlexLockArray;
 	param->ProcStructLock = ProcStructLock;
 	param->ProcGlobal = ProcGlobal;
 	param->AuxiliaryProcs = AuxiliaryProcs;
@@ -4943,7 +4941,7 @@ restore_backend_variables(BackendParameters *param, Port *port)
 	ShmemVariableCache = param->ShmemVariableCache;
 	ShmemBackendArray = param->ShmemBackendArray;
 
-	LWLockArray = param->LWLockArray;
+	FlexLockArray = param->FlexLockArray;
 	ProcStructLock = param->ProcStructLock;
 	ProcGlobal = param->ProcGlobal;
 	AuxiliaryProcs = param->AuxiliaryProcs;
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 157728e..587443d 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -167,9 +167,9 @@ WalWriterMain(void)
 		/*
 		 * These operations are really just a minimal subset of
 		 * AbortTransaction().	We don't have very many resources to worry
-		 * about in walwriter, but we do have LWLocks, and perhaps buffers?
+		 * about in walwriter, but we do have flex locks, and perhaps buffers?
 		 */
-		LWLockReleaseAll();
+		FlexLockReleaseAll();
 		AbortBufferIO();
 		UnlockBuffers();
 		/* buffer pins are released here: */
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index dd2d6ee..dc93b42 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -702,7 +702,7 @@ ProcessStandbyHSFeedbackMessage(void)
 	 * safe, and if we're moving it backwards, well, the data is at risk
 	 * already since a VACUUM could have just finished calling GetOldestXmin.)
 	 */
-	MyProc->xmin = reply.xmin;
+	MyProcMinimal->xmin = reply.xmin;
 }
 
 /* Main loop of walsender process */
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index e59af33..07356ec 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -141,7 +141,7 @@ PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 	{
 		BufferTag	newTag;		/* identity of requested block */
 		uint32		newHash;	/* hash value for newTag */
-		LWLockId	newPartitionLock;	/* buffer partition lock for it */
+		FlexLockId	newPartitionLock;	/* buffer partition lock for it */
 		int			buf_id;
 
 		/* create a tag so we can lookup the buffer */
@@ -512,10 +512,10 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 {
 	BufferTag	newTag;			/* identity of requested block */
 	uint32		newHash;		/* hash value for newTag */
-	LWLockId	newPartitionLock;		/* buffer partition lock for it */
+	FlexLockId	newPartitionLock;		/* buffer partition lock for it */
 	BufferTag	oldTag;			/* previous identity of selected buffer */
 	uint32		oldHash;		/* hash value for oldTag */
-	LWLockId	oldPartitionLock;		/* buffer partition lock for it */
+	FlexLockId	oldPartitionLock;		/* buffer partition lock for it */
 	BufFlags	oldFlags;
 	int			buf_id;
 	volatile BufferDesc *buf;
@@ -855,7 +855,7 @@ InvalidateBuffer(volatile BufferDesc *buf)
 {
 	BufferTag	oldTag;
 	uint32		oldHash;		/* hash value for oldTag */
-	LWLockId	oldPartitionLock;		/* buffer partition lock for it */
+	FlexLockId	oldPartitionLock;		/* buffer partition lock for it */
 	BufFlags	oldFlags;
 
 	/* Save the original buffer tag before dropping the spinlock */
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 56c0bd8..a2c570a 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -113,7 +113,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
 		size = add_size(size, SUBTRANSShmemSize());
 		size = add_size(size, TwoPhaseShmemSize());
 		size = add_size(size, MultiXactShmemSize());
-		size = add_size(size, LWLockShmemSize());
+		size = add_size(size, FlexLockShmemSize());
 		size = add_size(size, ProcArrayShmemSize());
 		size = add_size(size, BackendStatusShmemSize());
 		size = add_size(size, SInvalShmemSize());
@@ -179,7 +179,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
 	 * needed for InitShmemIndex.
 	 */
 	if (!IsUnderPostmaster)
-		CreateLWLocks();
+		CreateFlexLocks();
 
 	/*
 	 * Set up shmem.c index hashtable
@@ -192,7 +192,6 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
 	XLOGShmemInit();
 	CLOGShmemInit();
 	SUBTRANSShmemInit();
-	TwoPhaseShmemInit();
 	MultiXactShmemInit();
 	InitBufferPool();
 
@@ -213,6 +212,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
 		InitProcGlobal();
 	CreateSharedProcArray();
 	CreateSharedBackendStatus();
+	TwoPhaseShmemInit();
 
 	/*
 	 * Set up shared-inval messaging
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 1a48485..8b6a9ef 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -52,6 +52,7 @@
 #include "access/twophase.h"
 #include "miscadmin.h"
 #include "storage/procarray.h"
+#include "storage/procarraylock.h"
 #include "storage/spin.h"
 #include "utils/builtins.h"
 #include "utils/snapmgr.h"
@@ -82,14 +83,17 @@ typedef struct ProcArrayStruct
 	TransactionId lastOverflowedXid;
 
 	/*
-	 * We declare procs[] as 1 entry because C wants a fixed-size array, but
+	 * We declare pgprocnos[] as 1 entry because C wants a fixed-size array, but
 	 * actually it is maxProcs entries long.
 	 */
-	PGPROC	   *procs[1];		/* VARIABLE LENGTH ARRAY */
+	int			pgprocnos[1];		/* VARIABLE LENGTH ARRAY */
 } ProcArrayStruct;
 
 static ProcArrayStruct *procArray;
 
+static PGPROC *allProcs;
+static PGPROC_MINIMAL *allProcs_Minimal;
+
 /*
  * Bookkeeping for tracking emulated transactions in recovery
  */
@@ -169,8 +173,8 @@ ProcArrayShmemSize(void)
 	/* Size of the ProcArray structure itself */
 #define PROCARRAY_MAXPROCS	(MaxBackends + max_prepared_xacts)
 
-	size = offsetof(ProcArrayStruct, procs);
-	size = add_size(size, mul_size(sizeof(PGPROC *), PROCARRAY_MAXPROCS));
+	size = offsetof(ProcArrayStruct, pgprocnos);
+	size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS));
 
 	/*
 	 * During Hot Standby processing we have a data structure called
@@ -211,8 +215,8 @@ CreateSharedProcArray(void)
 	/* Create or attach to the ProcArray shared structure */
 	procArray = (ProcArrayStruct *)
 		ShmemInitStruct("Proc Array",
-						add_size(offsetof(ProcArrayStruct, procs),
-								 mul_size(sizeof(PGPROC *),
+						add_size(offsetof(ProcArrayStruct, pgprocnos),
+								 mul_size(sizeof(int),
 										  PROCARRAY_MAXPROCS)),
 						&found);
 
@@ -231,6 +235,9 @@ CreateSharedProcArray(void)
 		procArray->lastOverflowedXid = InvalidTransactionId;
 	}
 
+	allProcs = ProcGlobal->allProcs;
+	allProcs_Minimal = ProcGlobal->allProcs_Minimal;
+
 	/* Create or attach to the KnownAssignedXids arrays too, if needed */
 	if (EnableHotStandby)
 	{
@@ -253,8 +260,9 @@ void
 ProcArrayAdd(PGPROC *proc)
 {
 	ProcArrayStruct *arrayP = procArray;
+	int index;
 
-	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	ProcArrayLockAcquire(PAL_EXCLUSIVE);
 
 	if (arrayP->numProcs >= arrayP->maxProcs)
 	{
@@ -263,16 +271,37 @@ ProcArrayAdd(PGPROC *proc)
 		 * fixed supply of PGPROC structs too, and so we should have failed
 		 * earlier.)
 		 */
-		LWLockRelease(ProcArrayLock);
+		ProcArrayLockRelease();
 		ereport(FATAL,
 				(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
 				 errmsg("sorry, too many clients already")));
 	}
 
-	arrayP->procs[arrayP->numProcs] = proc;
+	/*
+	 * Keep the procs array sorted by (PGPROC *) so that we can utilize
+	 * locality of references much better. This is useful while traversing the
+	 * ProcArray because there is a increased likelyhood of finding the next
+	 * PGPROC structure in the cache.
+	 * 
+	 * Since the occurance of adding/removing a proc is much lower than the
+	 * access to the ProcArray itself, the overhead should be marginal
+	 */
+	for (index = 0; index < arrayP->numProcs; index++)
+	{
+		/*
+		 * If we are the first PGPROC or if we have found our right position in
+		 * the array, break
+		 */
+		if ((arrayP->pgprocnos[index] == -1) || (arrayP->pgprocnos[index] > proc->pgprocno))
+			break;
+	}
+
+	memmove(&arrayP->pgprocnos[index + 1], &arrayP->pgprocnos[index],
+			(arrayP->numProcs - index) * sizeof (int));
+	arrayP->pgprocnos[index] = proc->pgprocno;
 	arrayP->numProcs++;
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 }
 
 /*
@@ -289,6 +318,7 @@ void
 ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
 {
 	ProcArrayStruct *arrayP = procArray;
+	PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[proc->pgprocno];
 	int			index;
 
 #ifdef XIDCACHE_DEBUG
@@ -297,11 +327,11 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
 		DisplayXidCache();
 #endif
 
-	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	ProcArrayLockAcquire(PAL_EXCLUSIVE);
 
 	if (TransactionIdIsValid(latestXid))
 	{
-		Assert(TransactionIdIsValid(proc->xid));
+		Assert(TransactionIdIsValid(proc_minimal->xid));
 
 		/* Advance global latestCompletedXid while holding the lock */
 		if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
@@ -311,23 +341,25 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
 	else
 	{
 		/* Shouldn't be trying to remove a live transaction here */
-		Assert(!TransactionIdIsValid(proc->xid));
+		Assert(!TransactionIdIsValid(proc_minimal->xid));
 	}
 
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		if (arrayP->procs[index] == proc)
+		if (arrayP->pgprocnos[index] == proc->pgprocno)
 		{
-			arrayP->procs[index] = arrayP->procs[arrayP->numProcs - 1];
-			arrayP->procs[arrayP->numProcs - 1] = NULL; /* for debugging */
+			/* Keep the PGPROC array sorted. See notes above */
+			memmove(&arrayP->pgprocnos[index], &arrayP->pgprocnos[index + 1],
+					(arrayP->numProcs - index - 1) * sizeof (int));
+			arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */
 			arrayP->numProcs--;
-			LWLockRelease(ProcArrayLock);
+			ProcArrayLockRelease();
 			return;
 		}
 	}
 
 	/* Ooops */
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	elog(LOG, "failed to find proc %p in ProcArray", proc);
 }
@@ -349,56 +381,19 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
 void
 ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
 {
+	PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[proc->pgprocno];
+
 	if (TransactionIdIsValid(latestXid))
 	{
-		/*
-		 * We must lock ProcArrayLock while clearing proc->xid, so that we do
-		 * not exit the set of "running" transactions while someone else is
-		 * taking a snapshot.  See discussion in
-		 * src/backend/access/transam/README.
-		 */
-		Assert(TransactionIdIsValid(proc->xid));
-
-		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
-
-		proc->xid = InvalidTransactionId;
-		proc->lxid = InvalidLocalTransactionId;
-		proc->xmin = InvalidTransactionId;
-		/* must be cleared with xid/xmin: */
-		proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
-		proc->inCommit = false; /* be sure this is cleared in abort */
-		proc->recoveryConflictPending = false;
-
-		/* Clear the subtransaction-XID cache too while holding the lock */
-		proc->subxids.nxids = 0;
-		proc->subxids.overflowed = false;
-
-		/* Also advance global latestCompletedXid while holding the lock */
-		if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
-								  latestXid))
-			ShmemVariableCache->latestCompletedXid = latestXid;
-
-		LWLockRelease(ProcArrayLock);
+		Assert(proc == MyProc);
+		ProcArrayLockClearTransaction(latestXid);		
 	}
 	else
-	{
-		/*
-		 * If we have no XID, we don't need to lock, since we won't affect
-		 * anyone else's calculation of a snapshot.  We might change their
-		 * estimate of global xmin, but that's OK.
-		 */
-		Assert(!TransactionIdIsValid(proc->xid));
+		proc_minimal->xmin = InvalidTransactionId;
 
-		proc->lxid = InvalidLocalTransactionId;
-		proc->xmin = InvalidTransactionId;
-		/* must be cleared with xid/xmin: */
-		proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
-		proc->inCommit = false; /* be sure this is cleared in abort */
-		proc->recoveryConflictPending = false;
-
-		Assert(proc->subxids.nxids == 0);
-		Assert(proc->subxids.overflowed == false);
-	}
+	proc->lxid = InvalidLocalTransactionId;
+	proc_minimal->inCommit = false; /* be sure this is cleared in abort */
+	proc->recoveryConflictPending = false;
 }
 
 
@@ -413,24 +408,26 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
 void
 ProcArrayClearTransaction(PGPROC *proc)
 {
+	PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[proc->pgprocno];
+
 	/*
 	 * We can skip locking ProcArrayLock here, because this action does not
 	 * actually change anyone's view of the set of running XIDs: our entry is
 	 * duplicate with the gxact that has already been inserted into the
 	 * ProcArray.
 	 */
-	proc->xid = InvalidTransactionId;
+	proc_minimal->xid = InvalidTransactionId;
 	proc->lxid = InvalidLocalTransactionId;
-	proc->xmin = InvalidTransactionId;
+	proc_minimal->xmin = InvalidTransactionId;
 	proc->recoveryConflictPending = false;
 
 	/* redundant, but just in case */
-	proc->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
-	proc->inCommit = false;
+	proc_minimal->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
+	proc_minimal->inCommit = false;
 
 	/* Clear the subtransaction-XID cache too */
-	proc->subxids.nxids = 0;
-	proc->subxids.overflowed = false;
+	proc_minimal->nxids = 0;
+	proc_minimal->overflowed = false;
 }
 
 /*
@@ -528,7 +525,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
 	/*
 	 * Nobody else is running yet, but take locks anyhow
 	 */
-	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	ProcArrayLockAcquire(PAL_EXCLUSIVE);
 
 	/*
 	 * KnownAssignedXids is sorted so we cannot just add the xids, we have to
@@ -635,7 +632,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
 	Assert(TransactionIdIsNormal(ShmemVariableCache->latestCompletedXid));
 	Assert(TransactionIdIsValid(ShmemVariableCache->nextXid));
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
 	if (standbyState == STANDBY_SNAPSHOT_READY)
@@ -690,7 +687,7 @@ ProcArrayApplyXidAssignment(TransactionId topxid,
 	/*
 	 * Uses same locking as transaction commit
 	 */
-	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	ProcArrayLockAcquire(PAL_EXCLUSIVE);
 
 	/*
 	 * Remove subxids from known-assigned-xacts.
@@ -703,7 +700,7 @@ ProcArrayApplyXidAssignment(TransactionId topxid,
 	if (TransactionIdPrecedes(procArray->lastOverflowedXid, max_xid))
 		procArray->lastOverflowedXid = max_xid;
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 }
 
 /*
@@ -795,7 +792,7 @@ TransactionIdIsInProgress(TransactionId xid)
 					 errmsg("out of memory")));
 	}
 
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 
 	/*
 	 * Now that we have the lock, we can check latestCompletedXid; if the
@@ -803,7 +800,7 @@ TransactionIdIsInProgress(TransactionId xid)
 	 */
 	if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, xid))
 	{
-		LWLockRelease(ProcArrayLock);
+		ProcArrayLockRelease();
 		xc_by_latest_xid_inc();
 		return true;
 	}
@@ -811,7 +808,9 @@ TransactionIdIsInProgress(TransactionId xid)
 	/* No shortcuts, gotta grovel through the array */
 	for (i = 0; i < arrayP->numProcs; i++)
 	{
-		volatile PGPROC *proc = arrayP->procs[i];
+		int pgprocno = arrayP->pgprocnos[i];
+		volatile PGPROC *proc = &allProcs[pgprocno];
+		volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno];
 		TransactionId pxid;
 
 		/* Ignore my own proc --- dealt with it above */
@@ -819,7 +818,7 @@ TransactionIdIsInProgress(TransactionId xid)
 			continue;
 
 		/* Fetch xid just once - see GetNewTransactionId */
-		pxid = proc->xid;
+		pxid = proc_minimal->xid;
 
 		if (!TransactionIdIsValid(pxid))
 			continue;
@@ -829,7 +828,7 @@ TransactionIdIsInProgress(TransactionId xid)
 		 */
 		if (TransactionIdEquals(pxid, xid))
 		{
-			LWLockRelease(ProcArrayLock);
+			ProcArrayLockRelease();
 			xc_by_main_xid_inc();
 			return true;
 		}
@@ -844,14 +843,14 @@ TransactionIdIsInProgress(TransactionId xid)
 		/*
 		 * Step 2: check the cached child-Xids arrays
 		 */
-		for (j = proc->subxids.nxids - 1; j >= 0; j--)
+		for (j = proc_minimal->nxids - 1; j >= 0; j--)
 		{
 			/* Fetch xid just once - see GetNewTransactionId */
 			TransactionId cxid = proc->subxids.xids[j];
 
 			if (TransactionIdEquals(cxid, xid))
 			{
-				LWLockRelease(ProcArrayLock);
+				ProcArrayLockRelease();
 				xc_by_child_xid_inc();
 				return true;
 			}
@@ -864,7 +863,7 @@ TransactionIdIsInProgress(TransactionId xid)
 		 * we hold ProcArrayLock.  So we can't miss an Xid that we need to
 		 * worry about.)
 		 */
-		if (proc->subxids.overflowed)
+		if (proc_minimal->overflowed)
 			xids[nxids++] = pxid;
 	}
 
@@ -879,7 +878,7 @@ TransactionIdIsInProgress(TransactionId xid)
 
 		if (KnownAssignedXidExists(xid))
 		{
-			LWLockRelease(ProcArrayLock);
+			ProcArrayLockRelease();
 			xc_by_known_assigned_inc();
 			return true;
 		}
@@ -895,7 +894,7 @@ TransactionIdIsInProgress(TransactionId xid)
 			nxids = KnownAssignedXidsGet(xids, xid);
 	}
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	/*
 	 * If none of the relevant caches overflowed, we know the Xid is not
@@ -961,14 +960,17 @@ TransactionIdIsActive(TransactionId xid)
 	if (TransactionIdPrecedes(xid, RecentXmin))
 		return false;
 
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 
 	for (i = 0; i < arrayP->numProcs; i++)
 	{
-		volatile PGPROC *proc = arrayP->procs[i];
+		int pgprocno = arrayP->pgprocnos[i];
+		volatile PGPROC *proc = &allProcs[pgprocno];
+		volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno];
+		TransactionId pxid;
 
 		/* Fetch xid just once - see GetNewTransactionId */
-		TransactionId pxid = proc->xid;
+		pxid = proc_minimal->xid;
 
 		if (!TransactionIdIsValid(pxid))
 			continue;
@@ -983,7 +985,7 @@ TransactionIdIsActive(TransactionId xid)
 		}
 	}
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	return result;
 }
@@ -1046,7 +1048,7 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
 	/* Cannot look for individual databases during recovery */
 	Assert(allDbs || !RecoveryInProgress());
 
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 
 	/*
 	 * We initialize the MIN() calculation with latestCompletedXid + 1. This
@@ -1060,9 +1062,11 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
 
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		volatile PGPROC *proc = arrayP->procs[index];
+		int pgprocno = arrayP->pgprocnos[index];
+		volatile PGPROC *proc = &allProcs[pgprocno];
+		volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno];
 
-		if (ignoreVacuum && (proc->vacuumFlags & PROC_IN_VACUUM))
+		if (ignoreVacuum && (proc_minimal->vacuumFlags & PROC_IN_VACUUM))
 			continue;
 
 		if (allDbs ||
@@ -1070,7 +1074,7 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
 			proc->databaseId == 0)		/* always include WalSender */
 		{
 			/* Fetch xid just once - see GetNewTransactionId */
-			TransactionId xid = proc->xid;
+			TransactionId xid = proc_minimal->xid;
 
 			/* First consider the transaction's own Xid, if any */
 			if (TransactionIdIsNormal(xid) &&
@@ -1084,7 +1088,7 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
 			 * have an Xmin but not (yet) an Xid; conversely, if it has an
 			 * Xid, that could determine some not-yet-set Xmin.
 			 */
-			xid = proc->xmin;	/* Fetch just once */
+			xid = proc_minimal->xmin;	/* Fetch just once */
 			if (TransactionIdIsNormal(xid) &&
 				TransactionIdPrecedes(xid, result))
 				result = xid;
@@ -1099,7 +1103,7 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
 		 */
 		TransactionId kaxmin = KnownAssignedXidsGetOldestXmin();
 
-		LWLockRelease(ProcArrayLock);
+		ProcArrayLockRelease();
 
 		if (TransactionIdIsNormal(kaxmin) &&
 			TransactionIdPrecedes(kaxmin, result))
@@ -1110,7 +1114,7 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
 		/*
 		 * No other information needed, so release the lock immediately.
 		 */
-		LWLockRelease(ProcArrayLock);
+		ProcArrayLockRelease();
 
 		/*
 		 * Compute the cutoff XID by subtracting vacuum_defer_cleanup_age,
@@ -1200,6 +1204,8 @@ GetSnapshotData(Snapshot snapshot)
 	int			count = 0;
 	int			subcount = 0;
 	bool		suboverflowed = false;
+	static TransactionId *xmins = NULL;
+	int			numProcs;
 
 	Assert(snapshot != NULL);
 
@@ -1235,11 +1241,20 @@ GetSnapshotData(Snapshot snapshot)
 					 errmsg("out of memory")));
 	}
 
+	if (xmins == NULL)
+	{
+		xmins = malloc(procArray->maxProcs * sizeof(TransactionId));
+		if (xmins == NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_OUT_OF_MEMORY),
+					 errmsg("out of memory")));
+	}
+
 	/*
 	 * It is sufficient to get shared lock on ProcArrayLock, even if we are
 	 * going to set MyProc->xmin.
 	 */
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 
 	/* xmax is always latestCompletedXid + 1 */
 	xmax = ShmemVariableCache->latestCompletedXid;
@@ -1261,6 +1276,8 @@ GetSnapshotData(Snapshot snapshot)
 
 	if (!snapshot->takenDuringRecovery)
 	{
+		int *pgprocnos = arrayP->pgprocnos;
+
 		/*
 		 * Spin over procArray checking xid, xmin, and subxids.  The goal is
 		 * to gather all active xids, find the lowest xmin, and try to record
@@ -1269,23 +1286,25 @@ GetSnapshotData(Snapshot snapshot)
 		 * prepared transaction xids are held in KnownAssignedXids, so these
 		 * will be seen without needing to loop through procs here.
 		 */
-		for (index = 0; index < arrayP->numProcs; index++)
+		numProcs = arrayP->numProcs;
+		for (index = 0; index < numProcs; index++)
 		{
-			volatile PGPROC *proc = arrayP->procs[index];
+			int pgprocno = pgprocnos[index];
+			volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno];
 			TransactionId xid;
 
 			/* Ignore procs running LAZY VACUUM */
-			if (proc->vacuumFlags & PROC_IN_VACUUM)
+			if (proc_minimal->vacuumFlags & PROC_IN_VACUUM)
+			{
+				xmins[index] = InvalidTransactionId;
 				continue;
+			}
 
 			/* Update globalxmin to be the smallest valid xmin */
-			xid = proc->xmin;	/* fetch just once */
-			if (TransactionIdIsNormal(xid) &&
-				TransactionIdPrecedes(xid, globalxmin))
-				globalxmin = xid;
+			xmins[index] = proc_minimal->xmin;	/* fetch just once */
 
 			/* Fetch xid just once - see GetNewTransactionId */
-			xid = proc->xid;
+			xid = proc_minimal->xid;
 
 			/*
 			 * If the transaction has been assigned an xid < xmax we add it to
@@ -1300,7 +1319,7 @@ GetSnapshotData(Snapshot snapshot)
 			{
 				if (TransactionIdFollowsOrEquals(xid, xmax))
 					continue;
-				if (proc != MyProc)
+				if (proc_minimal != MyProcMinimal)
 					snapshot->xip[count++] = xid;
 				if (TransactionIdPrecedes(xid, xmin))
 					xmin = xid;
@@ -1321,16 +1340,17 @@ GetSnapshotData(Snapshot snapshot)
 			 *
 			 * Again, our own XIDs are not included in the snapshot.
 			 */
-			if (!suboverflowed && proc != MyProc)
+			if (!suboverflowed && proc_minimal != MyProcMinimal)
 			{
-				if (proc->subxids.overflowed)
+				if (proc_minimal->overflowed)
 					suboverflowed = true;
 				else
 				{
-					int			nxids = proc->subxids.nxids;
+					int			nxids = proc_minimal->nxids;
 
 					if (nxids > 0)
 					{
+						volatile PGPROC *proc = &allProcs[pgprocno];
 						memcpy(snapshot->subxip + subcount,
 							   (void *) proc->subxids.xids,
 							   nxids * sizeof(TransactionId));
@@ -1342,6 +1362,7 @@ GetSnapshotData(Snapshot snapshot)
 	}
 	else
 	{
+		numProcs = 0;
 		/*
 		 * We're in hot standby, so get XIDs from KnownAssignedXids.
 		 *
@@ -1372,16 +1393,23 @@ GetSnapshotData(Snapshot snapshot)
 			suboverflowed = true;
 	}
 
-	if (!TransactionIdIsValid(MyProc->xmin))
-		MyProc->xmin = TransactionXmin = xmin;
-
-	LWLockRelease(ProcArrayLock);
+	if (!TransactionIdIsValid(MyProcMinimal->xmin))
+		MyProcMinimal->xmin = TransactionXmin = xmin;
+	ProcArrayLockRelease();
 
 	/*
 	 * Update globalxmin to include actual process xids.  This is a slightly
 	 * different way of computing it than GetOldestXmin uses, but should give
 	 * the same result.
 	 */
+	for (index = 0; index < numProcs; index++)
+	{
+		TransactionId xid = xmins[index];
+		if (TransactionIdIsNormal(xid) &&
+			TransactionIdPrecedes(xid, globalxmin))
+			globalxmin = xid;
+	}
+
 	if (TransactionIdPrecedes(xmin, globalxmin))
 		globalxmin = xmin;
 
@@ -1432,18 +1460,20 @@ ProcArrayInstallImportedXmin(TransactionId xmin, TransactionId sourcexid)
 		return false;
 
 	/* Get lock so source xact can't end while we're doing this */
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		volatile PGPROC *proc = arrayP->procs[index];
+		int pgprocno = arrayP->pgprocnos[index];
+		volatile PGPROC *proc = &allProcs[pgprocno];
+		volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno];
 		TransactionId xid;
 
 		/* Ignore procs running LAZY VACUUM */
-		if (proc->vacuumFlags & PROC_IN_VACUUM)
+		if (proc_minimal->vacuumFlags & PROC_IN_VACUUM)
 			continue;
 
-		xid = proc->xid;	/* fetch just once */
+		xid = proc_minimal->xid;	/* fetch just once */
 		if (xid != sourcexid)
 			continue;
 
@@ -1459,7 +1489,7 @@ ProcArrayInstallImportedXmin(TransactionId xmin, TransactionId sourcexid)
 		/*
 		 * Likewise, let's just make real sure its xmin does cover us.
 		 */
-		xid = proc->xmin;	/* fetch just once */
+		xid = proc_minimal->xmin;	/* fetch just once */
 		if (!TransactionIdIsNormal(xid) ||
 			!TransactionIdPrecedesOrEquals(xid, xmin))
 			continue;
@@ -1470,13 +1500,13 @@ ProcArrayInstallImportedXmin(TransactionId xmin, TransactionId sourcexid)
 		 * GetSnapshotData first, we'll be overwriting a valid xmin here,
 		 * so we don't check that.)
 		 */
-		MyProc->xmin = TransactionXmin = xmin;
+		MyProcMinimal->xmin = TransactionXmin = xmin;
 
 		result = true;
 		break;
 	}
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	return result;
 }
@@ -1550,7 +1580,7 @@ GetRunningTransactionData(void)
 	 * Ensure that no xids enter or leave the procarray while we obtain
 	 * snapshot.
 	 */
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 	LWLockAcquire(XidGenLock, LW_SHARED);
 
 	latestCompletedXid = ShmemVariableCache->latestCompletedXid;
@@ -1562,12 +1592,14 @@ GetRunningTransactionData(void)
 	 */
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		volatile PGPROC *proc = arrayP->procs[index];
+		int pgprocno = arrayP->pgprocnos[index];
+		volatile PGPROC *proc = &allProcs[pgprocno];
+		volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno];
 		TransactionId xid;
 		int			nxids;
 
 		/* Fetch xid just once - see GetNewTransactionId */
-		xid = proc->xid;
+		xid = proc_minimal->xid;
 
 		/*
 		 * We don't need to store transactions that don't have a TransactionId
@@ -1585,7 +1617,7 @@ GetRunningTransactionData(void)
 		 * Save subtransaction XIDs. Other backends can't add or remove
 		 * entries while we're holding XidGenLock.
 		 */
-		nxids = proc->subxids.nxids;
+		nxids = proc_minimal->nxids;
 		if (nxids > 0)
 		{
 			memcpy(&xids[count], (void *) proc->subxids.xids,
@@ -1593,7 +1625,7 @@ GetRunningTransactionData(void)
 			count += nxids;
 			subcount += nxids;
 
-			if (proc->subxids.overflowed)
+			if (proc_minimal->overflowed)
 				suboverflowed = true;
 
 			/*
@@ -1611,7 +1643,7 @@ GetRunningTransactionData(void)
 	CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
 
 	/* We don't release XidGenLock here, the caller is responsible for that */
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
 	Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
@@ -1644,7 +1676,7 @@ GetOldestActiveTransactionId(void)
 
 	Assert(!RecoveryInProgress());
 
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 
 	oldestRunningXid = ShmemVariableCache->nextXid;
 
@@ -1653,11 +1685,12 @@ GetOldestActiveTransactionId(void)
 	 */
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		volatile PGPROC *proc = arrayP->procs[index];
+		int pgprocno = arrayP->pgprocnos[index];
+		volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno];
 		TransactionId xid;
 
 		/* Fetch xid just once - see GetNewTransactionId */
-		xid = proc->xid;
+		xid = proc_minimal->xid;
 
 		if (!TransactionIdIsNormal(xid))
 			continue;
@@ -1672,7 +1705,7 @@ GetOldestActiveTransactionId(void)
 		 */
 	}
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	return oldestRunningXid;
 }
@@ -1705,20 +1738,22 @@ GetTransactionsInCommit(TransactionId **xids_p)
 	xids = (TransactionId *) palloc(arrayP->maxProcs * sizeof(TransactionId));
 	nxids = 0;
 
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		volatile PGPROC *proc = arrayP->procs[index];
+		int pgprocno = arrayP->pgprocnos[index];
+		volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno];
+		TransactionId pxid;
 
 		/* Fetch xid just once - see GetNewTransactionId */
-		TransactionId pxid = proc->xid;
+		pxid = proc_minimal->xid;
 
-		if (proc->inCommit && TransactionIdIsValid(pxid))
+		if (proc_minimal->inCommit && TransactionIdIsValid(pxid))
 			xids[nxids++] = pxid;
 	}
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	*xids_p = xids;
 	return nxids;
@@ -1740,16 +1775,18 @@ HaveTransactionsInCommit(TransactionId *xids, int nxids)
 	ProcArrayStruct *arrayP = procArray;
 	int			index;
 
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		volatile PGPROC *proc = arrayP->procs[index];
+		int pgprocno = arrayP->pgprocnos[index];
+		volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno];
+		TransactionId pxid;
 
 		/* Fetch xid just once - see GetNewTransactionId */
-		TransactionId pxid = proc->xid;
+		pxid = proc_minimal->xid;
 
-		if (proc->inCommit && TransactionIdIsValid(pxid))
+		if (proc_minimal->inCommit && TransactionIdIsValid(pxid))
 		{
 			int			i;
 
@@ -1766,7 +1803,7 @@ HaveTransactionsInCommit(TransactionId *xids, int nxids)
 		}
 	}
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	return result;
 }
@@ -1788,11 +1825,11 @@ BackendPidGetProc(int pid)
 	if (pid == 0)				/* never match dummy PGPROCs */
 		return NULL;
 
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		PGPROC	   *proc = arrayP->procs[index];
+		PGPROC	   *proc = &allProcs[arrayP->pgprocnos[index]];
 
 		if (proc->pid == pid)
 		{
@@ -1801,7 +1838,7 @@ BackendPidGetProc(int pid)
 		}
 	}
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	return result;
 }
@@ -1829,20 +1866,22 @@ BackendXidGetPid(TransactionId xid)
 	if (xid == InvalidTransactionId)	/* never match invalid xid */
 		return 0;
 
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		volatile PGPROC *proc = arrayP->procs[index];
+		int pgprocno = arrayP->pgprocnos[index];
+		volatile PGPROC *proc = &allProcs[pgprocno];
+		volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno];
 
-		if (proc->xid == xid)
+		if (proc_minimal->xid == xid)
 		{
 			result = proc->pid;
 			break;
 		}
 	}
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	return result;
 }
@@ -1897,22 +1936,24 @@ GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
 	vxids = (VirtualTransactionId *)
 		palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
 
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		volatile PGPROC *proc = arrayP->procs[index];
+		int pgprocno = arrayP->pgprocnos[index];
+		volatile PGPROC *proc = &allProcs[pgprocno];
+		volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno];
 
 		if (proc == MyProc)
 			continue;
 
-		if (excludeVacuum & proc->vacuumFlags)
+		if (excludeVacuum & proc_minimal->vacuumFlags)
 			continue;
 
 		if (allDbs || proc->databaseId == MyDatabaseId)
 		{
 			/* Fetch xmin just once - might change on us */
-			TransactionId pxmin = proc->xmin;
+			TransactionId pxmin = proc_minimal->xmin;
 
 			if (excludeXmin0 && !TransactionIdIsValid(pxmin))
 				continue;
@@ -1933,7 +1974,7 @@ GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
 		}
 	}
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	*nvxids = count;
 	return vxids;
@@ -1992,11 +2033,13 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
 					 errmsg("out of memory")));
 	}
 
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		volatile PGPROC *proc = arrayP->procs[index];
+		int pgprocno = arrayP->pgprocnos[index];
+		volatile PGPROC *proc = &allProcs[pgprocno];
+		volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno];
 
 		/* Exclude prepared transactions */
 		if (proc->pid == 0)
@@ -2006,7 +2049,7 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
 			proc->databaseId == dbOid)
 		{
 			/* Fetch xmin just once - can't change on us, but good coding */
-			TransactionId pxmin = proc->xmin;
+			TransactionId pxmin = proc_minimal->xmin;
 
 			/*
 			 * We ignore an invalid pxmin because this means that backend has
@@ -2025,7 +2068,7 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
 		}
 	}
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	/* add the terminator */
 	vxids[count].backendId = InvalidBackendId;
@@ -2046,12 +2089,13 @@ CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
 	int			index;
 	pid_t		pid = 0;
 
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
+		int pgprocno = arrayP->pgprocnos[index];
+		volatile PGPROC *proc = &allProcs[pgprocno];
 		VirtualTransactionId procvxid;
-		PGPROC	   *proc = arrayP->procs[index];
 
 		GET_VXID_FROM_PGPROC(procvxid, *proc);
 
@@ -2072,7 +2116,7 @@ CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
 		}
 	}
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	return pid;
 }
@@ -2104,7 +2148,9 @@ MinimumActiveBackends(int min)
 	 */
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		volatile PGPROC *proc = arrayP->procs[index];
+		int pgprocno = arrayP->pgprocnos[index];
+		volatile PGPROC *proc = &allProcs[pgprocno];
+		volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno];
 
 		/*
 		 * Since we're not holding a lock, need to check that the pointer is
@@ -2122,10 +2168,10 @@ MinimumActiveBackends(int min)
 
 		if (proc == MyProc)
 			continue;			/* do not count myself */
+		if (proc_minimal->xid == InvalidTransactionId)
+			continue;			/* do not count if no XID assigned */
 		if (proc->pid == 0)
 			continue;			/* do not count prepared xacts */
-		if (proc->xid == InvalidTransactionId)
-			continue;			/* do not count if no XID assigned */
 		if (proc->waitLock != NULL)
 			continue;			/* do not count if blocked on a lock */
 		count++;
@@ -2146,11 +2192,12 @@ CountDBBackends(Oid databaseid)
 	int			count = 0;
 	int			index;
 
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		volatile PGPROC *proc = arrayP->procs[index];
+		int pgprocno = arrayP->pgprocnos[index];
+		volatile PGPROC *proc = &allProcs[pgprocno];
 
 		if (proc->pid == 0)
 			continue;			/* do not count prepared xacts */
@@ -2159,7 +2206,7 @@ CountDBBackends(Oid databaseid)
 			count++;
 	}
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	return count;
 }
@@ -2175,11 +2222,12 @@ CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
 	pid_t		pid = 0;
 
 	/* tell all backends to die */
-	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	ProcArrayLockAcquire(PAL_EXCLUSIVE);
 
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		volatile PGPROC *proc = arrayP->procs[index];
+		int pgprocno = arrayP->pgprocnos[index];
+		volatile PGPROC *proc = &allProcs[pgprocno];
 
 		if (databaseid == InvalidOid || proc->databaseId == databaseid)
 		{
@@ -2200,7 +2248,7 @@ CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
 		}
 	}
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 }
 
 /*
@@ -2213,11 +2261,12 @@ CountUserBackends(Oid roleid)
 	int			count = 0;
 	int			index;
 
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	ProcArrayLockAcquire(PAL_SHARED);
 
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		volatile PGPROC *proc = arrayP->procs[index];
+		int pgprocno = arrayP->pgprocnos[index];
+		volatile PGPROC *proc = &allProcs[pgprocno];
 
 		if (proc->pid == 0)
 			continue;			/* do not count prepared xacts */
@@ -2225,7 +2274,7 @@ CountUserBackends(Oid roleid)
 			count++;
 	}
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 
 	return count;
 }
@@ -2273,11 +2322,13 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
 
 		*nbackends = *nprepared = 0;
 
-		LWLockAcquire(ProcArrayLock, LW_SHARED);
+		ProcArrayLockAcquire(PAL_SHARED);
 
 		for (index = 0; index < arrayP->numProcs; index++)
 		{
-			volatile PGPROC *proc = arrayP->procs[index];
+			int pgprocno = arrayP->pgprocnos[index];
+			volatile PGPROC *proc = &allProcs[pgprocno];
+			volatile PGPROC_MINIMAL *proc_minimal = &allProcs_Minimal[pgprocno];
 
 			if (proc->databaseId != databaseId)
 				continue;
@@ -2291,13 +2342,13 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
 			else
 			{
 				(*nbackends)++;
-				if ((proc->vacuumFlags & PROC_IS_AUTOVACUUM) &&
+				if ((proc_minimal->vacuumFlags & PROC_IS_AUTOVACUUM) &&
 					nautovacs < MAXAUTOVACPIDS)
 					autovac_pids[nautovacs++] = proc->pid;
 			}
 		}
 
-		LWLockRelease(ProcArrayLock);
+		ProcArrayLockRelease();
 
 		if (!found)
 			return false;		/* no conflicting backends, so done */
@@ -2321,8 +2372,8 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
 
 #define XidCacheRemove(i) \
 	do { \
-		MyProc->subxids.xids[i] = MyProc->subxids.xids[MyProc->subxids.nxids - 1]; \
-		MyProc->subxids.nxids--; \
+		MyProc->subxids.xids[i] = MyProc->subxids.xids[MyProcMinimal->nxids - 1]; \
+		MyProcMinimal->nxids--; \
 	} while (0)
 
 /*
@@ -2350,7 +2401,7 @@ XidCacheRemoveRunningXids(TransactionId xid,
 	 * to abort subtransactions, but pending closer analysis we'd best be
 	 * conservative.
 	 */
-	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	ProcArrayLockAcquire(PAL_EXCLUSIVE);
 
 	/*
 	 * Under normal circumstances xid and xids[] will be in increasing order,
@@ -2361,7 +2412,7 @@ XidCacheRemoveRunningXids(TransactionId xid,
 	{
 		TransactionId anxid = xids[i];
 
-		for (j = MyProc->subxids.nxids - 1; j >= 0; j--)
+		for (j = MyProcMinimal->nxids - 1; j >= 0; j--)
 		{
 			if (TransactionIdEquals(MyProc->subxids.xids[j], anxid))
 			{
@@ -2377,11 +2428,11 @@ XidCacheRemoveRunningXids(TransactionId xid,
 		 * error during AbortSubTransaction.  So instead of Assert, emit a
 		 * debug warning.
 		 */
-		if (j < 0 && !MyProc->subxids.overflowed)
+		if (j < 0 && !MyProcMinimal->overflowed)
 			elog(WARNING, "did not find subXID %u in MyProc", anxid);
 	}
 
-	for (j = MyProc->subxids.nxids - 1; j >= 0; j--)
+	for (j = MyProcMinimal->nxids - 1; j >= 0; j--)
 	{
 		if (TransactionIdEquals(MyProc->subxids.xids[j], xid))
 		{
@@ -2390,7 +2441,7 @@ XidCacheRemoveRunningXids(TransactionId xid,
 		}
 	}
 	/* Ordinarily we should have found it, unless the cache has overflowed */
-	if (j < 0 && !MyProc->subxids.overflowed)
+	if (j < 0 && !MyProcMinimal->overflowed)
 		elog(WARNING, "did not find subXID %u in MyProc", xid);
 
 	/* Also advance global latestCompletedXid while holding the lock */
@@ -2398,7 +2449,7 @@ XidCacheRemoveRunningXids(TransactionId xid,
 							  latestXid))
 		ShmemVariableCache->latestCompletedXid = latestXid;
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 }
 
 #ifdef XIDCACHE_DEBUG
@@ -2565,7 +2616,7 @@ ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids,
 	/*
 	 * Uses same locking as transaction commit
 	 */
-	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	ProcArrayLockAcquire(PAL_EXCLUSIVE);
 
 	KnownAssignedXidsRemoveTree(xid, nsubxids, subxids);
 
@@ -2574,7 +2625,7 @@ ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids,
 							  max_xid))
 		ShmemVariableCache->latestCompletedXid = max_xid;
 
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 }
 
 /*
@@ -2584,9 +2635,9 @@ ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids,
 void
 ExpireAllKnownAssignedTransactionIds(void)
 {
-	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	ProcArrayLockAcquire(PAL_EXCLUSIVE);
 	KnownAssignedXidsRemovePreceding(InvalidTransactionId);
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 }
 
 /*
@@ -2596,9 +2647,9 @@ ExpireAllKnownAssignedTransactionIds(void)
 void
 ExpireOldKnownAssignedTransactionIds(TransactionId xid)
 {
-	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	ProcArrayLockAcquire(PAL_EXCLUSIVE);
 	KnownAssignedXidsRemovePreceding(xid);
-	LWLockRelease(ProcArrayLock);
+	ProcArrayLockRelease();
 }
 
 
@@ -2820,7 +2871,7 @@ KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
 	{
 		/* must hold lock to compress */
 		if (!exclusive_lock)
-			LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+			ProcArrayLockAcquire(PAL_EXCLUSIVE);
 
 		KnownAssignedXidsCompress(true);
 
@@ -2828,7 +2879,7 @@ KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
 		/* note: we no longer care about the tail pointer */
 
 		if (!exclusive_lock)
-			LWLockRelease(ProcArrayLock);
+			ProcArrayLockRelease();
 
 		/*
 		 * If it still won't fit then we're out of memory
diff --git a/src/backend/storage/lmgr/Makefile b/src/backend/storage/lmgr/Makefile
index e12a854..27eaa97 100644
--- a/src/backend/storage/lmgr/Makefile
+++ b/src/backend/storage/lmgr/Makefile
@@ -12,7 +12,8 @@ subdir = src/backend/storage/lmgr
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o predicate.o
+OBJS = flexlock.o lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o \
+	procarraylock.o predicate.o
 
 include $(top_srcdir)/src/backend/common.mk
 
diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c
index 7e7f6af..4fd7bd7 100644
--- a/src/backend/storage/lmgr/deadlock.c
+++ b/src/backend/storage/lmgr/deadlock.c
@@ -450,6 +450,7 @@ FindLockCycleRecurse(PGPROC *checkProc,
 					 int *nSoftEdges)	/* output argument */
 {
 	PGPROC	   *proc;
+	PGPROC_MINIMAL *proc_minimal;
 	LOCK	   *lock;
 	PROCLOCK   *proclock;
 	SHM_QUEUE  *procLocks;
@@ -516,6 +517,7 @@ FindLockCycleRecurse(PGPROC *checkProc,
 	while (proclock)
 	{
 		proc = proclock->tag.myProc;
+		proc_minimal = &ProcGlobal->allProcs_Minimal[proc->pgprocno];
 
 		/* A proc never blocks itself */
 		if (proc != checkProc)
@@ -541,7 +543,7 @@ FindLockCycleRecurse(PGPROC *checkProc,
 					 * vacuumFlag bit), but we don't do that here to avoid
 					 * grabbing ProcArrayLock.
 					 */
-					if (proc->vacuumFlags & PROC_IS_AUTOVACUUM)
+					if (proc_minimal->vacuumFlags & PROC_IS_AUTOVACUUM)
 						blocking_autovacuum_proc = proc;
 
 					/* This proc hard-blocks checkProc */
diff --git a/src/backend/storage/lmgr/flexlock.c b/src/backend/storage/lmgr/flexlock.c
new file mode 100644
index 0000000..c88bd24
--- /dev/null
+++ b/src/backend/storage/lmgr/flexlock.c
@@ -0,0 +1,366 @@
+/*-------------------------------------------------------------------------
+ *
+ * flexlock.c
+ *	  Low-level routines for managing flex locks.
+ *
+ * Flex locks are intended primarily to provide mutual exclusion of access
+ * to shared-memory data structures.  Most, but not all, flex locks are
+ * lightweight locks (LWLocks).  This file contains support routines that
+ * are used for all types of flex locks, including lwlocks.  User-level
+ * locking should be done with the full lock manager --- which depends on
+ * LWLocks to protect its shared state.
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/storage/lmgr/flexlock.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "access/clog.h"
+#include "access/multixact.h"
+#include "access/subtrans.h"
+#include "commands/async.h"
+#include "storage/flexlock_internals.h"
+#include "storage/lwlock.h"
+#include "storage/predicate.h"
+#include "storage/proc.h"
+#include "storage/procarraylock.h"
+#include "storage/spin.h"
+#include "utils/elog.h"
+
+/*
+ * We use this structure to keep track of flex locks held, for release
+ * during error recovery.  The maximum size could be determined at runtime
+ * if necessary, but it seems unlikely that more than a few locks could
+ * ever be held simultaneously.
+ */
+#define MAX_SIMUL_FLEXLOCKS	100
+
+int	num_held_flexlocks = 0;
+FlexLockId held_flexlocks[MAX_SIMUL_FLEXLOCKS];
+
+static int	lock_addin_request = 0;
+static bool lock_addin_request_allowed = true;
+
+#ifdef LOCK_DEBUG
+bool		Trace_flexlocks = false;
+#endif
+
+/*
+ * This points to the array of FlexLocks in shared memory.  Backends inherit
+ * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
+ * where we have special measures to pass it down).
+ */
+FlexLockPadded *FlexLockArray = NULL;
+
+/* We use the ShmemLock spinlock to protect LWLockAssign */
+extern slock_t *ShmemLock;
+
+static void FlexLockInit(FlexLock *flex, char locktype);
+
+/*
+ * Compute number of FlexLocks to allocate.
+ */
+int
+NumFlexLocks(void)
+{
+	int			numLocks;
+
+	/*
+	 * Possibly this logic should be spread out among the affected modules,
+	 * the same way that shmem space estimation is done.  But for now, there
+	 * are few enough users of FlexLocks that we can get away with just keeping
+	 * the knowledge here.
+	 */
+
+	/* Predefined FlexLocks */
+	numLocks = (int) NumFixedFlexLocks;
+
+	/* bufmgr.c needs two for each shared buffer */
+	numLocks += 2 * NBuffers;
+
+	/* proc.c needs one for each backend or auxiliary process */
+	numLocks += MaxBackends + NUM_AUXILIARY_PROCS;
+
+	/* clog.c needs one per CLOG buffer */
+	numLocks += NUM_CLOG_BUFFERS;
+
+	/* subtrans.c needs one per SubTrans buffer */
+	numLocks += NUM_SUBTRANS_BUFFERS;
+
+	/* multixact.c needs two SLRU areas */
+	numLocks += NUM_MXACTOFFSET_BUFFERS + NUM_MXACTMEMBER_BUFFERS;
+
+	/* async.c needs one per Async buffer */
+	numLocks += NUM_ASYNC_BUFFERS;
+
+	/* predicate.c needs one per old serializable xid buffer */
+	numLocks += NUM_OLDSERXID_BUFFERS;
+
+	/*
+	 * Add any requested by loadable modules; for backwards-compatibility
+	 * reasons, allocate at least NUM_USER_DEFINED_FLEXLOCKS of them even if
+	 * there are no explicit requests.
+	 */
+	lock_addin_request_allowed = false;
+	numLocks += Max(lock_addin_request, NUM_USER_DEFINED_FLEXLOCKS);
+
+	return numLocks;
+}
+
+
+/*
+ * RequestAddinFlexLocks
+ *		Request that extra FlexLocks be allocated for use by
+ *		a loadable module.
+ *
+ * This is only useful if called from the _PG_init hook of a library that
+ * is loaded into the postmaster via shared_preload_libraries.	Once
+ * shared memory has been allocated, calls will be ignored.  (We could
+ * raise an error, but it seems better to make it a no-op, so that
+ * libraries containing such calls can be reloaded if needed.)
+ */
+void
+RequestAddinFlexLocks(int n)
+{
+	if (IsUnderPostmaster || !lock_addin_request_allowed)
+		return;					/* too late */
+	lock_addin_request += n;
+}
+
+
+/*
+ * Compute shmem space needed for FlexLocks.
+ */
+Size
+FlexLockShmemSize(void)
+{
+	Size		size;
+	int			numLocks = NumFlexLocks();
+
+	/* Space for the FlexLock array. */
+	size = mul_size(numLocks, FLEX_LOCK_BYTES);
+
+	/* Space for dynamic allocation counter, plus room for alignment. */
+	size = add_size(size, 2 * sizeof(int) + FLEX_LOCK_BYTES);
+
+	return size;
+}
+
+/*
+ * Allocate shmem space for FlexLocks and initialize the locks.
+ */
+void
+CreateFlexLocks(void)
+{
+	int			numLocks = NumFlexLocks();
+	Size		spaceLocks = FlexLockShmemSize();
+	FlexLockPadded *lock;
+	int		   *FlexLockCounter;
+	char	   *ptr;
+	int			id;
+
+	/* Allocate and zero space */
+	ptr = (char *) ShmemAlloc(spaceLocks);
+	memset(ptr, 0, spaceLocks);
+
+	/* Leave room for dynamic allocation counter */
+	ptr += 2 * sizeof(int);
+
+	/* Ensure desired alignment of FlexLock array */
+	ptr += FLEX_LOCK_BYTES - ((uintptr_t) ptr) % FLEX_LOCK_BYTES;
+
+	FlexLockArray = (FlexLockPadded *) ptr;
+
+	/* All of the "fixed" FlexLocks are LWLocks - except ProcArrayLock. */
+	for (id = 0, lock = FlexLockArray; id < NumFixedFlexLocks; id++, lock++)
+	{
+		if (id == ProcArrayLock)
+			FlexLockInit(&lock->flex, FLEXLOCK_TYPE_PROCARRAYLOCK);
+		else
+			FlexLockInit(&lock->flex, FLEXLOCK_TYPE_LWLOCK);
+	}
+
+	/*
+	 * Initialize the dynamic-allocation counter, which is stored just before
+	 * the first FlexLock.
+	 */
+	FlexLockCounter = (int *) ((char *) FlexLockArray - 2 * sizeof(int));
+	FlexLockCounter[0] = (int) NumFixedFlexLocks;
+	FlexLockCounter[1] = numLocks;
+}
+
+/*
+ * FlexLockAssign - assign a dynamically-allocated FlexLock number
+ *
+ * We interlock this using the same spinlock that is used to protect
+ * ShmemAlloc().  Interlocking is not really necessary during postmaster
+ * startup, but it is needed if any user-defined code tries to allocate
+ * LWLocks after startup.
+ */
+FlexLockId
+FlexLockAssign(char locktype)
+{
+	FlexLockId	result;
+
+	/* use volatile pointer to prevent code rearrangement */
+	volatile int *FlexLockCounter;
+
+	FlexLockCounter = (int *) ((char *) FlexLockArray - 2 * sizeof(int));
+	SpinLockAcquire(ShmemLock);
+	if (FlexLockCounter[0] >= FlexLockCounter[1])
+	{
+		SpinLockRelease(ShmemLock);
+		elog(ERROR, "no more FlexLockIds available");
+	}
+	result = (FlexLockId) (FlexLockCounter[0]++);
+	SpinLockRelease(ShmemLock);
+
+	FlexLockInit(&FlexLockArray[result].flex, locktype);
+
+	return result;
+}
+
+/*
+ * Initialize a FlexLock.
+ */
+static void
+FlexLockInit(FlexLock *flex, char locktype)
+{
+	SpinLockInit(&flex->mutex);
+	flex->releaseOK = true;
+	flex->locktype = locktype;
+	/*
+	 * We might need to think a little harder about what should happen here
+	 * if some future type of FlexLock requires more initialization than this.
+	 * For now, this will suffice.
+	 */
+}
+
+/*
+ * Remove lock from list of locks held.  Usually, but not always, it will
+ * be the latest-acquired lock; so search array backwards.
+ */
+void
+FlexLockRemember(FlexLockId id)
+{
+	if (num_held_flexlocks >= MAX_SIMUL_FLEXLOCKS)
+		elog(PANIC, "too many FlexLocks taken");
+	held_flexlocks[num_held_flexlocks++] = id;
+}
+
+/*
+ * Remove lock from list of locks held.  Usually, but not always, it will
+ * be the latest-acquired lock; so search array backwards.
+ */
+void
+FlexLockForget(FlexLockId id)
+{
+	int			i;
+
+	for (i = num_held_flexlocks; --i >= 0;)
+	{
+		if (id == held_flexlocks[i])
+			break;
+	}
+	if (i < 0)
+		elog(ERROR, "lock %d is not held", (int) id);
+	num_held_flexlocks--;
+	for (; i < num_held_flexlocks; i++)
+		held_flexlocks[i] = held_flexlocks[i + 1];
+}
+
+/*
+ * FlexLockWait - wait until awakened
+ *
+ * Since we share the process wait semaphore with the regular lock manager
+ * and ProcWaitForSignal, and we may need to acquire a FlexLock while one of
+ * those is pending, it is possible that we get awakened for a reason other
+ * than being signaled by a FlexLock release.  If so, loop back and wait again.
+ *
+ * Returns the number of "extra" waits absorbed so that, once we've gotten the
+ * FlexLock, we can re-increment the sema by the number of additional signals
+ * received, so that the lock manager or signal manager will see the received
+ * signal when it next waits.
+ */
+int
+FlexLockWait(FlexLockId id, int mode)
+{
+	int		extraWaits = 0;
+
+	FlexLockDebug("LWLockAcquire", id, "waiting");
+	TRACE_POSTGRESQL_FLEXLOCK_WAIT_START(id, mode);
+
+	for (;;)
+   	{
+		/* "false" means cannot accept cancel/die interrupt here. */
+		PGSemaphoreLock(&MyProc->sem, false);
+		/*
+		 * FLEXTODO: I think we should return this, instead of ignoring it.
+		 * Any non-zero value means "wake up".
+		 */
+		if (MyProc->flWaitResult)
+			break;
+		extraWaits++;
+   	}
+
+	TRACE_POSTGRESQL_FLEXLOCK_WAIT_DONE(id, mode);
+	FlexLockDebug("LWLockAcquire", id, "awakened");
+
+	return extraWaits;
+}
+
+/*
+ * FlexLockReleaseAll - release all currently-held locks
+ *
+ * Used to clean up after ereport(ERROR). An important difference between this
+ * function and retail LWLockRelease calls is that InterruptHoldoffCount is
+ * unchanged by this operation.  This is necessary since InterruptHoldoffCount
+ * has been set to an appropriate level earlier in error recovery. We could
+ * decrement it below zero if we allow it to drop for each released lock!
+ */
+void
+FlexLockReleaseAll(void)
+{
+	while (num_held_flexlocks > 0)
+	{
+		FlexLockId	id;
+		FlexLock   *flex;
+
+		HOLD_INTERRUPTS();		/* match the upcoming RESUME_INTERRUPTS */
+
+		id = held_flexlocks[num_held_flexlocks - 1];
+		flex = &FlexLockArray[id].flex;
+		if (flex->locktype == FLEXLOCK_TYPE_LWLOCK)
+			LWLockRelease(id);
+		else
+		{
+			Assert(id == ProcArrayLock);
+			ProcArrayLockRelease();
+		}
+	}
+}
+
+/*
+ * FlexLockHeldByMe - test whether my process currently holds a lock
+ *
+ * This is meant as debug support only.  We do not consider the lock mode.
+ */
+bool
+FlexLockHeldByMe(FlexLockId id)
+{
+	int			i;
+
+	for (i = 0; i < num_held_flexlocks; i++)
+	{
+		if (held_flexlocks[i] == id)
+			return true;
+	}
+	return false;
+}
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 905502f..edaff09 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -591,7 +591,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
 	bool		found;
 	ResourceOwner owner;
 	uint32		hashcode;
-	LWLockId	partitionLock;
+	FlexLockId	partitionLock;
 	int			status;
 	bool		log_lock = false;
 
@@ -1546,7 +1546,7 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
 	LOCALLOCK  *locallock;
 	LOCK	   *lock;
 	PROCLOCK   *proclock;
-	LWLockId	partitionLock;
+	FlexLockId	partitionLock;
 	bool		wakeupNeeded;
 
 	if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
@@ -1912,7 +1912,7 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
 	 */
 	for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++)
 	{
-		LWLockId	partitionLock = FirstLockMgrLock + partition;
+		FlexLockId	partitionLock = FirstLockMgrLock + partition;
 		SHM_QUEUE  *procLocks = &(MyProc->myProcLocks[partition]);
 
 		proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
@@ -2197,7 +2197,7 @@ static bool
 FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag,
 					  uint32 hashcode)
 {
-	LWLockId		partitionLock = LockHashPartitionLock(hashcode);
+	FlexLockId		partitionLock = LockHashPartitionLock(hashcode);
 	Oid				relid = locktag->locktag_field2;
 	uint32			i;
 
@@ -2281,7 +2281,7 @@ FastPathGetRelationLockEntry(LOCALLOCK *locallock)
 	LockMethod		lockMethodTable = LockMethods[DEFAULT_LOCKMETHOD];
 	LOCKTAG		   *locktag = &locallock->tag.lock;
 	PROCLOCK	   *proclock = NULL;
-	LWLockId		partitionLock = LockHashPartitionLock(locallock->hashcode);
+	FlexLockId		partitionLock = LockHashPartitionLock(locallock->hashcode);
 	Oid				relid = locktag->locktag_field2;
 	uint32			f;
 
@@ -2382,7 +2382,7 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
 	SHM_QUEUE  *procLocks;
 	PROCLOCK   *proclock;
 	uint32		hashcode;
-	LWLockId	partitionLock;
+	FlexLockId	partitionLock;
 	int			count = 0;
 	int			fast_count = 0;
 
@@ -2593,7 +2593,7 @@ LockRefindAndRelease(LockMethod lockMethodTable, PGPROC *proc,
 	PROCLOCKTAG proclocktag;
 	uint32		hashcode;
 	uint32		proclock_hashcode;
-	LWLockId	partitionLock;
+	FlexLockId	partitionLock;
 	bool		wakeupNeeded;
 
 	hashcode = LockTagHashCode(locktag);
@@ -2827,7 +2827,7 @@ PostPrepare_Locks(TransactionId xid)
 	 */
 	for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++)
 	{
-		LWLockId	partitionLock = FirstLockMgrLock + partition;
+		FlexLockId	partitionLock = FirstLockMgrLock + partition;
 		SHM_QUEUE  *procLocks = &(MyProc->myProcLocks[partition]);
 
 		proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
@@ -3188,9 +3188,10 @@ GetRunningTransactionLocks(int *nlocks)
 			proclock->tag.myLock->tag.locktag_type == LOCKTAG_RELATION)
 		{
 			PGPROC	   *proc = proclock->tag.myProc;
+			PGPROC_MINIMAL *proc_minimal = &ProcGlobal->allProcs_Minimal[proc->pgprocno];
 			LOCK	   *lock = proclock->tag.myLock;
 
-			accessExclusiveLocks[index].xid = proc->xid;
+			accessExclusiveLocks[index].xid = proc_minimal->xid;
 			accessExclusiveLocks[index].dbOid = lock->tag.locktag_field1;
 			accessExclusiveLocks[index].relOid = lock->tag.locktag_field2;
 
@@ -3342,7 +3343,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
 	uint32		hashcode;
 	uint32		proclock_hashcode;
 	int			partition;
-	LWLockId	partitionLock;
+	FlexLockId	partitionLock;
 	LockMethod	lockMethodTable;
 
 	Assert(len == sizeof(TwoPhaseLockRecord));
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 079eb29..ce6c931 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -21,74 +21,23 @@
  */
 #include "postgres.h"
 
-#include "access/clog.h"
-#include "access/multixact.h"
-#include "access/subtrans.h"
-#include "commands/async.h"
 #include "miscadmin.h"
 #include "pg_trace.h"
+#include "storage/flexlock_internals.h"
 #include "storage/ipc.h"
-#include "storage/predicate.h"
 #include "storage/proc.h"
 #include "storage/spin.h"
 
-
-/* We use the ShmemLock spinlock to protect LWLockAssign */
-extern slock_t *ShmemLock;
-
-
 typedef struct LWLock
 {
-	slock_t		mutex;			/* Protects LWLock and queue of PGPROCs */
-	bool		releaseOK;		/* T if ok to release waiters */
+	FlexLock	flex;			/* common FlexLock infrastructure */
 	char		exclusive;		/* # of exclusive holders (0 or 1) */
 	int			shared;			/* # of shared holders (0..MaxBackends) */
-	PGPROC	   *head;			/* head of list of waiting PGPROCs */
-	PGPROC	   *tail;			/* tail of list of waiting PGPROCs */
-	/* tail is undefined when head is NULL */
 } LWLock;
 
-/*
- * All the LWLock structs are allocated as an array in shared memory.
- * (LWLockIds are indexes into the array.)	We force the array stride to
- * be a power of 2, which saves a few cycles in indexing, but more
- * importantly also ensures that individual LWLocks don't cross cache line
- * boundaries.	This reduces cache contention problems, especially on AMD
- * Opterons.  (Of course, we have to also ensure that the array start
- * address is suitably aligned.)
- *
- * LWLock is between 16 and 32 bytes on all known platforms, so these two
- * cases are sufficient.
- */
-#define LWLOCK_PADDED_SIZE	(sizeof(LWLock) <= 16 ? 16 : 32)
-
-typedef union LWLockPadded
-{
-	LWLock		lock;
-	char		pad[LWLOCK_PADDED_SIZE];
-} LWLockPadded;
-
-/*
- * This points to the array of LWLocks in shared memory.  Backends inherit
- * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
- * where we have special measures to pass it down).
- */
-NON_EXEC_STATIC LWLockPadded *LWLockArray = NULL;
-
-
-/*
- * We use this structure to keep track of locked LWLocks for release
- * during error recovery.  The maximum size could be determined at runtime
- * if necessary, but it seems unlikely that more than a few locks could
- * ever be held simultaneously.
- */
-#define MAX_SIMUL_LWLOCKS	100
-
-static int	num_held_lwlocks = 0;
-static LWLockId held_lwlocks[MAX_SIMUL_LWLOCKS];
-
-static int	lock_addin_request = 0;
-static bool lock_addin_request_allowed = true;
+#define	LWLockPointer(lockid) \
+	(AssertMacro(FlexLockArray[lockid].flex.locktype == FLEXLOCK_TYPE_LWLOCK), \
+	 (volatile LWLock *) &FlexLockArray[lockid])
 
 #ifdef LWLOCK_STATS
 static int	counts_for_pid = 0;
@@ -98,27 +47,17 @@ static int *block_counts;
 #endif
 
 #ifdef LOCK_DEBUG
-bool		Trace_lwlocks = false;
-
 inline static void
-PRINT_LWDEBUG(const char *where, LWLockId lockid, const volatile LWLock *lock)
+PRINT_LWDEBUG(const char *where, FlexLockId lockid, const volatile LWLock *lock)
 {
-	if (Trace_lwlocks)
+	if (Trace_flexlocks)
 		elog(LOG, "%s(%d): excl %d shared %d head %p rOK %d",
 			 where, (int) lockid,
-			 (int) lock->exclusive, lock->shared, lock->head,
-			 (int) lock->releaseOK);
-}
-
-inline static void
-LOG_LWDEBUG(const char *where, LWLockId lockid, const char *msg)
-{
-	if (Trace_lwlocks)
-		elog(LOG, "%s(%d): %s", where, (int) lockid, msg);
+			 (int) lock->exclusive, lock->shared, lock->flex.head,
+			 (int) lock->flex.releaseOK);
 }
 #else							/* not LOCK_DEBUG */
 #define PRINT_LWDEBUG(a,b,c)
-#define LOG_LWDEBUG(a,b,c)
 #endif   /* LOCK_DEBUG */
 
 #ifdef LWLOCK_STATS
@@ -127,8 +66,8 @@ static void
 print_lwlock_stats(int code, Datum arg)
 {
 	int			i;
-	int		   *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
-	int			numLocks = LWLockCounter[1];
+	int		   *FlexLockCounter = (int *) ((char *) FlexLockArray - 2 * sizeof(int));
+	int			numLocks = FlexLockCounter[1];
 
 	/* Grab an LWLock to keep different backends from mixing reports */
 	LWLockAcquire(0, LW_EXCLUSIVE);
@@ -145,173 +84,15 @@ print_lwlock_stats(int code, Datum arg)
 }
 #endif   /* LWLOCK_STATS */
 
-
 /*
- * Compute number of LWLocks to allocate.
+ * LWLockAssign - initialize a new lwlock and return its ID
  */
-int
-NumLWLocks(void)
-{
-	int			numLocks;
-
-	/*
-	 * Possibly this logic should be spread out among the affected modules,
-	 * the same way that shmem space estimation is done.  But for now, there
-	 * are few enough users of LWLocks that we can get away with just keeping
-	 * the knowledge here.
-	 */
-
-	/* Predefined LWLocks */
-	numLocks = (int) NumFixedLWLocks;
-
-	/* bufmgr.c needs two for each shared buffer */
-	numLocks += 2 * NBuffers;
-
-	/* proc.c needs one for each backend or auxiliary process */
-	numLocks += MaxBackends + NUM_AUXILIARY_PROCS;
-
-	/* clog.c needs one per CLOG buffer */
-	numLocks += NUM_CLOG_BUFFERS;
-
-	/* subtrans.c needs one per SubTrans buffer */
-	numLocks += NUM_SUBTRANS_BUFFERS;
-
-	/* multixact.c needs two SLRU areas */
-	numLocks += NUM_MXACTOFFSET_BUFFERS + NUM_MXACTMEMBER_BUFFERS;
-
-	/* async.c needs one per Async buffer */
-	numLocks += NUM_ASYNC_BUFFERS;
-
-	/* predicate.c needs one per old serializable xid buffer */
-	numLocks += NUM_OLDSERXID_BUFFERS;
-
-	/*
-	 * Add any requested by loadable modules; for backwards-compatibility
-	 * reasons, allocate at least NUM_USER_DEFINED_LWLOCKS of them even if
-	 * there are no explicit requests.
-	 */
-	lock_addin_request_allowed = false;
-	numLocks += Max(lock_addin_request, NUM_USER_DEFINED_LWLOCKS);
-
-	return numLocks;
-}
-
-
-/*
- * RequestAddinLWLocks
- *		Request that extra LWLocks be allocated for use by
- *		a loadable module.
- *
- * This is only useful if called from the _PG_init hook of a library that
- * is loaded into the postmaster via shared_preload_libraries.	Once
- * shared memory has been allocated, calls will be ignored.  (We could
- * raise an error, but it seems better to make it a no-op, so that
- * libraries containing such calls can be reloaded if needed.)
- */
-void
-RequestAddinLWLocks(int n)
-{
-	if (IsUnderPostmaster || !lock_addin_request_allowed)
-		return;					/* too late */
-	lock_addin_request += n;
-}
-
-
-/*
- * Compute shmem space needed for LWLocks.
- */
-Size
-LWLockShmemSize(void)
-{
-	Size		size;
-	int			numLocks = NumLWLocks();
-
-	/* Space for the LWLock array. */
-	size = mul_size(numLocks, sizeof(LWLockPadded));
-
-	/* Space for dynamic allocation counter, plus room for alignment. */
-	size = add_size(size, 2 * sizeof(int) + LWLOCK_PADDED_SIZE);
-
-	return size;
-}
-
-
-/*
- * Allocate shmem space for LWLocks and initialize the locks.
- */
-void
-CreateLWLocks(void)
-{
-	int			numLocks = NumLWLocks();
-	Size		spaceLocks = LWLockShmemSize();
-	LWLockPadded *lock;
-	int		   *LWLockCounter;
-	char	   *ptr;
-	int			id;
-
-	/* Allocate space */
-	ptr = (char *) ShmemAlloc(spaceLocks);
-
-	/* Leave room for dynamic allocation counter */
-	ptr += 2 * sizeof(int);
-
-	/* Ensure desired alignment of LWLock array */
-	ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
-
-	LWLockArray = (LWLockPadded *) ptr;
-
-	/*
-	 * Initialize all LWLocks to "unlocked" state
-	 */
-	for (id = 0, lock = LWLockArray; id < numLocks; id++, lock++)
-	{
-		SpinLockInit(&lock->lock.mutex);
-		lock->lock.releaseOK = true;
-		lock->lock.exclusive = 0;
-		lock->lock.shared = 0;
-		lock->lock.head = NULL;
-		lock->lock.tail = NULL;
-	}
-
-	/*
-	 * Initialize the dynamic-allocation counter, which is stored just before
-	 * the first LWLock.
-	 */
-	LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
-	LWLockCounter[0] = (int) NumFixedLWLocks;
-	LWLockCounter[1] = numLocks;
-}
-
-
-/*
- * LWLockAssign - assign a dynamically-allocated LWLock number
- *
- * We interlock this using the same spinlock that is used to protect
- * ShmemAlloc().  Interlocking is not really necessary during postmaster
- * startup, but it is needed if any user-defined code tries to allocate
- * LWLocks after startup.
- */
-LWLockId
+FlexLockId
 LWLockAssign(void)
 {
-	LWLockId	result;
-
-	/* use volatile pointer to prevent code rearrangement */
-	volatile int *LWLockCounter;
-
-	LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
-	SpinLockAcquire(ShmemLock);
-	if (LWLockCounter[0] >= LWLockCounter[1])
-	{
-		SpinLockRelease(ShmemLock);
-		elog(ERROR, "no more LWLockIds available");
-	}
-	result = (LWLockId) (LWLockCounter[0]++);
-	SpinLockRelease(ShmemLock);
-	return result;
+	return FlexLockAssign(FLEXLOCK_TYPE_LWLOCK);
 }
 
-
 /*
  * LWLockAcquire - acquire a lightweight lock in the specified mode
  *
@@ -320,9 +101,9 @@ LWLockAssign(void)
  * Side effect: cancel/die interrupts are held off until lock release.
  */
 void
-LWLockAcquire(LWLockId lockid, LWLockMode mode)
+LWLockAcquire(FlexLockId lockid, LWLockMode mode)
 {
-	volatile LWLock *lock = &(LWLockArray[lockid].lock);
+	volatile LWLock *lock = LWLockPointer(lockid);
 	PGPROC	   *proc = MyProc;
 	bool		retry = false;
 	int			extraWaits = 0;
@@ -333,8 +114,8 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 	/* Set up local count state first time through in a given process */
 	if (counts_for_pid != MyProcPid)
 	{
-		int		   *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
-		int			numLocks = LWLockCounter[1];
+		int		   *FlexLockCounter = (int *) ((char *) FlexLockArray - 2 * sizeof(int));
+		int			numLocks = FlexLockCounter[1];
 
 		sh_acquire_counts = calloc(numLocks, sizeof(int));
 		ex_acquire_counts = calloc(numLocks, sizeof(int));
@@ -356,10 +137,6 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 	 */
 	Assert(!(proc == NULL && IsUnderPostmaster));
 
-	/* Ensure we will have room to remember the lock */
-	if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
-		elog(ERROR, "too many LWLocks taken");
-
 	/*
 	 * Lock out cancel/die interrupts until we exit the code section protected
 	 * by the LWLock.  This ensures that interrupts will not interfere with
@@ -388,11 +165,11 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 		bool		mustwait;
 
 		/* Acquire mutex.  Time spent holding mutex should be short! */
-		SpinLockAcquire(&lock->mutex);
+		SpinLockAcquire(&lock->flex.mutex);
 
 		/* If retrying, allow LWLockRelease to release waiters again */
 		if (retry)
-			lock->releaseOK = true;
+			lock->flex.releaseOK = true;
 
 		/* If I can get the lock, do so quickly. */
 		if (mode == LW_EXCLUSIVE)
@@ -419,72 +196,30 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 		if (!mustwait)
 			break;				/* got the lock */
 
-		/*
-		 * Add myself to wait queue.
-		 *
-		 * If we don't have a PGPROC structure, there's no way to wait. This
-		 * should never occur, since MyProc should only be null during shared
-		 * memory initialization.
-		 */
-		if (proc == NULL)
-			elog(PANIC, "cannot wait without a PGPROC structure");
-
-		proc->lwWaiting = true;
-		proc->lwExclusive = (mode == LW_EXCLUSIVE);
-		proc->lwWaitLink = NULL;
-		if (lock->head == NULL)
-			lock->head = proc;
-		else
-			lock->tail->lwWaitLink = proc;
-		lock->tail = proc;
+		/* Add myself to wait queue. */
+		FlexLockJoinWaitQueue(lock, (int) mode);
 
 		/* Can release the mutex now */
-		SpinLockRelease(&lock->mutex);
-
-		/*
-		 * Wait until awakened.
-		 *
-		 * Since we share the process wait semaphore with the regular lock
-		 * manager and ProcWaitForSignal, and we may need to acquire an LWLock
-		 * while one of those is pending, it is possible that we get awakened
-		 * for a reason other than being signaled by LWLockRelease. If so,
-		 * loop back and wait again.  Once we've gotten the LWLock,
-		 * re-increment the sema by the number of additional signals received,
-		 * so that the lock manager or signal manager will see the received
-		 * signal when it next waits.
-		 */
-		LOG_LWDEBUG("LWLockAcquire", lockid, "waiting");
+		SpinLockRelease(&lock->flex.mutex);
+
+		/* Wait until awakened. */
+		extraWaits += FlexLockWait(lockid, mode);
 
 #ifdef LWLOCK_STATS
 		block_counts[lockid]++;
 #endif
 
-		TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode);
-
-		for (;;)
-		{
-			/* "false" means cannot accept cancel/die interrupt here. */
-			PGSemaphoreLock(&proc->sem, false);
-			if (!proc->lwWaiting)
-				break;
-			extraWaits++;
-		}
-
-		TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode);
-
-		LOG_LWDEBUG("LWLockAcquire", lockid, "awakened");
-
 		/* Now loop back and try to acquire lock again. */
 		retry = true;
 	}
 
 	/* We are done updating shared state of the lock itself. */
-	SpinLockRelease(&lock->mutex);
+	SpinLockRelease(&lock->flex.mutex);
 
-	TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode);
+	TRACE_POSTGRESQL_FLEXLOCK_ACQUIRE(lockid, mode);
 
 	/* Add lock to list of locks held by this backend */
-	held_lwlocks[num_held_lwlocks++] = lockid;
+	FlexLockRemember(lockid);
 
 	/*
 	 * Fix the process wait semaphore's count for any absorbed wakeups.
@@ -501,17 +236,13 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
  * If successful, cancel/die interrupts are held off until lock release.
  */
 bool
-LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
+LWLockConditionalAcquire(FlexLockId lockid, LWLockMode mode)
 {
-	volatile LWLock *lock = &(LWLockArray[lockid].lock);
+	volatile LWLock *lock = LWLockPointer(lockid);
 	bool		mustwait;
 
 	PRINT_LWDEBUG("LWLockConditionalAcquire", lockid, lock);
 
-	/* Ensure we will have room to remember the lock */
-	if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
-		elog(ERROR, "too many LWLocks taken");
-
 	/*
 	 * Lock out cancel/die interrupts until we exit the code section protected
 	 * by the LWLock.  This ensures that interrupts will not interfere with
@@ -520,7 +251,7 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
 	HOLD_INTERRUPTS();
 
 	/* Acquire mutex.  Time spent holding mutex should be short! */
-	SpinLockAcquire(&lock->mutex);
+	SpinLockAcquire(&lock->flex.mutex);
 
 	/* If I can get the lock, do so quickly. */
 	if (mode == LW_EXCLUSIVE)
@@ -545,20 +276,20 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
 	}
 
 	/* We are done updating shared state of the lock itself. */
-	SpinLockRelease(&lock->mutex);
+	SpinLockRelease(&lock->flex.mutex);
 
 	if (mustwait)
 	{
 		/* Failed to get lock, so release interrupt holdoff */
 		RESUME_INTERRUPTS();
-		LOG_LWDEBUG("LWLockConditionalAcquire", lockid, "failed");
-		TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(lockid, mode);
+		FlexLockDebug("LWLockConditionalAcquire", lockid, "failed");
+		TRACE_POSTGRESQL_FLEXLOCK_CONDACQUIRE_FAIL(lockid, mode);
 	}
 	else
 	{
 		/* Add lock to list of locks held by this backend */
-		held_lwlocks[num_held_lwlocks++] = lockid;
-		TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(lockid, mode);
+		FlexLockRemember(lockid);
+		TRACE_POSTGRESQL_FLEXLOCK_CONDACQUIRE(lockid, mode);
 	}
 
 	return !mustwait;
@@ -568,32 +299,18 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
  * LWLockRelease - release a previously acquired lock
  */
 void
-LWLockRelease(LWLockId lockid)
+LWLockRelease(FlexLockId lockid)
 {
-	volatile LWLock *lock = &(LWLockArray[lockid].lock);
+	volatile LWLock *lock = LWLockPointer(lockid);
 	PGPROC	   *head;
 	PGPROC	   *proc;
-	int			i;
 
 	PRINT_LWDEBUG("LWLockRelease", lockid, lock);
 
-	/*
-	 * Remove lock from list of locks held.  Usually, but not always, it will
-	 * be the latest-acquired lock; so search array backwards.
-	 */
-	for (i = num_held_lwlocks; --i >= 0;)
-	{
-		if (lockid == held_lwlocks[i])
-			break;
-	}
-	if (i < 0)
-		elog(ERROR, "lock %d is not held", (int) lockid);
-	num_held_lwlocks--;
-	for (; i < num_held_lwlocks; i++)
-		held_lwlocks[i] = held_lwlocks[i + 1];
+	FlexLockForget(lockid);
 
 	/* Acquire mutex.  Time spent holding mutex should be short! */
-	SpinLockAcquire(&lock->mutex);
+	SpinLockAcquire(&lock->flex.mutex);
 
 	/* Release my hold on lock */
 	if (lock->exclusive > 0)
@@ -610,10 +327,10 @@ LWLockRelease(LWLockId lockid)
 	 * if someone has already awakened waiters that haven't yet acquired the
 	 * lock.
 	 */
-	head = lock->head;
+	head = lock->flex.head;
 	if (head != NULL)
 	{
-		if (lock->exclusive == 0 && lock->shared == 0 && lock->releaseOK)
+		if (lock->exclusive == 0 && lock->shared == 0 && lock->flex.releaseOK)
 		{
 			/*
 			 * Remove the to-be-awakened PGPROCs from the queue.  If the front
@@ -621,17 +338,17 @@ LWLockRelease(LWLockId lockid)
 			 * as many waiters as want shared access.
 			 */
 			proc = head;
-			if (!proc->lwExclusive)
+			if (proc->flWaitMode != LW_EXCLUSIVE)
 			{
-				while (proc->lwWaitLink != NULL &&
-					   !proc->lwWaitLink->lwExclusive)
-					proc = proc->lwWaitLink;
+				while (proc->flWaitLink != NULL &&
+					   proc->flWaitLink->flWaitMode != LW_EXCLUSIVE)
+					proc = proc->flWaitLink;
 			}
 			/* proc is now the last PGPROC to be released */
-			lock->head = proc->lwWaitLink;
-			proc->lwWaitLink = NULL;
+			lock->flex.head = proc->flWaitLink;
+			proc->flWaitLink = NULL;
 			/* prevent additional wakeups until retryer gets to run */
-			lock->releaseOK = false;
+			lock->flex.releaseOK = false;
 		}
 		else
 		{
@@ -641,20 +358,20 @@ LWLockRelease(LWLockId lockid)
 	}
 
 	/* We are done updating shared state of the lock itself. */
-	SpinLockRelease(&lock->mutex);
+	SpinLockRelease(&lock->flex.mutex);
 
-	TRACE_POSTGRESQL_LWLOCK_RELEASE(lockid);
+	TRACE_POSTGRESQL_FLEXLOCK_RELEASE(lockid);
 
 	/*
 	 * Awaken any waiters I removed from the queue.
 	 */
 	while (head != NULL)
 	{
-		LOG_LWDEBUG("LWLockRelease", lockid, "release waiter");
+		FlexLockDebug("LWLockRelease", lockid, "release waiter");
 		proc = head;
-		head = proc->lwWaitLink;
-		proc->lwWaitLink = NULL;
-		proc->lwWaiting = false;
+		head = proc->flWaitLink;
+		proc->flWaitLink = NULL;
+		proc->flWaitResult = 1;		/* any non-zero value will do */
 		PGSemaphoreUnlock(&proc->sem);
 	}
 
@@ -664,43 +381,17 @@ LWLockRelease(LWLockId lockid)
 	RESUME_INTERRUPTS();
 }
 
-
-/*
- * LWLockReleaseAll - release all currently-held locks
- *
- * Used to clean up after ereport(ERROR). An important difference between this
- * function and retail LWLockRelease calls is that InterruptHoldoffCount is
- * unchanged by this operation.  This is necessary since InterruptHoldoffCount
- * has been set to an appropriate level earlier in error recovery. We could
- * decrement it below zero if we allow it to drop for each released lock!
- */
-void
-LWLockReleaseAll(void)
-{
-	while (num_held_lwlocks > 0)
-	{
-		HOLD_INTERRUPTS();		/* match the upcoming RESUME_INTERRUPTS */
-
-		LWLockRelease(held_lwlocks[num_held_lwlocks - 1]);
-	}
-}
-
-
 /*
  * LWLockHeldByMe - test whether my process currently holds a lock
  *
- * This is meant as debug support only.  We do not distinguish whether the
- * lock is held shared or exclusive.
+ * The following convenience routine might not be worthwhile but for the fact
+ * that we've had a function by this name since long before FlexLocks existed.
+ * Callers who want to check whether an arbitrary FlexLock (that may or may not
+ * be an LWLock) is held can use FlexLockHeldByMe directly.
  */
 bool
-LWLockHeldByMe(LWLockId lockid)
+LWLockHeldByMe(FlexLockId lockid)
 {
-	int			i;
-
-	for (i = 0; i < num_held_lwlocks; i++)
-	{
-		if (held_lwlocks[i] == lockid)
-			return true;
-	}
-	return false;
+	AssertMacro(FlexLockArray[lockid].flex.locktype == FLEXLOCK_TYPE_LWLOCK);
+	return FlexLockHeldByMe(lockid);
 }
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index 345f6f5..15978a4 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -239,7 +239,7 @@
 #define PredicateLockHashPartition(hashcode) \
 	((hashcode) % NUM_PREDICATELOCK_PARTITIONS)
 #define PredicateLockHashPartitionLock(hashcode) \
-	((LWLockId) (FirstPredicateLockMgrLock + PredicateLockHashPartition(hashcode)))
+	((FlexLockId) (FirstPredicateLockMgrLock + PredicateLockHashPartition(hashcode)))
 
 #define NPREDICATELOCKTARGETENTS() \
 	mul_size(max_predicate_locks_per_xact, add_size(MaxBackends, max_prepared_xacts))
@@ -1840,7 +1840,7 @@ PageIsPredicateLocked(Relation relation, BlockNumber blkno)
 {
 	PREDICATELOCKTARGETTAG targettag;
 	uint32		targettaghash;
-	LWLockId	partitionLock;
+	FlexLockId	partitionLock;
 	PREDICATELOCKTARGET *target;
 
 	SET_PREDICATELOCKTARGETTAG_PAGE(targettag,
@@ -2073,7 +2073,7 @@ DeleteChildTargetLocks(const PREDICATELOCKTARGETTAG *newtargettag)
 		if (TargetTagIsCoveredBy(oldtargettag, *newtargettag))
 		{
 			uint32		oldtargettaghash;
-			LWLockId	partitionLock;
+			FlexLockId	partitionLock;
 			PREDICATELOCK *rmpredlock;
 
 			oldtargettaghash = PredicateLockTargetTagHashCode(&oldtargettag);
@@ -2285,7 +2285,7 @@ CreatePredicateLock(const PREDICATELOCKTARGETTAG *targettag,
 	PREDICATELOCKTARGET *target;
 	PREDICATELOCKTAG locktag;
 	PREDICATELOCK *lock;
-	LWLockId	partitionLock;
+	FlexLockId	partitionLock;
 	bool		found;
 
 	partitionLock = PredicateLockHashPartitionLock(targettaghash);
@@ -2586,10 +2586,10 @@ TransferPredicateLocksToNewTarget(PREDICATELOCKTARGETTAG oldtargettag,
 								  bool removeOld)
 {
 	uint32		oldtargettaghash;
-	LWLockId	oldpartitionLock;
+	FlexLockId	oldpartitionLock;
 	PREDICATELOCKTARGET *oldtarget;
 	uint32		newtargettaghash;
-	LWLockId	newpartitionLock;
+	FlexLockId	newpartitionLock;
 	bool		found;
 	bool		outOfShmem = false;
 
@@ -3578,7 +3578,7 @@ ClearOldPredicateLocks(void)
 			PREDICATELOCKTARGET *target;
 			PREDICATELOCKTARGETTAG targettag;
 			uint32		targettaghash;
-			LWLockId	partitionLock;
+			FlexLockId	partitionLock;
 
 			tag = predlock->tag;
 			target = tag.myTarget;
@@ -3656,7 +3656,7 @@ ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial,
 		PREDICATELOCKTARGET *target;
 		PREDICATELOCKTARGETTAG targettag;
 		uint32		targettaghash;
-		LWLockId	partitionLock;
+		FlexLockId	partitionLock;
 
 		nextpredlock = (PREDICATELOCK *)
 			SHMQueueNext(&(sxact->predicateLocks),
@@ -4034,7 +4034,7 @@ static void
 CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag)
 {
 	uint32		targettaghash;
-	LWLockId	partitionLock;
+	FlexLockId	partitionLock;
 	PREDICATELOCKTARGET *target;
 	PREDICATELOCK *predlock;
 	PREDICATELOCK *mypredlock = NULL;
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index eda3a98..edb225a 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -36,6 +36,7 @@
 #include <sys/time.h>
 
 #include "access/transam.h"
+#include "access/twophase.h"
 #include "access/xact.h"
 #include "miscadmin.h"
 #include "postmaster/autovacuum.h"
@@ -45,6 +46,7 @@
 #include "storage/pmsignal.h"
 #include "storage/proc.h"
 #include "storage/procarray.h"
+#include "storage/procarraylock.h"
 #include "storage/procsignal.h"
 #include "storage/spin.h"
 #include "utils/timestamp.h"
@@ -57,6 +59,7 @@ bool		log_lock_waits = false;
 
 /* Pointer to this process's PGPROC struct, if any */
 PGPROC	   *MyProc = NULL;
+PGPROC_MINIMAL	   *MyProcMinimal = NULL;
 
 /*
  * This spinlock protects the freelist of recycled PGPROC structures.
@@ -70,6 +73,7 @@ NON_EXEC_STATIC slock_t *ProcStructLock = NULL;
 /* Pointers to shared-memory structures */
 PROC_HDR *ProcGlobal = NULL;
 NON_EXEC_STATIC PGPROC *AuxiliaryProcs = NULL;
+PGPROC *PreparedXactProcs = NULL;
 
 /* If we are waiting for a lock, this points to the associated LOCALLOCK */
 static LOCALLOCK *lockAwaited = NULL;
@@ -106,13 +110,19 @@ ProcGlobalShmemSize(void)
 
 	/* ProcGlobal */
 	size = add_size(size, sizeof(PROC_HDR));
-	/* AuxiliaryProcs */
-	size = add_size(size, mul_size(NUM_AUXILIARY_PROCS, sizeof(PGPROC)));
 	/* MyProcs, including autovacuum workers and launcher */
 	size = add_size(size, mul_size(MaxBackends, sizeof(PGPROC)));
+	/* AuxiliaryProcs */
+	size = add_size(size, mul_size(NUM_AUXILIARY_PROCS, sizeof(PGPROC)));
+	/* Prepared xacts */
+	size = add_size(size, mul_size(max_prepared_xacts, sizeof(PGPROC)));
 	/* ProcStructLock */
 	size = add_size(size, sizeof(slock_t));
 
+	size = add_size(size, mul_size(MaxBackends, sizeof(PGPROC_MINIMAL)));
+	size = add_size(size, mul_size(NUM_AUXILIARY_PROCS, sizeof(PGPROC_MINIMAL)));
+	size = add_size(size, mul_size(max_prepared_xacts, sizeof(PGPROC_MINIMAL)));
+
 	return size;
 }
 
@@ -157,10 +167,11 @@ void
 InitProcGlobal(void)
 {
 	PGPROC	   *procs;
+	PGPROC_MINIMAL *procs_minimal;
 	int			i,
 				j;
 	bool		found;
-	uint32		TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS;
+	uint32		TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS + max_prepared_xacts;
 
 	/* Create the ProcGlobal shared structure */
 	ProcGlobal = (PROC_HDR *)
@@ -195,14 +206,38 @@ InitProcGlobal(void)
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of shared memory")));
 	MemSet(procs, 0, TotalProcs * sizeof(PGPROC));
+
+	/*
+	 * Also allocate a separate array of PROC_MINIMAL structures. We keep this
+	 * out of band of the main PGPROC array to ensure the very heavily accessed
+	 * members of the PGPROC structure are stored contiguously in the memory.
+	 * This provides significant performance benefits, especially on a
+	 * multiprocessor system by improving cache hit ratio.
+	 *
+	 * Note: We separate the members needed by GetSnapshotData since that's the
+	 * most frequently accessed code path. There is one PROC_MINIMAL structure
+	 * for every PGPROC structure.
+	 */
+	procs_minimal = (PGPROC_MINIMAL *) ShmemAlloc(TotalProcs * sizeof(PGPROC_MINIMAL));
+	MemSet(procs_minimal, 0, TotalProcs * sizeof(PGPROC_MINIMAL));
+	ProcGlobal->allProcs_Minimal = procs_minimal;
+
 	for (i = 0; i < TotalProcs; i++)
 	{
 		/* Common initialization for all PGPROCs, regardless of type. */
 
-		/* Set up per-PGPROC semaphore, latch, and backendLock */
-		PGSemaphoreCreate(&(procs[i].sem));
-		InitSharedLatch(&(procs[i].procLatch));
-		procs[i].backendLock = LWLockAssign();
+		/*
+		 * Set up per-PGPROC semaphore, latch, and backendLock. Prepared
+		 * xact dummy PGPROCs don't need these though - they're never
+		 * associated with a real process
+		 */
+		if (i < MaxBackends + NUM_AUXILIARY_PROCS)
+		{
+			PGSemaphoreCreate(&(procs[i].sem));
+			InitSharedLatch(&(procs[i].procLatch));
+			procs[i].backendLock = LWLockAssign();
+		}
+		procs[i].pgprocno = i;
 
 		/*
 		 * Newly created PGPROCs for normal backends or for autovacuum must
@@ -234,6 +269,7 @@ InitProcGlobal(void)
 	 * auxiliary proceses.
 	 */
 	AuxiliaryProcs = &procs[MaxBackends];
+	PreparedXactProcs = &procs[MaxBackends + NUM_AUXILIARY_PROCS];
 
 	/* Create ProcStructLock spinlock, too */
 	ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t));
@@ -296,6 +332,7 @@ InitProcess(void)
 				(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
 				 errmsg("sorry, too many clients already")));
 	}
+	MyProcMinimal = &ProcGlobal->allProcs_Minimal[MyProc->pgprocno];
 
 	/*
 	 * Now that we have a PGPROC, mark ourselves as an active postmaster
@@ -313,21 +350,21 @@ InitProcess(void)
 	SHMQueueElemInit(&(MyProc->links));
 	MyProc->waitStatus = STATUS_OK;
 	MyProc->lxid = InvalidLocalTransactionId;
-	MyProc->xid = InvalidTransactionId;
-	MyProc->xmin = InvalidTransactionId;
+	MyProcMinimal->xid = InvalidTransactionId;
+	MyProcMinimal->xmin = InvalidTransactionId;
 	MyProc->pid = MyProcPid;
 	/* backendId, databaseId and roleId will be filled in later */
 	MyProc->backendId = InvalidBackendId;
 	MyProc->databaseId = InvalidOid;
 	MyProc->roleId = InvalidOid;
-	MyProc->inCommit = false;
-	MyProc->vacuumFlags = 0;
+	MyProcMinimal->inCommit = false;
+	MyProcMinimal->vacuumFlags = 0;
 	/* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */
 	if (IsAutoVacuumWorkerProcess())
-		MyProc->vacuumFlags |= PROC_IS_AUTOVACUUM;
-	MyProc->lwWaiting = false;
-	MyProc->lwExclusive = false;
-	MyProc->lwWaitLink = NULL;
+		MyProcMinimal->vacuumFlags |= PROC_IS_AUTOVACUUM;
+	MyProc->flWaitResult = 0;
+	MyProc->flWaitMode = 0;
+	MyProc->flWaitLink = NULL;
 	MyProc->waitLock = NULL;
 	MyProc->waitProcLock = NULL;
 #ifdef USE_ASSERT_CHECKING
@@ -462,6 +499,7 @@ InitAuxiliaryProcess(void)
 	((volatile PGPROC *) auxproc)->pid = MyProcPid;
 
 	MyProc = auxproc;
+	MyProcMinimal = &ProcGlobal->allProcs_Minimal[auxproc->pgprocno];
 
 	SpinLockRelease(ProcStructLock);
 
@@ -472,16 +510,16 @@ InitAuxiliaryProcess(void)
 	SHMQueueElemInit(&(MyProc->links));
 	MyProc->waitStatus = STATUS_OK;
 	MyProc->lxid = InvalidLocalTransactionId;
-	MyProc->xid = InvalidTransactionId;
-	MyProc->xmin = InvalidTransactionId;
+	MyProcMinimal->xid = InvalidTransactionId;
+	MyProcMinimal->xmin = InvalidTransactionId;
 	MyProc->backendId = InvalidBackendId;
 	MyProc->databaseId = InvalidOid;
 	MyProc->roleId = InvalidOid;
-	MyProc->inCommit = false;
-	MyProc->vacuumFlags = 0;
-	MyProc->lwWaiting = false;
-	MyProc->lwExclusive = false;
-	MyProc->lwWaitLink = NULL;
+	MyProcMinimal->inCommit = false;
+	MyProcMinimal->vacuumFlags = 0;
+	MyProc->flWaitMode = 0;
+	MyProc->flWaitResult = 0;
+	MyProc->flWaitLink = NULL;
 	MyProc->waitLock = NULL;
 	MyProc->waitProcLock = NULL;
 #ifdef USE_ASSERT_CHECKING
@@ -607,7 +645,7 @@ IsWaitingForLock(void)
 void
 LockWaitCancel(void)
 {
-	LWLockId	partitionLock;
+	FlexLockId	partitionLock;
 
 	/* Nothing to do if we weren't waiting for a lock */
 	if (lockAwaited == NULL)
@@ -718,11 +756,11 @@ ProcKill(int code, Datum arg)
 #endif
 
 	/*
-	 * Release any LW locks I am holding.  There really shouldn't be any, but
-	 * it's cheap to check again before we cut the knees off the LWLock
+	 * Release any felx locks I am holding.  There really shouldn't be any, but
+	 * it's cheap to check again before we cut the knees off the flex lock
 	 * facility by releasing our PGPROC ...
 	 */
-	LWLockReleaseAll();
+	FlexLockReleaseAll();
 
 	/* Release ownership of the process's latch, too */
 	DisownLatch(&MyProc->procLatch);
@@ -779,8 +817,8 @@ AuxiliaryProcKill(int code, Datum arg)
 
 	Assert(MyProc == auxproc);
 
-	/* Release any LW locks I am holding (see notes above) */
-	LWLockReleaseAll();
+	/* Release any flex locks I am holding (see notes above) */
+	FlexLockReleaseAll();
 
 	/* Release ownership of the process's latch, too */
 	DisownLatch(&MyProc->procLatch);
@@ -865,7 +903,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 	LOCK	   *lock = locallock->lock;
 	PROCLOCK   *proclock = locallock->proclock;
 	uint32		hashcode = locallock->hashcode;
-	LWLockId	partitionLock = LockHashPartitionLock(hashcode);
+	FlexLockId	partitionLock = LockHashPartitionLock(hashcode);
 	PROC_QUEUE *waitQueue = &(lock->waitProcs);
 	LOCKMASK	myHeldLocks = MyProc->heldLocks;
 	bool		early_deadlock = false;
@@ -1045,16 +1083,17 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 		if (deadlock_state == DS_BLOCKED_BY_AUTOVACUUM && allow_autovacuum_cancel)
 		{
 			PGPROC	   *autovac = GetBlockingAutoVacuumPgproc();
+			PGPROC_MINIMAL *autovac_minimal = &ProcGlobal->allProcs_Minimal[autovac->pgprocno];
 
-			LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+			ProcArrayLockAcquire(PAL_EXCLUSIVE);
 
 			/*
 			 * Only do it if the worker is not working to protect against Xid
 			 * wraparound.
 			 */
 			if ((autovac != NULL) &&
-				(autovac->vacuumFlags & PROC_IS_AUTOVACUUM) &&
-				!(autovac->vacuumFlags & PROC_VACUUM_FOR_WRAPAROUND))
+				(autovac_minimal->vacuumFlags & PROC_IS_AUTOVACUUM) &&
+				!(autovac_minimal->vacuumFlags & PROC_VACUUM_FOR_WRAPAROUND))
 			{
 				int			pid = autovac->pid;
 
@@ -1062,7 +1101,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 					 pid);
 
 				/* don't hold the lock across the kill() syscall */
-				LWLockRelease(ProcArrayLock);
+				ProcArrayLockRelease();
 
 				/* send the autovacuum worker Back to Old Kent Road */
 				if (kill(pid, SIGINT) < 0)
@@ -1074,7 +1113,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 				}
 			}
 			else
-				LWLockRelease(ProcArrayLock);
+				ProcArrayLockRelease();
 
 			/* prevent signal from being resent more than once */
 			allow_autovacuum_cancel = false;
diff --git a/src/backend/storage/lmgr/procarraylock.c b/src/backend/storage/lmgr/procarraylock.c
new file mode 100644
index 0000000..6aa51f2
--- /dev/null
+++ b/src/backend/storage/lmgr/procarraylock.c
@@ -0,0 +1,343 @@
+/*-------------------------------------------------------------------------
+ *
+ * procarraylock.c
+ *	  Lock management for the ProcArray
+ *
+ * Because the ProcArray data structure is highly trafficked, it is
+ * critical that mutual exclusion for ProcArray options be as efficient
+ * as possible.  A particular problem is transaction end (commit or abort)
+ * which cannot be done in parallel with snapshot acquisition.  We
+ * therefore include some special hacks to deal with this case efficiently.
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/storage/lmgr/procarraylock.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "pg_trace.h"
+#include "access/transam.h"
+#include "storage/flexlock_internals.h"
+#include "storage/ipc.h"
+#include "storage/procarraylock.h"
+#include "storage/proc.h"
+#include "storage/spin.h"
+
+typedef struct ProcArrayLockStruct
+{
+	FlexLock	flex;			/* common FlexLock infrastructure */
+	char		exclusive;		/* # of exclusive holders (0 or 1) */
+	int			shared;			/* # of shared holders (0..MaxBackends) */
+	PGPROC	   *ending;			/* transactions wishing to clear state */
+	TransactionId	latest_ending_xid;	/* latest ending XID */
+} ProcArrayLockStruct;
+
+/* There is only one ProcArrayLock. */
+#define	ProcArrayLockPointer() \
+	(AssertMacro(FlexLockArray[ProcArrayLock].flex.locktype == \
+		FLEXLOCK_TYPE_PROCARRAYLOCK), \
+	 (volatile ProcArrayLockStruct *) &FlexLockArray[ProcArrayLock])
+
+/*
+ * ProcArrayLockAcquire - acquire a lightweight lock in the specified mode
+ *
+ * If the lock is not available, sleep until it is.
+ *
+ * Side effect: cancel/die interrupts are held off until lock release.
+ */
+void
+ProcArrayLockAcquire(ProcArrayLockMode mode)
+{
+	volatile ProcArrayLockStruct *lock = ProcArrayLockPointer();
+	PGPROC	   *proc = MyProc;
+	bool		retry = false;
+	int			extraWaits = 0;
+
+	/*
+	 * We can't wait if we haven't got a PGPROC.  This should only occur
+	 * during bootstrap or shared memory initialization.  Put an Assert here
+	 * to catch unsafe coding practices.
+	 */
+	Assert(!(proc == NULL && IsUnderPostmaster));
+
+	/*
+	 * Lock out cancel/die interrupts until we exit the code section protected
+	 * by the ProcArrayLock.  This ensures that interrupts will not interfere
+     * with manipulations of data structures in shared memory.
+	 */
+	HOLD_INTERRUPTS();
+
+	/*
+	 * Loop here to try to acquire lock after each time we are signaled by
+	 * ProcArrayLockRelease.  See comments in LWLockAcquire for an explanation
+	 * of why do we not attempt to hand off the lock directly.
+	 */
+	for (;;)
+	{
+		bool		mustwait;
+
+		/* Acquire mutex.  Time spent holding mutex should be short! */
+		SpinLockAcquire(&lock->flex.mutex);
+
+		/* If retrying, allow LWLockRelease to release waiters again */
+		if (retry)
+			lock->flex.releaseOK = true;
+
+		/* If I can get the lock, do so quickly. */
+		if (mode == PAL_EXCLUSIVE)
+		{
+			if (lock->exclusive == 0 && lock->shared == 0)
+			{
+				lock->exclusive++;
+				mustwait = false;
+			}
+			else
+				mustwait = true;
+		}
+		else
+		{
+			if (lock->exclusive == 0)
+			{
+				lock->shared++;
+				mustwait = false;
+			}
+			else
+				mustwait = true;
+		}
+
+		if (!mustwait)
+			break;				/* got the lock */
+
+		/* Add myself to wait queue. */
+		FlexLockJoinWaitQueue(lock, (int) mode);
+
+		/* Can release the mutex now */
+		SpinLockRelease(&lock->flex.mutex);
+
+		/* Wait until awakened. */
+		extraWaits += FlexLockWait(ProcArrayLock, mode);
+
+		/* Now loop back and try to acquire lock again. */
+		retry = true;
+	}
+
+	/* We are done updating shared state of the lock itself. */
+	SpinLockRelease(&lock->flex.mutex);
+
+	TRACE_POSTGRESQL_FLEXLOCK_ACQUIRE(lockid, mode);
+
+	/* Add lock to list of locks held by this backend */
+	FlexLockRemember(ProcArrayLock);
+
+	/*
+	 * Fix the process wait semaphore's count for any absorbed wakeups.
+	 */
+	while (extraWaits-- > 0)
+		PGSemaphoreUnlock(&proc->sem);
+}
+
+/*
+ * ProcArrayLockClearTransaction - safely clear transaction details
+ *
+ * This can't be done while ProcArrayLock is held, but it's so fast that
+ * we can afford to do it while holding the spinlock, rather than acquiring
+ * and releasing the lock.
+ */
+void
+ProcArrayLockClearTransaction(TransactionId latestXid)
+{
+	volatile ProcArrayLockStruct *lock = ProcArrayLockPointer();
+	PGPROC	   *proc = MyProc;
+	int			extraWaits = 0;
+	bool		mustwait;
+
+	HOLD_INTERRUPTS();
+
+	/* Acquire mutex.  Time spent holding mutex should be short! */
+	SpinLockAcquire(&lock->flex.mutex);
+
+	if (lock->exclusive == 0 && lock->shared == 0)
+	{
+		{
+			volatile PGPROC_MINIMAL *vproc_minimal = &ProcGlobal->allProcs_Minimal[proc->pgprocno];
+			/* If there are no lockers, clar the critical PGPROC fields. */
+			vproc_minimal->xid = InvalidTransactionId;
+	        vproc_minimal->xmin = InvalidTransactionId;
+	        /* must be cleared with xid/xmin: */
+	        vproc_minimal->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
+			vproc_minimal->nxids = 0;
+			vproc_minimal->overflowed = false;
+		}
+		mustwait = false;
+
+        /* Also advance global latestCompletedXid while holding the lock */
+        if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
+                                  latestXid))
+            ShmemVariableCache->latestCompletedXid = latestXid;
+	}
+	else
+	{
+		/* Rats, must wait. */
+		proc->flWaitLink = lock->ending;
+		lock->ending = proc;
+		if (!TransactionIdIsValid(lock->latest_ending_xid) ||
+				TransactionIdPrecedes(lock->latest_ending_xid, latestXid)) 
+			lock->latest_ending_xid = latestXid;
+		mustwait = true;
+	}
+
+	/* Can release the mutex now */
+	SpinLockRelease(&lock->flex.mutex);
+
+	/*
+	 * If we were not able to perfom the operation immediately, we must wait.
+	 * But we need not retry after being awoken, because the last lock holder
+	 * to release the lock will do the work first, on our behalf.
+	 */
+	if (mustwait)
+	{
+		extraWaits += FlexLockWait(ProcArrayLock, 2);
+		while (extraWaits-- > 0)
+			PGSemaphoreUnlock(&proc->sem);
+	}
+
+	RESUME_INTERRUPTS();
+}
+
+/*
+ * ProcArrayLockRelease - release a previously acquired lock
+ */
+void
+ProcArrayLockRelease(void)
+{
+	volatile ProcArrayLockStruct *lock = ProcArrayLockPointer();
+	PGPROC	   *head;
+	PGPROC	   *ending = NULL;
+	PGPROC	   *proc;
+
+	FlexLockForget(ProcArrayLock);
+
+	/* Acquire mutex.  Time spent holding mutex should be short! */
+	SpinLockAcquire(&lock->flex.mutex);
+
+	/* Release my hold on lock */
+	if (lock->exclusive > 0)
+		lock->exclusive--;
+	else
+	{
+		Assert(lock->shared > 0);
+		lock->shared--;
+	}
+
+	/*
+	 * If the lock is now free, but there are some transactions trying to
+	 * end, we must clear the critical PGPROC fields for them, and save a
+	 * list of them so we can wake them up.
+	 */
+	if (lock->exclusive == 0 && lock->shared == 0 && lock->ending != NULL)
+	{
+		volatile PGPROC *vproc;
+
+		ending = lock->ending;
+		vproc = ending;
+
+		while (vproc != NULL)
+		{
+			volatile PGPROC_MINIMAL *vproc_minimal = &ProcGlobal->allProcs_Minimal[vproc->pgprocno];
+			/* If there are no lockers, clar the critical PGPROC fields. */
+			vproc_minimal->xid = InvalidTransactionId;
+	        vproc_minimal->xmin = InvalidTransactionId;
+	        /* must be cleared with xid/xmin: */
+	        vproc_minimal->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
+			vproc_minimal->nxids = 0;
+			vproc_minimal->overflowed = false;
+			vproc = vproc->flWaitLink;
+		}
+
+		/* Also advance global latestCompletedXid */
+		if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
+								  lock->latest_ending_xid))
+			ShmemVariableCache->latestCompletedXid = lock->latest_ending_xid;
+
+		/* Reset lock state. */
+		lock->ending = NULL;
+		lock->latest_ending_xid = InvalidTransactionId;
+	}
+
+	/*
+	 * See if I need to awaken any waiters.  If I released a non-last shared
+	 * hold, there cannot be anything to do.  Also, do not awaken any waiters
+	 * if someone has already awakened waiters that haven't yet acquired the
+	 * lock.
+	 */
+	head = lock->flex.head;
+	if (head != NULL)
+	{
+		if (lock->exclusive == 0 && lock->shared == 0 && lock->flex.releaseOK)
+		{
+			/*
+			 * Remove the to-be-awakened PGPROCs from the queue.  If the front
+			 * waiter wants exclusive lock, awaken him only. Otherwise awaken
+			 * as many waiters as want shared access.
+			 */
+			proc = head;
+			if (proc->flWaitMode != LW_EXCLUSIVE)
+			{
+				while (proc->flWaitLink != NULL &&
+					   proc->flWaitLink->flWaitMode != LW_EXCLUSIVE)
+					proc = proc->flWaitLink;
+			}
+			/* proc is now the last PGPROC to be released */
+			lock->flex.head = proc->flWaitLink;
+			proc->flWaitLink = NULL;
+			/* prevent additional wakeups until retryer gets to run */
+			lock->flex.releaseOK = false;
+		}
+		else
+		{
+			/* lock is still held, can't awaken anything */
+			head = NULL;
+		}
+	}
+
+	/* We are done updating shared state of the lock itself. */
+	SpinLockRelease(&lock->flex.mutex);
+
+	TRACE_POSTGRESQL_FLEXLOCK_RELEASE(lockid);
+
+	/*
+	 * Awaken any waiters I removed from the queue.
+	 */
+	while (head != NULL)
+	{
+		FlexLockDebug("LWLockRelease", lockid, "release waiter");
+		proc = head;
+		head = proc->flWaitLink;
+		proc->flWaitLink = NULL;
+		proc->flWaitResult = 1;		/* any non-zero value will do */
+		PGSemaphoreUnlock(&proc->sem);
+	}
+
+	/*
+	 * Also awaken any processes whose critical PGPROC fields I cleared
+	 */
+	while (ending != NULL)
+	{
+		FlexLockDebug("LWLockRelease", lockid, "release ending");
+		proc = ending;
+		ending = proc->flWaitLink;
+		proc->flWaitLink = NULL;
+		proc->flWaitResult = 1;		/* any non-zero value will do */
+		PGSemaphoreUnlock(&proc->sem);
+	}
+
+	/*
+	 * Now okay to allow cancel/die interrupts.
+	 */
+	RESUME_INTERRUPTS();
+}
diff --git a/src/backend/utils/misc/check_guc b/src/backend/utils/misc/check_guc
index 293fb03..1a19e36 100755
--- a/src/backend/utils/misc/check_guc
+++ b/src/backend/utils/misc/check_guc
@@ -19,7 +19,7 @@
 INTENTIONALLY_NOT_INCLUDED="autocommit debug_deadlocks \
 is_superuser lc_collate lc_ctype lc_messages lc_monetary lc_numeric lc_time \
 pre_auth_delay role seed server_encoding server_version server_version_int \
-session_authorization trace_lock_oidmin trace_lock_table trace_locks trace_lwlocks \
+session_authorization trace_lock_oidmin trace_lock_table trace_locks trace_flexlocks \
 trace_notify trace_userlocks transaction_isolation transaction_read_only \
 zero_damaged_pages"
 
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index da7b6d4..52de233 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -59,6 +59,7 @@
 #include "replication/walreceiver.h"
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
+#include "storage/flexlock_internals.h"
 #include "storage/standby.h"
 #include "storage/fd.h"
 #include "storage/predicate.h"
@@ -1071,12 +1072,12 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 	{
-		{"trace_lwlocks", PGC_SUSET, DEVELOPER_OPTIONS,
+		{"trace_flexlocks", PGC_SUSET, DEVELOPER_OPTIONS,
 			gettext_noop("No description available."),
 			NULL,
 			GUC_NOT_IN_SAMPLE
 		},
-		&Trace_lwlocks,
+		&Trace_flexlocks,
 		false,
 		NULL, NULL, NULL
 	},
diff --git a/src/backend/utils/probes.d b/src/backend/utils/probes.d
index 71c5ab0..5b9cfe6 100644
--- a/src/backend/utils/probes.d
+++ b/src/backend/utils/probes.d
@@ -15,8 +15,8 @@
  * in probe definitions, as they cause compilation errors on Mac OS X 10.5.
  */
 #define LocalTransactionId unsigned int
-#define LWLockId int
-#define LWLockMode int
+#define FlexLockId int
+#define FlexLockMode int
 #define LOCKMODE int
 #define BlockNumber unsigned int
 #define Oid unsigned int
@@ -29,12 +29,12 @@ provider postgresql {
 	probe transaction__commit(LocalTransactionId);
 	probe transaction__abort(LocalTransactionId);
 
-	probe lwlock__acquire(LWLockId, LWLockMode);
-	probe lwlock__release(LWLockId);
-	probe lwlock__wait__start(LWLockId, LWLockMode);
-	probe lwlock__wait__done(LWLockId, LWLockMode);
-	probe lwlock__condacquire(LWLockId, LWLockMode);
-	probe lwlock__condacquire__fail(LWLockId, LWLockMode);
+	probe flexlock__acquire(FlexLockId, FlexLockMode);
+	probe flexlock__release(FlexLockId);
+	probe flexlock__wait__start(FlexLockId, FlexLockMode);
+	probe flexlock__wait__done(FlexLockId, FlexLockMode);
+	probe flexlock__condacquire(FlexLockId, FlexLockMode);
+	probe flexlock__condacquire__fail(FlexLockId, FlexLockMode);
 
 	probe lock__wait__start(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, LOCKMODE);
 	probe lock__wait__done(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, LOCKMODE);
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 50fb780..1f4f5b4 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -577,7 +577,7 @@ static void
 SnapshotResetXmin(void)
 {
 	if (RegisteredSnapshots == 0 && ActiveSnapshot == NULL)
-		MyProc->xmin = InvalidTransactionId;
+		MyProcMinimal->xmin = InvalidTransactionId;
 }
 
 /*
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index e48743f..680a87f 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -55,7 +55,7 @@ typedef enum
  */
 typedef struct SlruSharedData
 {
-	LWLockId	ControlLock;
+	FlexLockId	ControlLock;
 
 	/* Number of buffers managed by this SLRU structure */
 	int			num_slots;
@@ -69,7 +69,7 @@ typedef struct SlruSharedData
 	bool	   *page_dirty;
 	int		   *page_number;
 	int		   *page_lru_count;
-	LWLockId   *buffer_locks;
+	FlexLockId *buffer_locks;
 
 	/*
 	 * Optional array of WAL flush LSNs associated with entries in the SLRU
@@ -136,7 +136,7 @@ typedef SlruCtlData *SlruCtl;
 
 extern Size SimpleLruShmemSize(int nslots, int nlsns);
 extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
-			  LWLockId ctllock, const char *subdir);
+			  FlexLockId ctllock, const char *subdir);
 extern int	SimpleLruZeroPage(SlruCtl ctl, int pageno);
 extern int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
 				  TransactionId xid);
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index 6c8e312..d3b74db 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -49,9 +49,9 @@
 #define SEQ_MINVALUE	(-SEQ_MAXVALUE)
 
 /*
- * Number of spare LWLocks to allocate for user-defined add-on code.
+ * Number of spare FlexLocks to allocate for user-defined add-on code.
  */
-#define NUM_USER_DEFINED_LWLOCKS	4
+#define NUM_USER_DEFINED_FLEXLOCKS	4
 
 /*
  * Define this if you want to allow the lo_import and lo_export SQL
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index b7d4ea5..ac7f665 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -103,7 +103,7 @@ typedef struct buftag
 #define BufTableHashPartition(hashcode) \
 	((hashcode) % NUM_BUFFER_PARTITIONS)
 #define BufMappingPartitionLock(hashcode) \
-	((LWLockId) (FirstBufMappingLock + BufTableHashPartition(hashcode)))
+	((FlexLockId) (FirstBufMappingLock + BufTableHashPartition(hashcode)))
 
 /*
  *	BufferDesc -- shared descriptor/state data for a single shared buffer.
@@ -143,8 +143,8 @@ typedef struct sbufdesc
 	int			buf_id;			/* buffer's index number (from 0) */
 	int			freeNext;		/* link in freelist chain */
 
-	LWLockId	io_in_progress_lock;	/* to wait for I/O to complete */
-	LWLockId	content_lock;	/* to lock access to buffer contents */
+	FlexLockId	io_in_progress_lock;	/* to wait for I/O to complete */
+	FlexLockId	content_lock;	/* to lock access to buffer contents */
 } BufferDesc;
 
 #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
diff --git a/src/include/storage/flexlock.h b/src/include/storage/flexlock.h
new file mode 100644
index 0000000..612c21a
--- /dev/null
+++ b/src/include/storage/flexlock.h
@@ -0,0 +1,102 @@
+/*-------------------------------------------------------------------------
+ *
+ * flexlock.h
+ *	  Flex lock manager
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/flexlock.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FLEXLOCK_H
+#define FLEXLOCK_H
+
+/*
+ * It's a bit odd to declare NUM_BUFFER_PARTITIONS and NUM_LOCK_PARTITIONS
+ * here, but we need them to set up enum FlexLockId correctly, and having
+ * this file include lock.h or bufmgr.h would be backwards.
+ */
+
+/* Number of partitions of the shared buffer mapping hashtable */
+#define NUM_BUFFER_PARTITIONS  16
+
+/* Number of partitions the shared lock tables are divided into */
+#define LOG2_NUM_LOCK_PARTITIONS  4
+#define NUM_LOCK_PARTITIONS  (1 << LOG2_NUM_LOCK_PARTITIONS)
+
+/* Number of partitions the shared predicate lock tables are divided into */
+#define LOG2_NUM_PREDICATELOCK_PARTITIONS  4
+#define NUM_PREDICATELOCK_PARTITIONS  (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS)
+
+/*
+ * We have a number of predefined FlexLocks, plus a bunch of locks that are
+ * dynamically assigned (e.g., for shared buffers).  The FlexLock structures
+ * live in shared memory (since they contain shared data) and are identified
+ * by values of this enumerated type.  We abuse the notion of an enum somewhat
+ * by allowing values not listed in the enum declaration to be assigned.
+ * The extra value MaxDynamicFlexLock is there to keep the compiler from
+ * deciding that the enum can be represented as char or short ...
+ *
+ * If you remove a lock, please replace it with a placeholder. This retains
+ * the lock numbering, which is helpful for DTrace and other external
+ * debugging scripts.
+ */
+typedef enum FlexLockId
+{
+	BufFreelistLock,
+	ShmemIndexLock,
+	OidGenLock,
+	XidGenLock,
+	ProcArrayLock,
+	SInvalReadLock,
+	SInvalWriteLock,
+	WALInsertLock,
+	WALWriteLock,
+	ControlFileLock,
+	CheckpointLock,
+	CLogControlLock,
+	SubtransControlLock,
+	MultiXactGenLock,
+	MultiXactOffsetControlLock,
+	MultiXactMemberControlLock,
+	RelCacheInitLock,
+	BgWriterCommLock,
+	TwoPhaseStateLock,
+	TablespaceCreateLock,
+	BtreeVacuumLock,
+	AddinShmemInitLock,
+	AutovacuumLock,
+	AutovacuumScheduleLock,
+	SyncScanLock,
+	RelationMappingLock,
+	AsyncCtlLock,
+	AsyncQueueLock,
+	SerializableXactHashLock,
+	SerializableFinishedListLock,
+	SerializablePredicateLockListLock,
+	OldSerXidLock,
+	SyncRepLock,
+	/* Individual lock IDs end here */
+	FirstBufMappingLock,
+	FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
+	FirstPredicateLockMgrLock = FirstLockMgrLock + NUM_LOCK_PARTITIONS,
+
+	/* must be last except for MaxDynamicFlexLock: */
+	NumFixedFlexLocks = FirstPredicateLockMgrLock + NUM_PREDICATELOCK_PARTITIONS,
+
+	MaxDynamicFlexLock = 1000000000
+} FlexLockId;
+
+/* Shared memory setup. */
+extern int	NumFlexLocks(void);
+extern Size FlexLockShmemSize(void);
+extern void RequestAddinFlexLocks(int n);
+extern void CreateFlexLocks(void);
+
+/* Error recovery and debugging support functions. */
+extern void FlexLockReleaseAll(void);
+extern bool FlexLockHeldByMe(FlexLockId id);
+
+#endif   /* FLEXLOCK_H */
diff --git a/src/include/storage/flexlock_internals.h b/src/include/storage/flexlock_internals.h
new file mode 100644
index 0000000..d1bca45
--- /dev/null
+++ b/src/include/storage/flexlock_internals.h
@@ -0,0 +1,89 @@
+/*-------------------------------------------------------------------------
+ *
+ * flexlock_internals.h
+ *	  Flex lock internals.  Only files which implement a FlexLock
+ *    type should need to include this.  Merging this with flexlock.h
+ *    creates a circular header dependency, but even if it didn't, this
+ *    is cleaner.
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/flexlock_internals.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef FLEXLOCK_INTERNALS_H
+#define FLEXLOCK_INTERNALS_H
+
+#include "pg_trace.h"
+#include "storage/flexlock.h"
+#include "storage/proc.h"
+#include "storage/s_lock.h"
+
+/*
+ * Individual FlexLock implementations each get this many bytes to store
+ * its state; of course, a given implementation could also allocate additional
+ * shmem elsewhere, but we provide this many bytes within the array.  The
+ * header fields common to all FlexLock types are included in this number.
+ * A power of two should probably be chosen, to avoid alignment issues and
+ * cache line splitting.  It might be useful to increase this on systems where
+ * a cache line is more than 64 bytes in size.
+ */
+#define FLEX_LOCK_BYTES		64
+
+typedef struct FlexLock
+{
+	char		locktype;		/* see FLEXLOCK_TYPE_* constants */
+	slock_t		mutex;			/* Protects FlexLock state and wait queues */
+	bool		releaseOK;		/* T if ok to release waiters */
+	PGPROC	   *head;			/* head of list of waiting PGPROCs */
+	PGPROC	   *tail;			/* tail of list of waiting PGPROCs */
+	/* tail is undefined when head is NULL */
+} FlexLock;
+
+#define FLEXLOCK_TYPE_LWLOCK			'l'
+#define FLEXLOCK_TYPE_PROCARRAYLOCK		'p'
+
+typedef union FlexLockPadded
+{
+	FlexLock	flex;
+	char		pad[FLEX_LOCK_BYTES];
+} FlexLockPadded;
+
+extern FlexLockPadded *FlexLockArray;
+
+extern FlexLockId FlexLockAssign(char locktype);
+extern void FlexLockRemember(FlexLockId id);
+extern void FlexLockForget(FlexLockId id);
+extern int FlexLockWait(FlexLockId id, int mode);
+
+/*
+ * We must join the wait queue while holding the spinlock, so we define this
+ * as a macro, for speed.
+ */
+#define FlexLockJoinWaitQueue(lock, mode) \
+	do { \
+		Assert(MyProc != NULL); \
+		MyProc->flWaitResult = 0; \
+		MyProc->flWaitMode = mode; \
+		MyProc->flWaitLink = NULL; \
+		if (lock->flex.head == NULL) \
+			lock->flex.head = MyProc; \
+		else \
+			lock->flex.tail->flWaitLink = MyProc; \
+		lock->flex.tail = MyProc; \
+	} while (0)
+
+#ifdef LOCK_DEBUG
+extern bool	Trace_flexlocks;
+#define FlexLockDebug(where, id, msg) \
+	do { \
+		if (Trace_flexlocks) \
+			elog(LOG, "%s(%d): %s", where, (int) id, msg); \
+	} while (0)
+#else
+#define FlexLockDebug(where, id, msg)
+#endif
+
+#endif   /* FLEXLOCK_H */
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index e106ad5..ba87db2 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -471,7 +471,7 @@ typedef enum
 #define LockHashPartition(hashcode) \
 	((hashcode) % NUM_LOCK_PARTITIONS)
 #define LockHashPartitionLock(hashcode) \
-	((LWLockId) (FirstLockMgrLock + LockHashPartition(hashcode)))
+	((FlexLockId) (FirstLockMgrLock + LockHashPartition(hashcode)))
 
 
 /*
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 438a48d..f68cddc 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -14,82 +14,7 @@
 #ifndef LWLOCK_H
 #define LWLOCK_H
 
-/*
- * It's a bit odd to declare NUM_BUFFER_PARTITIONS and NUM_LOCK_PARTITIONS
- * here, but we need them to set up enum LWLockId correctly, and having
- * this file include lock.h or bufmgr.h would be backwards.
- */
-
-/* Number of partitions of the shared buffer mapping hashtable */
-#define NUM_BUFFER_PARTITIONS  16
-
-/* Number of partitions the shared lock tables are divided into */
-#define LOG2_NUM_LOCK_PARTITIONS  4
-#define NUM_LOCK_PARTITIONS  (1 << LOG2_NUM_LOCK_PARTITIONS)
-
-/* Number of partitions the shared predicate lock tables are divided into */
-#define LOG2_NUM_PREDICATELOCK_PARTITIONS  4
-#define NUM_PREDICATELOCK_PARTITIONS  (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS)
-
-/*
- * We have a number of predefined LWLocks, plus a bunch of LWLocks that are
- * dynamically assigned (e.g., for shared buffers).  The LWLock structures
- * live in shared memory (since they contain shared data) and are identified
- * by values of this enumerated type.  We abuse the notion of an enum somewhat
- * by allowing values not listed in the enum declaration to be assigned.
- * The extra value MaxDynamicLWLock is there to keep the compiler from
- * deciding that the enum can be represented as char or short ...
- *
- * If you remove a lock, please replace it with a placeholder. This retains
- * the lock numbering, which is helpful for DTrace and other external
- * debugging scripts.
- */
-typedef enum LWLockId
-{
-	BufFreelistLock,
-	ShmemIndexLock,
-	OidGenLock,
-	XidGenLock,
-	ProcArrayLock,
-	SInvalReadLock,
-	SInvalWriteLock,
-	WALInsertLock,
-	WALWriteLock,
-	ControlFileLock,
-	CheckpointLock,
-	CLogControlLock,
-	SubtransControlLock,
-	MultiXactGenLock,
-	MultiXactOffsetControlLock,
-	MultiXactMemberControlLock,
-	RelCacheInitLock,
-	BgWriterCommLock,
-	TwoPhaseStateLock,
-	TablespaceCreateLock,
-	BtreeVacuumLock,
-	AddinShmemInitLock,
-	AutovacuumLock,
-	AutovacuumScheduleLock,
-	SyncScanLock,
-	RelationMappingLock,
-	AsyncCtlLock,
-	AsyncQueueLock,
-	SerializableXactHashLock,
-	SerializableFinishedListLock,
-	SerializablePredicateLockListLock,
-	OldSerXidLock,
-	SyncRepLock,
-	/* Individual lock IDs end here */
-	FirstBufMappingLock,
-	FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
-	FirstPredicateLockMgrLock = FirstLockMgrLock + NUM_LOCK_PARTITIONS,
-
-	/* must be last except for MaxDynamicLWLock: */
-	NumFixedLWLocks = FirstPredicateLockMgrLock + NUM_PREDICATELOCK_PARTITIONS,
-
-	MaxDynamicLWLock = 1000000000
-} LWLockId;
-
+#include "storage/flexlock.h"
 
 typedef enum LWLockMode
 {
@@ -97,22 +22,10 @@ typedef enum LWLockMode
 	LW_SHARED
 } LWLockMode;
 
-
-#ifdef LOCK_DEBUG
-extern bool Trace_lwlocks;
-#endif
-
-extern LWLockId LWLockAssign(void);
-extern void LWLockAcquire(LWLockId lockid, LWLockMode mode);
-extern bool LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode);
-extern void LWLockRelease(LWLockId lockid);
-extern void LWLockReleaseAll(void);
-extern bool LWLockHeldByMe(LWLockId lockid);
-
-extern int	NumLWLocks(void);
-extern Size LWLockShmemSize(void);
-extern void CreateLWLocks(void);
-
-extern void RequestAddinLWLocks(int n);
+extern FlexLockId LWLockAssign(void);
+extern void LWLockAcquire(FlexLockId lockid, LWLockMode mode);
+extern bool LWLockConditionalAcquire(FlexLockId lockid, LWLockMode mode);
+extern void LWLockRelease(FlexLockId lockid);
+extern bool LWLockHeldByMe(FlexLockId lockid);
 
 #endif   /* LWLOCK_H */
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 6e798b1..9f377a8 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -35,8 +35,6 @@
 
 struct XidCache
 {
-	bool		overflowed;
-	int			nxids;
 	TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS];
 };
 
@@ -86,27 +84,14 @@ struct PGPROC
 	LocalTransactionId lxid;	/* local id of top-level transaction currently
 								 * being executed by this proc, if running;
 								 * else InvalidLocalTransactionId */
-
-	TransactionId xid;			/* id of top-level transaction currently being
-								 * executed by this proc, if running and XID
-								 * is assigned; else InvalidTransactionId */
-
-	TransactionId xmin;			/* minimal running XID as it was when we were
-								 * starting our xact, excluding LAZY VACUUM:
-								 * vacuum must not remove tuples deleted by
-								 * xid >= xmin ! */
-
 	int			pid;			/* Backend's process ID; 0 if prepared xact */
+	int			pgprocno;
 
 	/* These fields are zero while a backend is still starting up: */
 	BackendId	backendId;		/* This backend's backend ID (if assigned) */
 	Oid			databaseId;		/* OID of database this backend is using */
 	Oid			roleId;			/* OID of role using this backend */
 
-	bool		inCommit;		/* true if within commit critical section */
-
-	uint8		vacuumFlags;	/* vacuum-related flags, see above */
-
 	/*
 	 * While in hot standby mode, shows that a conflict signal has been sent
 	 * for the current transaction. Set/cleared while holding ProcArrayLock,
@@ -114,10 +99,10 @@ struct PGPROC
 	 */
 	bool		recoveryConflictPending;
 
-	/* Info about LWLock the process is currently waiting for, if any. */
-	bool		lwWaiting;		/* true if waiting for an LW lock */
-	bool		lwExclusive;	/* true if waiting for exclusive access */
-	struct PGPROC *lwWaitLink;	/* next waiter for same LW lock */
+	/* Info about FlexLock the process is currently waiting for, if any. */
+	int			flWaitResult;	/* result of wait, or 0 if still waiting */
+	int			flWaitMode;		/* lock mode sought */
+	struct PGPROC *flWaitLink;	/* next waiter for same FlexLock */
 
 	/* Info about lock the process is currently waiting for, if any. */
 	/* waitLock and waitProcLock are NULL if not currently waiting. */
@@ -147,7 +132,7 @@ struct PGPROC
 	struct XidCache subxids;	/* cache for subtransaction XIDs */
 
 	/* Per-backend LWLock.  Protects fields below. */
-	LWLockId	backendLock;	/* protects the fields below */
+	FlexLockId	backendLock;	/* protects the fields below */
 
 	/* Lock manager data, recording fast-path locks taken by this backend. */
 	uint64		fpLockBits;		/* lock modes held for each fast-path slot */
@@ -160,7 +145,35 @@ struct PGPROC
 
 
 extern PGDLLIMPORT PGPROC *MyProc;
+extern PGDLLIMPORT struct PGPROC_MINIMAL *MyProcMinimal;
+
+/*
+ * A minimal part of the PGPROC. We store these members out of the main PGPROC
+ * structure since they are very heavily accessed members and usually in a loop
+ * for all active PGPROCs. Storing them in a separate array ensures that these
+ * members can be very effeciently accessed with minimum cache misses. On a
+ * large multiprocessor system, this can show a significant performance
+ * improvement.
+ */
+struct PGPROC_MINIMAL
+{
+	TransactionId xid;			/* id of top-level transaction currently being
+								 * executed by this proc, if running and XID
+								 * is assigned; else InvalidTransactionId */
 
+	TransactionId xmin;			/* minimal running XID as it was when we were
+								 * starting our xact, excluding LAZY VACUUM:
+								 * vacuum must not remove tuples deleted by
+								 * xid >= xmin ! */
+
+	uint8		vacuumFlags;	/* vacuum-related flags, see above */
+	bool		overflowed;
+	bool		inCommit;		/* true if within commit critical section */
+
+	uint8		nxids;
+};
+
+typedef struct PGPROC_MINIMAL PGPROC_MINIMAL;
 
 /*
  * There is one ProcGlobal struct for the whole database cluster.
@@ -169,6 +182,8 @@ typedef struct PROC_HDR
 {
 	/* Array of PGPROC structures (not including dummies for prepared txns) */
 	PGPROC	   *allProcs;
+	/* Array of PGPROC_MINIMAL structures (not including dummies for prepared txns */
+	PGPROC_MINIMAL	*allProcs_Minimal;
 	/* Length of allProcs array */
 	uint32		allProcCount;
 	/* Head of list of free PGPROC structures */
@@ -186,6 +201,8 @@ typedef struct PROC_HDR
 
 extern PROC_HDR *ProcGlobal;
 
+extern PGPROC *PreparedXactProcs;
+
 /*
  * We set aside some extra PGPROC structures for auxiliary processes,
  * ie things that aren't full-fledged backends but need shmem access.
diff --git a/src/include/storage/procarraylock.h b/src/include/storage/procarraylock.h
new file mode 100644
index 0000000..678ca6f
--- /dev/null
+++ b/src/include/storage/procarraylock.h
@@ -0,0 +1,28 @@
+/*-------------------------------------------------------------------------
+ *
+ * procarraylock.h
+ *	  Lock management for the ProcArray
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/lwlock.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PROCARRAYLOCK_H
+#define PROCARRAYLOCK_H
+
+#include "storage/flexlock.h"
+
+typedef enum ProcArrayLockMode
+{
+	PAL_EXCLUSIVE,
+	PAL_SHARED
+} ProcArrayLockMode;
+
+extern void ProcArrayLockAcquire(ProcArrayLockMode mode);
+extern void ProcArrayLockClearTransaction(TransactionId latestXid);
+extern void ProcArrayLockRelease(void);
+
+#endif   /* PROCARRAYLOCK_H */