From e9d588ba1e504d74a5904f44c4c985dc9f1b4e54 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Fri, 14 Aug 2015 16:32:38 -0400
Subject: [PATCH 1/3] Allow multiple sessions to wait.

---
 src/test/isolation/README            |   5 +-
 src/test/isolation/isolationtester.c | 200 ++++++++++++++++++++++-------------
 2 files changed, 125 insertions(+), 80 deletions(-)

diff --git a/src/test/isolation/README b/src/test/isolation/README
index 490be8e..75d10ff 100644
--- a/src/test/isolation/README
+++ b/src/test/isolation/README
@@ -107,10 +107,7 @@ Each step may contain commands that block until further action has been taken
 (most likely, some other session runs a step that unblocks it or causes a
 deadlock).  A test that uses this ability must manually specify valid
 permutations, i.e. those that would not expect a blocked session to execute a
-command.  If the test fails to follow that rule, the test is aborted.
-
-Currently, at most one step can be waiting at a time.  As long as one
-step is waiting, subsequent steps are run to completion synchronously.
+command.  If a test fails to follow that rule, it will hang.
 
 Note that isolationtester recognizes that a command has blocked by looking
 to see if it is shown as waiting in the pg_locks view; therefore, only
diff --git a/src/test/isolation/isolationtester.c b/src/test/isolation/isolationtester.c
index ab41501..64290aa 100644
--- a/src/test/isolation/isolationtester.c
+++ b/src/test/isolation/isolationtester.c
@@ -467,34 +467,48 @@ report_error_message(Step *step)
 }
 
 /*
- * As above, but reports messages possibly emitted by two steps.  This is
+ * As above, but reports messages possibly emitted by multiple steps.  This is
  * useful when we have a blocked command awakened by another one; we want to
- * report both messages identically, for the case where we don't care which
+ * report all messages identically, for the case where we don't care which
  * one fails due to a timeout such as deadlock timeout.
  */
 static void
-report_two_error_messages(Step *step1, Step *step2)
+report_multiple_error_messages(Step *step, int nextra, Step **extrastep)
 {
-	char	   *prefix;
+	PQExpBufferData buffer;
+	int		n;
 
-	prefix = psprintf("%s %s", step1->name, step2->name);
-
-	if (step1->errormsg)
+	if (nextra == 0)
 	{
-		fprintf(stdout, "error in steps %s: %s\n", prefix,
-				step1->errormsg);
-		free(step1->errormsg);
-		step1->errormsg = NULL;
+		report_error_message(step);
+		return;
 	}
-	if (step2->errormsg)
+
+	initPQExpBuffer(&buffer);
+	appendPQExpBufferStr(&buffer, step->name);
+ 
+	for (n = 0; n < nextra; ++n)
+		appendPQExpBuffer(&buffer, " %s", extrastep[n]->name);
+ 
+	if (step->errormsg)
 	{
-		fprintf(stdout, "error in steps %s: %s\n", prefix,
-				step2->errormsg);
-		free(step2->errormsg);
-		step2->errormsg = NULL;
+		fprintf(stdout, "error in steps %s: %s\n", buffer.data,
+				step->errormsg);
+		free(step->errormsg);
+		step->errormsg = NULL;
 	}
 
-	free(prefix);
+	for (n = 0; n < nextra; ++n)
+	{
+		if (extrastep[n]->errormsg == NULL)
+			continue;
+		fprintf(stdout, "error in steps %s: %s\n",
+				buffer.data, extrastep[n]->errormsg);
+		free(extrastep[n]->errormsg);
+		extrastep[n]->errormsg = NULL;
+	}
+ 
+	termPQExpBuffer(&buffer);
 }
 
 /*
@@ -505,7 +519,14 @@ run_permutation(TestSpec *testspec, int nsteps, Step **steps)
 {
 	PGresult   *res;
 	int			i;
-	Step	   *waiting = NULL;
+	int			w;
+	int			nwaiting = 0;
+	int			nerrorstep = 0;
+	Step      **xwaiting;
+	Step      **errorstep;
+
+	xwaiting = malloc(sizeof(Step *) * testspec->nsessions);
+	errorstep = malloc(sizeof(Step *) * testspec->nsessions);
 
 	/*
 	 * In dry run mode, just display the permutation in the same format used
@@ -566,54 +587,73 @@ run_permutation(TestSpec *testspec, int nsteps, Step **steps)
 	for (i = 0; i < nsteps; i++)
 	{
 		Step	   *step = steps[i];
+		Step       *oldstep = NULL;
 		PGconn	   *conn = conns[1 + step->session];
+		bool		mustwait;
 
-		if (waiting != NULL && step->session == waiting->session)
+		/*
+		 * Check whether the session that needs to perform the next step
+		 * is still blocked on an earlier step.  If so, wait for it to finish.
+		 *
+		 * In older versions of this tool, when allowed precisely one session
+		 * to be waiting at a time.  If we reached a step which required that
+		 * session to execute the next command, we would declare the whole
+		 * permutation invalid, cancel everything, and move on to the next one.
+		 * Unfortunately, that made it impossible to test the deadlock detector
+		 * using this framework unless the numebr of processes involved in the
+		 * deadlock was precisely two.  We now assume that if we reach a step
+		 * that is still blocked, we need to wait for it to unblock itself.
+		 */
+		for (w = 0; w < nwaiting; ++w)
 		{
-			PGcancel   *cancel;
-			PGresult   *res;
-			int			j;
-
-			/*
-			 * This permutation is invalid: it can never happen in real life.
-			 *
-			 * A session is blocked on an earlier step (waiting) and no
-			 * further steps from this session can run until it is unblocked,
-			 * but it can only be unblocked by running steps from other
-			 * sessions.
-			 */
-			fprintf(stderr, "invalid permutation detected\n");
-
-			/* Cancel the waiting statement from this session. */
-			cancel = PQgetCancel(conn);
-			if (cancel != NULL)
+			if (step->session == xwaiting[w]->session)
 			{
-				char		buf[256];
-
-				if (!PQcancel(cancel, buf, sizeof(buf)))
-					fprintf(stderr, "PQcancel failed: %s\n", buf);
-
-				/* Be sure to consume the error message. */
-				while ((res = PQgetResult(conn)) != NULL)
-					PQclear(res);
-
-				PQfreeCancel(cancel);
+				oldstep = xwaiting[w];
+				break;
 			}
+		}
+		if (oldstep != NULL)
+		{
+			/* Wait for previous step on this connection. */
+			try_complete_step(oldstep, STEP_RETRY);
 
 			/*
-			 * Now we really have to complete all the running transactions to
-			 * make sure teardown doesn't block.
+			 * Attempt to complete any steps that were previously waiting.
+			 * Remove any that have completed, and also remove the one for
+			 * which we just waited.
 			 */
-			for (j = 1; j < nconns; j++)
+			w = 0;
+			nerrorstep = 0;
+			while (w < nwaiting)
 			{
-				res = PQexec(conns[j], "ROLLBACK");
-				if (res != NULL)
-					PQclear(res);
+				if (xwaiting[w] == oldstep)
+				{
+					/* We just waited for this; remove it. */
+					if (w + 1 < nwaiting)
+						xwaiting[w] = xwaiting[w + 1];
+					nwaiting--;
+				}
+
+				if (try_complete_step(xwaiting[w], STEP_NONBLOCK | STEP_RETRY))
+				{
+					/* Still blocked on a lock, leave it alone. */
+					w++;
+				}
+				else
+				{
+					/* This one finished, too! */
+					errorstep[nerrorstep++] = xwaiting[w];
+					if (w + 1 < nwaiting)
+						xwaiting[w] = xwaiting[w + 1];
+					nwaiting--;
+				}
 			}
 
-			goto teardown;
+			/* Report all errors together. */
+			report_multiple_error_messages(oldstep, nerrorstep, errorstep);
 		}
 
+		/* Send the query for this step. */
 		if (!PQsendQuery(conn, step->sql))
 		{
 			fprintf(stdout, "failed to send query for step %s: %s\n",
@@ -621,39 +661,47 @@ run_permutation(TestSpec *testspec, int nsteps, Step **steps)
 			exit_nicely();
 		}
 
-		if (waiting != NULL)
-		{
-			/* Some other step is already waiting: just block. */
-			try_complete_step(step, 0);
+		/*
+		 * Try to complete this step without blocking, unless all other
+		 * sessions are already blocked.  In that case, just wait for this
+		 * step to complete.
+		 */
+		if (nwaiting >= testspec->nsessions - 1)
+			mustwait = try_complete_step(step, 0);
+		else
+			mustwait = try_complete_step(step, STEP_NONBLOCK);
 
-			/*
-			 * See if this step unblocked the waiting step; report both error
-			 * messages together if so.
-			 */
-			if (!try_complete_step(waiting, STEP_NONBLOCK | STEP_RETRY))
+		/* Attempt to complete any steps that were previously waiting. */
+		w = 0;
+		nerrorstep = 0;
+		while (w < nwaiting)
+		{
+			if (try_complete_step(xwaiting[w], STEP_NONBLOCK | STEP_RETRY))
+				w++;
+			else
 			{
-				report_two_error_messages(step, waiting);
-				waiting = NULL;
+				errorstep[nerrorstep++] = xwaiting[w];
+				if (w + 1 < nwaiting)
+					xwaiting[w] = xwaiting[w + 1];
+				nwaiting--;
 			}
-			else
-				report_error_message(step);
-		}
-		else
-		{
-			if (try_complete_step(step, STEP_NONBLOCK))
-				waiting = step;
-			report_error_message(step);
 		}
+
+		/* Report any error from this step, and any steps that it unblocked. */
+		report_multiple_error_messages(step, nerrorstep, errorstep);
+
+		/* If this step is waiting, add it to the array of waiters. */
+		if (mustwait)
+			xwaiting[nwaiting++] = step;
 	}
 
 	/* Finish any waiting query. */
-	if (waiting != NULL)
+	for (w = 0; w < nwaiting; ++w)
 	{
-		try_complete_step(waiting, STEP_RETRY);
-		report_error_message(waiting);
+		try_complete_step(xwaiting[w], STEP_RETRY);
+		report_error_message(xwaiting[w]);
 	}
 
-teardown:
 	/* Perform per-session teardown */
 	for (i = 0; i < testspec->nsessions; i++)
 	{
-- 
2.3.2 (Apple Git-55)