Re: Server error and deadlocks - Mailing list pgsql-general
| From | Tom Lane |
|---|---|
| Subject | Re: Server error and deadlocks |
| Date | |
| Msg-id | 27559.1042751176@sss.pgh.pa.us Whole thread Raw |
| In response to | Re: Server error and deadlocks ("Orr, Steve" <sorr@rightnow.com>) |
| List | pgsql-general |
"Orr, Steve" <sorr@rightnow.com> writes:
> Agreed but what I need rightnow is to find a "smoking gun" to beat the
> duhvelopers on the head with in order to first get their attention. The
> smoking gun would be an example of their poor SQL technique so I can ask
> them, "What were you smoking when you wrote this junk?" In this context, is
> there any way to create some sort of trace log of all the SQL submitted to
> the server for a given time period, database, user, etc.?
See the logging options in postgresql.conf.
If you are interested, attached is the patch I just applied to CVS HEAD
to print out information about deadlocks. It should apply cleanly to
7.3.*. Sample output is
regression=# lock table tenk1;
NOTICE: Proc 27417 waits for AccessExclusiveLock on relation 18987 database 17054; blocked by 27415
NOTICE: Proc 27415 waits for ShareLock on transaction 6446; blocked by 27417
ERROR: deadlock detected
regression=#
regards, tom lane
*** src/backend/storage/lmgr/deadlock.c.orig Thu Oct 31 19:40:23 2002
--- src/backend/storage/lmgr/deadlock.c Thu Jan 16 15:45:38 2003
***************
*** 17,22 ****
--- 17,24 ----
* Interface:
*
* DeadLockCheck()
+ * DeadLockReport()
+ * RememberSimpleDeadLock()
* InitDeadLockChecking()
*
*-------------------------------------------------------------------------
***************
*** 45,56 ****
int nProcs;
} WAIT_ORDER;
static bool DeadLockCheckRecurse(PGPROC *proc);
static bool TestConfiguration(PGPROC *startProc);
static bool FindLockCycle(PGPROC *checkProc,
EDGE *softEdges, int *nSoftEdges);
! static bool FindLockCycleRecurse(PGPROC *checkProc,
EDGE *softEdges, int *nSoftEdges);
static bool ExpandConstraints(EDGE *constraints, int nConstraints);
static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints,
--- 47,73 ----
int nProcs;
} WAIT_ORDER;
+ /*
+ * Information saved about each edge in a detected deadlock cycle. This
+ * is used to print a diagnostic message upon failure.
+ *
+ * Note: because we want to examine this info after releasing the LockMgrLock,
+ * we can't just store LOCK and PGPROC pointers; we must extract out all the
+ * info we want to be able to print.
+ */
+ typedef struct
+ {
+ LOCKTAG locktag; /* ID of awaited lock object */
+ LOCKMODE lockmode; /* type of lock we're waiting for */
+ int pid; /* PID of blocked backend */
+ } DEADLOCK_INFO;
+
static bool DeadLockCheckRecurse(PGPROC *proc);
static bool TestConfiguration(PGPROC *startProc);
static bool FindLockCycle(PGPROC *checkProc,
EDGE *softEdges, int *nSoftEdges);
! static bool FindLockCycleRecurse(PGPROC *checkProc, int depth,
EDGE *softEdges, int *nSoftEdges);
static bool ExpandConstraints(EDGE *constraints, int nConstraints);
static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints,
***************
*** 88,93 ****
--- 105,112 ----
static EDGE *possibleConstraints;
static int nPossibleConstraints;
static int maxPossibleConstraints;
+ static DEADLOCK_INFO *deadlockDetails;
+ static int nDeadlockDetails;
/*
***************
*** 110,117 ****
--- 129,138 ----
/*
* FindLockCycle needs at most MaxBackends entries in visitedProcs[]
+ * and deadlockDetails[].
*/
visitedProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *));
+ deadlockDetails = (DEADLOCK_INFO *) palloc(MaxBackends * sizeof(DEADLOCK_INFO));
/*
* TopoSort needs to consider at most MaxBackends wait-queue entries,
***************
*** 176,181 ****
--- 197,206 ----
* table to have a different masterLock, all locks that can block had
* better use the same LWLock, else this code will not be adequately
* interlocked!
+ *
+ * On failure, deadlock details are recorded in deadlockDetails[] for
+ * subsequent printing by DeadLockReport(). That activity is separate
+ * because we don't want to do it while holding the master lock.
*/
bool
DeadLockCheck(PGPROC *proc)
***************
*** 190,196 ****
--- 215,233 ----
/* Search for deadlocks and possible fixes */
if (DeadLockCheckRecurse(proc))
+ {
+ /*
+ * Call FindLockCycle one more time, to record the correct
+ * deadlockDetails[] for the basic state with no rearrangements.
+ */
+ int nSoftEdges;
+
+ nWaitOrders = 0;
+ if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))
+ elog(FATAL, "DeadLockCheck: deadlock seems to have disappeared");
+
return true; /* cannot find a non-deadlocked state */
+ }
/* Apply any needed rearrangements of wait queues */
for (i = 0; i < nWaitOrders; i++)
***************
*** 357,365 ****
*
* Scan outward from the given proc to see if there is a cycle in the
* waits-for graph that includes this proc. Return TRUE if a cycle
! * is found, else FALSE. If a cycle is found, we also return a list of
* the "soft edges", if any, included in the cycle. These edges could
! * potentially be eliminated by rearranging wait queues.
*
* Since we need to be able to check hypothetical configurations that would
* exist after wait queue rearrangement, the routine pays attention to the
--- 394,405 ----
*
* Scan outward from the given proc to see if there is a cycle in the
* waits-for graph that includes this proc. Return TRUE if a cycle
! * is found, else FALSE. If a cycle is found, we return a list of
* the "soft edges", if any, included in the cycle. These edges could
! * potentially be eliminated by rearranging wait queues. We also fill
! * deadlockDetails[] with information about the detected cycle; this info
! * is not used by the deadlock algorithm itself, only to print a useful
! * message after failing.
*
* Since we need to be able to check hypothetical configurations that would
* exist after wait queue rearrangement, the routine pays attention to the
***************
*** 372,383 ****
int *nSoftEdges) /* output argument */
{
nVisitedProcs = 0;
*nSoftEdges = 0;
! return FindLockCycleRecurse(checkProc, softEdges, nSoftEdges);
}
static bool
FindLockCycleRecurse(PGPROC *checkProc,
EDGE *softEdges, /* output argument */
int *nSoftEdges) /* output argument */
{
--- 412,425 ----
int *nSoftEdges) /* output argument */
{
nVisitedProcs = 0;
+ nDeadlockDetails = 0;
*nSoftEdges = 0;
! return FindLockCycleRecurse(checkProc, 0, softEdges, nSoftEdges);
}
static bool
FindLockCycleRecurse(PGPROC *checkProc,
+ int depth,
EDGE *softEdges, /* output argument */
int *nSoftEdges) /* output argument */
{
***************
*** 402,408 ****
--- 444,459 ----
{
/* If we return to starting point, we have a deadlock cycle */
if (i == 0)
+ {
+ /*
+ * record total length of cycle --- outer levels will now
+ * fill deadlockDetails[]
+ */
+ Assert(depth <= MaxBackends);
+ nDeadlockDetails = depth;
+
return true;
+ }
/*
* Otherwise, we have a cycle but it does not include the
***************
*** 449,456 ****
((1 << lm) & conflictMask) != 0)
{
/* This proc hard-blocks checkProc */
! if (FindLockCycleRecurse(proc, softEdges, nSoftEdges))
return true;
/* If no deadlock, we're done looking at this holder */
break;
}
--- 500,517 ----
((1 << lm) & conflictMask) != 0)
{
/* This proc hard-blocks checkProc */
! if (FindLockCycleRecurse(proc, depth+1,
! softEdges, nSoftEdges))
! {
! /* fill deadlockDetails[] */
! DEADLOCK_INFO *info = &deadlockDetails[depth];
!
! info->locktag = lock->tag;
! info->lockmode = checkProc->waitLockMode;
! info->pid = checkProc->pid;
!
return true;
+ }
/* If no deadlock, we're done looking at this holder */
break;
}
***************
*** 496,503 ****
if (((1 << proc->waitLockMode) & conflictMask) != 0)
{
/* This proc soft-blocks checkProc */
! if (FindLockCycleRecurse(proc, softEdges, nSoftEdges))
{
/*
* Add this edge to the list of soft edges in the
* cycle
--- 557,572 ----
if (((1 << proc->waitLockMode) & conflictMask) != 0)
{
/* This proc soft-blocks checkProc */
! if (FindLockCycleRecurse(proc, depth+1,
! softEdges, nSoftEdges))
{
+ /* fill deadlockDetails[] */
+ DEADLOCK_INFO *info = &deadlockDetails[depth];
+
+ info->locktag = lock->tag;
+ info->lockmode = checkProc->waitLockMode;
+ info->pid = checkProc->pid;
+
/*
* Add this edge to the list of soft edges in the
* cycle
***************
*** 529,536 ****
if (((1 << proc->waitLockMode) & conflictMask) != 0)
{
/* This proc soft-blocks checkProc */
! if (FindLockCycleRecurse(proc, softEdges, nSoftEdges))
{
/*
* Add this edge to the list of soft edges in the
* cycle
--- 598,613 ----
if (((1 << proc->waitLockMode) & conflictMask) != 0)
{
/* This proc soft-blocks checkProc */
! if (FindLockCycleRecurse(proc, depth+1,
! softEdges, nSoftEdges))
{
+ /* fill deadlockDetails[] */
+ DEADLOCK_INFO *info = &deadlockDetails[depth];
+
+ info->locktag = lock->tag;
+ info->lockmode = checkProc->waitLockMode;
+ info->pid = checkProc->pid;
+
/*
* Add this edge to the list of soft edges in the
* cycle
***************
*** 758,760 ****
--- 835,901 ----
}
#endif
+
+ /*
+ * Report details about a detected deadlock.
+ */
+ void
+ DeadLockReport(void)
+ {
+ int i;
+
+ for (i = 0; i < nDeadlockDetails; i++)
+ {
+ DEADLOCK_INFO *info = &deadlockDetails[i];
+ int nextpid;
+
+ /* The last proc waits for the first one... */
+ if (i < nDeadlockDetails-1)
+ nextpid = info[1].pid;
+ else
+ nextpid = deadlockDetails[0].pid;
+
+ if (info->locktag.relId == XactLockTableId && info->locktag.dbId == 0)
+ {
+ /* Lock is for transaction ID */
+ elog(NOTICE, "Proc %d waits for %s on transaction %u; blocked by %d",
+ info->pid,
+ GetLockmodeName(info->lockmode),
+ info->locktag.objId.xid,
+ nextpid);
+ }
+ else
+ {
+ /* Lock is for a relation */
+ elog(NOTICE, "Proc %d waits for %s on relation %u database %u; blocked by %d",
+ info->pid,
+ GetLockmodeName(info->lockmode),
+ info->locktag.relId,
+ info->locktag.dbId,
+ nextpid);
+ }
+ }
+ }
+
+ /*
+ * RememberSimpleDeadLock: set up info for DeadLockReport when ProcSleep
+ * detects a trivial (two-way) deadlock. proc1 wants to block for lockmode
+ * on lock, but proc2 is already waiting and would be blocked by proc1.
+ */
+ void
+ RememberSimpleDeadLock(PGPROC *proc1,
+ LOCKMODE lockmode,
+ LOCK *lock,
+ PGPROC *proc2)
+ {
+ DEADLOCK_INFO *info = &deadlockDetails[0];
+
+ info->locktag = lock->tag;
+ info->lockmode = lockmode;
+ info->pid = proc1->pid;
+ info++;
+ info->locktag = proc2->waitLock->tag;
+ info->lockmode = proc2->waitLockMode;
+ info->pid = proc2->pid;
+ nDeadlockDetails = 2;
+ }
*** src/backend/storage/lmgr/lock.c.orig Thu Oct 31 19:40:23 2002
--- src/backend/storage/lmgr/lock.c Thu Jan 16 15:33:29 2003
***************
*** 905,910 ****
--- 905,917 ----
*/
LOCK_PRINT("WaitOnLock: aborting on lock", lock, lockmode);
LWLockRelease(lockMethodTable->masterLock);
+ /*
+ * Now that we aren't holding the LockMgrLock, print details about
+ * the detected deadlock. We didn't want to do this before because
+ * sending elog messages to the client while holding the shared lock
+ * is bad for concurrency.
+ */
+ DeadLockReport();
elog(ERROR, "deadlock detected");
/* not reached */
}
*** src/backend/storage/lmgr/proc.c.orig Thu Oct 31 16:34:16 2002
--- src/backend/storage/lmgr/proc.c Thu Jan 16 15:34:28 2003
***************
*** 566,573 ****
* up correctly is to call RemoveFromWaitQueue(), but
* we can't do that until we are *on* the wait queue.
* So, set a flag to check below, and break out of
! * loop.
*/
early_deadlock = true;
break;
}
--- 566,574 ----
* up correctly is to call RemoveFromWaitQueue(), but
* we can't do that until we are *on* the wait queue.
* So, set a flag to check below, and break out of
! * loop. Also, record deadlock info for later message.
*/
+ RememberSimpleDeadLock(MyProc, lockmode, lock, proc);
early_deadlock = true;
break;
}
*** src/include/storage/lock.h.orig Wed Sep 4 17:31:14 2002
--- src/include/storage/lock.h Thu Jan 16 15:34:23 2003
***************
*** 243,248 ****
--- 243,253 ----
extern void RemoveFromWaitQueue(PGPROC *proc);
extern int LockShmemSize(int maxBackends);
extern bool DeadLockCheck(PGPROC *proc);
+ extern void DeadLockReport(void);
+ extern void RememberSimpleDeadLock(PGPROC *proc1,
+ LOCKMODE lockmode,
+ LOCK *lock,
+ PGPROC *proc2);
extern void InitDeadLockChecking(void);
extern LockData *GetLockStatusData(void);
extern const char *GetLockmodeName(LOCKMODE mode);
pgsql-general by date: