Re: update i386 spinlock for hyperthreading - Mailing list pgsql-patches

From Manfred Spraul
Subject Re: update i386 spinlock for hyperthreading
Date
Msg-id 3FF0AA78.9050803@colorfullife.com
Whole thread Raw
In response to Re: update i386 spinlock for hyperthreading  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: update i386 spinlock for hyperthreading
List pgsql-patches
Tom Lane wrote:

>>Is there an easy way find out which LWLock is contended?
>>
>>
>
>Not from oprofile output, as far as I can think.  I've suspected for
>some time that the BufMgrLock is a major bottleneck, but have no proof.
>
>
Mark ran a DBT-2 testrun with the attached statistics patch applied: It
collects stats about all lightweight locks and dumps them during
shutdown. The hottest locks are

Lock                         Acquire   %contention   sleep calls
8(WALInsertLock)       8679205 0.030410        263934
1(LockMgrLock)       64089418        0.079783        5113215
5(SInvalLock)       68396470        0.001298        88812
0(BufMgrLock)       246307425       0.120293        29629089

The lock numbers are from 7.4, i.e. without the patch that removes
ShmemIndexLock. I've check that 8 is really WALInsertLock in the
assembly output.

The scary part from the system perspective are the 35 million context
switches that were generated by the BufMgrLock and the LockMgrLock. I
remember there were patches that tried other algorithms instead of the
simple LRU for the buffer manager. Has anyone tried to change the
locking of the buffer manager?

The effect of padding the lightweight locks to a full cacheline appears
to be negligable: With the padding, there were around 4 million
performance monitor hits on the 'lock xchg' instructions. Without it
(test run 300), there were 4.2 million hits.

The complete data is at

http://developer.osdl.org/markw/dbt2-pgsql/303/

The db log with the lock stats is at
http://developer.osdl.org/markw/dbt2-pgsql/303/db/log

(Warning: 6.9 MB)

--
    Manfred
Index: src/backend/storage/lmgr/lwlock.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/storage/lmgr/lwlock.c,v
retrieving revision 1.19
diff -u -r1.19 lwlock.c
--- src/backend/storage/lmgr/lwlock.c    20 Dec 2003 17:31:21 -0000    1.19
+++ src/backend/storage/lmgr/lwlock.c    27 Dec 2003 22:51:36 -0000
@@ -36,6 +36,11 @@
     PGPROC       *head;            /* head of list of waiting PGPROCs */
     PGPROC       *tail;            /* tail of list of waiting PGPROCs */
     /* tail is undefined when head is NULL */
+    unsigned long long stat_acquire_total;
+    unsigned long long stat_acquire_fail;
+    unsigned long long stat_release_total;
+    unsigned long long stat_release_wakeup;
+    int        fill[20];
 } LWLock;

 /*
@@ -159,6 +164,10 @@
         lock->shared = 0;
         lock->head = NULL;
         lock->tail = NULL;
+        lock->stat_acquire_total = 0;
+        lock->stat_acquire_fail = 0;
+        lock->stat_release_total = 0;
+        lock->stat_release_wakeup = 0;
     }

     /*
@@ -245,6 +254,10 @@
         if (retry)
             lock->releaseOK = true;

+        lock->stat_acquire_total++;
+        if (retry)
+            lock->stat_acquire_fail++;
+
         /* If I can get the lock, do so quickly. */
         if (mode == LW_EXCLUSIVE)
         {
@@ -440,6 +453,7 @@
         Assert(lock->shared > 0);
         lock->shared--;
     }
+    lock->stat_release_total++;

     /*
      * See if I need to awaken any waiters.  If I released a non-last
@@ -477,6 +491,8 @@
         }
     }

+    if (head)
+        lock->stat_release_wakeup++;
     /* We are done updating shared state of the lock itself. */
     SpinLockRelease_NoHoldoff(&lock->mutex);

@@ -517,5 +533,19 @@
         HOLD_INTERRUPTS();        /* match the upcoming RESUME_INTERRUPTS */

         LWLockRelease(held_lwlocks[num_held_lwlocks - 1]);
+    }
+}
+
+void LWLockPrintStats(void);
+void
+LWLockPrintStats(void)
+{
+    int i;
+    for (i=0;i<LWLockCounter[0];i++) {
+        volatile LWLock *lock = LWLockArray + i;
+        elog(LOG, "Lock %d): acquire_total %Ld acquire_fail %Ld release_total %Ld release_wakeup %Ld\n",
+             i,
+             lock->stat_acquire_total, lock->stat_acquire_fail,
+             lock->stat_release_total, lock->stat_release_wakeup);
     }
 }
Index: src/backend/postmaster/postmaster.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/postmaster/postmaster.c,v
retrieving revision 1.353
diff -u -r1.353 postmaster.c
--- src/backend/postmaster/postmaster.c    25 Dec 2003 03:52:51 -0000    1.353
+++ src/backend/postmaster/postmaster.c    27 Dec 2003 22:51:38 -0000
@@ -1701,7 +1701,7 @@
     errno = save_errno;
 }

-
+void LWLockPrintStats(void);

 /*
  * pmdie -- signal handler for processing various postmaster signals.
@@ -1733,6 +1733,7 @@
             Shutdown = SmartShutdown;
             ereport(LOG,
                     (errmsg("received smart shutdown request")));
+            LWLockPrintStats();
             if (DLGetHead(BackendList)) /* let reaper() handle this */
                 break;

@@ -1766,6 +1767,7 @@
                 break;
             ereport(LOG,
                     (errmsg("received fast shutdown request")));
+            LWLockPrintStats();
             if (DLGetHead(BackendList)) /* let reaper() handle this */
             {
                 Shutdown = FastShutdown;
@@ -1812,6 +1814,7 @@
                 kill(BgWriterPID, SIGQUIT);
             ereport(LOG,
                     (errmsg("received immediate shutdown request")));
+            LWLockPrintStats();
             if (ShutdownPID > 0)
                 kill(ShutdownPID, SIGQUIT);
             if (StartupPID > 0)

pgsql-patches by date:

Previous
From: Tom Lane
Date:
Subject: Re: libpq endless loop if client_min_messages=debug1
Next
From: Bruce Momjian
Date:
Subject: Re: update i386 spinlock for hyperthreading