Re: futex results with dbt-3 - Mailing list pgsql-performance
From | Manfred Spraul |
---|---|
Subject | Re: futex results with dbt-3 |
Date | |
Msg-id | 417221B5.1050704@colorfullife.com Whole thread Raw |
In response to | futex results with dbt-3 (Mark Wong <markw@osdl.org>) |
Responses |
Re: futex results with dbt-3
Re: futex results with dbt-3 |
List | pgsql-performance |
Neil wrote: >. In any case, the "futex patch" >uses the Linux 2.6 futex API to implement PostgreSQL spinlocks. > Has anyone tried to replace the whole lwlock implementation with pthread_rwlock? At least for Linux with recent glibcs, pthread_rwlock is implemented with futexes, i.e. we would get a fast lock handling without os specific hacks. Perhaps other os contain user space pthread locks, too. Attached is an old patch. I tested it on an uniprocessor system a year ago and it didn't provide much difference, but perhaps the scalability is better. You'll have to add -lpthread to the library list for linking. Regarding Neil's patch: >! /* >! * XXX: is there a more efficient way to write this? Perhaps using >! * decl...? >! */ >! static __inline__ slock_t >! atomic_dec(volatile slock_t *ptr) >! { >! slock_t prev = -1; >! >! __asm__ __volatile__( >! " lock \n" >! " xadd %0,%1 \n" >! :"=q"(prev) >! :"m"(*ptr), "0"(prev) >! :"memory", "cc"); >! >! return prev; >! } > xadd is not supported by original 80386 cpus, it was added for 80486 cpus. There is no instruction in the 80386 cpu that allows to atomically decrement and retrieve the old value of an integer. The only option are atomic_dec_test_zero or atomic_dec_test_negative - that can be implemented by looking at the sign/zero flag. Depending on what you want this may be enough. Or make the futex code conditional for > 80386 cpus. -- Manfred --- p7.3.3.orig/src/backend/storage/lmgr/lwlock.c 2002-09-25 22:31:40.000000000 +0200 +++ postgresql-7.3.3/src/backend/storage/lmgr/lwlock.c 2003-09-06 14:15:01.000000000 +0200 @@ -26,6 +26,28 @@ #include "storage/proc.h" #include "storage/spin.h" +#define USE_PTHREAD_LOCKS + +#ifdef USE_PTHREAD_LOCKS + +#include <pthread.h> +#include <errno.h> +typedef pthread_rwlock_t LWLock; + +inline static void +InitLWLock(LWLock *p) +{ + pthread_rwlockattr_t rwattr; + int i; + + pthread_rwlockattr_init(&rwattr); + pthread_rwlockattr_setpshared(&rwattr, PTHREAD_PROCESS_SHARED); + i=pthread_rwlock_init(p, &rwattr); + pthread_rwlockattr_destroy(&rwattr); + if (i) + elog(FATAL, "pthread_rwlock_init failed"); +} +#else typedef struct LWLock { @@ -38,6 +60,17 @@ /* tail is undefined when head is NULL */ } LWLock; +inline static void +InitLWLock(LWLock *lock) +{ + SpinLockInit(&lock->mutex); + lock->releaseOK = true; + lock->exclusive = 0; + lock->shared = 0; + lock->head = NULL; + lock->tail = NULL; +} +#endif /* * This points to the array of LWLocks in shared memory. Backends inherit * the pointer by fork from the postmaster. LWLockIds are indexes into @@ -61,7 +94,7 @@ static LWLockId held_lwlocks[MAX_SIMUL_LWLOCKS]; -#ifdef LOCK_DEBUG +#if defined(LOCK_DEBUG) && !defined(USE_PTHREAD_LOCKS) bool Trace_lwlocks = false; inline static void @@ -153,12 +186,7 @@ */ for (id = 0, lock = LWLockArray; id < numLocks; id++, lock++) { - SpinLockInit(&lock->mutex); - lock->releaseOK = true; - lock->exclusive = 0; - lock->shared = 0; - lock->head = NULL; - lock->tail = NULL; + InitLWLock(lock); } /* @@ -185,7 +213,116 @@ return (LWLockId) (LWLockCounter[0]++); } +#ifdef USE_PTHREAD_LOCKS +/* + * LWLockAcquire - acquire a lightweight lock in the specified mode + * + * If the lock is not available, sleep until it is. + * + * Side effect: cancel/die interrupts are held off until lock release. + */ +void +LWLockAcquire(LWLockId lockid, LWLockMode mode) +{ + int i; + PRINT_LWDEBUG("LWLockAcquire", lockid, &LWLockArray[lockid]); + + /* + * We can't wait if we haven't got a PGPROC. This should only occur + * during bootstrap or shared memory initialization. Put an Assert + * here to catch unsafe coding practices. + */ + Assert(!(proc == NULL && IsUnderPostmaster)); + + /* + * Lock out cancel/die interrupts until we exit the code section + * protected by the LWLock. This ensures that interrupts will not + * interfere with manipulations of data structures in shared memory. + */ + HOLD_INTERRUPTS(); + + if (mode == LW_EXCLUSIVE) { + i = pthread_rwlock_wrlock(&LWLockArray[lockid]); + } else { + i = pthread_rwlock_rdlock(&LWLockArray[lockid]); + } + if (i) + elog(FATAL, "Unexpected error from pthread_rwlock."); + + /* Add lock to list of locks held by this backend */ + Assert(num_held_lwlocks < MAX_SIMUL_LWLOCKS); + held_lwlocks[num_held_lwlocks++] = lockid; +} + +/* + * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode + * + * If the lock is not available, return FALSE with no side-effects. + * + * If successful, cancel/die interrupts are held off until lock release. + */ +bool +LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode) +{ + int i; + PRINT_LWDEBUG("LWLockConditionalAcquire", lockid, &LWLockArray[lockid]); + + HOLD_INTERRUPTS(); + + if (mode == LW_EXCLUSIVE) { + i = pthread_rwlock_trywrlock(&LWLockArray[lockid]); + } else { + i = pthread_rwlock_tryrdlock(&LWLockArray[lockid]); + } + switch(i) { + case 0: + /* Add lock to list of locks held by this backend */ + Assert(num_held_lwlocks < MAX_SIMUL_LWLOCKS); + held_lwlocks[num_held_lwlocks++] = lockid; + return true; + case EBUSY: + RESUME_INTERRUPTS(); + return false; + default: + elog(FATAL, "Unexpected error from pthread_rwlock_try."); + return false; + } +} + +/* + * LWLockRelease - release a previously acquired lock + */ +void +LWLockRelease(LWLockId lockid) +{ + int i; + + /* + * Remove lock from list of locks held. Usually, but not always, it + * will be the latest-acquired lock; so search array backwards. + */ + for (i = num_held_lwlocks; --i >= 0;) + { + if (lockid == held_lwlocks[i]) + break; + } + if (i < 0) + elog(ERROR, "LWLockRelease: lock %d is not held", (int) lockid); + num_held_lwlocks--; + for (; i < num_held_lwlocks; i++) + held_lwlocks[i] = held_lwlocks[i + 1]; + + i = pthread_rwlock_unlock(&LWLockArray[lockid]); + if (i) + elog(FATAL, "Unexpected error from pthread_rwlock_unlock."); + + /* + * Now okay to allow cancel/die interrupts. + */ + RESUME_INTERRUPTS(); +} +#else /* * LWLockAcquire - acquire a lightweight lock in the specified mode * @@ -499,6 +636,7 @@ RESUME_INTERRUPTS(); } +#endif /* * LWLockReleaseAll - release all currently-held locks
pgsql-performance by date: