Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 2 Nov 2010 06:47:18 +0000 (UTC)
From:      David Xu <davidxu@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r214658 - user/davidxu/libthr/lib/libthr/thread
Message-ID:  <201011020647.oA26lIJA024650@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: davidxu
Date: Tue Nov  2 06:47:18 2010
New Revision: 214658
URL: http://svn.freebsd.org/changeset/base/214658

Log:
  Implement userland sleep queue. The queue is used to eliminate extra
  context switches and system calls in condition variable use. The
  problem is pthread_cond_broadcast can cause thread scheduler's thundering
  hurd, and because a thread calls pthread_cond_signal or pthread_cond_broadcast
  with mutex locked, resumed thread tries to lock the mutex and is block
  again, this is extra context switch. The change tries to move sleeping
  threads from condition variable's wait queue to mutex wait queue, only
  when the mutex is unlocked, a sleeping thread is woken up, in ideal case,
  only one system call is needed.

Modified:
  user/davidxu/libthr/lib/libthr/thread/Makefile.inc
  user/davidxu/libthr/lib/libthr/thread/thr_cond.c
  user/davidxu/libthr/lib/libthr/thread/thr_init.c
  user/davidxu/libthr/lib/libthr/thread/thr_kern.c
  user/davidxu/libthr/lib/libthr/thread/thr_list.c
  user/davidxu/libthr/lib/libthr/thread/thr_mutex.c
  user/davidxu/libthr/lib/libthr/thread/thr_private.h
  user/davidxu/libthr/lib/libthr/thread/thr_umtx.c
  user/davidxu/libthr/lib/libthr/thread/thr_umtx.h

Modified: user/davidxu/libthr/lib/libthr/thread/Makefile.inc
==============================================================================
--- user/davidxu/libthr/lib/libthr/thread/Makefile.inc	Tue Nov  2 06:13:21 2010	(r214657)
+++ user/davidxu/libthr/lib/libthr/thread/Makefile.inc	Tue Nov  2 06:47:18 2010	(r214658)
@@ -45,6 +45,7 @@ SRCS+= \
 	thr_setschedparam.c \
 	thr_sig.c \
 	thr_single_np.c \
+	thr_sleepq.c \
 	thr_spec.c \
 	thr_spinlock.c \
 	thr_stack.c \

Modified: user/davidxu/libthr/lib/libthr/thread/thr_cond.c
==============================================================================
--- user/davidxu/libthr/lib/libthr/thread/thr_cond.c	Tue Nov  2 06:13:21 2010	(r214657)
+++ user/davidxu/libthr/lib/libthr/thread/thr_cond.c	Tue Nov  2 06:47:18 2010	(r214658)
@@ -45,7 +45,10 @@ int	__pthread_cond_timedwait(pthread_con
 static int cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr);
 static int cond_wait_common(pthread_cond_t *cond, pthread_mutex_t *mutex,
 		    const struct timespec *abstime, int cancel);
-static int cond_signal_common(pthread_cond_t *cond, int broadcast);
+static int cond_signal_common(pthread_cond_t *cond);
+static int cond_broadcast_common(pthread_cond_t *cond);
+
+#define CV_PSHARED(cv)	(((cv)->c_kerncv.c_flags & USYNC_PROCESS_SHARED) != 0)
 
 /*
  * Double underscore versions are cancellation points.  Single underscore
@@ -74,13 +77,12 @@ cond_init(pthread_cond_t *cond, const pt
 		 * Initialise the condition variable structure:
 		 */
 		if (cond_attr == NULL || *cond_attr == NULL) {
-			pcond->c_pshared = 0;
 			pcond->c_clockid = CLOCK_REALTIME;
 		} else {
-			pcond->c_pshared = (*cond_attr)->c_pshared;
+			if ((*cond_attr)->c_pshared)
+				pcond->c_kerncv.c_flags |= USYNC_PROCESS_SHARED;
 			pcond->c_clockid = (*cond_attr)->c_clockid;
 		}
-		_thr_umutex_init(&pcond->c_lock);
 		*cond = pcond;
 	}
 	/* Return the completion status: */
@@ -128,7 +130,6 @@ _pthread_cond_init(pthread_cond_t *cond,
 int
 _pthread_cond_destroy(pthread_cond_t *cond)
 {
-	struct pthread		*curthread = _get_curthread();
 	struct pthread_cond	*cv;
 	int			rval = 0;
 
@@ -138,10 +139,10 @@ _pthread_cond_destroy(pthread_cond_t *co
 		rval = EINVAL;
 	else {
 		cv = *cond;
-		THR_UMUTEX_LOCK(curthread, &cv->c_lock);
+		if (cv->c_mutex != NULL)
+			return (EBUSY);
+		_thr_ucond_broadcast(&cv->c_kerncv);
 		*cond = THR_COND_DESTROYED;
-		THR_UMUTEX_UNLOCK(curthread, &cv->c_lock);
-
 		/*
 		 * Free the memory allocated for the condition
 		 * variable structure:
@@ -155,54 +156,36 @@ struct cond_cancel_info
 {
 	pthread_mutex_t	*mutex;
 	pthread_cond_t	*cond;
-	int		count;
+	int		recurse;
 };
 
 static void
 cond_cancel_handler(void *arg)
 {
-	struct pthread *curthread = _get_curthread();
 	struct cond_cancel_info *info = (struct cond_cancel_info *)arg;
-	pthread_cond_t  cv;
-
-	if (info->cond != NULL) {
-		cv = *(info->cond);
-		THR_UMUTEX_UNLOCK(curthread, &cv->c_lock);
-	}
-	_mutex_cv_lock(info->mutex, info->count);
+  
+	_mutex_cv_lock(info->mutex, info->recurse, 1);
 }
 
 /*
- * Cancellation behaivor:
- *   Thread may be canceled at start, if thread is canceled, it means it
- *   did not get a wakeup from pthread_cond_signal(), otherwise, it is
- *   not canceled.
- *   Thread cancellation never cause wakeup from pthread_cond_signal()
- *   to be lost.
+ * Wait on kernel based condition variable.
  */
 static int
-cond_wait_common(pthread_cond_t *cond, pthread_mutex_t *mutex,
+cond_wait_kernel(pthread_cond_t *cond, pthread_mutex_t *mutex,
 	const struct timespec *abstime, int cancel)
 {
 	struct pthread	*curthread = _get_curthread();
 	struct timespec ts, ts2, *tsp;
+	struct pthread_mutex *m;
 	struct cond_cancel_info info;
 	pthread_cond_t  cv;
-	int		ret;
-
-	/*
-	 * If the condition variable is statically initialized,
-	 * perform the dynamic initialization:
-	 */
-	CHECK_AND_INIT_COND
+	int		error, error2;
 
 	cv = *cond;
-	THR_UMUTEX_LOCK(curthread, &cv->c_lock);
-	ret = _mutex_cv_unlock(mutex, &info.count);
-	if (__predict_false(ret != 0)) {
-		THR_UMUTEX_UNLOCK(curthread, &cv->c_lock);
-		return (ret);
-	}
+	m = *mutex;
+	error = _mutex_cv_detach(mutex, &info.recurse);
+	if (__predict_false(error != 0))
+		return (error);
 
 	info.mutex = mutex;
 	info.cond  = cond;
@@ -217,17 +200,129 @@ cond_wait_common(pthread_cond_t *cond, p
 	if (cancel) {
 		THR_CLEANUP_PUSH(curthread, cond_cancel_handler, &info);
 		_thr_cancel_enter2(curthread, 0);
-		ret = _thr_ucond_wait(&cv->c_kerncv, &cv->c_lock, tsp, 1);
+		error = _thr_ucond_wait(&cv->c_kerncv, &m->m_lock, tsp, 1);
 		info.cond = NULL;
-		_thr_cancel_leave(curthread, (ret != 0));
+		_thr_cancel_leave(curthread, (error != 0));
 		THR_CLEANUP_POP(curthread, 0);
 	} else {
-		ret = _thr_ucond_wait(&cv->c_kerncv, &cv->c_lock, tsp, 0);
+		error = _thr_ucond_wait(&cv->c_kerncv, &m->m_lock, tsp, 0);
+	}
+	if (error == EINTR)
+		error = 0;
+	error2 = _mutex_cv_lock(mutex, info.recurse, 1);
+	return (error || error2);
+}
+
+/*
+ * Cancellation behaivor:
+ *   Thread may be canceled at start, if thread is canceled, it means it
+ *   did not get a wakeup from pthread_cond_signal(), otherwise, it is
+ *   not canceled.
+ *   Thread cancellation never cause wakeup from pthread_cond_signal()
+ *   to be lost.
+ */
+static int
+cond_wait_queue(pthread_cond_t *cond, pthread_mutex_t *mutex,
+	const struct timespec *abstime, int cancel)
+{
+	struct pthread	*curthread = _get_curthread();
+	struct pthread_mutex *m;
+	struct sleepqueue *sq;
+	pthread_cond_t	cv;
+	int		recurse;
+	int		error;
+
+	cv = *cond;
+	/*
+	 * Enqueue thread before unlocking mutex, so we can avoid
+	 * sleep lock in pthread_cond_signal whenever possible.
+	 */
+	if ((error = _mutex_owned(curthread, mutex)) != 0)
+		return (error);
+	sq = _sleepq_lock(cv, CV);
+	if (cv->c_mutex != NULL && cv->c_mutex != mutex) {
+		_sleepq_unlock(sq);
+		return (EINVAL);
+	}
+	cv->c_mutex = mutex;
+	_sleepq_add(sq, curthread);
+	_thr_clear_wake(curthread);
+	_sleepq_unlock(sq);
+	(void)_mutex_cv_unlock(mutex, &recurse);
+	m = *mutex;
+	for (;;) {
+		if (cancel) {
+			_thr_cancel_enter2(curthread, 0);
+			error = _thr_sleep(curthread, abstime, cv->c_clockid);
+			_thr_cancel_leave(curthread, 0);
+		} else {
+			error = _thr_sleep(curthread, abstime, cv->c_clockid);
+		}
+		_thr_clear_wake(curthread);
+
+		sq = _sleepq_lock(cv, CV);
+		if (curthread->wchan == NULL) {
+			/*
+			 * This must be signaled by mutex unlocking,
+			 * they remove us from mutex queue.
+			 */
+			_sleepq_unlock(sq);
+			error = 0;
+			break;
+		} if (curthread->wchan == m) {
+			_sleepq_unlock(sq);
+			/*
+			 * This must be signaled by cond_signal and there
+			 * is no owner for the mutex.
+			 */
+			sq = _sleepq_lock(m, MX);
+			if (curthread->wchan == m)
+				_sleepq_remove(sq, curthread);
+			_sleepq_unlock(sq);
+			error = 0;
+			break;
+		} if (abstime != NULL && error == ETIMEDOUT) {
+			_sleepq_remove(sq, curthread);
+			if (_sleepq_empty(sq))
+				cv->c_mutex = NULL;
+			_sleepq_unlock(sq);
+			break;
+		} else if (SHOULD_CANCEL(curthread)) {
+			_sleepq_remove(sq, curthread);
+			if (_sleepq_empty(sq))
+				cv->c_mutex = NULL;
+			_sleepq_unlock(sq);
+			(void)_mutex_cv_lock(mutex, recurse, 0);
+			_pthread_exit(PTHREAD_CANCELED);
+		}
+		_sleepq_unlock(sq);
+	}
+	_mutex_cv_lock(mutex, recurse, 0);
+	return (error);
+}
+
+static int
+cond_wait_common(pthread_cond_t *cond, pthread_mutex_t *mutex,
+	const struct timespec *abstime, int cancel)
+{
+	pthread_cond_t	cv;
+	struct pthread_mutex	*m;
+
+	/*
+	 * If the condition variable is statically initialized,
+	 * perform the dynamic initialization:
+	 */
+	CHECK_AND_INIT_COND
+	if ((m = *mutex) == NULL || m < THR_MUTEX_DESTROYED)
+		return (EINVAL);
+	if (IS_SIMPLE_MUTEX(m)) {
+		if (!CV_PSHARED(cv))
+			return cond_wait_queue(cond, mutex, abstime, cancel);
+		else
+			return (EINVAL);
+	} else {
+		return cond_wait_kernel(cond, mutex, abstime, cancel);
 	}
-	if (ret == EINTR)
-		ret = 0;
-	_mutex_cv_lock(mutex, info.count);
-	return (ret);
 }
 
 int
@@ -269,11 +364,14 @@ __pthread_cond_timedwait(pthread_cond_t 
 }
 
 static int
-cond_signal_common(pthread_cond_t *cond, int broadcast)
+cond_signal_common(pthread_cond_t *cond)
 {
-	struct pthread	*curthread = _get_curthread();
-	pthread_cond_t	cv;
-	int		ret = 0;
+	pthread_mutex_t *mutex;
+	struct pthread_mutex *m;
+	struct pthread  *td;
+	struct pthread_cond *cv;
+	struct sleepqueue *cv_sq, *mx_sq;
+	unsigned	*waddr = NULL;
 
 	/*
 	 * If the condition variable is statically initialized, perform dynamic
@@ -281,25 +379,118 @@ cond_signal_common(pthread_cond_t *cond,
 	 */
 	CHECK_AND_INIT_COND
 
-	THR_UMUTEX_LOCK(curthread, &cv->c_lock);
-	if (!broadcast)
-		ret = _thr_ucond_signal(&cv->c_kerncv);
-	else
-		ret = _thr_ucond_broadcast(&cv->c_kerncv);
-	THR_UMUTEX_UNLOCK(curthread, &cv->c_lock);
-	return (ret);
+	_thr_ucond_signal(&cv->c_kerncv);
+
+	if (CV_PSHARED(cv))
+		return (0);
+
+	/* There is no waiter. */
+	if (cv->c_mutex == NULL)
+		return (0);
+
+	cv_sq = _sleepq_lock(cv, CV);
+	if (_sleepq_empty(cv_sq)) {
+		_sleepq_unlock(cv_sq);
+		return (0);
+	} 
+	/*
+	 * Check if we owned the temporarily binding mutex,
+	 * if owned, we can migrate thread to mutex wait
+	 * queue without waking up thread.
+	 */
+	if ((mutex = cv->c_mutex) != NULL)
+		m = *mutex;
+	else {
+		_sleepq_unlock(cv_sq);
+		PANIC("mutex == NULL");
+	}
+
+	td = _sleepq_first(cv_sq);
+	if (m->m_owner == NULL)
+		waddr = WAKE_ADDR(td);
+	_sleepq_remove(cv_sq, td);
+	mx_sq = _sleepq_lock(m, MX);
+	_sleepq_add(mx_sq, td);
+	_mutex_set_contested(m);
+	_sleepq_unlock(mx_sq);
+	if (_sleepq_empty(cv_sq))
+		cv->c_mutex = NULL;
+	_sleepq_unlock(cv_sq);
+	if (waddr != NULL) {
+		_thr_set_wake(waddr);
+		_thr_umtx_wake(waddr, INT_MAX, 0);
+	}
+	return (0);
+}
+
+static int
+cond_broadcast_common(pthread_cond_t *cond)
+{
+	pthread_mutex_t *mutex;
+	struct pthread_mutex *m;
+	struct pthread  *td;
+	struct pthread_cond *cv;
+	struct sleepqueue *cv_sq, *mx_sq;
+	unsigned	*waddr = NULL;
+
+	/*
+	 * If the condition variable is statically initialized, perform dynamic
+	 * initialization.
+	 */
+	CHECK_AND_INIT_COND
+
+	_thr_ucond_broadcast(&cv->c_kerncv);
+
+	if (CV_PSHARED(cv))
+		return (0);
+
+	/* There is no waiter. */
+	if (cv->c_mutex == NULL)
+		return (0);
+
+	cv_sq = _sleepq_lock(cv, CV);
+	if (_sleepq_empty(cv_sq)) {
+		_sleepq_unlock(cv_sq);
+		return (0);
+	} 
+	/*
+	 * Check if we owned the temporarily binding mutex,
+	 * if owned, we can migrate thread to mutex wait
+	 * queue without waking up thread.
+	 */
+	if ((mutex = cv->c_mutex) != NULL)
+		m = *mutex;
+	else {
+		_sleepq_unlock(cv_sq);
+		PANIC("mutex == NULL");
+	}
+
+	td = _sleepq_first(cv_sq);
+	if (m->m_owner == NULL)
+		waddr = WAKE_ADDR(td);
+	mx_sq = _sleepq_lock(m, MX);
+	_sleepq_concat(mx_sq, cv_sq);
+	_mutex_set_contested(m);
+	_sleepq_unlock(mx_sq);
+	cv->c_mutex = NULL;
+	_sleepq_unlock(cv_sq);
+	if (waddr != NULL) {
+		_thr_set_wake(waddr);
+		_thr_umtx_wake(waddr, INT_MAX, 0);
+	}
+	return (0);
 }
 
 int
 _pthread_cond_signal(pthread_cond_t * cond)
 {
 
-	return (cond_signal_common(cond, 0));
+	return (cond_signal_common(cond));
 }
 
 int
 _pthread_cond_broadcast(pthread_cond_t * cond)
 {
 
-	return (cond_signal_common(cond, 1));
+	return (cond_broadcast_common(cond));
 }

Modified: user/davidxu/libthr/lib/libthr/thread/thr_init.c
==============================================================================
--- user/davidxu/libthr/lib/libthr/thread/thr_init.c	Tue Nov  2 06:13:21 2010	(r214657)
+++ user/davidxu/libthr/lib/libthr/thread/thr_init.c	Tue Nov  2 06:47:18 2010	(r214658)
@@ -444,6 +444,7 @@ init_private(void)
 	_thr_once_init();
 	_thr_spinlock_init();
 	_thr_list_init();
+	_sleepq_init();
 
 	/*
 	 * Avoid reinitializing some things if they don't need to be,

Modified: user/davidxu/libthr/lib/libthr/thread/thr_kern.c
==============================================================================
--- user/davidxu/libthr/lib/libthr/thread/thr_kern.c	Tue Nov  2 06:13:21 2010	(r214657)
+++ user/davidxu/libthr/lib/libthr/thread/thr_kern.c	Tue Nov  2 06:47:18 2010	(r214658)
@@ -31,6 +31,7 @@
 #include <sys/signalvar.h>
 #include <sys/rtprio.h>
 #include <pthread.h>
+#include <sys/mman.h>
 
 #include "thr_private.h"
 
@@ -41,6 +42,53 @@
 #define DBG_MSG(x...)
 #endif
 
+static struct umutex addr_lock;
+static struct wake_addr *wake_addr_head;
+static struct wake_addr default_wake_addr;
+
+struct wake_addr *
+_thr_alloc_wake_addr(void)
+{
+	struct pthread *curthread;
+	struct wake_addr *p;
+
+	if (_thr_initial == NULL) {
+		return &default_wake_addr;
+	}
+
+	curthread = _get_curthread();
+
+	THR_UMUTEX_LOCK(curthread, &addr_lock);
+	if (wake_addr_head == NULL) {
+		unsigned i;
+		unsigned pagesize = getpagesize();
+		struct wake_addr *pp = (struct wake_addr *)mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE, MAP_ANON, -1, 0);
+		for (i = 1; i < pagesize/sizeof(struct wake_addr); ++i)
+			pp[i].link = &pp[i+1];
+		pp[i-1].link = NULL;	
+		wake_addr_head = &pp[1];
+		p = &pp[0];
+	} else {
+		p = wake_addr_head;
+		wake_addr_head = p->link;
+	}
+	THR_UMUTEX_UNLOCK(curthread, &addr_lock);
+	return (p);
+}
+
+void
+_thr_release_wake_addr(struct wake_addr *wa)
+{
+	struct pthread *curthread = _get_curthread();
+
+	if (wa == &default_wake_addr)
+		return;
+	THR_UMUTEX_LOCK(curthread, &addr_lock);
+	wa->link = wake_addr_head;
+	wake_addr_head = wa;
+	THR_UMUTEX_UNLOCK(curthread, &addr_lock);
+}
+
 /*
  * This is called when the first thread (other than the initial
  * thread) is created.
@@ -130,3 +178,29 @@ _thr_setscheduler(lwpid_t lwpid, int pol
 	_schedparam_to_rtp(policy, param, &rtp);
 	return (rtprio_thread(RTP_SET, lwpid, &rtp));
 }
+
+/* Sleep on thread wakeup address */
+int
+_thr_sleep(struct pthread *curthread, const struct timespec *abstime, int clockid)
+{
+	struct timespec *tsp, ts, ts2;
+	int error;
+
+	if (abstime != NULL) {
+		if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
+            		abstime->tv_nsec >= 1000000000) {
+			return (EINVAL);
+		}
+		clock_gettime(clockid, &ts);
+		TIMESPEC_SUB(&ts2, abstime, &ts);
+		if (ts2.tv_sec < 0 || ts2.tv_nsec <= 0)
+			return (ETIMEDOUT);
+		tsp = &ts2;
+	} else {
+		tsp = NULL;
+	}
+
+	error = _thr_umtx_wait_uint(&curthread->wake_addr->value,
+		 0, tsp, 0);
+	return (error);
+}

Modified: user/davidxu/libthr/lib/libthr/thread/thr_list.c
==============================================================================
--- user/davidxu/libthr/lib/libthr/thread/thr_list.c	Tue Nov  2 06:13:21 2010	(r214657)
+++ user/davidxu/libthr/lib/libthr/thread/thr_list.c	Tue Nov  2 06:47:18 2010	(r214658)
@@ -165,6 +165,8 @@ _thr_alloc(struct pthread *curthread)
 	if (tcb != NULL) {
 		memset(thread, 0, sizeof(*thread));
 		thread->tcb = tcb;
+		thread->wake_addr = _thr_alloc_wake_addr();
+		thread->sleepqueue = _sleepq_alloc();
 	} else {
 		thr_destroy(curthread, thread);
 		atomic_fetchadd_int(&total_threads, -1);
@@ -193,6 +195,8 @@ _thr_free(struct pthread *curthread, str
 	thread->tcb = NULL;
 	if ((curthread == NULL) || (free_thread_count >= MAX_CACHED_THREADS)) {
 		thr_destroy(curthread, thread);
+		_thr_release_wake_addr(thread->wake_addr);
+		_sleepq_free(thread->sleepqueue);
 		atomic_fetchadd_int(&total_threads, -1);
 	} else {
 		/*

Modified: user/davidxu/libthr/lib/libthr/thread/thr_mutex.c
==============================================================================
--- user/davidxu/libthr/lib/libthr/thread/thr_mutex.c	Tue Nov  2 06:13:21 2010	(r214657)
+++ user/davidxu/libthr/lib/libthr/thread/thr_mutex.c	Tue Nov  2 06:47:18 2010	(r214658)
@@ -45,6 +45,10 @@
 
 #include "thr_private.h"
 
+#ifndef UMUTEX_TIDMASK
+#define UMUTEX_TIDMASK	(~UMUTEX_CONTESTED)
+#endif
+
 #if defined(_PTHREADS_INVARIANTS)
 #define MUTEX_INIT_LINK(m) 		do {		\
 	(m)->m_qe.tqe_prev = NULL;			\
@@ -55,8 +59,8 @@
 		PANIC("mutex is not on list");		\
 } while (0)
 #define MUTEX_ASSERT_NOT_OWNED(m)	do {		\
-	if (__predict_false((m)->m_qe.tqe_prev != NULL ||	\
-	    (m)->m_qe.tqe_next != NULL))	\
+	if (__predict_false((m)->m_qe.tqe_prev != NULL ||\
+	    (m)->m_qe.tqe_next != NULL))		\
 		PANIC("mutex is on list");		\
 } while (0)
 #else
@@ -93,8 +97,6 @@ static int	mutex_self_trylock(pthread_mu
 static int	mutex_self_lock(pthread_mutex_t,
 				const struct timespec *abstime);
 static int	mutex_unlock_common(pthread_mutex_t *);
-static int	mutex_lock_sleep(struct pthread *, pthread_mutex_t,
-				const struct timespec *);
 
 __weak_reference(__pthread_mutex_init, pthread_mutex_init);
 __strong_reference(__pthread_mutex_init, _pthread_mutex_init);
@@ -147,7 +149,7 @@ mutex_init(pthread_mutex_t *mutex,
 
 	pmutex->m_type = attr->m_type;
 	pmutex->m_owner = NULL;
-	pmutex->m_count = 0;
+	pmutex->m_recurse = 0;
 	pmutex->m_refcount = 0;
 	pmutex->m_spinloops = 0;
 	pmutex->m_yieldloops = 0;
@@ -173,7 +175,6 @@ mutex_init(pthread_mutex_t *mutex,
 		    _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
 		pmutex->m_yieldloops = _thr_yieldloops;
 	}
-
 	*mutex = pmutex;
 	return (0);
 }
@@ -181,19 +182,19 @@ mutex_init(pthread_mutex_t *mutex,
 static int
 init_static(struct pthread *thread, pthread_mutex_t *mutex)
 {
-	int ret;
+	int error;
 
 	THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
 
 	if (*mutex == THR_MUTEX_INITIALIZER)
-		ret = mutex_init(mutex, &_pthread_mutexattr_default, calloc);
+		error = mutex_init(mutex, &_pthread_mutexattr_default, calloc);
 	else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
-		ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default, calloc);
+		error = mutex_init(mutex, &_pthread_mutexattr_adaptive_default, calloc);
 	else
-		ret = 0;
+		error = 0;
 	THR_LOCK_RELEASE(thread, &_mutex_static_lock);
 
-	return (ret);
+	return (error);
 }
 
 static void
@@ -225,12 +226,12 @@ _pthread_mutex_init_calloc_cb(pthread_mu
 		.m_protocol = PTHREAD_PRIO_NONE,
 		.m_ceiling = 0
 	};
-	int ret;
+	int error;
 
-	ret = mutex_init(mutex, &attr, calloc_cb);
-	if (ret == 0)
+	error = mutex_init(mutex, &attr, calloc_cb);
+	if (error == 0)
 		(*mutex)->m_private = 1;
-	return (ret);
+	return (error);
 }
 
 void
@@ -247,9 +248,9 @@ _mutex_fork(struct pthread *curthread)
 	 * process shared mutex is not supported, so I
 	 * am not worried.
 	 */
-
-	TAILQ_FOREACH(m, &curthread->mutexq, m_qe)
+	TAILQ_FOREACH(m, &curthread->mutexq, m_qe) {
 		m->m_lock.m_owner = TID(curthread);
+	}
 	TAILQ_FOREACH(m, &curthread->pp_mutexq, m_qe)
 		m->m_lock.m_owner = TID(curthread) | UMUTEX_CONTESTED;
 }
@@ -284,20 +285,32 @@ _pthread_mutex_destroy(pthread_mutex_t *
 		(m)->m_owner = curthread;				\
 		/* Add to the list of owned mutexes: */			\
 		MUTEX_ASSERT_NOT_OWNED((m));				\
-		if (((m)->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)	\
+		if (__predict_true(((m)->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0))	\
 			TAILQ_INSERT_TAIL(&curthread->mutexq, (m), m_qe);\
 		else							\
 			TAILQ_INSERT_TAIL(&curthread->pp_mutexq, (m), m_qe);\
 	} while (0)
 
+#define DEQUEUE_MUTEX(curthread, m)					\
+	(m)->m_owner = NULL;						\
+	/* Remove the mutex from the threads queue. */			\
+	MUTEX_ASSERT_IS_OWNED(m);					\
+	if (__predict_true(((m)->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0))	\
+		TAILQ_REMOVE(&curthread->mutexq, (m), m_qe);		\
+	else {								\
+		TAILQ_REMOVE(&curthread->pp_mutexq, (m), m_qe);		\
+		set_inherited_priority(curthread, (m));			\
+	}								\
+	MUTEX_INIT_LINK(m);
+
 #define CHECK_AND_INIT_MUTEX						\
 	if (__predict_false((m = *mutex) <= THR_MUTEX_DESTROYED)) {	\
 		if (m == THR_MUTEX_DESTROYED)				\
 			return (EINVAL);				\
-		int ret;						\
-		ret = init_static(_get_curthread(), mutex);		\
-		if (ret)						\
-			return (ret);					\
+		int error;						\
+		error = init_static(_get_curthread(), mutex);		\
+		if (error)						\
+			return (error);					\
 		m = *mutex;						\
 	}
 
@@ -307,20 +320,20 @@ mutex_trylock_common(pthread_mutex_t *mu
 	struct pthread *curthread = _get_curthread();
 	struct pthread_mutex *m = *mutex;
 	uint32_t id;
-	int ret;
-
+	int error;
+                 
 	id = TID(curthread);
 	if (m->m_private)
 		THR_CRITICAL_ENTER(curthread);
-	ret = _thr_umutex_trylock(&m->m_lock, id);
-	if (__predict_true(ret == 0)) {
+	error = _thr_umutex_trylock(&m->m_lock, id);
+	if (__predict_true(error == 0)) {
 		ENQUEUE_MUTEX(curthread, m);
 	} else if (m->m_owner == curthread) {
-		ret = mutex_self_trylock(m);
+		error = mutex_self_trylock(m);
 	} /* else {} */
-	if (ret && m->m_private)
+	if (error != 0 && m->m_private)
 		THR_CRITICAL_LEAVE(curthread);
-	return (ret);
+        return (error);
 }
 
 int
@@ -333,92 +346,149 @@ __pthread_mutex_trylock(pthread_mutex_t 
 	return (mutex_trylock_common(mutex));
 }
 
+/* Lock user-mode queue based mutex. */
 static int
-mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
+mutex_lock_queued(struct pthread_mutex *m,
 	const struct timespec *abstime)
 {
-	uint32_t	id, owner;
-	int	count;
-	int	ret;
-
-	if (m->m_owner == curthread)
-		return mutex_self_lock(m, abstime);
+	struct pthread *curthread  = _get_curthread();
+	struct sleepqueue *sq;
+	uint32_t owner, tid;
+	int error = 0;
+	int spin;
+
+	spin = m->m_spinloops;
+	tid = TID(curthread);
+	for (;;) {
+		if (!_thr_is_smp)
+			goto sleep;
 
-	id = TID(curthread);
-	/*
-	 * For adaptive mutexes, spin for a bit in the expectation
-	 * that if the application requests this mutex type then
-	 * the lock is likely to be released quickly and it is
-	 * faster than entering the kernel
-	 */
-	if (__predict_false(
-		(m->m_lock.m_flags & 
-		 (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) != 0))
-			goto sleep_in_kernel;
-
-	if (!_thr_is_smp)
-		goto yield_loop;
-
-	count = m->m_spinloops;
-	while (count--) {
-		owner = m->m_lock.m_owner;
-		if ((owner & ~UMUTEX_CONTESTED) == 0) {
-			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, id|owner)) {
-				ret = 0;
-				goto done;
+		while (spin-- > 0) {
+			/*
+			 * For adaptive mutexes, spin for a bit in the expectation
+			 * that if the application requests this mutex type then
+			 * the lock is likely to be released quickly and it is
+			 * faster than entering the kernel
+			 */
+			owner = m->m_lockword;
+			if ((owner & UMUTEX_TIDMASK) == 0) {
+				if (atomic_cmpset_acq_32(&m->m_lockword, owner, owner|tid)) {
+					ENQUEUE_MUTEX(curthread, m);
+					error = 0;
+					goto out;
+				}
 			}
+			CPU_SPINWAIT;
 		}
-		CPU_SPINWAIT;
-	}
+sleep:
+		_thr_clear_wake(curthread);
 
-yield_loop:
-	count = m->m_yieldloops;
-	while (count--) {
-		_sched_yield();
-		owner = m->m_lock.m_owner;
-		if ((owner & ~UMUTEX_CONTESTED) == 0) {
-			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, id|owner)) {
-				ret = 0;
-				goto done;
+		sq = _sleepq_lock(m, MX);
+		if (curthread->wchan == NULL)
+			_sleepq_add(sq, curthread);
+		_sleepq_unlock(sq);
+		owner = m->m_lockword;
+		/* Set contested bit. */
+		while ((owner & UMUTEX_TIDMASK) != 0 && (owner & UMUTEX_CONTESTED) == 0) {
+			if (atomic_cmpset_acq_32(&m->m_lockword,
+			    owner, owner|UMUTEX_CONTESTED))
+				break;
+			owner = m->m_lockword;
+		}
+		if ((owner & UMUTEX_TIDMASK) != 0) {
+			error = _thr_sleep(curthread, abstime, CLOCK_REALTIME);
+			if (error != EINTR) {
+				if (curthread->wchan != NULL) {
+					sq = _sleepq_lock(m, MX);
+					if (curthread->wchan != NULL)
+						_sleepq_remove(sq, curthread);
+					_sleepq_unlock(sq);
+				}
+			} else
+				error = 0;
+			owner = m->m_lockword;
+		}
+		if ((owner & UMUTEX_TIDMASK) == 0) {
+			if (atomic_cmpset_acq_32(&m->m_lockword, owner, owner|tid)) {
+				ENQUEUE_MUTEX(curthread, m);
+				error = 0;
+				break;
 			}
 		}
+		if (error != 0)
+			break;
+		spin = m->m_spinloops;
 	}
+out:
+	if (curthread->wchan != NULL) {
+		sq = _sleepq_lock(m, MX);
+		if (curthread->wchan != NULL)
+			_sleepq_remove(sq, curthread);
+		_sleepq_unlock(sq);
+	}
+	return (error);
+}
 
-sleep_in_kernel:
+/* Enter kernel and lock mutex */
+static int
+mutex_lock_kernel(struct pthread_mutex *m,
+	const struct timespec *abstime)
+{
+	struct pthread	*curthread = _get_curthread();
+	uint32_t	id;
+	int		error;
+
+	id = TID(curthread);
 	if (abstime == NULL) {
-		ret = __thr_umutex_lock(&m->m_lock, id);
-	} else if (__predict_false(
-		   abstime->tv_nsec < 0 ||
-		   abstime->tv_nsec >= 1000000000)) {
-		ret = EINVAL;
+		error = __thr_umutex_lock(&m->m_lock, id);
 	} else {
-		ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
+		error = __thr_umutex_timedlock(&m->m_lock, id, abstime);
 	}
-done:
-	if (ret == 0)
+	if (error == 0)
 		ENQUEUE_MUTEX(curthread, m);
-
-	return (ret);
+	return (error);
 }
 
 static inline int
-mutex_lock_common(struct pthread_mutex *m,
-	const struct timespec *abstime)
+_mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime)
 {
-	struct pthread *curthread  = _get_curthread();
-	int ret;
+	struct pthread	*curthread  = _get_curthread();
+	int	error;
 
-	if (m->m_private)
-		THR_CRITICAL_ENTER(curthread);
 	if (_thr_umutex_trylock2(&m->m_lock, TID(curthread)) == 0) {
 		ENQUEUE_MUTEX(curthread, m);
-		ret = 0;
+		error = 0;
+	} else if (m->m_owner == curthread) {
+		error = mutex_self_lock(m, abstime);
 	} else {
-		ret = mutex_lock_sleep(curthread, m, abstime);
+	 	if (__predict_false(abstime != NULL &&
+		   (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
+		    abstime->tv_nsec >= 1000000000)))
+			error = EINVAL;
+		else if (__predict_true(IS_SIMPLE_MUTEX(m)))
+			error = mutex_lock_queued(m, abstime);
+		else
+			error = mutex_lock_kernel(m, abstime);
 	}
-	if (ret && m->m_private)
-		THR_CRITICAL_LEAVE(curthread);
-	return (ret);
+	return (error);
+}
+
+static inline int
+mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime)
+{
+	int	error;
+
+	if (!m->m_private) {
+		error = _mutex_lock_common(m, abstime);
+	} else {
+		struct pthread *curthread = _get_curthread();
+
+		THR_CRITICAL_ENTER(curthread);
+		error = _mutex_lock_common(m, abstime);
+		if (error != 0)
+			THR_CRITICAL_LEAVE(curthread);
+	}
+	return (error);
 }
 
 int
@@ -451,54 +521,39 @@ _pthread_mutex_unlock(pthread_mutex_t *m
 	return (mutex_unlock_common(m));
 }
 
-int
-_mutex_cv_lock(pthread_mutex_t *mutex, int count)
-{
-	struct pthread_mutex	*m;
-	int	ret;
-
-	m = *mutex;
-	ret = mutex_lock_common(m, NULL);
-	if (ret == 0) {
-		m->m_refcount--;
-		m->m_count += count;
-	}
-	return (ret);
-}
-
 static int
 mutex_self_trylock(struct pthread_mutex *m)
 {
-	int	ret;
+	int	error;
 
 	switch (m->m_type) {
 	case PTHREAD_MUTEX_ERRORCHECK:
 	case PTHREAD_MUTEX_NORMAL:
-		ret = EBUSY; 
+		error = EBUSY; 
 		break;
 
 	case PTHREAD_MUTEX_RECURSIVE:
 		/* Increment the lock count: */
-		if (m->m_count + 1 > 0) {
-			m->m_count++;
-			ret = 0;
+		if (m->m_recurse + 1 > 0) {
+			m->m_recurse++;
+			error = 0;
 		} else
-			ret = EAGAIN;
+			error = EAGAIN;
 		break;
 
 	default:
 		/* Trap invalid mutex types; */
-		ret = EINVAL;
+		error = EINVAL;
 	}
 
-	return (ret);
+	return (error);
 }
 
 static int
 mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
 {
 	struct timespec	ts1, ts2;
-	int	ret;
+	int	error;
 
 	switch (m->m_type) {
 	case PTHREAD_MUTEX_ERRORCHECK:
@@ -506,19 +561,19 @@ mutex_self_lock(struct pthread_mutex *m,
 		if (abstime) {
 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
 			    abstime->tv_nsec >= 1000000000) {
-				ret = EINVAL;
+				error = EINVAL;
 			} else {
 				clock_gettime(CLOCK_REALTIME, &ts1);
 				TIMESPEC_SUB(&ts2, abstime, &ts1);
 				__sys_nanosleep(&ts2, NULL);
-				ret = ETIMEDOUT;
+				error = ETIMEDOUT;
 			}
 		} else {
 			/*
 			 * POSIX specifies that mutexes should return
 			 * EDEADLK if a recursive lock is detected.
 			 */

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201011020647.oA26lIJA024650>