Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 22 May 2018 07:20:23 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r334024 - in head/sys: kern sys
Message-ID:  <201805220720.w4M7KNMD017041@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Tue May 22 07:20:22 2018
New Revision: 334024
URL: https://svnweb.freebsd.org/changeset/base/334024

Log:
  sx: port over writer starvation prevention measures from rwlock
  
  A constant stream of readers could completely starve writers and this is not
  a hypothetical scenario.
  
  The 'poll2_threads' test from the will-it-scale suite reliably starves writers
  even with concurrency < 10 threads.
  
  The problem was run into and diagnosed by dillon@backplane.com
  
  There was next to no change in lock contention profile during -j 128 pkg build,
  despite an sx lock being at the top.
  
  Tested by:	pho

Modified:
  head/sys/kern/kern_sx.c
  head/sys/kern/subr_trap.c
  head/sys/sys/proc.h
  head/sys/sys/sx.h

Modified: head/sys/kern/kern_sx.c
==============================================================================
--- head/sys/kern/kern_sx.c	Tue May 22 07:16:39 2018	(r334023)
+++ head/sys/kern/kern_sx.c	Tue May 22 07:20:22 2018	(r334024)
@@ -292,6 +292,7 @@ sx_try_slock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire,
 			    sx, 0, 0, file, line, LOCKSTAT_READER);
 			TD_LOCKS_INC(curthread);
+			curthread->td_sx_slocks++;
 			return (1);
 		}
 	}
@@ -441,7 +442,7 @@ sx_try_upgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DE
 	for (;;) {
 		if (SX_SHARERS(x) > 1)
 			break;
-		waiters = (x & SX_LOCK_EXCLUSIVE_WAITERS);
+		waiters = (x & SX_LOCK_WAITERS);
 		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x,
 		    (uintptr_t)curthread | waiters)) {
 			success = 1;
@@ -450,6 +451,7 @@ sx_try_upgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DE
 	}
 	LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
 	if (success) {
+		curthread->td_sx_slocks--;
 		WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		LOCKSTAT_RECORD0(sx__upgrade, sx);
@@ -526,6 +528,7 @@ sx_downgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 		kick_proc0();
 
 out:
+	curthread->td_sx_slocks++;
 	LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
 	LOCKSTAT_RECORD0(sx__downgrade, sx);
 }
@@ -537,6 +540,23 @@ sx_downgrade_(struct sx *sx, const char *file, int lin
 	sx_downgrade_int(sx LOCK_FILE_LINE_ARG);
 }
 
+#ifdef	ADAPTIVE_SX
+static inline void
+sx_drop_critical(uintptr_t x, bool *in_critical, int *extra_work)
+{
+
+	if (x & SX_LOCK_WRITE_SPINNER)
+		return;
+	if (*in_critical) {
+		critical_exit();
+		*in_critical = false;
+		(*extra_work)--;
+	}
+}
+#else
+#define sx_drop_critical(x, in_critical, extra_work) do { } while(0)
+#endif
+
 /*
  * This function represents the so-called 'hard case' for sx_xlock
  * operation.  All 'easy case' failures are redirected to this.  Note
@@ -547,12 +567,13 @@ int
 _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
 {
 	GIANT_DECLARE;
-	uintptr_t tid;
+	uintptr_t tid, setx;
 #ifdef ADAPTIVE_SX
 	volatile struct thread *owner;
 	u_int i, n, spintries = 0;
 	enum { READERS, WRITER } sleep_reason = READERS;
 	bool adaptive;
+	bool in_critical = false;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
@@ -569,6 +590,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LO
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	uintptr_t state = 0;
+	int doing_lockprof = 0;
 #endif
 	int extra_work = 0;
 
@@ -581,12 +603,14 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LO
 				goto out_lockstat;
 		}
 		extra_work = 1;
+		doing_lockprof = 1;
 		all_time -= lockstat_nsecs(&sx->lock_object);
 		state = x;
 	}
 #endif
 #ifdef LOCK_PROFILING
 	extra_work = 1;
+	doing_lockprof = 1;
 	state = x;
 #endif
 
@@ -653,6 +677,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LO
 		 * running or the state of the lock changes.
 		 */
 		if ((x & SX_LOCK_SHARED) == 0) {
+			sx_drop_critical(x, &in_critical, &extra_work);
 			sleep_reason = WRITER;
 			owner = lv_sx_owner(x);
 			if (!TD_IS_RUNNING(owner))
@@ -675,17 +700,35 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LO
 			sleep_reason = READERS;
 			if (spintries == asx_retries)
 				goto sleepq;
+			if (!(x & SX_LOCK_WRITE_SPINNER)) {
+				if (!in_critical) {
+					critical_enter();
+					in_critical = true;
+					extra_work++;
+				}
+				if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
+				    x | SX_LOCK_WRITE_SPINNER)) {
+					critical_exit();
+					in_critical = false;
+					extra_work--;
+					continue;
+				}
+			}
 			spintries++;
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    sx->lock_object.lo_name);
+			n = SX_SHARERS(x);
 			for (i = 0; i < asx_loops; i += n) {
-				n = SX_SHARERS(x);
 				lock_delay_spin(n);
 				x = SX_READ_VALUE(sx);
-				if ((x & SX_LOCK_SHARED) == 0 ||
-				    SX_SHARERS(x) == 0)
+				if (!(x & SX_LOCK_WRITE_SPINNER))
 					break;
+				if (!(x & SX_LOCK_SHARED))
+					break;
+				n = SX_SHARERS(x);
+				if (n == 0)
+					break;
 			}
 #ifdef KDTRACE_HOOKS
 			lda.spin_cnt += i;
@@ -707,6 +750,7 @@ retry_sleepq:
 		 */
 		if (x == SX_LOCK_UNLOCKED) {
 			sleepq_release(&sx->lock_object);
+			sx_drop_critical(x, &in_critical, &extra_work);
 			continue;
 		}
 
@@ -723,10 +767,13 @@ retry_sleepq:
 				owner = (struct thread *)SX_OWNER(x);
 				if (TD_IS_RUNNING(owner)) {
 					sleepq_release(&sx->lock_object);
+					sx_drop_critical(x, &in_critical,
+					    &extra_work);
 					continue;
 				}
 			} else if (SX_SHARERS(x) > 0 && sleep_reason == WRITER) {
 				sleepq_release(&sx->lock_object);
+				sx_drop_critical(x, &in_critical, &extra_work);
 				continue;
 			}
 		}
@@ -742,9 +789,10 @@ retry_sleepq:
 		 * as there are other exclusive waiters still.  If we
 		 * fail, restart the loop.
 		 */
-		if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) {
-			if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &x,
-			    tid | SX_LOCK_EXCLUSIVE_WAITERS))
+		setx = x & (SX_LOCK_WAITERS | SX_LOCK_WRITE_SPINNER);
+		if ((x & ~setx) == SX_LOCK_SHARED) {
+			setx &= ~SX_LOCK_WRITE_SPINNER;
+			if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid | setx))
 				goto retry_sleepq;
 			sleepq_release(&sx->lock_object);
 			CTR2(KTR_LOCK, "%s: %p claimed by new writer",
@@ -752,19 +800,43 @@ retry_sleepq:
 			break;
 		}
 
+#ifdef ADAPTIVE_SX
 		/*
-		 * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
-		 * than loop back and retry.
+		 * It is possible we set the SX_LOCK_WRITE_SPINNER bit.
+		 * It is an invariant that when the bit is set, there is
+		 * a writer ready to grab the lock. Thus clear the bit since
+		 * we are going to sleep.
 		 */
-		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
-			if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
-			    x | SX_LOCK_EXCLUSIVE_WAITERS)) {
-				goto retry_sleepq;
+		if (in_critical) {
+			if ((x & SX_LOCK_WRITE_SPINNER) ||
+			    !((x & SX_LOCK_EXCLUSIVE_WAITERS))) {
+				setx = x & ~SX_LOCK_WRITE_SPINNER;
+				setx |= SX_LOCK_EXCLUSIVE_WAITERS;
+				if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
+				    setx)) {
+					goto retry_sleepq;
+				}
 			}
-			if (LOCK_LOG_TEST(&sx->lock_object, 0))
-				CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
-				    __func__, sx);
+			critical_exit();
+			in_critical = false;
+		} else {
+#endif
+			/*
+			 * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
+			 * than loop back and retry.
+			 */
+			if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
+				if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
+				    x | SX_LOCK_EXCLUSIVE_WAITERS)) {
+					goto retry_sleepq;
+				}
+				if (LOCK_LOG_TEST(&sx->lock_object, 0))
+					CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
+					    __func__, sx);
+			}
+#ifdef ADAPTIVE_SX
 		}
+#endif
 
 		/*
 		 * Since we have been unable to acquire the exclusive
@@ -801,10 +873,16 @@ retry_sleepq:
 			    __func__, sx);
 		x = SX_READ_VALUE(sx);
 	}
-#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!extra_work))
 		return (error);
+#ifdef ADAPTIVE_SX
+	if (in_critical)
+		critical_exit();
 #endif
+#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
+	if (__predict_true(!doing_lockprof))
+		return (error);
+#endif
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&sx->lock_object);
 	if (sleep_time)
@@ -877,11 +955,11 @@ _sx_xunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_
 	 * them precedence and cleaning up the shared waiters bit anyway.
 	 */
 	setx = SX_LOCK_UNLOCKED;
-	queue = SQ_EXCLUSIVE_QUEUE;
-	if ((x & SX_LOCK_SHARED_WAITERS) != 0 &&
-	    sleepq_sleepcnt(&sx->lock_object, SQ_SHARED_QUEUE) != 0) {
-		queue = SQ_SHARED_QUEUE;
-		setx |= (x & SX_LOCK_EXCLUSIVE_WAITERS);
+	queue = SQ_SHARED_QUEUE;
+	if ((x & SX_LOCK_EXCLUSIVE_WAITERS) != 0 &&
+	    sleepq_sleepcnt(&sx->lock_object, SQ_EXCLUSIVE_QUEUE) != 0) {
+		queue = SQ_EXCLUSIVE_QUEUE;
+		setx |= (x & SX_LOCK_SHARED_WAITERS);
 	}
 	atomic_store_rel_ptr(&sx->sx_lock, setx);
 
@@ -899,23 +977,36 @@ _sx_xunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_
 }
 
 static bool __always_inline
-__sx_slock_try(struct sx *sx, uintptr_t *xp LOCK_FILE_LINE_ARG_DEF)
+__sx_can_read(struct thread *td, uintptr_t x, bool fp)
 {
 
+	if ((x & (SX_LOCK_SHARED | SX_LOCK_EXCLUSIVE_WAITERS | SX_LOCK_WRITE_SPINNER))
+			== SX_LOCK_SHARED)
+		return (true);
+	if (!fp && td->td_sx_slocks && (x & SX_LOCK_SHARED))
+		return (true);
+	return (false);
+}
+
+static bool __always_inline
+__sx_slock_try(struct sx *sx, struct thread *td, uintptr_t *xp, bool fp
+    LOCK_FILE_LINE_ARG_DEF)
+{
+
 	/*
 	 * If no other thread has an exclusive lock then try to bump up
 	 * the count of sharers.  Since we have to preserve the state
 	 * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
 	 * shared lock loop back and retry.
 	 */
-	while (*xp & SX_LOCK_SHARED) {
-		MPASS(!(*xp & SX_LOCK_SHARED_WAITERS));
+	while (__sx_can_read(td, *xp, fp)) {
 		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, xp,
 		    *xp + SX_ONE_SHARER)) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR4(KTR_LOCK, "%s: %p succeed %p -> %p",
 				    __func__, sx, (void *)*xp,
 				    (void *)(*xp + SX_ONE_SHARER));
+			td->td_sx_slocks++;
 			return (true);
 		}
 	}
@@ -926,8 +1017,10 @@ static int __noinline
 _sx_slock_hard(struct sx *sx, int opts, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
 {
 	GIANT_DECLARE;
+	struct thread *td;
 #ifdef ADAPTIVE_SX
 	volatile struct thread *owner;
+	u_int i, n, spintries = 0;
 	bool adaptive;
 #endif
 #ifdef LOCK_PROFILING
@@ -948,9 +1041,11 @@ _sx_slock_hard(struct sx *sx, int opts, uintptr_t x LO
 #endif
 	int extra_work = 0;
 
+	td = curthread;
+
 #ifdef KDTRACE_HOOKS
 	if (LOCKSTAT_PROFILE_ENABLED(sx__acquire)) {
-		if (__sx_slock_try(sx, &x LOCK_FILE_LINE_ARG))
+		if (__sx_slock_try(sx, td, &x, false LOCK_FILE_LINE_ARG))
 			goto out_lockstat;
 		extra_work = 1;
 		all_time -= lockstat_nsecs(&sx->lock_object);
@@ -990,7 +1085,7 @@ _sx_slock_hard(struct sx *sx, int opts, uintptr_t x LO
 	 * shared locks once there is an exclusive waiter.
 	 */
 	for (;;) {
-		if (__sx_slock_try(sx, &x LOCK_FILE_LINE_ARG))
+		if (__sx_slock_try(sx, td, &x, false LOCK_FILE_LINE_ARG))
 			break;
 #ifdef INVARIANTS
 		GIANT_SAVE(extra_work);
@@ -1002,28 +1097,62 @@ _sx_slock_hard(struct sx *sx, int opts, uintptr_t x LO
 #ifdef ADAPTIVE_SX
 		if (__predict_false(!adaptive))
 			goto sleepq;
+
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
-		owner = lv_sx_owner(x);
-		if (TD_IS_RUNNING(owner)) {
-			if (LOCK_LOG_TEST(&sx->lock_object, 0))
-				CTR3(KTR_LOCK,
-				    "%s: spinning on %p held by %p",
-				    __func__, sx, owner);
-			KTR_STATE1(KTR_SCHED, "thread",
-			    sched_tdname(curthread), "spinning",
-			    "lockname:\"%s\"", sx->lock_object.lo_name);
-			do {
-				lock_delay(&lda);
+		if ((x & SX_LOCK_SHARED) == 0) {
+			owner = lv_sx_owner(x);
+			if (TD_IS_RUNNING(owner)) {
+				if (LOCK_LOG_TEST(&sx->lock_object, 0))
+					CTR3(KTR_LOCK,
+					    "%s: spinning on %p held by %p",
+					    __func__, sx, owner);
+				KTR_STATE1(KTR_SCHED, "thread",
+				    sched_tdname(curthread), "spinning",
+				    "lockname:\"%s\"", sx->lock_object.lo_name);
+				do {
+					lock_delay(&lda);
+					x = SX_READ_VALUE(sx);
+					owner = lv_sx_owner(x);
+				} while (owner != NULL && TD_IS_RUNNING(owner));
+				KTR_STATE0(KTR_SCHED, "thread",
+				    sched_tdname(curthread), "running");
+				continue;
+			}
+		} else {
+			if ((x & SX_LOCK_WRITE_SPINNER) && SX_SHARERS(x) == 0) {
+				MPASS(!__sx_can_read(td, x, false));
+				lock_delay_spin(2);
 				x = SX_READ_VALUE(sx);
-				owner = lv_sx_owner(x);
-			} while (owner != NULL && TD_IS_RUNNING(owner));
-			KTR_STATE0(KTR_SCHED, "thread",
-			    sched_tdname(curthread), "running");
-			continue;
+				continue;
+			}
+			if (spintries < asx_retries) {
+				KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
+				    "spinning", "lockname:\"%s\"",
+				    sx->lock_object.lo_name);
+				n = SX_SHARERS(x);
+				for (i = 0; i < asx_loops; i += n) {
+					lock_delay_spin(n);
+					x = SX_READ_VALUE(sx);
+					if (!(x & SX_LOCK_SHARED))
+						break;
+					n = SX_SHARERS(x);
+					if (n == 0)
+						break;
+					if (__sx_can_read(td, x, false))
+						break;
+				}
+#ifdef KDTRACE_HOOKS
+				lda.spin_cnt += i;
+#endif
+				KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
+				    "running");
+				if (i < asx_loops)
+					continue;
+			}
 		}
 sleepq:
 #endif
@@ -1035,11 +1164,8 @@ sleepq:
 		sleepq_lock(&sx->lock_object);
 		x = SX_READ_VALUE(sx);
 retry_sleepq:
-		/*
-		 * The lock could have been released while we spun.
-		 * In this case loop back and retry.
-		 */
-		if (x & SX_LOCK_SHARED) {
+		if (((x & SX_LOCK_WRITE_SPINNER) && SX_SHARERS(x) == 0) ||
+		    __sx_can_read(td, x, false)) {
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
@@ -1135,6 +1261,7 @@ out_lockstat:
 int
 _sx_slock_int(struct sx *sx, int opts LOCK_FILE_LINE_ARG_DEF)
 {
+	struct thread *td;
 	uintptr_t x;
 	int error;
 
@@ -1147,9 +1274,10 @@ _sx_slock_int(struct sx *sx, int opts LOCK_FILE_LINE_A
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL);
 
 	error = 0;
+	td = curthread;
 	x = SX_READ_VALUE(sx);
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__acquire) ||
-	    !__sx_slock_try(sx, &x LOCK_FILE_LINE_ARG)))
+	    !__sx_slock_try(sx, td, &x, true LOCK_FILE_LINE_ARG)))
 		error = _sx_slock_hard(sx, opts, x LOCK_FILE_LINE_ARG);
 	else
 		lock_profile_obtain_lock_success(&sx->lock_object, 0, 0,
@@ -1170,22 +1298,11 @@ _sx_slock(struct sx *sx, int opts, const char *file, i
 }
 
 static bool __always_inline
-_sx_sunlock_try(struct sx *sx, uintptr_t *xp)
+_sx_sunlock_try(struct sx *sx, struct thread *td, uintptr_t *xp)
 {
 
 	for (;;) {
-		/*
-		 * We should never have sharers while at least one thread
-		 * holds a shared lock.
-		 */
-		KASSERT(!(*xp & SX_LOCK_SHARED_WAITERS),
-		    ("%s: waiting sharers", __func__));
-
-		/*
-		 * See if there is more than one shared lock held.  If
-		 * so, just drop one and return.
-		 */
-		if (SX_SHARERS(*xp) > 1) {
+		if (SX_SHARERS(*xp) > 1 || !(*xp & SX_LOCK_WAITERS)) {
 			if (atomic_fcmpset_rel_ptr(&sx->sx_lock, xp,
 			    *xp - SX_ONE_SHARER)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
@@ -1193,56 +1310,33 @@ _sx_sunlock_try(struct sx *sx, uintptr_t *xp)
 					    "%s: %p succeeded %p -> %p",
 					    __func__, sx, (void *)*xp,
 					    (void *)(*xp - SX_ONE_SHARER));
+				td->td_sx_slocks--;
 				return (true);
 			}
 			continue;
 		}
-
-		/*
-		 * If there aren't any waiters for an exclusive lock,
-		 * then try to drop it quickly.
-		 */
-		if (!(*xp & SX_LOCK_EXCLUSIVE_WAITERS)) {
-			MPASS(*xp == SX_SHARERS_LOCK(1));
-			*xp = SX_SHARERS_LOCK(1);
-			if (atomic_fcmpset_rel_ptr(&sx->sx_lock,
-			    xp, SX_LOCK_UNLOCKED)) {
-				if (LOCK_LOG_TEST(&sx->lock_object, 0))
-					CTR2(KTR_LOCK, "%s: %p last succeeded",
-					    __func__, sx);
-				return (true);
-			}
-			continue;
-		}
 		break;
 	}
 	return (false);
 }
 
 static void __noinline
-_sx_sunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
+_sx_sunlock_hard(struct sx *sx, struct thread *td, uintptr_t x
+    LOCK_FILE_LINE_ARG_DEF)
 {
 	int wakeup_swapper = 0;
-	uintptr_t setx;
+	uintptr_t setx, queue;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
-	if (_sx_sunlock_try(sx, &x))
+	if (_sx_sunlock_try(sx, td, &x))
 		goto out_lockstat;
 
-	/*
-	 * At this point, there should just be one sharer with
-	 * exclusive waiters.
-	 */
-	MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS));
-
 	sleepq_lock(&sx->lock_object);
 	x = SX_READ_VALUE(sx);
 	for (;;) {
-		MPASS(x & SX_LOCK_EXCLUSIVE_WAITERS);
-		MPASS(!(x & SX_LOCK_SHARED_WAITERS));
-		if (_sx_sunlock_try(sx, &x))
+		if (_sx_sunlock_try(sx, td, &x))
 			break;
 
 		/*
@@ -1251,15 +1345,21 @@ _sx_sunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_
 		 * Note that the state of the lock could have changed,
 		 * so if it fails loop back and retry.
 		 */
-		setx = x - SX_ONE_SHARER;
-		setx &= ~SX_LOCK_EXCLUSIVE_WAITERS;
+		setx = SX_LOCK_UNLOCKED;
+		queue = SQ_SHARED_QUEUE;
+		if (x & SX_LOCK_EXCLUSIVE_WAITERS) {
+			setx |= (x & SX_LOCK_SHARED_WAITERS);
+			queue = SQ_EXCLUSIVE_QUEUE;
+		}
+		setx |= (x & SX_LOCK_WRITE_SPINNER);
 		if (!atomic_fcmpset_rel_ptr(&sx->sx_lock, &x, setx))
 			continue;
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p waking up all thread on"
 			    "exclusive queue", __func__, sx);
 		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
-		    0, SQ_EXCLUSIVE_QUEUE);
+		    0, queue);
+		td->td_sx_slocks--;
 		break;
 	}
 	sleepq_release(&sx->lock_object);
@@ -1272,6 +1372,7 @@ out_lockstat:
 void
 _sx_sunlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 {
+	struct thread *td;
 	uintptr_t x;
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
@@ -1280,10 +1381,11 @@ _sx_sunlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
 	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
 
+	td = curthread;
 	x = SX_READ_VALUE(sx);
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__release) ||
-	    !_sx_sunlock_try(sx, &x)))
-		_sx_sunlock_hard(sx, x LOCK_FILE_LINE_ARG);
+	    !_sx_sunlock_try(sx, td, &x)))
+		_sx_sunlock_hard(sx, td, x LOCK_FILE_LINE_ARG);
 	else
 		lock_profile_release_lock(&sx->lock_object);
 

Modified: head/sys/kern/subr_trap.c
==============================================================================
--- head/sys/kern/subr_trap.c	Tue May 22 07:16:39 2018	(r334023)
+++ head/sys/kern/subr_trap.c	Tue May 22 07:20:22 2018	(r334024)
@@ -168,6 +168,9 @@ userret(struct thread *td, struct trapframe *frame)
 	KASSERT(td->td_rw_rlocks == 0,
 	    ("userret: Returning with %d rwlocks held in read mode",
 	    td->td_rw_rlocks));
+	KASSERT(td->td_sx_slocks == 0,
+	    ("userret: Returning with %d sx locks held in shared mode",
+	    td->td_sx_slocks));
 	KASSERT((td->td_pflags & TDP_NOFAULTING) == 0,
 	    ("userret: Returning with pagefaults disabled"));
 	KASSERT(td->td_no_sleeping == 0,

Modified: head/sys/sys/proc.h
==============================================================================
--- head/sys/sys/proc.h	Tue May 22 07:16:39 2018	(r334023)
+++ head/sys/sys/proc.h	Tue May 22 07:20:22 2018	(r334024)
@@ -267,6 +267,7 @@ struct thread {
 	u_char		td_tsqueue;	/* (t) Turnstile queue blocked on. */
 	short		td_locks;	/* (k) Debug: count of non-spin locks */
 	short		td_rw_rlocks;	/* (k) Count of rwlock read locks. */
+	short		td_sx_slocks;	/* (k) Count of sx shared locks. */
 	short		td_lk_slocks;	/* (k) Count of lockmgr shared locks. */
 	short		td_stopsched;	/* (k) Scheduler stopped. */
 	struct turnstile *td_blocked;	/* (t) Lock thread is blocked on. */

Modified: head/sys/sys/sx.h
==============================================================================
--- head/sys/sys/sx.h	Tue May 22 07:16:39 2018	(r334023)
+++ head/sys/sys/sx.h	Tue May 22 07:20:22 2018	(r334024)
@@ -71,12 +71,14 @@
 #define	SX_LOCK_SHARED_WAITERS		0x02
 #define	SX_LOCK_EXCLUSIVE_WAITERS	0x04
 #define	SX_LOCK_RECURSED		0x08
+#define	SX_LOCK_WRITE_SPINNER		0x10
 #define	SX_LOCK_FLAGMASK						\
 	(SX_LOCK_SHARED | SX_LOCK_SHARED_WAITERS |			\
-	SX_LOCK_EXCLUSIVE_WAITERS | SX_LOCK_RECURSED)
+	SX_LOCK_EXCLUSIVE_WAITERS | SX_LOCK_RECURSED | SX_LOCK_WRITE_SPINNER)
+#define	SX_LOCK_WAITERS			(SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)
 
 #define	SX_OWNER(x)			((x) & ~SX_LOCK_FLAGMASK)
-#define	SX_SHARERS_SHIFT		4
+#define	SX_SHARERS_SHIFT		5
 #define	SX_SHARERS(x)			(SX_OWNER(x) >> SX_SHARERS_SHIFT)
 #define	SX_SHARERS_LOCK(x)						\
 	((x) << SX_SHARERS_SHIFT | SX_LOCK_SHARED)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201805220720.w4M7KNMD017041>