Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 16 Mar 2017 06:45:36 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r315378 - in stable/11/sys: kern sys
Message-ID:  <201703160645.v2G6jaHA086561@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Thu Mar 16 06:45:36 2017
New Revision: 315378
URL: https://svnweb.freebsd.org/changeset/base/315378

Log:
  MFC r313275,r313280,r313282,r313335:
  
  mtx: move lockstat handling out of inline primitives
  
  Lockstat requires checking if it is enabled and if so, calling a 6 argument
  function. Further, determining whether to call it on unlock requires
  pre-reading the lock value.
  
  This is problematic in at least 3 ways:
  - more branches in the hot path than necessary
  - additional cacheline ping pong under contention
  - bigger code
  
  Instead, check first if lockstat handling is necessary and if so, just fall
  back to regular locking routines. For this purpose a new macro is introduced
  (LOCKSTAT_PROFILE_ENABLED).
  
  LOCK_PROFILING uninlines all primitives. Fold in the current inline lock
  variant into the _mtx_lock_flags to retain the support. With this change
  the inline variants are not used when LOCK_PROFILING is defined and thus
  can ignore its existence.
  
  This results in:
  text        data     bss     dec     hex filename
  22259667    1303208 4994976 28557851        1b3c21b kernel.orig
  21797315    1303208 4994976 28095499        1acb40b kernel.patched
  
  i.e. about 3% reduction in text size.
  
  A remaining action is to remove spurious arguments for internal kernel
  consumers.
  
  ==
  
  sx: move lockstat handling out of inline primitives
  
  See r313275 for details.
  
  ==
  
  rwlock: move lockstat handling out of inline primitives
  
  See r313275 for details.
  
  One difference here is that recursion handling was removed from the fallback
  routine. As it is it was never supposed to see a recursed lock in the first
  place. Future changes will move it out of inline variants, but right now
  there is no easy to way to test if the lock is recursed without reading
  additional words.
  
  ==
  
  locks: fix recursion support after recent changes
  
  When a relevant lockstat probe is enabled the fallback primitive is called with
  a constant signifying a free lock. This works fine for typical cases but breaks
  with recursion, since it checks if the passed value is that of the executing
  thread.
  
  Read the value if necessary.

Modified:
  stable/11/sys/kern/kern_mutex.c
  stable/11/sys/kern/kern_rwlock.c
  stable/11/sys/kern/kern_sx.c
  stable/11/sys/sys/lockstat.h
  stable/11/sys/sys/mutex.h
  stable/11/sys/sys/rwlock.h
  stable/11/sys/sys/sdt.h
  stable/11/sys/sys/sx.h
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/kern/kern_mutex.c
==============================================================================
--- stable/11/sys/kern/kern_mutex.c	Thu Mar 16 06:35:53 2017	(r315377)
+++ stable/11/sys/kern/kern_mutex.c	Thu Mar 16 06:45:36 2017	(r315378)
@@ -226,6 +226,7 @@ void
 __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
+	uintptr_t tid, v;
 
 	if (SCHEDULER_STOPPED())
 		return;
@@ -243,7 +244,13 @@ __mtx_lock_flags(volatile uintptr_t *c, 
 	WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) |
 	    LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
 
-	__mtx_lock(m, curthread, opts, file, line);
+	tid = (uintptr_t)curthread;
+	v = MTX_UNOWNED;
+	if (!_mtx_obtain_lock_fetch(m, &v, tid))
+		_mtx_lock_sleep(m, v, tid, opts, file, line);
+	else
+		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire,
+		    m, 0, 0, file, line);
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE,
@@ -271,7 +278,7 @@ __mtx_unlock_flags(volatile uintptr_t *c
 	    line);
 	mtx_assert(m, MA_OWNED);
 
-	__mtx_unlock(m, curthread, opts, file, line);
+	__mtx_unlock_sleep(c, opts, file, line);
 	TD_LOCKS_DEC(curthread);
 }
 
@@ -449,6 +456,8 @@ __mtx_lock_sleep(volatile uintptr_t *c, 
 	lock_delay_arg_init(&lda, NULL);
 #endif
 	m = mtxlock2mtx(c);
+	if (__predict_false(v == MTX_UNOWNED))
+		v = MTX_READ_VALUE(m);
 
 	if (__predict_false(lv_mtx_owner(v) == (struct thread *)tid)) {
 		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
@@ -857,20 +866,17 @@ __mtx_unlock_sleep(volatile uintptr_t *c
 {
 	struct mtx *m;
 	struct turnstile *ts;
-	uintptr_t v;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
-	v = MTX_READ_VALUE(m);
 
-	if (v == (uintptr_t)curthread) {
+	if (!mtx_recursed(m)) {
+		LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, m);
 		if (_mtx_release_lock(m, (uintptr_t)curthread))
 			return;
-	}
-
-	if (mtx_recursed(m)) {
+	} else {
 		if (--(m->mtx_recurse) == 0)
 			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
 		if (LOCK_LOG_TEST(&m->lock_object, opts))

Modified: stable/11/sys/kern/kern_rwlock.c
==============================================================================
--- stable/11/sys/kern/kern_rwlock.c	Thu Mar 16 06:35:53 2017	(r315377)
+++ stable/11/sys/kern/kern_rwlock.c	Thu Mar 16 06:45:36 2017	(r315378)
@@ -265,6 +265,7 @@ void
 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
+	uintptr_t tid, v;
 
 	if (SCHEDULER_STOPPED())
 		return;
@@ -278,7 +279,14 @@ _rw_wlock_cookie(volatile uintptr_t *c, 
 	    ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line, NULL);
-	__rw_wlock(rw, curthread, file, line);
+	tid = (uintptr_t)curthread;
+	v = RW_UNLOCKED;
+	if (!_rw_write_lock_fetch(rw, &v, tid))
+		_rw_wlock_hard(rw, v, tid, file, line);
+	else
+		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw,
+		    0, 0, file, line, LOCKSTAT_WRITER);
+
 	LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
 	WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
 	TD_LOCKS_INC(curthread);
@@ -337,7 +345,11 @@ _rw_wunlock_cookie(volatile uintptr_t *c
 	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
 	    line);
-	__rw_wunlock(rw, curthread, file, line);
+	if (rw->rw_recurse)
+		rw->rw_recurse--;
+	else
+		_rw_wunlock_hard(rw, (uintptr_t)curthread, file, line);
+
 	TD_LOCKS_DEC(curthread);
 }
 
@@ -782,6 +794,8 @@ __rw_wlock_hard(volatile uintptr_t *c, u
 	lock_delay_arg_init(&lda, NULL);
 #endif
 	rw = rwlock2rw(c);
+	if (__predict_false(v == RW_UNLOCKED))
+		v = RW_READ_VALUE(rw);
 
 	if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) {
 		KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
@@ -980,13 +994,12 @@ __rw_wunlock_hard(volatile uintptr_t *c,
 		return;
 
 	rw = rwlock2rw(c);
+	MPASS(!rw_recursed(rw));
 
-	if (rw_wlocked(rw) && rw_recursed(rw)) {
-		rw->rw_recurse--;
-		if (LOCK_LOG_TEST(&rw->lock_object, 0))
-			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
+	LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw,
+	    LOCKSTAT_WRITER);
+	if (_rw_write_unlock(rw, tid))
 		return;
-	}
 
 	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
 	    ("%s: neither of the waiter flags are set", __func__));

Modified: stable/11/sys/kern/kern_sx.c
==============================================================================
--- stable/11/sys/kern/kern_sx.c	Thu Mar 16 06:35:53 2017	(r315377)
+++ stable/11/sys/kern/kern_sx.c	Thu Mar 16 06:45:36 2017	(r315378)
@@ -290,6 +290,7 @@ sx_try_slock_(struct sx *sx, const char 
 int
 _sx_xlock(struct sx *sx, int opts, const char *file, int line)
 {
+	uintptr_t tid, x;
 	int error = 0;
 
 	if (SCHEDULER_STOPPED())
@@ -301,7 +302,13 @@ _sx_xlock(struct sx *sx, int opts, const
 	    ("sx_xlock() of destroyed sx @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line, NULL);
-	error = __sx_xlock(sx, curthread, opts, file, line);
+	tid = (uintptr_t)curthread;
+	x = SX_LOCK_UNLOCKED;
+	if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
+		error = _sx_xlock_hard(sx, x, tid, opts, file, line);
+	else
+		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
+		    0, 0, file, line, LOCKSTAT_WRITER);
 	if (!error) {
 		LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse,
 		    file, line);
@@ -359,7 +366,7 @@ _sx_xunlock(struct sx *sx, const char *f
 	WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
 	    line);
-	__sx_xunlock(sx, curthread, file, line);
+	_sx_xunlock_hard(sx, (uintptr_t)curthread, file, line);
 	TD_LOCKS_DEC(curthread);
 }
 
@@ -504,6 +511,9 @@ _sx_xlock_hard(struct sx *sx, uintptr_t 
 	lock_delay_arg_init(&lda, NULL);
 #endif
 
+	if (__predict_false(x == SX_LOCK_UNLOCKED))
+		x = SX_READ_VALUE(sx);
+
 	/* If we already hold an exclusive lock, then recurse. */
 	if (__predict_false(lv_sx_owner(x) == (struct thread *)tid)) {
 		KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
@@ -737,8 +747,13 @@ _sx_xunlock_hard(struct sx *sx, uintptr_
 
 	MPASS(!(sx->sx_lock & SX_LOCK_SHARED));
 
-	/* If the lock is recursed, then unrecurse one level. */
-	if (sx_xlocked(sx) && sx_recursed(sx)) {
+	if (!sx_recursed(sx)) {
+		LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx,
+		    LOCKSTAT_WRITER);
+		if (atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
+			return;
+	} else {
+		/* The lock is recursed, unrecurse one level. */
 		if ((--sx->sx_recurse) == 0)
 			atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))

Modified: stable/11/sys/sys/lockstat.h
==============================================================================
--- stable/11/sys/sys/lockstat.h	Thu Mar 16 06:35:53 2017	(r315377)
+++ stable/11/sys/sys/lockstat.h	Thu Mar 16 06:45:36 2017	(r315378)
@@ -107,6 +107,10 @@ extern int lockstat_enabled;
 	LOCKSTAT_RECORD1(probe, lp, a);					\
 } while (0)
 
+#ifndef LOCK_PROFILING
+#define	LOCKSTAT_PROFILE_ENABLED(probe)	SDT_PROBE_ENABLED(lockstat, , , probe)
+#endif
+
 struct lock_object;
 uint64_t lockstat_nsecs(struct lock_object *);
 
@@ -130,6 +134,10 @@ uint64_t lockstat_nsecs(struct lock_obje
 #define	LOCKSTAT_PROFILE_RELEASE_RWLOCK(probe, lp, a)  			\
 	LOCKSTAT_PROFILE_RELEASE_LOCK(probe, lp)
 
+#ifndef LOCK_PROFILING
+#define	LOCKSTAT_PROFILE_ENABLED(probe)	0
+#endif
+
 #endif /* !KDTRACE_HOOKS */
 #endif /* _KERNEL */
 #endif /* _SYS_LOCKSTAT_H */

Modified: stable/11/sys/sys/mutex.h
==============================================================================
--- stable/11/sys/sys/mutex.h	Thu Mar 16 06:35:53 2017	(r315377)
+++ stable/11/sys/sys/mutex.h	Thu Mar 16 06:45:36 2017	(r315378)
@@ -171,10 +171,8 @@ void	thread_lock_flags_(struct thread *,
 #define _mtx_obtain_lock(mp, tid)					\
 	atomic_cmpset_acq_ptr(&(mp)->mtx_lock, MTX_UNOWNED, (tid))
 
-#define _mtx_obtain_lock_fetch(mp, vp, tid) ({				\
-	*vp = MTX_UNOWNED;						\
-	atomic_fcmpset_acq_ptr(&(mp)->mtx_lock, vp, (tid));		\
-})
+#define _mtx_obtain_lock_fetch(mp, vp, tid)				\
+	atomic_fcmpset_acq_ptr(&(mp)->mtx_lock, vp, (tid))
 
 /* Try to release mtx_lock if it is unrecursed and uncontested. */
 #define _mtx_release_lock(mp, tid)					\
@@ -193,13 +191,11 @@ void	thread_lock_flags_(struct thread *,
 /* Lock a normal mutex. */
 #define __mtx_lock(mp, tid, opts, file, line) do {			\
 	uintptr_t _tid = (uintptr_t)(tid);				\
-	uintptr_t _v;							\
+	uintptr_t _v = MTX_UNOWNED;					\
 									\
-	if (!_mtx_obtain_lock_fetch((mp), &_v, _tid))			\
+	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(adaptive__acquire) ||\
+	    !_mtx_obtain_lock_fetch((mp), &_v, _tid)))			\
 		_mtx_lock_sleep((mp), _v, _tid, (opts), (file), (line));\
-	else								\
-		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire,	\
-		    mp, 0, 0, file, line);				\
 } while (0)
 
 /*
@@ -211,7 +207,7 @@ void	thread_lock_flags_(struct thread *,
 #ifdef SMP
 #define __mtx_lock_spin(mp, tid, opts, file, line) do {			\
 	uintptr_t _tid = (uintptr_t)(tid);				\
-	uintptr_t _v;							\
+	uintptr_t _v = MTX_UNOWNED;					\
 									\
 	spinlock_enter();						\
 	if (!_mtx_obtain_lock_fetch((mp), &_v, _tid)) 			\
@@ -267,9 +263,8 @@ void	thread_lock_flags_(struct thread *,
 #define __mtx_unlock(mp, tid, opts, file, line) do {			\
 	uintptr_t _tid = (uintptr_t)(tid);				\
 									\
-	if ((mp)->mtx_recurse == 0)					\
-		LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, mp);	\
-	if (!_mtx_release_lock((mp), _tid))				\
+	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(adaptive__release) ||\
+	    !_mtx_release_lock((mp), _tid)))				\
 		_mtx_unlock_sleep((mp), (opts), (file), (line));	\
 } while (0)
 

Modified: stable/11/sys/sys/rwlock.h
==============================================================================
--- stable/11/sys/sys/rwlock.h	Thu Mar 16 06:35:53 2017	(r315377)
+++ stable/11/sys/sys/rwlock.h	Thu Mar 16 06:45:36 2017	(r315378)
@@ -84,10 +84,8 @@
 #define	_rw_write_lock(rw, tid)						\
 	atomic_cmpset_acq_ptr(&(rw)->rw_lock, RW_UNLOCKED, (tid))
 
-#define	_rw_write_lock_fetch(rw, vp, tid) ({				\
-	*vp = RW_UNLOCKED;						\
-	atomic_fcmpset_acq_ptr(&(rw)->rw_lock, vp, (tid));		\
-})
+#define	_rw_write_lock_fetch(rw, vp, tid)				\
+	atomic_fcmpset_acq_ptr(&(rw)->rw_lock, vp, (tid))
 
 /* Release a write lock quickly if there are no waiters. */
 #define	_rw_write_unlock(rw, tid)					\
@@ -102,13 +100,11 @@
 /* Acquire a write lock. */
 #define	__rw_wlock(rw, tid, file, line) do {				\
 	uintptr_t _tid = (uintptr_t)(tid);				\
-	uintptr_t _v;							\
+	uintptr_t _v = RW_UNLOCKED;					\
 									\
-	if (!_rw_write_lock_fetch((rw), &_v, _tid))			\
+	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) ||	\
+	    !_rw_write_lock_fetch((rw), &_v, _tid)))			\
 		_rw_wlock_hard((rw), _v, _tid, (file), (line));		\
-	else 								\
-		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw,	\
-		    0, 0, file, line, LOCKSTAT_WRITER);			\
 } while (0)
 
 /* Release a write lock. */
@@ -118,9 +114,8 @@
 	if ((rw)->rw_recurse)						\
 		(rw)->rw_recurse--;					\
 	else {								\
-		LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw,	\
-		    LOCKSTAT_WRITER);					\
-		if (!_rw_write_unlock((rw), _tid))			\
+		if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) ||\
+		    !_rw_write_unlock((rw), _tid)))			\
 			_rw_wunlock_hard((rw), _tid, (file), (line));	\
 	}								\
 } while (0)

Modified: stable/11/sys/sys/sdt.h
==============================================================================
--- stable/11/sys/sys/sdt.h	Thu Mar 16 06:35:53 2017	(r315377)
+++ stable/11/sys/sys/sdt.h	Thu Mar 16 06:45:36 2017	(r315378)
@@ -160,6 +160,9 @@ SET_DECLARE(sdt_argtypes_set, struct sdt
 #define SDT_PROBE_DECLARE(prov, mod, func, name)				\
 	extern struct sdt_probe sdt_##prov##_##mod##_##func##_##name[1]
 
+#define SDT_PROBE_ENABLED(prov, mod, func, name)				\
+	__predict_false((sdt_##prov##_##mod##_##func##_##name->id))
+
 #define SDT_PROBE(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4)	do {	\
 	if (__predict_false(sdt_##prov##_##mod##_##func##_##name->id))		\
 		(*sdt_probe_func)(sdt_##prov##_##mod##_##func##_##name->id,	\

Modified: stable/11/sys/sys/sx.h
==============================================================================
--- stable/11/sys/sys/sx.h	Thu Mar 16 06:35:53 2017	(r315377)
+++ stable/11/sys/sys/sx.h	Thu Mar 16 06:45:36 2017	(r315378)
@@ -145,21 +145,19 @@ struct sx_args {
  * deferred to 'tougher' functions.
  */
 
+#if	(LOCK_DEBUG == 0) && !defined(SX_NOINLINE)
 /* Acquire an exclusive lock. */
 static __inline int
 __sx_xlock(struct sx *sx, struct thread *td, int opts, const char *file,
     int line)
 {
 	uintptr_t tid = (uintptr_t)td;
-	uintptr_t v;
+	uintptr_t v = SX_LOCK_UNLOCKED;
 	int error = 0;
 
-	v = SX_LOCK_UNLOCKED;
-	if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &v, tid))
+	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__acquire) ||
+	    !atomic_fcmpset_acq_ptr(&sx->sx_lock, &v, tid)))
 		error = _sx_xlock_hard(sx, v, tid, opts, file, line);
-	else 
-		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
-		    0, 0, file, line, LOCKSTAT_WRITER);
 
 	return (error);
 }
@@ -170,12 +168,11 @@ __sx_xunlock(struct sx *sx, struct threa
 {
 	uintptr_t tid = (uintptr_t)td;
 
-	if (sx->sx_recurse == 0)
-		LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx,
-		    LOCKSTAT_WRITER);
-	if (!atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
+	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__release) ||
+	    !atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED)))
 		_sx_xunlock_hard(sx, tid, file, line);
 }
+#endif
 
 /*
  * Public interface for lock operations.



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201703160645.v2G6jaHA086561>