Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 17 Jul 2015 19:43:43 -0700
From:      Mark Johnston <markj@FreeBSD.org>
To:        Mateusz Guzik <mjguzik@gmail.com>
Cc:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   Re: svn commit: r285664 - in head/sys: kern sys
Message-ID:  <20150718024343.GA2808@raichu>
In-Reply-To: <20150718013511.GA5792@dft-labs.eu>
References:  <201507180057.t6I0vVhS076519@repo.freebsd.org> <20150718013511.GA5792@dft-labs.eu>

next in thread | previous in thread | raw e-mail | index | archive | help
On Sat, Jul 18, 2015 at 03:35:11AM +0200, Mateusz Guzik wrote:
> On Sat, Jul 18, 2015 at 12:57:31AM +0000, Mark Johnston wrote:
> > Author: markj
> > Date: Sat Jul 18 00:57:30 2015
> > New Revision: 285664
> > URL: https://svnweb.freebsd.org/changeset/base/285664
> > 
> > Log:
> >   Pass the lock object to lockstat_nsecs() and return immediately if
> >   LO_NOPROFILE is set. Some timecounter handlers acquire a spin mutex, and
> >   we don't want to recurse if lockstat probes are enabled.
> >   
> 
> Why do we even make the call?

Note that lockstat_nsecs() is not called in uncontended lock
acquisitions (with the exception of rwlock read locks). So I don't think
there's a lot to gain by inlining the new checks, since we're already
attempting to perform an operation that's much more expensive than a
single function call. I had also assumed that keeping the checks in one
location would be friendlier to the branch predictor.

The case of rwlock read locks is special. I considered trying to
modify the implementation to attempt to acquire the lock and fall back
to __rw_rlock() only if the attempt fails, but that's risky for a
change that I want to include in 10.2. What I'd like to do long-term
is generalize some parts of DTrace FBT so that the calls could
be entirely removed until needed, with hot-patching.

> Have you tried replacing this with an
> inline with __predict_false on both conditions?

I assume you mean a __predict_true on the first condition? I haven't.
The testing I did that led me to look at this was simple timing of an
uncontended lock acquire -> lock release loop, but I don't see how a
branch prediction hint would make a difference there. (And it doesn't.)
Could you suggest a microbenchmark that might expose a difference?

I'm not sure it makes sense it makes sense to add a branch hint to the
LO_NOPROFILE check, as we only hit that when probes are enabled, at
which point performance is less of a concern.

-Mark

> 
> >   PR:		201642
> >   Reviewed by:	avg
> >   MFC after:	3 days
> > 
> > Modified:
> >   head/sys/kern/kern_lockstat.c
> >   head/sys/kern/kern_mutex.c
> >   head/sys/kern/kern_rwlock.c
> >   head/sys/kern/kern_sx.c
> >   head/sys/sys/lockstat.h
> > 
> > Modified: head/sys/kern/kern_lockstat.c
> > ==============================================================================
> > --- head/sys/kern/kern_lockstat.c	Sat Jul 18 00:22:00 2015	(r285663)
> > +++ head/sys/kern/kern_lockstat.c	Sat Jul 18 00:57:30 2015	(r285664)
> > @@ -34,9 +34,10 @@
> >  
> >  #ifdef KDTRACE_HOOKS
> >  
> > -#include <sys/time.h>
> >  #include <sys/types.h>
> > +#include <sys/lock.h>
> >  #include <sys/lockstat.h>
> > +#include <sys/time.h>
> >  
> >  /*
> >   * The following must match the type definition of dtrace_probe.  It is  
> > @@ -48,13 +49,15 @@ void (*lockstat_probe_func)(uint32_t, ui
> >  int lockstat_enabled = 0;
> >  
> >  uint64_t 
> > -lockstat_nsecs(void)
> > +lockstat_nsecs(struct lock_object *lo)
> >  {
> >  	struct bintime bt;
> >  	uint64_t ns;
> >  
> >  	if (!lockstat_enabled)
> >  		return (0);
> > +	if ((lo->lo_flags & LO_NOPROFILE) != 0)
> > +		return (0);
> >  
> >  	binuptime(&bt);
> >  	ns = bt.sec * (uint64_t)1000000000;
> > 
> > Modified: head/sys/kern/kern_mutex.c
> > ==============================================================================
> > --- head/sys/kern/kern_mutex.c	Sat Jul 18 00:22:00 2015	(r285663)
> > +++ head/sys/kern/kern_mutex.c	Sat Jul 18 00:57:30 2015	(r285664)
> > @@ -416,7 +416,7 @@ __mtx_lock_sleep(volatile uintptr_t *c, 
> >  		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
> >  		    m->lock_object.lo_name, (void *)m->mtx_lock, file, line);
> >  #ifdef KDTRACE_HOOKS
> > -	all_time -= lockstat_nsecs();
> > +	all_time -= lockstat_nsecs(&m->lock_object);
> >  #endif
> >  
> >  	while (!_mtx_obtain_lock(m, tid)) {
> > @@ -513,16 +513,16 @@ __mtx_lock_sleep(volatile uintptr_t *c, 
> >  		 * Block on the turnstile.
> >  		 */
> >  #ifdef KDTRACE_HOOKS
> > -		sleep_time -= lockstat_nsecs();
> > +		sleep_time -= lockstat_nsecs(&m->lock_object);
> >  #endif
> >  		turnstile_wait(ts, mtx_owner(m), TS_EXCLUSIVE_QUEUE);
> >  #ifdef KDTRACE_HOOKS
> > -		sleep_time += lockstat_nsecs();
> > +		sleep_time += lockstat_nsecs(&m->lock_object);
> >  		sleep_cnt++;
> >  #endif
> >  	}
> >  #ifdef KDTRACE_HOOKS
> > -	all_time += lockstat_nsecs();
> > +	all_time += lockstat_nsecs(&m->lock_object);
> >  #endif
> >  #ifdef KTR
> >  	if (cont_logged) {
> > @@ -600,7 +600,7 @@ _mtx_lock_spin_cookie(volatile uintptr_t
> >  #endif
> >  	lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime);
> >  #ifdef KDTRACE_HOOKS
> > -	spin_time -= lockstat_nsecs();
> > +	spin_time -= lockstat_nsecs(&m->lock_object);
> >  #endif
> >  	while (!_mtx_obtain_lock(m, tid)) {
> >  
> > @@ -620,7 +620,7 @@ _mtx_lock_spin_cookie(volatile uintptr_t
> >  		spinlock_enter();
> >  	}
> >  #ifdef KDTRACE_HOOKS
> > -	spin_time += lockstat_nsecs();
> > +	spin_time += lockstat_nsecs(&m->lock_object);
> >  #endif
> >  
> >  	if (LOCK_LOG_TEST(&m->lock_object, opts))
> > @@ -630,7 +630,8 @@ _mtx_lock_spin_cookie(volatile uintptr_t
> >  
> >  	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE, m,
> >  	    contested, waittime, (file), (line));
> > -	LOCKSTAT_RECORD1(LS_MTX_SPIN_LOCK_SPIN, m, spin_time);
> > +	if (spin_time != 0)
> > +		LOCKSTAT_RECORD1(LS_MTX_SPIN_LOCK_SPIN, m, spin_time);
> >  }
> >  #endif /* SMP */
> >  
> > @@ -655,7 +656,7 @@ thread_lock_flags_(struct thread *td, in
> >  		return;
> >  
> >  #ifdef KDTRACE_HOOKS
> > -	spin_time -= lockstat_nsecs();
> > +	spin_time -= lockstat_nsecs(&td->td_lock->lock_object);
> >  #endif
> >  	for (;;) {
> >  retry:
> > @@ -703,7 +704,7 @@ retry:
> >  		__mtx_unlock_spin(m);	/* does spinlock_exit() */
> >  	}
> >  #ifdef KDTRACE_HOOKS
> > -	spin_time += lockstat_nsecs();
> > +	spin_time += lockstat_nsecs(&m->lock_object);
> >  #endif
> >  	if (m->mtx_recurse == 0)
> >  		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE,
> > 
> > Modified: head/sys/kern/kern_rwlock.c
> > ==============================================================================
> > --- head/sys/kern/kern_rwlock.c	Sat Jul 18 00:22:00 2015	(r285663)
> > +++ head/sys/kern/kern_rwlock.c	Sat Jul 18 00:57:30 2015	(r285664)
> > @@ -378,7 +378,7 @@ __rw_rlock(volatile uintptr_t *c, const 
> >  	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
> >  
> >  #ifdef KDTRACE_HOOKS
> > -	all_time -= lockstat_nsecs();
> > +	all_time -= lockstat_nsecs(&rw->lock_object);
> >  	state = rw->rw_lock;
> >  #endif
> >  	for (;;) {
> > @@ -532,11 +532,11 @@ __rw_rlock(volatile uintptr_t *c, const 
> >  			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
> >  			    rw);
> >  #ifdef KDTRACE_HOOKS
> > -		sleep_time -= lockstat_nsecs();
> > +		sleep_time -= lockstat_nsecs(&rw->lock_object);
> >  #endif
> >  		turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
> >  #ifdef KDTRACE_HOOKS
> > -		sleep_time += lockstat_nsecs();
> > +		sleep_time += lockstat_nsecs(&rw->lock_object);
> >  		sleep_cnt++;
> >  #endif
> >  		if (LOCK_LOG_TEST(&rw->lock_object, 0))
> > @@ -544,7 +544,7 @@ __rw_rlock(volatile uintptr_t *c, const 
> >  			    __func__, rw);
> >  	}
> >  #ifdef KDTRACE_HOOKS
> > -	all_time += lockstat_nsecs();
> > +	all_time += lockstat_nsecs(&rw->lock_object);
> >  	if (sleep_time)
> >  		LOCKSTAT_RECORD4(LS_RW_RLOCK_BLOCK, rw, sleep_time,
> >  		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
> > @@ -767,7 +767,7 @@ __rw_wlock_hard(volatile uintptr_t *c, u
> >  		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
> >  
> >  #ifdef KDTRACE_HOOKS
> > -	all_time -= lockstat_nsecs();
> > +	all_time -= lockstat_nsecs(&rw->lock_object);
> >  	state = rw->rw_lock;
> >  #endif
> >  	while (!_rw_write_lock(rw, tid)) {
> > @@ -893,11 +893,11 @@ __rw_wlock_hard(volatile uintptr_t *c, u
> >  			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
> >  			    rw);
> >  #ifdef KDTRACE_HOOKS
> > -		sleep_time -= lockstat_nsecs();
> > +		sleep_time -= lockstat_nsecs(&rw->lock_object);
> >  #endif
> >  		turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
> >  #ifdef KDTRACE_HOOKS
> > -		sleep_time += lockstat_nsecs();
> > +		sleep_time += lockstat_nsecs(&rw->lock_object);
> >  		sleep_cnt++;
> >  #endif
> >  		if (LOCK_LOG_TEST(&rw->lock_object, 0))
> > @@ -908,7 +908,7 @@ __rw_wlock_hard(volatile uintptr_t *c, u
> >  #endif
> >  	}
> >  #ifdef KDTRACE_HOOKS
> > -	all_time += lockstat_nsecs();
> > +	all_time += lockstat_nsecs(&rw->lock_object);
> >  	if (sleep_time)
> >  		LOCKSTAT_RECORD4(LS_RW_WLOCK_BLOCK, rw, sleep_time,
> >  		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
> > 
> > Modified: head/sys/kern/kern_sx.c
> > ==============================================================================
> > --- head/sys/kern/kern_sx.c	Sat Jul 18 00:22:00 2015	(r285663)
> > +++ head/sys/kern/kern_sx.c	Sat Jul 18 00:57:30 2015	(r285664)
> > @@ -541,7 +541,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t 
> >  		    sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
> >  
> >  #ifdef KDTRACE_HOOKS
> > -	all_time -= lockstat_nsecs();
> > +	all_time -= lockstat_nsecs(&sx->lock_object);
> >  	state = sx->sx_lock;
> >  #endif
> >  	while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) {
> > @@ -691,7 +691,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t 
> >  			    __func__, sx);
> >  
> >  #ifdef KDTRACE_HOOKS
> > -		sleep_time -= lockstat_nsecs();
> > +		sleep_time -= lockstat_nsecs(&sx->lock_object);
> >  #endif
> >  		GIANT_SAVE();
> >  		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
> > @@ -702,7 +702,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t 
> >  		else
> >  			error = sleepq_wait_sig(&sx->lock_object, 0);
> >  #ifdef KDTRACE_HOOKS
> > -		sleep_time += lockstat_nsecs();
> > +		sleep_time += lockstat_nsecs(&sx->lock_object);
> >  		sleep_cnt++;
> >  #endif
> >  		if (error) {
> > @@ -717,7 +717,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t 
> >  			    __func__, sx);
> >  	}
> >  #ifdef KDTRACE_HOOKS
> > -	all_time += lockstat_nsecs();
> > +	all_time += lockstat_nsecs(&sx->lock_object);
> >  	if (sleep_time)
> >  		LOCKSTAT_RECORD4(LS_SX_XLOCK_BLOCK, sx, sleep_time,
> >  		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
> > @@ -828,7 +828,7 @@ _sx_slock_hard(struct sx *sx, int opts, 
> >  
> >  #ifdef KDTRACE_HOOKS
> >  	state = sx->sx_lock;
> > -	all_time -= lockstat_nsecs();
> > +	all_time -= lockstat_nsecs(&sx->lock_object);
> >  #endif
> >  
> >  	/*
> > @@ -955,7 +955,7 @@ _sx_slock_hard(struct sx *sx, int opts, 
> >  			    __func__, sx);
> >  
> >  #ifdef KDTRACE_HOOKS
> > -		sleep_time -= lockstat_nsecs();
> > +		sleep_time -= lockstat_nsecs(&sx->lock_object);
> >  #endif
> >  		GIANT_SAVE();
> >  		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
> > @@ -966,7 +966,7 @@ _sx_slock_hard(struct sx *sx, int opts, 
> >  		else
> >  			error = sleepq_wait_sig(&sx->lock_object, 0);
> >  #ifdef KDTRACE_HOOKS
> > -		sleep_time += lockstat_nsecs();
> > +		sleep_time += lockstat_nsecs(&sx->lock_object);
> >  		sleep_cnt++;
> >  #endif
> >  		if (error) {
> > @@ -981,7 +981,7 @@ _sx_slock_hard(struct sx *sx, int opts, 
> >  			    __func__, sx);
> >  	}
> >  #ifdef KDTRACE_HOOKS
> > -	all_time += lockstat_nsecs();
> > +	all_time += lockstat_nsecs(&sx->lock_object);
> >  	if (sleep_time)
> >  		LOCKSTAT_RECORD4(LS_SX_SLOCK_BLOCK, sx, sleep_time,
> >  		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
> > 
> > Modified: head/sys/sys/lockstat.h
> > ==============================================================================
> > --- head/sys/sys/lockstat.h	Sat Jul 18 00:22:00 2015	(r285663)
> > +++ head/sys/sys/lockstat.h	Sat Jul 18 00:57:30 2015	(r285664)
> > @@ -149,11 +149,12 @@
> >   * The following must match the type definition of dtrace_probe.  It is
> >   * defined this way to avoid having to rely on CDDL code.
> >   */
> > +struct lock_object;
> >  extern uint32_t lockstat_probemap[LS_NPROBES];
> >  typedef void (*lockstat_probe_func_t)(uint32_t, uintptr_t arg0, uintptr_t arg1,
> >      uintptr_t arg2, uintptr_t arg3, uintptr_t arg4);
> >  extern lockstat_probe_func_t lockstat_probe_func;
> > -extern uint64_t lockstat_nsecs(void);
> > +extern uint64_t lockstat_nsecs(struct lock_object *);
> >  extern int lockstat_enabled;
> >  
> >  #ifdef	KDTRACE_HOOKS
> > 
> 
> -- 
> Mateusz Guzik <mjguzik gmail.com>



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20150718024343.GA2808>