Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 25 Jan 2010 12:05:51 +0000 (UTC)
From:      Attilio Rao <attilio@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
Subject:   svn commit: r202966 - in stable/8: . share/man/man9 sys/conf sys/kern sys/sys
Message-ID:  <201001251205.o0PC5pnC056848@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: attilio
Date: Mon Jan 25 12:05:51 2010
New Revision: 202966
URL: http://svn.freebsd.org/changeset/base/202966

Log:
  MFC r201879:
  Introduce the new kernel thread called "deadlock resolver".
  It is used in order to seek within the threads state and heuristically
  understand if there is any deadlock happening.
  
  In order to implement it, the sq_type in sleepqueues is mandatory and not
  only compiled along with INVARIANTS option. Additively, a new sleepqueue
  function, sleepq_type() is added, returning the type of the sleepqueue
  linked to a wchan.
  Three new sysctls are added in order to configure the thread:
  debug.deadlkres.slptime_threshold
  debug.deadlkres.blktime_threshold
  debug.deadlkres.sleepfreq
  
  rappresenting the thresholds for sleep and block time that will lead to
  a deadlock matching (when exceeded), while the sleepfreq rappresents the
  number of seconds between 2 consecutive thread runnings.
  In order to enable the deadlock resolver thread recompile your kernel
  with the option DEADLKRES.
  
  Sponsored by:	Sandvine Incorporated

Modified:
  stable/8/UPDATING   (contents, props changed)
  stable/8/share/man/man9/sleepqueue.9
  stable/8/sys/conf/NOTES
  stable/8/sys/conf/options
  stable/8/sys/kern/kern_clock.c
  stable/8/sys/kern/subr_sleepqueue.c
  stable/8/sys/kern/subr_turnstile.c
  stable/8/sys/sys/proc.h
  stable/8/sys/sys/sleepqueue.h
Directory Properties:
  stable/8/share/man/man9/   (props changed)
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/xen/xenpci/   (props changed)

Modified: stable/8/UPDATING
==============================================================================
--- stable/8/UPDATING	Mon Jan 25 11:56:53 2010	(r202965)
+++ stable/8/UPDATING	Mon Jan 25 12:05:51 2010	(r202966)
@@ -15,6 +15,11 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.
 	debugging tools present in HEAD were left in place because
 	sun4v support still needs work to become production ready.
 
+20100125:
+	Introduce the kernel thread "deadlock resolver" (which can be enabled
+	via the DEADLKRES option, see NOTES for more details) and the
+	sleepq_type() function for sleepqueues.
+
 20090929:
 	802.11s D3.03 support was committed. This is incompatible with
 	the previous code, which was based on D3.0.

Modified: stable/8/share/man/man9/sleepqueue.9
==============================================================================
--- stable/8/share/man/man9/sleepqueue.9	Mon Jan 25 11:56:53 2010	(r202965)
+++ stable/8/share/man/man9/sleepqueue.9	Mon Jan 25 12:05:51 2010	(r202966)
@@ -23,7 +23,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd January 18, 2010
+.Dd January 25, 2010
 .Dt SLEEPQUEUE 9
 .Os
 .Sh NAME
@@ -44,6 +44,7 @@
 .Nm sleepq_sleepcnt ,
 .Nm sleepq_timedwait ,
 .Nm sleepq_timedwait_sig ,
+.Nm sleepq_type ,
 .Nm sleepq_wait ,
 .Nm sleepq_wait_sig
 .Nd manage the queues of sleeping threads
@@ -84,6 +85,8 @@
 .Fn sleepq_timedwait "void *wchan"
 .Ft int
 .Fn sleepq_timedwait_sig "void *wchan" "int signal_caught"
+.Ft int
+.Fn sleepq_type "void *wchan"
 .Ft void
 .Fn sleepq_wait "void *wchan"
 .Ft int
@@ -366,6 +369,12 @@ given a
 .Fa wchan .
 .Pp
 The
+.Fn sleepq_type
+function returns the type of
+.Fa wchan
+associated to a sleepqueue.
+.Pp
+The
 .Fn sleepq_abort ,
 .Fn sleepq_broadcast ,
 and

Modified: stable/8/sys/conf/NOTES
==============================================================================
--- stable/8/sys/conf/NOTES	Mon Jan 25 11:56:53 2010	(r202965)
+++ stable/8/sys/conf/NOTES	Mon Jan 25 12:05:51 2010	(r202966)
@@ -2473,6 +2473,11 @@ options 	BOOTP_BLOCKSIZE=8192 # Override
 options 	SW_WATCHDOG
 
 #
+# Add the software deadlock resolver thread.
+#
+options		DEADLKRES
+
+#
 # Disable swapping of stack pages.  This option removes all
 # code which actually performs swapping, so it's not possible to turn
 # it back on at run-time.

Modified: stable/8/sys/conf/options
==============================================================================
--- stable/8/sys/conf/options	Mon Jan 25 11:56:53 2010	(r202965)
+++ stable/8/sys/conf/options	Mon Jan 25 12:05:51 2010	(r202966)
@@ -72,6 +72,7 @@ COMPAT_FREEBSD6	opt_compat.h
 COMPAT_FREEBSD7	opt_compat.h
 COMPILING_LINT	opt_global.h
 CY_PCI_FASTINTR
+DEADLKRES	opt_watchdog.h
 DIRECTIO
 FULL_PREEMPTION	opt_sched.h
 IPI_PREEMPTION	opt_sched.h

Modified: stable/8/sys/kern/kern_clock.c
==============================================================================
--- stable/8/sys/kern/kern_clock.c	Mon Jan 25 11:56:53 2010	(r202965)
+++ stable/8/sys/kern/kern_clock.c	Mon Jan 25 12:05:51 2010	(r202966)
@@ -48,14 +48,16 @@ __FBSDID("$FreeBSD$");
 #include <sys/callout.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
-#include <sys/lock.h>
+#include <sys/kthread.h>
 #include <sys/ktr.h>
+#include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
+#include <sys/sleepqueue.h>
 #include <sys/smp.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
@@ -159,6 +161,124 @@ sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS
 SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
     0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics");
 
+#ifdef DEADLKRES
+static int slptime_threshold = 1800;
+static int blktime_threshold = 900;
+static int sleepfreq = 3;
+
+static void
+deadlkres(void)
+{
+	struct proc *p;
+	struct thread *td;
+	void *wchan;
+	int blkticks, slpticks, slptype, tryl, tticks;
+
+	tryl = 0;
+	for (;;) {
+		blkticks = blktime_threshold * hz;
+		slpticks = slptime_threshold * hz;
+
+		/*
+		 * Avoid to sleep on the sx_lock in order to avoid a possible
+		 * priority inversion problem leading to starvation.
+		 * If the lock can't be held after 100 tries, panic.
+		 */
+		if (!sx_try_slock(&allproc_lock)) {
+			if (tryl > 100)
+		panic("%s: possible deadlock detected on allproc_lock\n",
+				    __func__);
+			tryl++;
+			pause("allproc_lock deadlkres", sleepfreq * hz);
+			continue;
+		}
+		tryl = 0;
+		FOREACH_PROC_IN_SYSTEM(p) {
+			PROC_LOCK(p);
+			FOREACH_THREAD_IN_PROC(p, td) {
+				thread_lock(td);
+				if (TD_ON_LOCK(td)) {
+
+					/*
+					 * The thread should be blocked on a
+					 * turnstile, simply check if the
+					 * turnstile channel is in good state.
+					 */
+					MPASS(td->td_blocked != NULL);
+					tticks = ticks - td->td_blktick;
+					thread_unlock(td);
+					if (tticks > blkticks) {
+
+						/*
+						 * Accordingly with provided
+						 * thresholds, this thread is
+						 * stuck for too long on a
+						 * turnstile.
+						 */
+						PROC_UNLOCK(p);
+						sx_sunlock(&allproc_lock);
+	panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
+						    __func__, td, tticks);
+					}
+				} else if (TD_IS_SLEEPING(td)) {
+
+					/*
+					 * Check if the thread is sleeping on a
+					 * lock, otherwise skip the check.
+					 * Drop the thread lock in order to
+					 * avoid a LOR with the sleepqueue
+					 * spinlock.
+					 */
+					wchan = td->td_wchan;
+					tticks = ticks - td->td_slptick;
+					thread_unlock(td);
+					slptype = sleepq_type(wchan);
+					if ((slptype == SLEEPQ_SX ||
+					    slptype == SLEEPQ_LK) &&
+					    tticks > slpticks) {
+
+						/*
+						 * Accordingly with provided
+						 * thresholds, this thread is
+						 * stuck for too long on a
+						 * sleepqueue.
+						 */
+						PROC_UNLOCK(p);
+						sx_sunlock(&allproc_lock);
+	panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
+						    __func__, td, tticks);
+					}
+				} else
+					thread_unlock(td);
+			}
+			PROC_UNLOCK(p);
+		}
+		sx_sunlock(&allproc_lock);
+
+		/* Sleep for sleepfreq seconds. */
+		pause("deadlkres", sleepfreq * hz);
+	}
+}
+
+static struct kthread_desc deadlkres_kd = {
+	"deadlkres",
+	deadlkres,
+	(struct thread **)NULL
+};
+
+SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd);
+
+SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0, "Deadlock resolver");
+SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW,
+    &slptime_threshold, 0,
+    "Number of seconds within is valid to sleep on a sleepqueue");
+SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW,
+    &blktime_threshold, 0,
+    "Number of seconds within is valid to block on a turnstile");
+SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0,
+    "Number of seconds between any deadlock resolver thread run");
+#endif	/* DEADLKRES */
+
 void
 read_cpu_time(long *cp_time)
 {

Modified: stable/8/sys/kern/subr_sleepqueue.c
==============================================================================
--- stable/8/sys/kern/subr_sleepqueue.c	Mon Jan 25 11:56:53 2010	(r202965)
+++ stable/8/sys/kern/subr_sleepqueue.c	Mon Jan 25 12:05:51 2010	(r202966)
@@ -122,8 +122,8 @@ struct sleepqueue {
 	LIST_ENTRY(sleepqueue) sq_hash;		/* (c) Chain and free list. */
 	LIST_HEAD(, sleepqueue) sq_free;	/* (c) Free queues. */
 	void	*sq_wchan;			/* (c) Wait channel. */
-#ifdef INVARIANTS
 	int	sq_type;			/* (c) Queue type. */
+#ifdef INVARIANTS
 	struct lock_object *sq_lock;		/* (c) Associated lock. */
 #endif
 };
@@ -317,7 +317,6 @@ sleepq_add(void *wchan, struct lock_obje
 		    ("thread's sleep queue has a non-empty free list"));
 		KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
 		sq->sq_lock = lock;
-		sq->sq_type = flags & SLEEPQ_TYPE;
 #endif
 #ifdef SLEEPQUEUE_PROFILING
 		sc->sc_depth++;
@@ -330,6 +329,7 @@ sleepq_add(void *wchan, struct lock_obje
 		sq = td->td_sleepqueue;
 		LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
 		sq->sq_wchan = wchan;
+		sq->sq_type = flags & SLEEPQ_TYPE;
 	} else {
 		MPASS(wchan == sq->sq_wchan);
 		MPASS(lock == sq->sq_lock);
@@ -669,6 +669,28 @@ sleepq_timedwait_sig(void *wchan, int pr
 }
 
 /*
+ * Returns the type of sleepqueue given a waitchannel.
+ */
+int
+sleepq_type(void *wchan)
+{
+	struct sleepqueue *sq;
+	int type;
+
+	MPASS(wchan != NULL);
+
+	sleepq_lock(wchan);
+	sq = sleepq_lookup(wchan);
+	if (sq == NULL) {
+		sleepq_release(wchan);
+		return (-1);
+	}
+	type = sq->sq_type;
+	sleepq_release(wchan);
+	return (type);
+}
+
+/*
  * Removes a thread from a sleep queue and makes it
  * runnable.
  */
@@ -1176,8 +1198,8 @@ DB_SHOW_COMMAND(sleepq, db_show_sleepque
 	return;
 found:
 	db_printf("Wait channel: %p\n", sq->sq_wchan);
-#ifdef INVARIANTS
 	db_printf("Queue type: %d\n", sq->sq_type);
+#ifdef INVARIANTS
 	if (sq->sq_lock) {
 		lock = sq->sq_lock;
 		db_printf("Associated Interlock: %p - (%s) %s\n", lock,

Modified: stable/8/sys/kern/subr_turnstile.c
==============================================================================
--- stable/8/sys/kern/subr_turnstile.c	Mon Jan 25 11:56:53 2010	(r202965)
+++ stable/8/sys/kern/subr_turnstile.c	Mon Jan 25 12:05:51 2010	(r202966)
@@ -733,6 +733,7 @@ turnstile_wait(struct turnstile *ts, str
 	td->td_tsqueue = queue;
 	td->td_blocked = ts;
 	td->td_lockname = lock->lo_name;
+	td->td_blktick = ticks;
 	TD_SET_LOCK(td);
 	mtx_unlock_spin(&tc->tc_lock);
 	propagate_priority(td);
@@ -925,6 +926,7 @@ turnstile_unpend(struct turnstile *ts, i
 		MPASS(TD_CAN_RUN(td));
 		td->td_blocked = NULL;
 		td->td_lockname = NULL;
+		td->td_blktick = 0;
 #ifdef INVARIANTS
 		td->td_tsqueue = 0xff;
 #endif

Modified: stable/8/sys/sys/proc.h
==============================================================================
--- stable/8/sys/sys/proc.h	Mon Jan 25 11:56:53 2010	(r202965)
+++ stable/8/sys/sys/proc.h	Mon Jan 25 12:05:51 2010	(r202966)
@@ -217,6 +217,7 @@ struct thread {
 	struct ucred	*td_ucred;	/* (k) Reference to credentials. */
 	u_int		td_estcpu;	/* (t) estimated cpu utilization */
 	int		td_slptick;	/* (t) Time at sleep. */
+	int		td_blktick;	/* (t) Time spent blocked. */
 	struct rusage	td_ru;		/* (t) rusage information */
 	uint64_t	td_incruntime;	/* (t) Cpu ticks to transfer to proc. */
 	uint64_t	td_runtime;	/* (t) How many cpu ticks we've run. */

Modified: stable/8/sys/sys/sleepqueue.h
==============================================================================
--- stable/8/sys/sys/sleepqueue.h	Mon Jan 25 11:56:53 2010	(r202965)
+++ stable/8/sys/sys/sleepqueue.h	Mon Jan 25 12:05:51 2010	(r202966)
@@ -112,6 +112,7 @@ void	sleepq_set_timeout(void *wchan, int
 u_int	sleepq_sleepcnt(void *wchan, int queue);
 int	sleepq_timedwait(void *wchan, int pri);
 int	sleepq_timedwait_sig(void *wchan, int pri);
+int	sleepq_type(void *wchan);
 void	sleepq_wait(void *wchan, int pri);
 int	sleepq_wait_sig(void *wchan, int pri);
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201001251205.o0PC5pnC056848>