Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 07 Mar 2005 11:37:08 -0500
From:      Stephan Uphoff <ups@tree.com>
To:        Kris Kennaway <kris@obsecurity.org>
Cc:        Julian Elischer <julian@elischer.org>
Subject:   Re: HEADSUP: Turn off cpu_idle_hlt on SMP for now on x86
Message-ID:  <1110213428.29804.4517.camel@palm>
In-Reply-To: <20050222225932.GA90362@xor.obsecurity.org>
References:  <200502111148.45976.jhb@FreeBSD.org> <200502221620.27992.jhb@FreeBSD.org> <20050222225932.GA90362@xor.obsecurity.org>

next in thread | previous in thread | raw e-mail | index | archive | help

--=-7FqhM1Pfb5/i9LGV+nwv
Content-Type: text/plain
Content-Transfer-Encoding: 7bit

On Tue, 2005-02-22 at 17:59, Kris Kennaway wrote:
> On Tue, Feb 22, 2005 at 04:20:27PM -0500, John Baldwin wrote:
> > On Friday 11 February 2005 11:48 am, John Baldwin wrote:
> > > Thus, my theory is that when the pinned thread was preempted and put back
> > > on the run queue, the scheduler didn't IPI the CPU it was pinned to to wake
> > > it up in case it was idle.  The IPI is only needed if the CPUs are halted,
> > > which is why I think turning the idle halt off might work as a workaround. 
> > > I don't know if ULE has this same issue, but I've cc'd Jeff and hopefully
> > > he can look into it.
> > 
> > Nevermind, I don't think cpu_idle_hlt will help (though it has seemed to help 
> > locally oddly enough).  Presumably the CPU that the preempted thread owning 
> > the vm page queues lock would have run the pinned thread before going idle.  
> > In this case, that means that the thread must be pinned to CPU 0 which is 
> > running a make process that is just spinning.  Unfortunately we currently 
> > don't have a good way of looking at the stack for an thread on another CPU.
> 
> I'm running into this with deadlocks I'm seeing on a quad-cpu RELENG_5
> sparc machine.
> 
> Unfortunately, I can't even dump because of yet more locking assertion
> failures in the dump path (CAM, elsewhere).
> 
> Kris

The attached patch (hopefully ;-) fixes a few scheduler problems with
both SMP and UP.

The patch also adds an IPI to PREEMPT threads on other CPUs for i386.

While I compiled every valid combination of SMP,PREEMPT,FULL_PREEMPT I
did not have time to test all combinations.
( This needs a lot more testing - but I ran out of time)

Please test and provide feedback. 

Thanks
Stephan


--=-7FqhM1Pfb5/i9LGV+nwv
Content-Disposition: attachment; filename=idle.patch
Content-Type: text/x-patch; name=idle.patch; charset=ASCII
Content-Transfer-Encoding: 7bit

Index: i386/i386/mp_machdep.c
===================================================================
RCS file: /cvsroot/src/sys/i386/i386/mp_machdep.c,v
retrieving revision 1.244
diff -u -r1.244 mp_machdep.c
--- i386/i386/mp_machdep.c	8 Feb 2005 20:25:06 -0000	1.244
+++ i386/i386/mp_machdep.c	7 Mar 2005 16:25:44 -0000
@@ -1096,6 +1096,15 @@
 
 	ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
 
+	if (ipi_bitmap & IPI_PREEMPT) {
+		mtx_lock_spin(&sched_lock);
+		/* Don't preempt the idle thread */
+		if (curthread->td_priority <  PRI_MIN_IDLE) {
+			mi_switch(SW_INVOL | SW_PREEMPT, NULL);
+		}
+		mtx_unlock_spin(&sched_lock);
+	}
+
 	/* Nothing to do for AST */
 }
 
Index: i386/include/apicvar.h
===================================================================
RCS file: /cvsroot/src/sys/i386/include/apicvar.h,v
retrieving revision 1.11
diff -u -r1.11 apicvar.h
--- i386/include/apicvar.h	8 Feb 2005 20:25:06 -0000	1.11
+++ i386/include/apicvar.h	7 Mar 2005 16:25:44 -0000
@@ -122,7 +122,8 @@
 
 /* IPIs handled by IPI_BITMAPED_VECTOR  (XXX ups is there a better place?) */
 #define	IPI_AST		0 	/* Generate software trap. */
-#define IPI_BITMAP_LAST IPI_AST
+#define IPI_PREEMPT     1
+#define IPI_BITMAP_LAST IPI_PREEMPT
 #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
 
 #define	IPI_STOP	(APIC_IPI_INTS + 6)	/* Stop CPU until restarted. */
Index: kern/kern_switch.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_switch.c,v
retrieving revision 1.107
diff -u -r1.107 kern_switch.c
--- kern/kern_switch.c	6 Jan 2005 23:35:39 -0000	1.107
+++ kern/kern_switch.c	7 Mar 2005 16:25:45 -0000
@@ -329,29 +329,30 @@
 {
 	struct thread *running_thread;
 
-#ifndef FULL_PREEMPTION
-	int pri;
-	pri = td->td_priority;
-	if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD))
-		return;
-#endif
 	mtx_assert(&sched_lock, MA_OWNED);
 	running_thread = curthread;
 
 	if (running_thread->td_ksegrp != td->td_ksegrp)
 		return;
 
-	if (td->td_priority > running_thread->td_priority)
+	if (td->td_priority >= running_thread->td_priority)
 		return;
 #ifdef PREEMPTION
+#ifndef FULL_PREEMPTION
+	if (td->td_priority > PRI_MAX_ITHD) {
+		running_thread->td_flags |= TDF_NEEDRESCHED;
+		return;
+	}
+#endif /* FULL_PREEMPTION */
+
 	if (running_thread->td_critnest > 1) 
 		running_thread->td_pflags |= TDP_OWEPREEMPT;
 	 else 		
 		 mi_switch(SW_INVOL, NULL);
 	
-#else
+#else /* PREEMPTION */
 	running_thread->td_flags |= TDF_NEEDRESCHED;
-#endif
+#endif /* PREEMPTION */
 	return;
 }
 
@@ -365,13 +366,6 @@
 	struct pcpu *best_pcpu;
 	struct thread *cputhread;
 
-#ifndef FULL_PREEMPTION
-	int pri;
-	pri = td->td_priority;
-	if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD))
-		return;
-#endif
-
 	mtx_assert(&sched_lock, MA_OWNED);
 
 	running_thread = curthread;
@@ -421,7 +415,19 @@
 		if (best_pcpu == NULL) 
 			return;
 
+#if defined(IPI_PREEMPT) && defined(PREEMPTION)
+
+#if !defined(FULL_PREEMPTION)
+		if (td->td_priority  <=  PRI_MAX_ITHD)
+#endif /* ! FULL_PREEMPTION */
+			{
+				ipi_selected(best_pcpu->pc_cpumask, IPI_PREEMPT);
+				return;
+			}
+#endif /* defined(IPI_PREEMPT) && defined(PREEMPTION) */
+
 		if (PCPU_GET(cpuid) != best_pcpu->pc_cpuid) {
+
 			best_pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED;
 			ipi_selected(best_pcpu->pc_cpumask, IPI_AST);
 			return;
@@ -430,17 +436,24 @@
 	}	
 #endif
 
-	if (td->td_priority > running_thread->td_priority)
+	if (td->td_priority >= running_thread->td_priority)
 		return;
 #ifdef PREEMPTION
+
+#if !defined(FULL_PREEMPTION)
+	if (td->td_priority  >  PRI_MAX_ITHD) {
+		running_thread->td_flags |= TDF_NEEDRESCHED;
+	}
+#endif /* ! FULL_PREEMPTION */
+	
 	if (running_thread->td_critnest > 1) 
 		running_thread->td_pflags |= TDP_OWEPREEMPT;
 	 else 		
 		 mi_switch(SW_INVOL, NULL);
 	
-#else
+#else /* PREEMPTION */
 	running_thread->td_flags |= TDF_NEEDRESCHED;
-#endif
+#endif /* PREEMPTION */
 	return;
 }
 #endif /* !SMP */
@@ -655,7 +668,7 @@
 	    td->td_kse->ke_state != KES_THREAD)
 		return (0);
 #ifndef FULL_PREEMPTION
-	if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD) &&
+	if ((pri > PRI_MAX_ITHD) &&
 	    !(cpri >= PRI_MIN_IDLE))
 		return (0);
 #endif
Index: kern/sched_4bsd.c
===================================================================
RCS file: /cvsroot/src/sys/kern/sched_4bsd.c,v
retrieving revision 1.71
diff -u -r1.71 sched_4bsd.c
--- kern/sched_4bsd.c	30 Dec 2004 20:52:44 -0000	1.71
+++ kern/sched_4bsd.c	7 Mar 2005 16:25:45 -0000
@@ -1069,14 +1069,53 @@
 }
 #endif
 
+#ifdef SMP
+static void
+kick_other_cpu(int pri,int cpuid);
+
+
+static void
+kick_other_cpu(int pri,int cpuid)
+{	
+	struct pcpu * pcpu = pcpu_find(cpuid);
+	int cpri = pcpu->pc_curthread->td_priority;
+
+	if (idle_cpus_mask & pcpu->pc_cpumask) {
+		forward_wakeups_delivered++;
+		ipi_selected(pcpu->pc_cpumask, IPI_AST);
+		return;
+	}
+
+	if (pri >= cpri)
+		return;
+
+#if defined(IPI_PREEMPT) && defined(PREEMPTION)
+
+#if !defined(FULL_PREEMPTION)
+	if (pri <= PRI_MAX_ITHD)
+#endif /* ! FULL_PREEMPTION */
+	{
+		ipi_selected(pcpu->pc_cpumask, IPI_PREEMPT);
+		return;
+	}
+#endif /* defined(IPI_PREEMPT) && defined(PREEMPTION) */
+
+	pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED;
+	ipi_selected( pcpu->pc_cpumask , IPI_AST);
+	return;
+}
+
+#endif /* SMP */
+
 void
 sched_add(struct thread *td, int flags)
+#ifdef SMP
 {
+	
 	struct kse *ke;
-#ifdef SMP
 	int forwarded = 0;
 	int cpu;
-#endif
+	int single_cpu = 0;
 
 	ke = td->td_kse;
 	mtx_assert(&sched_lock, MA_OWNED);
@@ -1089,25 +1128,76 @@
 	    td, td->td_proc->p_comm, td->td_priority, curthread,
 	    curthread->td_proc->p_comm);
 
-#ifdef SMP
-	if (KSE_CAN_MIGRATE(ke)) {
+
+	if (td->td_pinned != 0) {
+		cpu = td->td_lastcpu;
+		ke->ke_runq = &runq_pcpu[cpu];
+		single_cpu = 1;
+		CTR3(KTR_RUNQ,
+		    "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu);
+	} else if ((ke)->ke_flags & KEF_BOUND) {
+		/* Find CPU from bound runq */
+		KASSERT(SKE_RUNQ_PCPU(ke),("sched_add: bound kse not on cpu runq"));
+		cpu =  ke->ke_runq - &runq_pcpu[0];
+		single_cpu = 1;
+		CTR3(KTR_RUNQ,
+		    "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu);
+	} else {	
 		CTR2(KTR_RUNQ,
 		    "sched_add: adding kse:%p (td:%p) to gbl runq", ke, td);
 		cpu = NOCPU;
 		ke->ke_runq = &runq;
+	}
+	
+	if ((single_cpu) && (cpu != PCPU_GET(cpuid))) {
+	        kick_other_cpu(td->td_priority,cpu);
 	} else {
-		if (!SKE_RUNQ_PCPU(ke))
-			ke->ke_runq = &runq_pcpu[(cpu = PCPU_GET(cpuid))];
-		else
-			cpu = td->td_lastcpu;
-		CTR3(KTR_RUNQ,
-		    "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu);
+		
+		if ( !single_cpu) {
+			cpumask_t me = PCPU_GET(cpumask);
+			int idle = idle_cpus_mask & me;	
+
+			if ( !idle  && ((flags & SRQ_INTR) == 0) &&
+			    (idle_cpus_mask & ~(hlt_cpus_mask  | me)))
+				forwarded = forward_wakeup(cpu);
+	
+		}
+
+		if (!forwarded) {
+			if (((flags & SRQ_YIELDING) == 0) && maybe_preempt(td))
+				return;
+			else
+				maybe_resched(td);
+		}
 	}
-#else
+	
+	if ((td->td_proc->p_flag & P_NOLOAD) == 0)
+		sched_load_add();
+	SLOT_USE(td->td_ksegrp);
+	runq_add(ke->ke_runq, ke, flags);
+	ke->ke_state = KES_ONRUNQ;
+}
+
+
+#else /* SMP */
+
+{
+	struct kse *ke;
+	ke = td->td_kse;
+	mtx_assert(&sched_lock, MA_OWNED);
+	KASSERT(ke->ke_state != KES_ONRUNQ,
+	    ("sched_add: kse %p (%s) already in run queue", ke,
+	    ke->ke_proc->p_comm));
+	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
+	    ("sched_add: process swapped out"));
+	CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)",
+	    td, td->td_proc->p_comm, td->td_priority, curthread,
+	    curthread->td_proc->p_comm);
+
+
 	CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p) to runq", ke, td);
 	ke->ke_runq = &runq;
 
-#endif
 	/* 
 	 * If we are yielding (on the way out anyhow) 
 	 * or the thread being saved is US,
@@ -1120,41 +1210,9 @@
 	 * which also only happens when we are about to yield.
 	 */
 	if((flags & SRQ_YIELDING) == 0) {
-#ifdef SMP
-		cpumask_t me = PCPU_GET(cpumask);
-		int idle = idle_cpus_mask & me;
-		/*
-		 * Only try to kick off another CPU if
-		 * the thread is unpinned
-		 * or pinned to another cpu,
-		 * and there are other available and idle CPUs.
-		 * if we are idle, or it's an interrupt,
-		 * then skip straight to preemption.
-		 */
-		if ( (! idle) && ((flags & SRQ_INTR) == 0) &&
-		    (idle_cpus_mask & ~(hlt_cpus_mask | me)) &&
-		    ( KSE_CAN_MIGRATE(ke) ||
-		      ke->ke_runq != &runq_pcpu[PCPU_GET(cpuid)])) {
-			forwarded = forward_wakeup(cpu);
-		}
-		/*
-		 * If we failed to kick off another cpu, then look to 
-		 * see if we should preempt this CPU. Only allow this
-		 * if it is not pinned or IS pinned to this CPU.
-		 * If we are the idle thread, we also try do preempt.
-		 * as it will be quicker and being idle, we won't 
-		 * lose in doing so.. 
-		 */
-		if ((!forwarded) &&
-		    (ke->ke_runq == &runq ||
-		     ke->ke_runq == &runq_pcpu[PCPU_GET(cpuid)]))
-#endif
-
-		{
-			if (maybe_preempt(td))
-				return;
-		}
-	}
+		if (maybe_preempt(td))
+			return;
+	}	
 	if ((td->td_proc->p_flag & P_NOLOAD) == 0)
 		sched_load_add();
 	SLOT_USE(td->td_ksegrp);
@@ -1162,6 +1220,9 @@
 	ke->ke_state = KES_ONRUNQ;
 	maybe_resched(td);
 }
+
+#endif /* SMP */
+
 
 void
 sched_rem(struct thread *td)
Index: kern/subr_turnstile.c
===================================================================
RCS file: /cvsroot/src/sys/kern/subr_turnstile.c,v
retrieving revision 1.152
diff -u -r1.152 subr_turnstile.c
--- kern/subr_turnstile.c	10 Feb 2005 12:02:37 -0000	1.152
+++ kern/subr_turnstile.c	7 Mar 2005 16:25:45 -0000
@@ -400,7 +400,10 @@
 	 */
 	if (td == TAILQ_FIRST(&ts->ts_blocked) && td->td_priority < oldpri) {
 		mtx_unlock_spin(&tc->tc_lock);
+		critical_enter();
 		propagate_priority(td);
+		critical_exit();
+
 	} else
 		mtx_unlock_spin(&tc->tc_lock);
 }
@@ -626,7 +629,11 @@
 	td->td_blocked = ts;
 	td->td_lockname = lock->lo_name;
 	TD_SET_LOCK(td);
+
+	critical_enter();
 	propagate_priority(td);
+	critical_exit();
+
 
 	if (LOCK_LOG_TEST(lock, 0))
 		CTR4(KTR_LOCK, "%s: td %d blocked on [%p] %s", __func__,

--=-7FqhM1Pfb5/i9LGV+nwv--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?1110213428.29804.4517.camel>