From owner-p4-projects@FreeBSD.ORG Sat Sep 11 12:34:48 2004 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 84B7A16A4D0; Sat, 11 Sep 2004 12:34:48 +0000 (GMT) Delivered-To: perforce@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 4306116A4CE for ; Sat, 11 Sep 2004 12:34:48 +0000 (GMT) Received: from repoman.freebsd.org (repoman.freebsd.org [216.136.204.115]) by mx1.FreeBSD.org (Postfix) with ESMTP id 26C2F43D1F for ; Sat, 11 Sep 2004 12:34:48 +0000 (GMT) (envelope-from julian@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.12.11/8.12.11) with ESMTP id i8BCYmqZ094322 for ; Sat, 11 Sep 2004 12:34:48 GMT (envelope-from julian@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.12.11/8.12.11/Submit) id i8BCYl29094319 for perforce@freebsd.org; Sat, 11 Sep 2004 12:34:47 GMT (envelope-from julian@freebsd.org) Date: Sat, 11 Sep 2004 12:34:47 GMT Message-Id: <200409111234.i8BCYl29094319@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to julian@freebsd.org using -f From: Julian Elischer To: Perforce Change Reviews Subject: PERFORCE change 61351 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 11 Sep 2004 12:34:49 -0000 http://perforce.freebsd.org/chv.cgi?CH=61351 Change 61351 by julian@julian_ref on 2004/09/11 12:33:50 Add debugging to try find the runq corruption problem Affected files ... .. //depot/projects/nsched/sys/kern/kern_fork.c#13 edit .. //depot/projects/nsched/sys/kern/kern_switch.c#15 edit Differences ... ==== //depot/projects/nsched/sys/kern/kern_fork.c#13 (text+ko) ==== @@ -256,7 +256,7 @@ * other side with the expectation that the process is about to * exec. */ - if (p1->p_flag & P_SA) { + if (p1->p_flag & P_HADTHREADS) { /* * Idle the other threads for a second. * Since the user space is copied, it must remain stable. @@ -727,7 +727,7 @@ /* * If other threads are waiting, let them continue now. */ - if (p1->p_flag & P_SA) { + if (p1->p_flag & P_HADTHREADS) { PROC_LOCK(p1); thread_single_end(); PROC_UNLOCK(p1); @@ -748,7 +748,7 @@ mac_destroy_proc(newproc); #endif uma_zfree(proc_zone, newproc); - if (p1->p_flag & P_SA) { + if (p1->p_flag & P_HADTHREADS) { PROC_LOCK(p1); thread_single_end(); PROC_UNLOCK(p1); ==== //depot/projects/nsched/sys/kern/kern_switch.c#15 (text+ko) ==== @@ -118,6 +118,35 @@ CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); +#ifdef INVARIANTS +#define CHECKRUNQ(kg, td) checkrunq((kg), (td)); + +static void +checkrunq(struct ksegrp *kg, struct thread *td) +{ + struct thread *td2; + int count = 0; + TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) { + /* XXX Debugging hack */ + if (!TD_ON_RUNQ(td2)) { + printf("thread in wrong state, td2= %p\n", td2); + panic("corruption in runqueue"); + } + if (td == td2) { + printf("thread alreadyin runq, td2= %p\n", td); + panic("confusion in runqueue"); + } + if (++count > 10000) { + printf("corrupt kg_runq, td= %p\n", td); + panic("deadlock in runqueue"); + } + } +} + +#else +#define CHECKRUNQ(kg, td) +#endif + #define td_kse td_sched /************************************************************************ @@ -153,6 +182,7 @@ td = ke->ke_thread; KASSERT((td->td_kse == ke), ("kse/thread mismatch")); kg = ke->ke_ksegrp; + CHECKRUNQ(kg, NULL) if (td->td_proc->p_flag & P_HADTHREADS) { if (kg->kg_last_assigned == td) { kg->kg_last_assigned = TAILQ_PREV(td, @@ -160,6 +190,7 @@ } TAILQ_REMOVE(&kg->kg_runq, td, td_runq); kg->kg_runnable--; + CHECKRUNQ(kg, td) } CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d", td, td->td_priority); @@ -257,6 +288,7 @@ td3 = TAILQ_PREV(td, threadqueue, td_runq); TAILQ_REMOVE(&kg->kg_runq, td, td_runq); kg->kg_runnable--; + CHECKRUNQ(kg, td) if (ke->ke_state == KES_ONRUNQ) { /* * This thread has been assigned to the system run queue. @@ -273,6 +305,7 @@ kg->kg_last_assigned = td3; /* slot_fill(kg); */ /* will replace it with another */ } + CHECKRUNQ(kg, NULL) } #endif @@ -305,7 +338,7 @@ /* It is a threaded process */ kg = td->td_ksegrp; - TD_SET_CAN_RUN(td); + CHECKRUNQ(kg, NULL) if (ke->ke_state == KES_ONRUNQ) { if (kg->kg_last_assigned == td) { kg->kg_last_assigned = @@ -315,9 +348,11 @@ kg->kg_avail_opennings++; } TAILQ_REMOVE(&kg->kg_runq, td, td_runq); + CHECKRUNQ(kg, td) kg->kg_runnable--; td->td_priority = newpri; setrunqueue(td, SRQ_BORING); + CHECKRUNQ(kg, NULL) } int limitcount; void @@ -355,6 +390,7 @@ return; } + CHECKRUNQ(kg, td) tda = kg->kg_last_assigned; if ((kg->kg_avail_opennings <= 0) && (tda && (tda->td_priority > td->td_priority))) { @@ -380,18 +416,13 @@ TAILQ_INSERT_BEFORE(td2, td, td_runq); break; } - /* XXX Debugging hack */ - if (++count > 10000) { - printf("setrunqueue(): corrupt kq_runq, td= %p\n", td); - panic("deadlock in setrunqueue"); - } } if (td2 == NULL) { /* We ran off the end of the TAILQ or it was empty. */ kg->kg_runnable++; TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq); } - + CHECKRUNQ(kg, NULL) /* * If we have a slot to use, then put the thread on the system * run queue and if needed, readjust the last_assigned pointer.