From owner-freebsd-hackers@FreeBSD.ORG Sun Oct 2 08:57:53 2005 Return-Path: X-Original-To: freebsd-hackers@FreeBSD.org Delivered-To: freebsd-hackers@FreeBSD.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id A91AC16A41F; Sun, 2 Oct 2005 08:57:53 +0000 (GMT) (envelope-from truckman@FreeBSD.org) Received: from gw.catspoiler.org (217-ip-163.nccn.net [209.79.217.163]) by mx1.FreeBSD.org (Postfix) with ESMTP id 5131843D5F; Sun, 2 Oct 2005 08:57:50 +0000 (GMT) (envelope-from truckman@FreeBSD.org) Received: from FreeBSD.org (mousie.catspoiler.org [192.168.101.2]) by gw.catspoiler.org (8.13.3/8.13.3) with ESMTP id j928veaV014700; Sun, 2 Oct 2005 01:57:44 -0700 (PDT) (envelope-from truckman@FreeBSD.org) Message-Id: <200510020857.j928veaV014700@gw.catspoiler.org> Date: Sun, 2 Oct 2005 01:57:40 -0700 (PDT) From: Don Lewis To: jhb@FreeBSD.org In-Reply-To: <200510012148.j91LlwvB013891@gw.catspoiler.org> MIME-Version: 1.0 Content-Type: TEXT/plain; charset=us-ascii Cc: freebsd-hackers@FreeBSD.org, rwatson@FreeBSD.org, apelisse@gmail.com Subject: Re: freebsd-5.4-stable panics X-BeenThere: freebsd-hackers@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Technical Discussions relating to FreeBSD List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 02 Oct 2005 08:57:53 -0000 On 1 Oct, Don Lewis wrote: > On 30 Sep, John Baldwin wrote: >> It turns out that the sysctl buffer is already wired in one of the two cases >> that this function is called, so I moved the wiring up to the upper layer in >> the other case and cut out a bunch of the locking gymnastics as a result. >> >> Index: kern_proc.c >> =================================================================== >> RCS file: /usr/cvs/src/sys/kern/kern_proc.c,v >> retrieving revision 1.231 >> diff -u -r1.231 kern_proc.c >> --- kern_proc.c 27 Sep 2005 18:03:15 -0000 1.231 >> +++ kern_proc.c 30 Sep 2005 17:04:57 -0000 >> @@ -875,22 +875,16 @@ >> >> if (flags & KERN_PROC_NOTHREADS) { >> fill_kinfo_proc(p, &kinfo_proc); >> - PROC_UNLOCK(p); >> error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, >> sizeof(kinfo_proc)); >> - PROC_LOCK(p); >> } else { >> - _PHOLD(p); >> FOREACH_THREAD_IN_PROC(p, td) { >> fill_kinfo_thread(td, &kinfo_proc); >> - PROC_UNLOCK(p); >> error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, >> sizeof(kinfo_proc)); >> - PROC_LOCK(p); >> if (error) >> break; >> } >> - _PRELE(p); >> } >> PROC_UNLOCK(p); >> if (error) >> @@ -932,6 +926,9 @@ >> if (oid_number == KERN_PROC_PID) { >> if (namelen != 1) >> return (EINVAL); >> + error = sysctl_wire_old_buffer(req, 0); >> + if (error) >> + return (error); >> p = pfind((pid_t)name[0]); >> if (!p) >> return (ESRCH); >> > > sched_lock needs to be grabbed before the FOREACH_THREAD_IN_PROC loop. > > Can _PHOLD()/_PRELE() be dropped? It turns out that fill_kinfo_thread() grabs a bunch of locks to grab things out of struct proc, which breaks badly if sched_lock is grabbed before calling fill_kinfo_thread(). I refactored fill_kinfo_thread() into two functions, one of which doesn't need any additional locks and only gathers per-thread data, and a new function, fill_kinfo_proc_only(), which gathers the data that is common to all theads and can be called before grabbing sched_lock. This should be more efficient if there is more than one thread because the per-process data is only gathered once, and only the per-thread data in kinfo_proc is overwritten for each thread. Index: kern_proc.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_proc.c,v retrieving revision 1.231 diff -u -r1.231 kern_proc.c --- kern_proc.c 27 Sep 2005 18:03:15 -0000 1.231 +++ kern_proc.c 2 Oct 2005 08:48:56 -0000 @@ -73,6 +73,8 @@ static void doenterpgrp(struct proc *, struct pgrp *); static void orphanpg(struct pgrp *pg); +static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp); +static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp); static void pgadjustjobc(struct pgrp *pgrp, int entering); static void pgdelete(struct pgrp *); static int proc_ctor(void *mem, int size, void *arg, int flags); @@ -596,33 +598,22 @@ } } #endif /* DDB */ -void -fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp); /* - * Fill in a kinfo_proc structure for the specified process. + * Clear kinfo_proc and fill in any information that is common + * to all threads in the process. * Must be called with the target process locked. */ -void -fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp) -{ - fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp); -} - -void -fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp) +static void +fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp) { - struct proc *p; struct thread *td0; - struct ksegrp *kg; struct tty *tp; struct session *sp; struct timeval tv; struct ucred *cred; struct sigacts *ps; - p = td->td_proc; - bzero(kp, sizeof(*kp)); kp->ki_structsize = sizeof(*kp); @@ -684,78 +675,14 @@ kp->ki_tsize = vm->vm_tsize; kp->ki_dsize = vm->vm_dsize; kp->ki_ssize = vm->vm_ssize; - } + } else if (p->p_state == PRS_ZOMBIE) + kp->ki_stat = SZOMB; kp->ki_sflag = p->p_sflag; kp->ki_swtime = p->p_swtime; kp->ki_pid = p->p_pid; kp->ki_nice = p->p_nice; bintime2timeval(&p->p_rux.rux_runtime, &tv); kp->ki_runtime = tv.tv_sec * (u_int64_t)1000000 + tv.tv_usec; - if (p->p_state != PRS_ZOMBIE) { -#if 0 - if (td == NULL) { - /* XXXKSE: This should never happen. */ - printf("fill_kinfo_proc(): pid %d has no threads!\n", - p->p_pid); - mtx_unlock_spin(&sched_lock); - return; - } -#endif - if (td->td_wmesg != NULL) { - strlcpy(kp->ki_wmesg, td->td_wmesg, - sizeof(kp->ki_wmesg)); - } - if (TD_ON_LOCK(td)) { - kp->ki_kiflag |= KI_LOCKBLOCK; - strlcpy(kp->ki_lockname, td->td_lockname, - sizeof(kp->ki_lockname)); - } - - if (p->p_state == PRS_NORMAL) { /* XXXKSE very approximate */ - if (TD_ON_RUNQ(td) || - TD_CAN_RUN(td) || - TD_IS_RUNNING(td)) { - kp->ki_stat = SRUN; - } else if (P_SHOULDSTOP(p)) { - kp->ki_stat = SSTOP; - } else if (TD_IS_SLEEPING(td)) { - kp->ki_stat = SSLEEP; - } else if (TD_ON_LOCK(td)) { - kp->ki_stat = SLOCK; - } else { - kp->ki_stat = SWAIT; - } - } else { - kp->ki_stat = SIDL; - } - - kg = td->td_ksegrp; - - /* things in the KSE GROUP */ - kp->ki_estcpu = kg->kg_estcpu; - kp->ki_slptime = kg->kg_slptime; - kp->ki_pri.pri_user = kg->kg_user_pri; - kp->ki_pri.pri_class = kg->kg_pri_class; - - /* Things in the thread */ - kp->ki_wchan = td->td_wchan; - kp->ki_pri.pri_level = td->td_priority; - kp->ki_pri.pri_native = td->td_base_pri; - kp->ki_lastcpu = td->td_lastcpu; - kp->ki_oncpu = td->td_oncpu; - kp->ki_tdflags = td->td_flags; - kp->ki_tid = td->td_tid; - kp->ki_numthreads = p->p_numthreads; - kp->ki_pcb = td->td_pcb; - kp->ki_kstack = (void *)td->td_kstack; - kp->ki_pctcpu = sched_pctcpu(td); - - /* We can't get this anymore but ps etc never used it anyway. */ - kp->ki_rqindex = 0; - - } else { - kp->ki_stat = SZOMB; - } mtx_unlock_spin(&sched_lock); if ((p->p_sflag & PS_INMEM) && p->p_stats != NULL) { kp->ki_start = p->p_stats->p_start; @@ -802,8 +729,6 @@ p->p_sysent->sv_name[0] != '\0') strlcpy(kp->ki_emul, p->p_sysent->sv_name, sizeof(kp->ki_emul)); kp->ki_siglist = p->p_siglist; - SIGSETOR(kp->ki_siglist, td->td_siglist); - kp->ki_sigmask = td->td_sigmask; kp->ki_xstat = p->p_xstat; kp->ki_acflag = p->p_acflag; kp->ki_lock = p->p_lock; @@ -811,6 +736,92 @@ kp->ki_ppid = p->p_pptr->p_pid; } +/* + * Fill in information that is thread specific. + * Must be called with sched_lock locked. + */ +static void +fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp) +{ + struct ksegrp *kg; + struct proc *p; + + p = td->td_proc; + + if (td->td_wmesg != NULL) + strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg)); + else + bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg)); + if (TD_ON_LOCK(td)) { + kp->ki_kiflag |= KI_LOCKBLOCK; + strlcpy(kp->ki_lockname, td->td_lockname, + sizeof(kp->ki_lockname)); + } else { + kp->ki_kiflag &= ~KI_LOCKBLOCK; + bzero(kp->ki_lockname, sizeof(kp->ki_lockname)); + } + + if (p->p_state == PRS_NORMAL) { /* XXXKSE very approximate */ + if (TD_ON_RUNQ(td) || + TD_CAN_RUN(td) || + TD_IS_RUNNING(td)) { + kp->ki_stat = SRUN; + } else if (P_SHOULDSTOP(p)) { + kp->ki_stat = SSTOP; + } else if (TD_IS_SLEEPING(td)) { + kp->ki_stat = SSLEEP; + } else if (TD_ON_LOCK(td)) { + kp->ki_stat = SLOCK; + } else { + kp->ki_stat = SWAIT; + } + } else { + kp->ki_stat = SIDL; + } + + kg = td->td_ksegrp; + + /* things in the KSE GROUP */ + kp->ki_estcpu = kg->kg_estcpu; + kp->ki_slptime = kg->kg_slptime; + kp->ki_pri.pri_user = kg->kg_user_pri; + kp->ki_pri.pri_class = kg->kg_pri_class; + + /* Things in the thread */ + kp->ki_wchan = td->td_wchan; + kp->ki_pri.pri_level = td->td_priority; + kp->ki_pri.pri_native = td->td_base_pri; + kp->ki_lastcpu = td->td_lastcpu; + kp->ki_oncpu = td->td_oncpu; + kp->ki_tdflags = td->td_flags; + kp->ki_tid = td->td_tid; + kp->ki_numthreads = p->p_numthreads; + kp->ki_pcb = td->td_pcb; + kp->ki_kstack = (void *)td->td_kstack; + kp->ki_pctcpu = sched_pctcpu(td); + + /* We can't get this anymore but ps etc never used it anyway. */ + kp->ki_rqindex = 0; + + SIGSETOR(kp->ki_siglist, td->td_siglist); + kp->ki_sigmask = td->td_sigmask; +} + +/* + * Fill in a kinfo_proc structure for the specified process. + * Must be called with the target process locked. + */ +void +fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp) +{ + + fill_kinfo_proc_only(p, kp); + mtx_lock_spin(&sched_lock); + if (FIRST_THREAD_IN_PROC(p) != NULL) + fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp); + mtx_unlock_spin(&sched_lock); +} + struct pstats * pstats_alloc(void) { @@ -873,24 +884,28 @@ PROC_LOCK_ASSERT(p, MA_OWNED); + fill_kinfo_proc(p, &kinfo_proc); if (flags & KERN_PROC_NOTHREADS) { - fill_kinfo_proc(p, &kinfo_proc); - PROC_UNLOCK(p); + mtx_lock_spin(&sched_lock); + if (FIRST_THREAD_IN_PROC(p) != NULL) + fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), &kinfo_proc); + mtx_unlock_spin(&sched_lock); error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, sizeof(kinfo_proc)); - PROC_LOCK(p); } else { - _PHOLD(p); - FOREACH_THREAD_IN_PROC(p, td) { - fill_kinfo_thread(td, &kinfo_proc); - PROC_UNLOCK(p); + mtx_lock_spin(&sched_lock); + if (FIRST_THREAD_IN_PROC(p) != NULL) + FOREACH_THREAD_IN_PROC(p, td) { + fill_kinfo_thread(td, &kinfo_proc); + error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, + sizeof(kinfo_proc)); + if (error) + break; + } + else error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, - sizeof(kinfo_proc)); - PROC_LOCK(p); - if (error) - break; - } - _PRELE(p); + sizeof(kinfo_proc)); + mtx_unlock_spin(&sched_lock); } PROC_UNLOCK(p); if (error) @@ -932,6 +947,9 @@ if (oid_number == KERN_PROC_PID) { if (namelen != 1) return (EINVAL); + error = sysctl_wire_old_buffer(req, 0); + if (error) + return (error); p = pfind((pid_t)name[0]); if (!p) return (ESRCH);