From owner-svn-src-all@freebsd.org Wed May 16 22:29:22 2018 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 249CEED9779; Wed, 16 May 2018 22:29:22 +0000 (UTC) (envelope-from mmacy@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id BF36E7F7EC; Wed, 16 May 2018 22:29:21 +0000 (UTC) (envelope-from mmacy@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 7D75C20406; Wed, 16 May 2018 22:29:21 +0000 (UTC) (envelope-from mmacy@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w4GMTLQm070797; Wed, 16 May 2018 22:29:21 GMT (envelope-from mmacy@FreeBSD.org) Received: (from mmacy@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w4GMTKJj070790; Wed, 16 May 2018 22:29:20 GMT (envelope-from mmacy@FreeBSD.org) Message-Id: <201805162229.w4GMTKJj070790@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mmacy set sender to mmacy@FreeBSD.org using -f From: Matt Macy Date: Wed, 16 May 2018 22:29:20 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r333690 - in head/sys: dev/hwpmc kern sys X-SVN-Group: head X-SVN-Commit-Author: mmacy X-SVN-Commit-Paths: in head/sys: dev/hwpmc kern sys X-SVN-Commit-Revision: 333690 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.26 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 16 May 2018 22:29:22 -0000 Author: mmacy Date: Wed May 16 22:29:20 2018 New Revision: 333690 URL: https://svnweb.freebsd.org/changeset/base/333690 Log: hwpmc: Implement per-thread counters for PMC sampling This implements per-thread counters for PMC sampling. The thread descriptors are stored in a list attached to the process descriptor. These thread descriptors can store any per-thread information necessary for current or future features. For the moment, they just store the counters for sampling. The thread descriptors are created when the process descriptor is created. Additionally, thread descriptors are created or freed when threads are started or stopped. Because the thread exit function is called in a critical section, we can't directly free the thread descriptors. Hence, they are freed to a cache, which is also used as a source of allocations when needed for new threads. Approved by: sbruno Obtained from: jtl Sponsored by: Juniper Networks, Limelight Networks Differential Revision: https://reviews.freebsd.org/D15335 Modified: head/sys/dev/hwpmc/hwpmc_mod.c head/sys/kern/kern_thr.c head/sys/kern/kern_thread.c head/sys/sys/pmc.h head/sys/sys/pmckern.h Modified: head/sys/dev/hwpmc/hwpmc_mod.c ============================================================================== --- head/sys/dev/hwpmc/hwpmc_mod.c Wed May 16 22:25:47 2018 (r333689) +++ head/sys/dev/hwpmc/hwpmc_mod.c Wed May 16 22:29:20 2018 (r333690) @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -92,6 +93,7 @@ enum pmc_flags { PMC_FLAG_NONE = 0x00, /* do nothing */ PMC_FLAG_REMOVE = 0x01, /* atomically remove entry from hash */ PMC_FLAG_ALLOCATE = 0x02, /* add entry to hash if not found */ + PMC_FLAG_NOWAIT = 0x04, /* do not wait for mallocs */ }; /* @@ -175,8 +177,22 @@ static LIST_HEAD(pmc_ownerhash, pmc_owner) *pmc_ownerh static LIST_HEAD(, pmc_owner) pmc_ss_owners; +/* + * List of free thread entries. This is protected by the spin + * mutex. + */ +static struct mtx pmc_threadfreelist_mtx; /* spin mutex */ +static LIST_HEAD(, pmc_thread) pmc_threadfreelist; +static int pmc_threadfreelist_entries=0; +#define THREADENTRY_SIZE \ +(sizeof(struct pmc_thread) + (md->pmd_npmc * sizeof(struct pmc_threadpmcstate))) /* + * Task to free thread descriptors + */ +static struct grouptask free_gtask; + +/* * A map of row indices to classdep structures. */ static struct pmc_classdep **pmc_rowindex_to_classdep; @@ -191,6 +207,8 @@ static int pmc_debugflags_parse(char *newstr, char *fe #endif static int load(struct module *module, int cmd, void *arg); +static void pmc_add_thread_descriptors_from_proc(struct proc *p, + struct pmc_process *pp); static int pmc_attach_process(struct proc *p, struct pmc *pm); static struct pmc *pmc_allocate_pmc_descriptor(void); static struct pmc_owner *pmc_allocate_owner_descriptor(struct proc *p); @@ -205,12 +223,15 @@ static int pmc_detach_one_process(struct proc *p, stru int flags); static void pmc_destroy_owner_descriptor(struct pmc_owner *po); static void pmc_destroy_pmc_descriptor(struct pmc *pm); +static void pmc_destroy_process_descriptor(struct pmc_process *pp); static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p); static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm); static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmc); static struct pmc_process *pmc_find_process_descriptor(struct proc *p, uint32_t mode); +static struct pmc_thread *pmc_find_thread_descriptor(struct pmc_process *pp, + struct thread *td, uint32_t mode); static void pmc_force_context_switch(void); static void pmc_link_target_process(struct pmc *pm, struct pmc_process *pp); @@ -225,6 +246,8 @@ static void pmc_process_fork(void *arg, struct proc *p struct proc *p2, int n); static void pmc_process_samples(int cpu, int soft); static void pmc_release_pmc_descriptor(struct pmc *pmc); +static void pmc_process_thread_add(struct thread *td); +static void pmc_process_thread_delete(struct thread *td); static void pmc_remove_owner(struct pmc_owner *po); static void pmc_remove_process_descriptor(struct pmc_process *pp); static void pmc_restore_cpu_binding(struct pmc_binding *pb); @@ -233,6 +256,9 @@ static void pmc_select_cpu(int cpu); static int pmc_start(struct pmc *pm); static int pmc_stop(struct pmc *pm); static int pmc_syscall_handler(struct thread *td, void *syscall_args); +static struct pmc_thread *pmc_thread_descriptor_pool_alloc(void); +static void pmc_thread_descriptor_pool_drain(void); +static void pmc_thread_descriptor_pool_free(struct pmc_thread *pt); static void pmc_unlink_target_process(struct pmc *pmc, struct pmc_process *pp); static int generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp); @@ -312,6 +338,24 @@ SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG /* + * kern.hwpmc.threadfreelist_entries -- number of free entries + */ + +SYSCTL_INT(_kern_hwpmc, OID_AUTO, threadfreelist_entries, CTLFLAG_RD, + &pmc_threadfreelist_entries, 0, "number of avalable thread entries"); + + +/* + * kern.hwpmc.threadfreelist_max -- maximum number of free entries + */ + +static int pmc_threadfreelist_max = PMC_THREADLIST_MAX; +SYSCTL_INT(_kern_hwpmc, OID_AUTO, threadfreelist_max, CTLFLAG_RW, + &pmc_threadfreelist_max, 0, + "maximum number of available thread entries before freeing some"); + + +/* * security.bsd.unprivileged_syspmcs -- allow non-root processes to * allocate system-wide PMCs. * @@ -835,6 +879,9 @@ pmc_link_target_process(struct pmc *pm, struct pmc_pro { int ri; struct pmc_target *pt; +#ifdef INVARIANTS + struct pmc_thread *pt_td; +#endif sx_assert(&pmc_sx, SX_XLOCKED); @@ -878,6 +925,18 @@ pmc_link_target_process(struct pmc *pm, struct pmc_pro pp->pp_refcnt++; +#ifdef INVARIANTS + /* Confirm that the per-thread values at this row index are cleared. */ + if (PMC_TO_MODE(pm) == PMC_MODE_TS) { + mtx_lock_spin(pp->pp_tdslock); + LIST_FOREACH(pt_td, &pp->pp_tds, pt_next) { + KASSERT(pt_td->pt_pmcs[ri].pt_pmcval == (pmc_value_t) 0, + ("[pmc,%d] pt_pmcval not cleared for pid=%d at " + "ri=%d", __LINE__, pp->pp_proc->p_pid, ri)); + } + mtx_unlock_spin(pp->pp_tdslock); + } +#endif } /* @@ -890,6 +949,7 @@ pmc_unlink_target_process(struct pmc *pm, struct pmc_p int ri; struct proc *p; struct pmc_target *ptgt; + struct pmc_thread *pt; sx_assert(&pmc_sx, SX_XLOCKED); @@ -912,6 +972,14 @@ pmc_unlink_target_process(struct pmc *pm, struct pmc_p pp->pp_pmcs[ri].pp_pmc = NULL; pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0; + /* Clear the per-thread values at this row index. */ + if (PMC_TO_MODE(pm) == PMC_MODE_TS) { + mtx_lock_spin(pp->pp_tdslock); + LIST_FOREACH(pt, &pp->pp_tds, pt_next) + pt->pt_pmcs[ri].pt_pmcval = (pmc_value_t) 0; + mtx_unlock_spin(pp->pp_tdslock); + } + /* Remove owner-specific flags */ if (pm->pm_owner->po_owner == pp->pp_proc) { pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS; @@ -1005,7 +1073,7 @@ pmc_can_attach(struct pmc *pm, struct proc *t) static int pmc_attach_one_process(struct proc *p, struct pmc *pm) { - int ri; + int ri, error; char *fullpath, *freepath; struct pmc_process *pp; @@ -1026,15 +1094,26 @@ pmc_attach_one_process(struct proc *p, struct pmc *pm) */ ri = PMC_TO_ROWINDEX(pm); - if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL) - return ENOMEM; + /* mark process as using HWPMCs */ + PROC_LOCK(p); + p->p_flag |= P_HWPMC; + PROC_UNLOCK(p); - if (pp->pp_pmcs[ri].pp_pmc == pm) /* already present at slot [ri] */ - return EEXIST; + if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL) { + error = ENOMEM; + goto fail; + } - if (pp->pp_pmcs[ri].pp_pmc != NULL) - return EBUSY; + if (pp->pp_pmcs[ri].pp_pmc == pm) {/* already present at slot [ri] */ + error = EEXIST; + goto fail; + } + if (pp->pp_pmcs[ri].pp_pmc != NULL) { + error = EBUSY; + goto fail; + } + pmc_link_target_process(pm, pp); if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) && @@ -1056,12 +1135,13 @@ pmc_attach_one_process(struct proc *p, struct pmc *pm) if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pmc_log_process_mappings(pm->pm_owner, p); } - /* mark process as using HWPMCs */ + + return (0); + fail: PROC_LOCK(p); - p->p_flag |= P_HWPMC; + p->p_flag &= ~P_HWPMC; PROC_UNLOCK(p); - - return 0; + return (error); } /* @@ -1173,7 +1253,7 @@ pmc_detach_one_process(struct proc *p, struct pmc *pm, pmc_remove_process_descriptor(pp); if (flags & PMC_FLAG_REMOVE) - free(pp, M_PMC); + pmc_destroy_process_descriptor(pp); PROC_LOCK(p); p->p_flag &= ~P_HWPMC; @@ -1250,10 +1330,11 @@ pmc_process_csw_in(struct thread *td) struct pmc_hw *phw; pmc_value_t newvalue; struct pmc_process *pp; + struct pmc_thread *pt; struct pmc_classdep *pcd; p = td->td_proc; - + pt = NULL; if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL) return; @@ -1312,23 +1393,54 @@ pmc_process_csw_in(struct thread *td) /* * Write out saved value and start the PMC. * - * Sampling PMCs use a per-process value, while + * Sampling PMCs use a per-thread value, while * counting mode PMCs use a per-pmc value that is * inherited across descendants. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) { + if (pt == NULL) + pt = pmc_find_thread_descriptor(pp, td, + PMC_FLAG_NONE); + + KASSERT(pt != NULL, + ("[pmc,%d] No thread found for td=%p", __LINE__, + td)); + mtx_pool_lock_spin(pmc_mtxpool, pm); /* - * Use the saved value calculated after the most recent - * thread switch out to start this counter. Reset - * the saved count in case another thread from this - * process switches in before any threads switch out. + * If we have a thread descriptor, use the per-thread + * counter in the descriptor. If not, we will use + * a per-process counter. + * + * TODO: Remove the per-process "safety net" once + * we have thoroughly tested that we don't hit the + * above assert. */ - newvalue = PMC_PCPU_SAVED(cpu,ri) = - pp->pp_pmcs[ri].pp_pmcval; - pp->pp_pmcs[ri].pp_pmcval = pm->pm_sc.pm_reloadcount; + if (pt != NULL) { + if (pt->pt_pmcs[ri].pt_pmcval > 0) + newvalue = pt->pt_pmcs[ri].pt_pmcval; + else + newvalue = pm->pm_sc.pm_reloadcount; + } else { + /* + * Use the saved value calculated after the most + * recent time a thread using the shared counter + * switched out. Reset the saved count in case + * another thread from this process switches in + * before any threads switch out. + */ + + newvalue = pp->pp_pmcs[ri].pp_pmcval; + pp->pp_pmcs[ri].pp_pmcval = + pm->pm_sc.pm_reloadcount; + } mtx_pool_unlock_spin(pmc_mtxpool, pm); + KASSERT(newvalue > 0 && newvalue <= + pm->pm_sc.pm_reloadcount, + ("[pmc,%d] pmcval outside of expected range cpu=%d " + "ri=%d pmcval=%jx pm_reloadcount=%jx", __LINE__, + cpu, ri, newvalue, pm->pm_sc.pm_reloadcount)); } else { KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC, ("[pmc,%d] illegal mode=%d", __LINE__, @@ -1381,6 +1493,7 @@ pmc_process_csw_out(struct thread *td) pmc_value_t newvalue; unsigned int adjri, ri; struct pmc_process *pp; + struct pmc_thread *pt = NULL; struct pmc_classdep *pcd; @@ -1476,37 +1589,50 @@ pmc_process_csw_out(struct thread *td) pcd->pcd_read_pmc(cpu, adjri, &newvalue); if (mode == PMC_MODE_TS) { - PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd (samp)", - cpu, ri, PMC_PCPU_SAVED(cpu,ri) - newvalue); + PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d val=%jd (samp)", + cpu, ri, newvalue); + if (pt == NULL) + pt = pmc_find_thread_descriptor(pp, td, + PMC_FLAG_NONE); + + KASSERT(pt != NULL, + ("[pmc,%d] No thread found for td=%p", + __LINE__, td)); + + mtx_pool_lock_spin(pmc_mtxpool, pm); + /* - * For sampling process-virtual PMCs, - * newvalue is the number of events to be seen - * until the next sampling interrupt. - * We can just add the events left from this - * invocation to the counter, then adjust - * in case we overflow our range. + * If we have a thread descriptor, save the + * per-thread counter in the descriptor. If not, + * we will update the per-process counter. * - * (Recall that we reload the counter every - * time we use it.) + * TODO: Remove the per-process "safety net" + * once we have thoroughly tested that we + * don't hit the above assert. */ - mtx_pool_lock_spin(pmc_mtxpool, pm); - - pp->pp_pmcs[ri].pp_pmcval += newvalue; - if (pp->pp_pmcs[ri].pp_pmcval > - pm->pm_sc.pm_reloadcount) - pp->pp_pmcs[ri].pp_pmcval -= - pm->pm_sc.pm_reloadcount; - KASSERT(pp->pp_pmcs[ri].pp_pmcval > 0 && - pp->pp_pmcs[ri].pp_pmcval <= - pm->pm_sc.pm_reloadcount, - ("[pmc,%d] pp_pmcval outside of expected " - "range cpu=%d ri=%d pp_pmcval=%jx " - "pm_reloadcount=%jx", __LINE__, cpu, ri, - pp->pp_pmcs[ri].pp_pmcval, - pm->pm_sc.pm_reloadcount)); + if (pt != NULL) + pt->pt_pmcs[ri].pt_pmcval = newvalue; + else { + /* + * For sampling process-virtual PMCs, + * newvalue is the number of events to + * be seen until the next sampling + * interrupt. We can just add the events + * left from this invocation to the + * counter, then adjust in case we + * overflow our range. + * + * (Recall that we reload the counter + * every time we use it.) + */ + pp->pp_pmcs[ri].pp_pmcval += newvalue; + if (pp->pp_pmcs[ri].pp_pmcval > + pm->pm_sc.pm_reloadcount) + pp->pp_pmcs[ri].pp_pmcval -= + pm->pm_sc.pm_reloadcount; + } mtx_pool_unlock_spin(pmc_mtxpool, pm); - } else { tmp = newvalue - PMC_PCPU_SAVED(cpu,ri); @@ -1550,6 +1676,33 @@ pmc_process_csw_out(struct thread *td) } /* + * A new thread for a process. + */ +static void +pmc_process_thread_add(struct thread *td) +{ + struct pmc_process *pmc; + + pmc = pmc_find_process_descriptor(td->td_proc, PMC_FLAG_NONE); + if (pmc != NULL) + pmc_find_thread_descriptor(pmc, td, PMC_FLAG_ALLOCATE); +} + +/* + * A thread delete for a process. + */ +static void +pmc_process_thread_delete(struct thread *td) +{ + struct pmc_process *pmc; + + pmc = pmc_find_process_descriptor(td->td_proc, PMC_FLAG_NONE); + if (pmc != NULL) + pmc_thread_descriptor_pool_free(pmc_find_thread_descriptor(pmc, + td, PMC_FLAG_REMOVE)); +} + +/* * A mapping change for a process. */ @@ -1873,13 +2026,16 @@ const char *pmc_hooknames[] = { "MUNMAP", "CALLCHAIN-NMI", "CALLCHAIN-SOFT", - "SOFTSAMPLING" + "SOFTSAMPLING", + "THR-CREATE", + "THR-EXIT", }; #endif static int pmc_hook_handler(struct thread *td, int function, void *arg) { + int cpu; PMCDBG4(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function, pmc_hooknames[function], arg); @@ -1996,7 +2152,7 @@ pmc_hook_handler(struct thread *td, int function, void if (pp->pp_refcnt == 0) { pmc_remove_process_descriptor(pp); - free(pp, M_PMC); + pmc_destroy_process_descriptor(pp); break; } @@ -2034,8 +2190,9 @@ pmc_hook_handler(struct thread *td, int function, void * lose the interrupt sample. */ DPCPU_SET(pmc_sampled, 0); - pmc_process_samples(PCPU_GET(cpuid), PMC_HR); - pmc_process_samples(PCPU_GET(cpuid), PMC_SR); + cpu = PCPU_GET(cpuid); + pmc_process_samples(cpu, PMC_HR); + pmc_process_samples(cpu, PMC_SR); break; case PMC_FN_MMAP: @@ -2078,6 +2235,16 @@ pmc_hook_handler(struct thread *td, int function, void pmc_soft_intr((struct pmckern_soft *) arg); break; + case PMC_FN_THR_CREATE: + pmc_process_thread_add(td); + break; + + case PMC_FN_THR_EXIT: + KASSERT(td == curthread, ("[pmc,%d] td != curthread", + __LINE__)); + pmc_process_thread_delete(td); + break; + default: #ifdef HWPMC_DEBUG KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function)); @@ -2129,6 +2296,198 @@ pmc_destroy_owner_descriptor(struct pmc_owner *po) } /* + * Allocate a thread descriptor from the free pool. + * + * NOTE: This *can* return NULL. + */ +static struct pmc_thread * +pmc_thread_descriptor_pool_alloc(void) +{ + struct pmc_thread *pt; + + mtx_lock_spin(&pmc_threadfreelist_mtx); + if ((pt = LIST_FIRST(&pmc_threadfreelist)) != NULL) { + LIST_REMOVE(pt, pt_next); + pmc_threadfreelist_entries--; + } + mtx_unlock_spin(&pmc_threadfreelist_mtx); + + return (pt); +} + +/* + * Add a thread descriptor to the free pool. We use this instead of free() + * to maintain a cache of free entries. Additionally, we can safely call + * this function when we cannot call free(), such as in a critical section. + * + */ +static void +pmc_thread_descriptor_pool_free(struct pmc_thread *pt) +{ + + if (pt == NULL) + return; + + memset(pt, 0, THREADENTRY_SIZE); + mtx_lock_spin(&pmc_threadfreelist_mtx); + LIST_INSERT_HEAD(&pmc_threadfreelist, pt, pt_next); + pmc_threadfreelist_entries++; + if (pmc_threadfreelist_entries > pmc_threadfreelist_max) + GROUPTASK_ENQUEUE(&free_gtask); + mtx_unlock_spin(&pmc_threadfreelist_mtx); +} + +/* + * A callout to manage the free list. + */ +static void +pmc_thread_descriptor_pool_free_task(void *arg __unused) +{ + struct pmc_thread *pt; + LIST_HEAD(, pmc_thread) tmplist; + int delta; + + LIST_INIT(&tmplist); + /* Determine what changes, if any, we need to make. */ + mtx_lock_spin(&pmc_threadfreelist_mtx); + delta = pmc_threadfreelist_entries - pmc_threadfreelist_max; + while (delta > 0) { + pt = LIST_FIRST(&pmc_threadfreelist); + MPASS(pt); + LIST_REMOVE(pt, pt_next); + LIST_INSERT_HEAD(&tmplist, pt, pt_next); + } + mtx_unlock_spin(&pmc_threadfreelist_mtx); + + /* If there are entries to free, free them. */ + while (!LIST_EMPTY(&tmplist)) { + pt = LIST_FIRST(&pmc_threadfreelist); + LIST_REMOVE(pt, pt_next); + free(pt, M_PMC); + } +} + +/* + * Drain the thread free pool, freeing all allocations. + */ +static void +pmc_thread_descriptor_pool_drain() +{ + struct pmc_thread *pt, *next; + + LIST_FOREACH_SAFE(pt, &pmc_threadfreelist, pt_next, next) { + LIST_REMOVE(pt, pt_next); + free(pt, M_PMC); + } +} + +/* + * find the descriptor corresponding to thread 'td', adding or removing it + * as specified by 'mode'. + * + * Note that this supports additional mode flags in addition to those + * supported by pmc_find_process_descriptor(): + * PMC_FLAG_NOWAIT: Causes the function to not wait for mallocs. + * This makes it safe to call while holding certain other locks. + */ + +static struct pmc_thread * +pmc_find_thread_descriptor(struct pmc_process *pp, struct thread *td, + uint32_t mode) +{ + struct pmc_thread *pt = NULL, *ptnew = NULL; + int wait_flag; + + KASSERT(td != NULL, ("[pmc,%d] called to add NULL td", __LINE__)); + + /* + * Pre-allocate memory in the PMC_FLAG_ALLOCATE case prior to + * acquiring the lock. + */ + if (mode & PMC_FLAG_ALLOCATE) { + if ((ptnew = pmc_thread_descriptor_pool_alloc()) == NULL) { + wait_flag = (mode & PMC_FLAG_NOWAIT) ? M_NOWAIT : + M_WAITOK; + ptnew = malloc(THREADENTRY_SIZE, M_PMC, + wait_flag|M_ZERO); + } + } + + mtx_lock_spin(pp->pp_tdslock); + + LIST_FOREACH(pt, &pp->pp_tds, pt_next) + if (pt->pt_td == td) + break; + + if ((mode & PMC_FLAG_REMOVE) && pt != NULL) + LIST_REMOVE(pt, pt_next); + + if ((mode & PMC_FLAG_ALLOCATE) && pt == NULL && ptnew != NULL) { + pt = ptnew; + ptnew = NULL; + pt->pt_td = td; + LIST_INSERT_HEAD(&pp->pp_tds, pt, pt_next); + } + + mtx_unlock_spin(pp->pp_tdslock); + + if (ptnew != NULL) { + free(ptnew, M_PMC); + } + + return pt; +} + +/* + * Try to add thread descriptors for each thread in a process. + */ + +static void +pmc_add_thread_descriptors_from_proc(struct proc *p, struct pmc_process *pp) +{ + struct thread *curtd; + struct pmc_thread **tdlist; + int i, tdcnt, tdlistsz; + + KASSERT(!PROC_LOCKED(p), ("[pmc,%d] proc unexpectedly locked", + __LINE__)); + tdcnt = 32; + restart: + tdlistsz = roundup2(tdcnt, 32); + + tdcnt = 0; + tdlist = malloc(sizeof(struct pmc_thread*) * tdlistsz, M_TEMP, M_WAITOK); + + PROC_LOCK(p); + FOREACH_THREAD_IN_PROC(p, curtd) + tdcnt++; + if (tdcnt >= tdlistsz) { + PROC_UNLOCK(p); + free(tdlist, M_TEMP); + goto restart; + } + /* + * Try to add each thread to the list without sleeping. If unable, + * add to a queue to retry after dropping the process lock. + */ + tdcnt = 0; + FOREACH_THREAD_IN_PROC(p, curtd) { + tdlist[tdcnt] = pmc_find_thread_descriptor(pp, curtd, + PMC_FLAG_ALLOCATE|PMC_FLAG_NOWAIT); + if (tdlist[tdcnt] == NULL) { + PROC_UNLOCK(p); + for (i = 0; i <= tdcnt; i++) + pmc_thread_descriptor_pool_free(tdlist[i]); + free(tdlist, M_TEMP); + goto restart; + } + tdcnt++; + } + PROC_UNLOCK(p); + free(tdlist, M_TEMP); +} + +/* * find the descriptor corresponding to process 'p', adding or removing it * as specified by 'mode'. */ @@ -2146,7 +2505,7 @@ pmc_find_process_descriptor(struct proc *p, uint32_t m ppnew = NULL; /* - * Pre-allocate memory in the FIND_ALLOCATE case since we + * Pre-allocate memory in the PMC_FLAG_ALLOCATE case since we * cannot call malloc(9) once we hold a spin lock. */ if (mode & PMC_FLAG_ALLOCATE) @@ -2164,13 +2523,20 @@ pmc_find_process_descriptor(struct proc *p, uint32_t m if ((mode & PMC_FLAG_ALLOCATE) && pp == NULL && ppnew != NULL) { ppnew->pp_proc = p; + LIST_INIT(&ppnew->pp_tds); + ppnew->pp_tdslock = mtx_pool_find(pmc_mtxpool, ppnew); LIST_INSERT_HEAD(pph, ppnew, pp_next); + mtx_unlock_spin(&pmc_processhash_mtx); pp = ppnew; ppnew = NULL; + + /* Add thread descriptors for this process' current threads. */ + pmc_add_thread_descriptors_from_proc(p, pp); } - mtx_unlock_spin(&pmc_processhash_mtx); + else + mtx_unlock_spin(&pmc_processhash_mtx); - if (pp != NULL && ppnew != NULL) + if (ppnew != NULL) free(ppnew, M_PMC); return pp; @@ -2192,7 +2558,23 @@ pmc_remove_process_descriptor(struct pmc_process *pp) mtx_unlock_spin(&pmc_processhash_mtx); } +/* + * destroy a process descriptor. + */ +static void +pmc_destroy_process_descriptor(struct pmc_process *pp) +{ + struct pmc_thread *pmc_td; + + while ((pmc_td = LIST_FIRST(&pp->pp_tds)) != NULL) { + LIST_REMOVE(pmc_td, pt_next); + pmc_thread_descriptor_pool_free(pmc_td); + } + free(pp, M_PMC); +} + + /* * find an owner descriptor corresponding to proc 'p' */ @@ -2420,7 +2802,7 @@ pmc_release_pmc_descriptor(struct pmc *pm) if (pp->pp_refcnt == 0) { pmc_remove_process_descriptor(pp); - free(pp, M_PMC); + pmc_destroy_process_descriptor(pp); } } @@ -4582,15 +4964,21 @@ pmc_process_exit(void *arg __unused, struct proc *p) pm->pm_pcpu_state[cpu].pps_cpustate = 0; if (!pm->pm_pcpu_state[cpu].pps_stalled) { (void) pcd->pcd_stop_pmc(cpu, adjri); - pcd->pcd_read_pmc(cpu, adjri, - &newvalue); - tmp = newvalue - - PMC_PCPU_SAVED(cpu,ri); - mtx_pool_lock_spin(pmc_mtxpool, pm); - pm->pm_gv.pm_savedvalue += tmp; - pp->pp_pmcs[ri].pp_pmcval += tmp; - mtx_pool_unlock_spin(pmc_mtxpool, pm); + if (PMC_TO_MODE(pm) == PMC_MODE_TC) { + pcd->pcd_read_pmc(cpu, adjri, + &newvalue); + tmp = newvalue - + PMC_PCPU_SAVED(cpu,ri); + + mtx_pool_lock_spin(pmc_mtxpool, + pm); + pm->pm_gv.pm_savedvalue += tmp; + pp->pp_pmcs[ri].pp_pmcval += + tmp; + mtx_pool_unlock_spin( + pmc_mtxpool, pm); + } } } @@ -4700,6 +5088,13 @@ pmc_process_fork(void *arg __unused, struct proc *p1, if (do_descendants == 0) /* nothing to do */ goto done; + /* + * Now mark the new process as being tracked by this driver. + */ + PROC_LOCK(newproc); + newproc->p_flag |= P_HWPMC; + PROC_UNLOCK(newproc); + /* allocate a descriptor for the new process */ if ((ppnew = pmc_find_process_descriptor(newproc, PMC_FLAG_ALLOCATE)) == NULL) @@ -4724,13 +5119,6 @@ pmc_process_fork(void *arg __unused, struct proc *p1, newproc->p_pid); } - /* - * Now mark the new process as being tracked by this driver. - */ - PROC_LOCK(newproc); - newproc->p_flag |= P_HWPMC; - PROC_UNLOCK(newproc); - done: sx_xunlock(&pmc_sx); } @@ -5055,6 +5443,16 @@ pmc_initialize(void) "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask, pmc_processhash, pmc_processhashmask); + /* Initialize a spin mutex for the thread free list. */ + mtx_init(&pmc_threadfreelist_mtx, "pmc-threadfreelist", "pmc-leaf", + MTX_SPIN); + + /* + * Initialize the callout to monitor the thread free list. + * This callout will also handle the initial population of the list. + */ + taskqgroup_config_gtask_init(NULL, &free_gtask, pmc_thread_descriptor_pool_free_task, "thread descriptor pool free task"); + /* register process {exit,fork,exec} handlers */ pmc_exit_tag = EVENTHANDLER_REGISTER(process_exit, pmc_process_exit, NULL, EVENTHANDLER_PRI_ANY); @@ -5152,6 +5550,9 @@ pmc_cleanup(void) } /* reclaim allocated data structures */ + mtx_destroy(&pmc_threadfreelist_mtx); + pmc_thread_descriptor_pool_drain(); + if (pmc_mtxpool) mtx_pool_destroy(&pmc_mtxpool); Modified: head/sys/kern/kern_thr.c ============================================================================== --- head/sys/kern/kern_thr.c Wed May 16 22:25:47 2018 (r333689) +++ head/sys/kern/kern_thr.c Wed May 16 22:29:20 2018 (r333690) @@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$"); #include "opt_posix.h" +#include "opt_hwpmc_hooks.h" #include #include #include @@ -55,6 +56,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifdef HWPMC_HOOKS +#include +#endif #include @@ -258,6 +262,10 @@ thread_create(struct thread *td, struct rtprio *rtp, newtd->td_dbgflags |= TDB_BORN; PROC_UNLOCK(p); +#ifdef HWPMC_HOOKS + if (PMC_PROC_IS_USING_PMCS(p)) + PMC_CALL_HOOK(newtd, PMC_FN_THR_CREATE, NULL); +#endif tidhash_add(newtd); Modified: head/sys/kern/kern_thread.c ============================================================================== --- head/sys/kern/kern_thread.c Wed May 16 22:25:47 2018 (r333689) +++ head/sys/kern/kern_thread.c Wed May 16 22:29:20 2018 (r333690) @@ -586,8 +586,10 @@ thread_exit(void) * If this thread is part of a process that is being tracked by hwpmc(4), * inform the module of the thread's impending exit. */ - if (PMC_PROC_IS_USING_PMCS(td->td_proc)) + if (PMC_PROC_IS_USING_PMCS(td->td_proc)) { PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); + PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_THR_EXIT, NULL); + } #endif PROC_UNLOCK(p); PROC_STATLOCK(p); Modified: head/sys/sys/pmc.h ============================================================================== --- head/sys/sys/pmc.h Wed May 16 22:25:47 2018 (r333689) +++ head/sys/sys/pmc.h Wed May 16 22:29:20 2018 (r333690) @@ -647,6 +647,7 @@ struct pmc_op_getdyneventinfo { #define PMC_NLOGBUFFERS_PCPU 8 #define PMC_NSAMPLES 64 #define PMC_CALLCHAIN_DEPTH 32 +#define PMC_THREADLIST_MAX 64 #define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "." @@ -786,8 +787,27 @@ struct pmc { #define PMC_TO_ROWINDEX(P) PMC_ID_TO_ROWINDEX((P)->pm_id) #define PMC_TO_CPU(P) PMC_ID_TO_CPU((P)->pm_id) +/* + * struct pmc_threadpmcstate + * + * Record per-PMC, per-thread state. + */ +struct pmc_threadpmcstate { + pmc_value_t pt_pmcval; /* per-thread reload count */ +}; /* + * struct pmc_thread + * + * Record a 'target' thread being profiled. + */ +struct pmc_thread { + LIST_ENTRY(pmc_thread) pt_next; /* linked list */ + struct thread *pt_td; /* target thread */ + struct pmc_threadpmcstate pt_pmcs[]; /* per-PMC state */ +}; + +/* * struct pmc_process * * Record a 'target' process being profiled. @@ -808,9 +828,11 @@ struct pmc_targetstate { struct pmc_process { LIST_ENTRY(pmc_process) pp_next; /* hash chain */ + LIST_HEAD(,pmc_thread) pp_tds; /* list of threads */ + struct mtx *pp_tdslock; /* lock on pp_tds thread list */ int pp_refcnt; /* reference count */ uint32_t pp_flags; /* flags PMC_PP_* */ - struct proc *pp_proc; /* target thread */ + struct proc *pp_proc; /* target process */ struct pmc_targetstate pp_pmcs[]; /* NHWPMCs */ }; Modified: head/sys/sys/pmckern.h ============================================================================== --- head/sys/sys/pmckern.h Wed May 16 22:25:47 2018 (r333689) +++ head/sys/sys/pmckern.h Wed May 16 22:29:20 2018 (r333690) @@ -60,6 +60,8 @@ #define PMC_FN_USER_CALLCHAIN 9 #define PMC_FN_USER_CALLCHAIN_SOFT 10 #define PMC_FN_SOFT_SAMPLING 11 +#define PMC_FN_THR_CREATE 12 +#define PMC_FN_THR_EXIT 13 #define PMC_HR 0 /* Hardware ring buffer */ #define PMC_SR 1 /* Software ring buffer */