Date: Fri, 18 Nov 2011 13:15:12 +0530 From: "Jayachandran C." <jchandra@freebsd.org> To: freebsd-mips@freebsd.org Subject: pmap changes for mips SMP Message-ID: <CA%2B7sy7Aj89iTTD=7ywiatyBkBW2OjborNydZTf8dgXLNjY8G1w@mail.gmail.com>
next in thread | raw e-mail | index | archive | help
I've committed this change to MIPS pmap.c, this optimization gives significant improvement in fork/exec performance on SMP systems with large number of CPUs. I have stress tested it on XLR/XLP, but let me know if this causes regressions on any other platforms. Thanks, JC. ---------- Forwarded message ---------- From: Jayachandran C. <jchandra@freebsd.org> Date: Thu, Nov 17, 2011 at 6:44 PM Subject: svn commit: r227623 - head/sys/mips/mips Author: jchandra Date: Thu Nov 17 13:14:59 2011 New Revision: 227623 URL: http://svn.freebsd.org/changeset/base/227623 Log: =A0Do pmap update only on active CPUs. =A0The pmap update_page/invalidate_page/invalidate_all operations has to be =A0done only on active cpus. In the simplest case, if the process is not =A0active on any other CPUs, we can just do the operation on the current CP= U. =A0This change replaces the call to smp_rendezvous() for these operations w= ith =A0smp_rendezvous_cpus() in case there more than one active CPU, or with a = direct =A0function call if there is just one active CPU. =A0This change give significant performance increase in fork/exec benchmark= s =A0on XLR/XLS/XLP with 32 cpus. =A0Reviewed by: =A0alc Modified: =A0head/sys/mips/mips/pmap.c Modified: head/sys/mips/mips/pmap.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D --- head/sys/mips/mips/pmap.c =A0 Thu Nov 17 13:14:07 2011 =A0 =A0 =A0 =A0(= r227622) +++ head/sys/mips/mips/pmap.c =A0 Thu Nov 17 13:14:59 2011 =A0 =A0 =A0 =A0(= r227623) @@ -181,7 +181,6 @@ static pt_entry_t init_pte_prot(vm_offse =A0#ifdef SMP =A0static void pmap_invalidate_page_action(void *arg); -static void pmap_invalidate_all_action(void *arg); =A0static void pmap_update_page_action(void *arg); =A0#endif @@ -622,119 +621,94 @@ pmap_init(void) =A0* Low level helper routines..... =A0***************************************************/ +#ifdef SMP =A0static __inline void -pmap_invalidate_all_local(pmap_t pmap) +pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg) =A0{ - =A0 =A0 =A0 u_int cpuid; + =A0 =A0 =A0 int =A0 =A0 cpuid, cpu, self; + =A0 =A0 =A0 cpuset_t active_cpus; + =A0 =A0 =A0 sched_pin(); + =A0 =A0 =A0 if (is_kernel_pmap(pmap)) { + =A0 =A0 =A0 =A0 =A0 =A0 =A0 smp_rendezvous(NULL, fn, NULL, arg); + =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out; + =A0 =A0 =A0 } + =A0 =A0 =A0 /* Force ASID update on inactive CPUs */ + =A0 =A0 =A0 CPU_FOREACH(cpu) { + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (!CPU_ISSET(cpu, &pmap->pm_active)) + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 pmap->pm_asid[cpu].gen =3D 0; + =A0 =A0 =A0 } =A0 =A0 =A0 =A0cpuid =3D PCPU_GET(cpuid); + =A0 =A0 =A0 /* + =A0 =A0 =A0 =A0* XXX: barrier/locking for active? + =A0 =A0 =A0 =A0* + =A0 =A0 =A0 =A0* Take a snapshot of active here, any further changes are = ignored. + =A0 =A0 =A0 =A0* tlb update/invalidate should be harmless on inactive CPU= s + =A0 =A0 =A0 =A0*/ + =A0 =A0 =A0 active_cpus =3D pmap->pm_active; + =A0 =A0 =A0 self =3D CPU_ISSET(cpuid, &active_cpus); + =A0 =A0 =A0 CPU_CLR(cpuid, &active_cpus); + =A0 =A0 =A0 /* Optimize for the case where this cpu is the only active on= e */ + =A0 =A0 =A0 if (CPU_EMPTY(&active_cpus)) { + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (self) + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 fn(arg); + =A0 =A0 =A0 } else { + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (self) + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 CPU_SET(cpuid, &active_cpus); + =A0 =A0 =A0 =A0 =A0 =A0 =A0 smp_rendezvous_cpus(active_cpus, NULL, fn, NU= LL, arg); + =A0 =A0 =A0 } +out: + =A0 =A0 =A0 sched_unpin(); +} +#else /* !SMP */ +static __inline void +pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg) +{ + =A0 =A0 =A0 int =A0 =A0 cpuid; - =A0 =A0 =A0 if (pmap =3D=3D kernel_pmap) { - =A0 =A0 =A0 =A0 =A0 =A0 =A0 tlb_invalidate_all(); + =A0 =A0 =A0 if (is_kernel_pmap(pmap)) { + =A0 =A0 =A0 =A0 =A0 =A0 =A0 fn(arg); =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return; =A0 =A0 =A0 =A0} - =A0 =A0 =A0 if (CPU_ISSET(cpuid, &pmap->pm_active)) - =A0 =A0 =A0 =A0 =A0 =A0 =A0 tlb_invalidate_all_user(pmap); - =A0 =A0 =A0 else + =A0 =A0 =A0 cpuid =3D PCPU_GET(cpuid); + =A0 =A0 =A0 if (!CPU_ISSET(cpuid, &pmap->pm_active)) =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0pmap->pm_asid[cpuid].gen =3D 0; + =A0 =A0 =A0 else + =A0 =A0 =A0 =A0 =A0 =A0 =A0 fn(arg); =A0} +#endif /* SMP */ -#ifdef SMP =A0static void =A0pmap_invalidate_all(pmap_t pmap) =A0{ - =A0 =A0 =A0 smp_rendezvous(0, pmap_invalidate_all_action, 0, pmap); + =A0 =A0 =A0 pmap_call_on_active_cpus(pmap, + =A0 =A0 =A0 =A0 =A0 (void (*)(void *))tlb_invalidate_all_user, pmap); =A0} -static void -pmap_invalidate_all_action(void *arg) -{ - - =A0 =A0 =A0 pmap_invalidate_all_local((pmap_t)arg); -} -#else -static void -pmap_invalidate_all(pmap_t pmap) -{ - - =A0 =A0 =A0 pmap_invalidate_all_local(pmap); -} -#endif - -static __inline void -pmap_invalidate_page_local(pmap_t pmap, vm_offset_t va) -{ - =A0 =A0 =A0 u_int cpuid; - - =A0 =A0 =A0 cpuid =3D PCPU_GET(cpuid); - - =A0 =A0 =A0 if (is_kernel_pmap(pmap)) { - =A0 =A0 =A0 =A0 =A0 =A0 =A0 tlb_invalidate_address(pmap, va); - =A0 =A0 =A0 =A0 =A0 =A0 =A0 return; - =A0 =A0 =A0 } - =A0 =A0 =A0 if (pmap->pm_asid[cpuid].gen !=3D PCPU_GET(asid_generation)) - =A0 =A0 =A0 =A0 =A0 =A0 =A0 return; - =A0 =A0 =A0 else if (!CPU_ISSET(cpuid, &pmap->pm_active)) { - =A0 =A0 =A0 =A0 =A0 =A0 =A0 pmap->pm_asid[cpuid].gen =3D 0; - =A0 =A0 =A0 =A0 =A0 =A0 =A0 return; - =A0 =A0 =A0 } - =A0 =A0 =A0 tlb_invalidate_address(pmap, va); -} - -#ifdef SMP =A0struct pmap_invalidate_page_arg { =A0 =A0 =A0 =A0pmap_t pmap; =A0 =A0 =A0 =A0vm_offset_t va; =A0}; =A0static void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) -{ - =A0 =A0 =A0 struct pmap_invalidate_page_arg arg; - - =A0 =A0 =A0 arg.pmap =3D pmap; - =A0 =A0 =A0 arg.va =3D va; - =A0 =A0 =A0 smp_rendezvous(0, pmap_invalidate_page_action, 0, &arg); -} - -static void =A0pmap_invalidate_page_action(void *arg) =A0{ =A0 =A0 =A0 =A0struct pmap_invalidate_page_arg *p =3D arg; - =A0 =A0 =A0 pmap_invalidate_page_local(p->pmap, p->va); + =A0 =A0 =A0 tlb_invalidate_address(p->pmap, p->va); =A0} -#else + =A0static void =A0pmap_invalidate_page(pmap_t pmap, vm_offset_t va) =A0{ + =A0 =A0 =A0 struct pmap_invalidate_page_arg arg; - =A0 =A0 =A0 pmap_invalidate_page_local(pmap, va); -} -#endif - -static __inline void -pmap_update_page_local(pmap_t pmap, vm_offset_t va, pt_entry_t pte) -{ - =A0 =A0 =A0 u_int cpuid; - - =A0 =A0 =A0 cpuid =3D PCPU_GET(cpuid); - - =A0 =A0 =A0 if (is_kernel_pmap(pmap)) { - =A0 =A0 =A0 =A0 =A0 =A0 =A0 tlb_update(pmap, va, pte); - =A0 =A0 =A0 =A0 =A0 =A0 =A0 return; - =A0 =A0 =A0 } - =A0 =A0 =A0 if (pmap->pm_asid[cpuid].gen !=3D PCPU_GET(asid_generation)) - =A0 =A0 =A0 =A0 =A0 =A0 =A0 return; - =A0 =A0 =A0 else if (!CPU_ISSET(cpuid, &pmap->pm_active)) { - =A0 =A0 =A0 =A0 =A0 =A0 =A0 pmap->pm_asid[cpuid].gen =3D 0; - =A0 =A0 =A0 =A0 =A0 =A0 =A0 return; - =A0 =A0 =A0 } - =A0 =A0 =A0 tlb_update(pmap, va, pte); + =A0 =A0 =A0 arg.pmap =3D pmap; + =A0 =A0 =A0 arg.va =3D va; + =A0 =A0 =A0 pmap_call_on_active_cpus(pmap, pmap_invalidate_page_action, &= arg); =A0} -#ifdef SMP =A0struct pmap_update_page_arg { =A0 =A0 =A0 =A0pmap_t pmap; =A0 =A0 =A0 =A0vm_offset_t va; @@ -742,31 +716,23 @@ struct pmap_update_page_arg { =A0}; =A0static void -pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte) -{ - =A0 =A0 =A0 struct pmap_update_page_arg arg; - - =A0 =A0 =A0 arg.pmap =3D pmap; - =A0 =A0 =A0 arg.va =3D va; - =A0 =A0 =A0 arg.pte =3D pte; - =A0 =A0 =A0 smp_rendezvous(0, pmap_update_page_action, 0, &arg); -} - -static void =A0pmap_update_page_action(void *arg) =A0{ =A0 =A0 =A0 =A0struct pmap_update_page_arg *p =3D arg; - =A0 =A0 =A0 pmap_update_page_local(p->pmap, p->va, p->pte); + =A0 =A0 =A0 tlb_update(p->pmap, p->va, p->pte); =A0} -#else + =A0static void =A0pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte) =A0{ + =A0 =A0 =A0 struct pmap_update_page_arg arg; - =A0 =A0 =A0 pmap_update_page_local(pmap, va, pte); + =A0 =A0 =A0 arg.pmap =3D pmap; + =A0 =A0 =A0 arg.va =3D va; + =A0 =A0 =A0 arg.pte =3D pte; + =A0 =A0 =A0 pmap_call_on_active_cpus(pmap, pmap_update_page_action, &arg)= ; =A0} -#endif =A0/* =A0* =A0 =A0 Routine: =A0 =A0 =A0 =A0pmap_extract @@ -3213,7 +3179,7 @@ pmap_emulate_modified(pmap_t pmap, vm_of =A0#ifdef SMP =A0 =A0 =A0 =A0/* It is possible that some other CPU changed m-bit */ =A0 =A0 =A0 =A0if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) { - =A0 =A0 =A0 =A0 =A0 =A0 =A0 pmap_update_page_local(pmap, va, *pte); + =A0 =A0 =A0 =A0 =A0 =A0 =A0 tlb_update(pmap, va, *pte); =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0PMAP_UNLOCK(pmap); =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return (0); =A0 =A0 =A0 =A0} @@ -3227,7 +3193,7 @@ pmap_emulate_modified(pmap_t pmap, vm_of =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return (1); =A0 =A0 =A0 =A0} =A0 =A0 =A0 =A0pte_set(pte, PTE_D); - =A0 =A0 =A0 pmap_update_page_local(pmap, va, *pte); + =A0 =A0 =A0 tlb_update(pmap, va, *pte); =A0 =A0 =A0 =A0pa =3D TLBLO_PTE_TO_PA(*pte); =A0 =A0 =A0 =A0if (!page_is_managed(pa)) =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0panic("pmap_emulate_modified: unmanaged page= ");
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CA%2B7sy7Aj89iTTD=7ywiatyBkBW2OjborNydZTf8dgXLNjY8G1w>