Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 18 Nov 2011 13:15:12 +0530
From:      "Jayachandran C." <jchandra@freebsd.org>
To:        freebsd-mips@freebsd.org
Subject:   pmap changes for mips SMP
Message-ID:  <CA%2B7sy7Aj89iTTD=7ywiatyBkBW2OjborNydZTf8dgXLNjY8G1w@mail.gmail.com>

next in thread | raw e-mail | index | archive | help
I've committed this change to MIPS pmap.c, this optimization gives
significant improvement in fork/exec performance on SMP systems with
large number of CPUs.

I have stress tested it on XLR/XLP, but let me know if this causes
regressions on any other platforms.

Thanks,
JC.

---------- Forwarded message ----------
From: Jayachandran C. <jchandra@freebsd.org>
Date: Thu, Nov 17, 2011 at 6:44 PM
Subject: svn commit: r227623 - head/sys/mips/mips


Author: jchandra
Date: Thu Nov 17 13:14:59 2011
New Revision: 227623
URL: http://svn.freebsd.org/changeset/base/227623

Log:
=A0Do pmap update only on active CPUs.

=A0The pmap update_page/invalidate_page/invalidate_all operations has to be
=A0done only on active cpus. In the simplest case, if the process is not
=A0active on any other CPUs, we can just do the operation on the current CP=
U.

=A0This change replaces the call to smp_rendezvous() for these operations w=
ith
=A0smp_rendezvous_cpus() in case there more than one active CPU, or with a =
direct
=A0function call if there is just one active CPU.

=A0This change give significant performance increase in fork/exec benchmark=
s
=A0on XLR/XLS/XLP with 32 cpus.

=A0Reviewed by: =A0alc

Modified:
=A0head/sys/mips/mips/pmap.c

Modified: head/sys/mips/mips/pmap.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D
--- head/sys/mips/mips/pmap.c =A0 Thu Nov 17 13:14:07 2011 =A0 =A0 =A0 =A0(=
r227622)
+++ head/sys/mips/mips/pmap.c =A0 Thu Nov 17 13:14:59 2011 =A0 =A0 =A0 =A0(=
r227623)
@@ -181,7 +181,6 @@ static pt_entry_t init_pte_prot(vm_offse

=A0#ifdef SMP
=A0static void pmap_invalidate_page_action(void *arg);
-static void pmap_invalidate_all_action(void *arg);
=A0static void pmap_update_page_action(void *arg);
=A0#endif

@@ -622,119 +621,94 @@ pmap_init(void)
=A0* Low level helper routines.....
=A0***************************************************/

+#ifdef SMP
=A0static __inline void
-pmap_invalidate_all_local(pmap_t pmap)
+pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg)
=A0{
- =A0 =A0 =A0 u_int cpuid;
+ =A0 =A0 =A0 int =A0 =A0 cpuid, cpu, self;
+ =A0 =A0 =A0 cpuset_t active_cpus;

+ =A0 =A0 =A0 sched_pin();
+ =A0 =A0 =A0 if (is_kernel_pmap(pmap)) {
+ =A0 =A0 =A0 =A0 =A0 =A0 =A0 smp_rendezvous(NULL, fn, NULL, arg);
+ =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
+ =A0 =A0 =A0 }
+ =A0 =A0 =A0 /* Force ASID update on inactive CPUs */
+ =A0 =A0 =A0 CPU_FOREACH(cpu) {
+ =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (!CPU_ISSET(cpu, &pmap->pm_active))
+ =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 pmap->pm_asid[cpu].gen =3D 0;
+ =A0 =A0 =A0 }
=A0 =A0 =A0 =A0cpuid =3D PCPU_GET(cpuid);
+ =A0 =A0 =A0 /*
+ =A0 =A0 =A0 =A0* XXX: barrier/locking for active?
+ =A0 =A0 =A0 =A0*
+ =A0 =A0 =A0 =A0* Take a snapshot of active here, any further changes are =
ignored.
+ =A0 =A0 =A0 =A0* tlb update/invalidate should be harmless on inactive CPU=
s
+ =A0 =A0 =A0 =A0*/
+ =A0 =A0 =A0 active_cpus =3D pmap->pm_active;
+ =A0 =A0 =A0 self =3D CPU_ISSET(cpuid, &active_cpus);
+ =A0 =A0 =A0 CPU_CLR(cpuid, &active_cpus);
+ =A0 =A0 =A0 /* Optimize for the case where this cpu is the only active on=
e */
+ =A0 =A0 =A0 if (CPU_EMPTY(&active_cpus)) {
+ =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (self)
+ =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 fn(arg);
+ =A0 =A0 =A0 } else {
+ =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (self)
+ =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 CPU_SET(cpuid, &active_cpus);
+ =A0 =A0 =A0 =A0 =A0 =A0 =A0 smp_rendezvous_cpus(active_cpus, NULL, fn, NU=
LL, arg);
+ =A0 =A0 =A0 }
+out:
+ =A0 =A0 =A0 sched_unpin();
+}
+#else /* !SMP */
+static __inline void
+pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg)
+{
+ =A0 =A0 =A0 int =A0 =A0 cpuid;

- =A0 =A0 =A0 if (pmap =3D=3D kernel_pmap) {
- =A0 =A0 =A0 =A0 =A0 =A0 =A0 tlb_invalidate_all();
+ =A0 =A0 =A0 if (is_kernel_pmap(pmap)) {
+ =A0 =A0 =A0 =A0 =A0 =A0 =A0 fn(arg);
=A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return;
=A0 =A0 =A0 =A0}
- =A0 =A0 =A0 if (CPU_ISSET(cpuid, &pmap->pm_active))
- =A0 =A0 =A0 =A0 =A0 =A0 =A0 tlb_invalidate_all_user(pmap);
- =A0 =A0 =A0 else
+ =A0 =A0 =A0 cpuid =3D PCPU_GET(cpuid);
+ =A0 =A0 =A0 if (!CPU_ISSET(cpuid, &pmap->pm_active))
=A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0pmap->pm_asid[cpuid].gen =3D 0;
+ =A0 =A0 =A0 else
+ =A0 =A0 =A0 =A0 =A0 =A0 =A0 fn(arg);
=A0}
+#endif /* SMP */

-#ifdef SMP
=A0static void
=A0pmap_invalidate_all(pmap_t pmap)
=A0{

- =A0 =A0 =A0 smp_rendezvous(0, pmap_invalidate_all_action, 0, pmap);
+ =A0 =A0 =A0 pmap_call_on_active_cpus(pmap,
+ =A0 =A0 =A0 =A0 =A0 (void (*)(void *))tlb_invalidate_all_user, pmap);
=A0}

-static void
-pmap_invalidate_all_action(void *arg)
-{
-
- =A0 =A0 =A0 pmap_invalidate_all_local((pmap_t)arg);
-}
-#else
-static void
-pmap_invalidate_all(pmap_t pmap)
-{
-
- =A0 =A0 =A0 pmap_invalidate_all_local(pmap);
-}
-#endif
-
-static __inline void
-pmap_invalidate_page_local(pmap_t pmap, vm_offset_t va)
-{
- =A0 =A0 =A0 u_int cpuid;
-
- =A0 =A0 =A0 cpuid =3D PCPU_GET(cpuid);
-
- =A0 =A0 =A0 if (is_kernel_pmap(pmap)) {
- =A0 =A0 =A0 =A0 =A0 =A0 =A0 tlb_invalidate_address(pmap, va);
- =A0 =A0 =A0 =A0 =A0 =A0 =A0 return;
- =A0 =A0 =A0 }
- =A0 =A0 =A0 if (pmap->pm_asid[cpuid].gen !=3D PCPU_GET(asid_generation))
- =A0 =A0 =A0 =A0 =A0 =A0 =A0 return;
- =A0 =A0 =A0 else if (!CPU_ISSET(cpuid, &pmap->pm_active)) {
- =A0 =A0 =A0 =A0 =A0 =A0 =A0 pmap->pm_asid[cpuid].gen =3D 0;
- =A0 =A0 =A0 =A0 =A0 =A0 =A0 return;
- =A0 =A0 =A0 }
- =A0 =A0 =A0 tlb_invalidate_address(pmap, va);
-}
-
-#ifdef SMP
=A0struct pmap_invalidate_page_arg {
=A0 =A0 =A0 =A0pmap_t pmap;
=A0 =A0 =A0 =A0vm_offset_t va;
=A0};

=A0static void
-pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
-{
- =A0 =A0 =A0 struct pmap_invalidate_page_arg arg;
-
- =A0 =A0 =A0 arg.pmap =3D pmap;
- =A0 =A0 =A0 arg.va =3D va;
- =A0 =A0 =A0 smp_rendezvous(0, pmap_invalidate_page_action, 0, &arg);
-}
-
-static void
=A0pmap_invalidate_page_action(void *arg)
=A0{
=A0 =A0 =A0 =A0struct pmap_invalidate_page_arg *p =3D arg;

- =A0 =A0 =A0 pmap_invalidate_page_local(p->pmap, p->va);
+ =A0 =A0 =A0 tlb_invalidate_address(p->pmap, p->va);
=A0}
-#else
+
=A0static void
=A0pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
=A0{
+ =A0 =A0 =A0 struct pmap_invalidate_page_arg arg;

- =A0 =A0 =A0 pmap_invalidate_page_local(pmap, va);
-}
-#endif
-
-static __inline void
-pmap_update_page_local(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
-{
- =A0 =A0 =A0 u_int cpuid;
-
- =A0 =A0 =A0 cpuid =3D PCPU_GET(cpuid);
-
- =A0 =A0 =A0 if (is_kernel_pmap(pmap)) {
- =A0 =A0 =A0 =A0 =A0 =A0 =A0 tlb_update(pmap, va, pte);
- =A0 =A0 =A0 =A0 =A0 =A0 =A0 return;
- =A0 =A0 =A0 }
- =A0 =A0 =A0 if (pmap->pm_asid[cpuid].gen !=3D PCPU_GET(asid_generation))
- =A0 =A0 =A0 =A0 =A0 =A0 =A0 return;
- =A0 =A0 =A0 else if (!CPU_ISSET(cpuid, &pmap->pm_active)) {
- =A0 =A0 =A0 =A0 =A0 =A0 =A0 pmap->pm_asid[cpuid].gen =3D 0;
- =A0 =A0 =A0 =A0 =A0 =A0 =A0 return;
- =A0 =A0 =A0 }
- =A0 =A0 =A0 tlb_update(pmap, va, pte);
+ =A0 =A0 =A0 arg.pmap =3D pmap;
+ =A0 =A0 =A0 arg.va =3D va;
+ =A0 =A0 =A0 pmap_call_on_active_cpus(pmap, pmap_invalidate_page_action, &=
arg);
=A0}

-#ifdef SMP
=A0struct pmap_update_page_arg {
=A0 =A0 =A0 =A0pmap_t pmap;
=A0 =A0 =A0 =A0vm_offset_t va;
@@ -742,31 +716,23 @@ struct pmap_update_page_arg {
=A0};

=A0static void
-pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
-{
- =A0 =A0 =A0 struct pmap_update_page_arg arg;
-
- =A0 =A0 =A0 arg.pmap =3D pmap;
- =A0 =A0 =A0 arg.va =3D va;
- =A0 =A0 =A0 arg.pte =3D pte;
- =A0 =A0 =A0 smp_rendezvous(0, pmap_update_page_action, 0, &arg);
-}
-
-static void
=A0pmap_update_page_action(void *arg)
=A0{
=A0 =A0 =A0 =A0struct pmap_update_page_arg *p =3D arg;

- =A0 =A0 =A0 pmap_update_page_local(p->pmap, p->va, p->pte);
+ =A0 =A0 =A0 tlb_update(p->pmap, p->va, p->pte);
=A0}
-#else
+
=A0static void
=A0pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
=A0{
+ =A0 =A0 =A0 struct pmap_update_page_arg arg;

- =A0 =A0 =A0 pmap_update_page_local(pmap, va, pte);
+ =A0 =A0 =A0 arg.pmap =3D pmap;
+ =A0 =A0 =A0 arg.va =3D va;
+ =A0 =A0 =A0 arg.pte =3D pte;
+ =A0 =A0 =A0 pmap_call_on_active_cpus(pmap, pmap_update_page_action, &arg)=
;
=A0}
-#endif

=A0/*
=A0* =A0 =A0 Routine: =A0 =A0 =A0 =A0pmap_extract
@@ -3213,7 +3179,7 @@ pmap_emulate_modified(pmap_t pmap, vm_of
=A0#ifdef SMP
=A0 =A0 =A0 =A0/* It is possible that some other CPU changed m-bit */
=A0 =A0 =A0 =A0if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) {
- =A0 =A0 =A0 =A0 =A0 =A0 =A0 pmap_update_page_local(pmap, va, *pte);
+ =A0 =A0 =A0 =A0 =A0 =A0 =A0 tlb_update(pmap, va, *pte);
=A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0PMAP_UNLOCK(pmap);
=A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return (0);
=A0 =A0 =A0 =A0}
@@ -3227,7 +3193,7 @@ pmap_emulate_modified(pmap_t pmap, vm_of
=A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return (1);
=A0 =A0 =A0 =A0}
=A0 =A0 =A0 =A0pte_set(pte, PTE_D);
- =A0 =A0 =A0 pmap_update_page_local(pmap, va, *pte);
+ =A0 =A0 =A0 tlb_update(pmap, va, *pte);
=A0 =A0 =A0 =A0pa =3D TLBLO_PTE_TO_PA(*pte);
=A0 =A0 =A0 =A0if (!page_is_managed(pa))
=A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0panic("pmap_emulate_modified: unmanaged page=
");



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CA%2B7sy7Aj89iTTD=7ywiatyBkBW2OjborNydZTf8dgXLNjY8G1w>