From owner-svn-src-projects@FreeBSD.ORG Fri May 20 14:53:17 2011 Return-Path: Delivered-To: svn-src-projects@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 4D80A106564A; Fri, 20 May 2011 14:53:17 +0000 (UTC) (envelope-from attilio@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 3C40D8FC18; Fri, 20 May 2011 14:53:17 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id p4KErHru088945; Fri, 20 May 2011 14:53:17 GMT (envelope-from attilio@svn.freebsd.org) Received: (from attilio@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id p4KErGem088936; Fri, 20 May 2011 14:53:16 GMT (envelope-from attilio@svn.freebsd.org) Message-Id: <201105201453.p4KErGem088936@svn.freebsd.org> From: Attilio Rao Date: Fri, 20 May 2011 14:53:16 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org X-SVN-Group: projects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r222131 - in projects/largeSMP/sys/i386: i386 include xen X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 20 May 2011 14:53:17 -0000 Author: attilio Date: Fri May 20 14:53:16 2011 New Revision: 222131 URL: http://svn.freebsd.org/changeset/base/222131 Log: Reintroduce the lazypmap infrastructure and convert it to using cpuset_t. Requested by: alc Modified: projects/largeSMP/sys/i386/i386/apic_vector.s projects/largeSMP/sys/i386/i386/db_trace.c projects/largeSMP/sys/i386/i386/mp_machdep.c projects/largeSMP/sys/i386/i386/pmap.c projects/largeSMP/sys/i386/i386/swtch.s projects/largeSMP/sys/i386/include/smp.h projects/largeSMP/sys/i386/xen/mp_machdep.c projects/largeSMP/sys/i386/xen/pmap.c Modified: projects/largeSMP/sys/i386/i386/apic_vector.s ============================================================================== --- projects/largeSMP/sys/i386/i386/apic_vector.s Fri May 20 14:32:28 2011 (r222130) +++ projects/largeSMP/sys/i386/i386/apic_vector.s Fri May 20 14:53:16 2011 (r222131) @@ -357,4 +357,20 @@ IDTVEC(rendezvous) POP_FRAME iret +/* + * Clean up when we lose out on the lazy context switch optimization. + * ie: when we are about to release a PTD but a cpu is still borrowing it. + */ + SUPERALIGN_TEXT +IDTVEC(lazypmap) + PUSH_FRAME + SET_KERNEL_SREGS + cld + + call pmap_lazyfix_action + + movl lapic, %eax + movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ + POP_FRAME + iret #endif /* SMP */ Modified: projects/largeSMP/sys/i386/i386/db_trace.c ============================================================================== --- projects/largeSMP/sys/i386/i386/db_trace.c Fri May 20 14:32:28 2011 (r222130) +++ projects/largeSMP/sys/i386/i386/db_trace.c Fri May 20 14:53:16 2011 (r222131) @@ -312,7 +312,8 @@ db_nextframe(struct i386_frame **fp, db_ frame_type = TRAP_TIMERINT; else if (strcmp(name, "Xcpustop") == 0 || strcmp(name, "Xrendezvous") == 0 || - strcmp(name, "Xipi_intr_bitmap_handler") == 0) + strcmp(name, "Xipi_intr_bitmap_handler") == 0 || + strcmp(name, "Xlazypmap") == 0) frame_type = TRAP_INTERRUPT; } Modified: projects/largeSMP/sys/i386/i386/mp_machdep.c ============================================================================== --- projects/largeSMP/sys/i386/i386/mp_machdep.c Fri May 20 14:32:28 2011 (r222130) +++ projects/largeSMP/sys/i386/i386/mp_machdep.c Fri May 20 14:53:16 2011 (r222131) @@ -166,6 +166,7 @@ u_long *ipi_invlrng_counts[MAXCPU]; u_long *ipi_invlpg_counts[MAXCPU]; u_long *ipi_invlcache_counts[MAXCPU]; u_long *ipi_rendezvous_counts[MAXCPU]; +u_long *ipi_lazypmap_counts[MAXCPU]; static u_long *ipi_hardclock_counts[MAXCPU]; #endif @@ -575,6 +576,10 @@ cpu_mp_start(void) setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + /* Install an inter-CPU IPI for lazy pmap release */ + setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + /* Install an inter-CPU IPI for all-CPU rendezvous */ setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); @@ -1718,6 +1723,8 @@ mp_ipi_intrcnt(void *dummy) intrcnt_add(buf, &ipi_ast_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i); intrcnt_add(buf, &ipi_rendezvous_counts[i]); + snprintf(buf, sizeof(buf), "cpu%d:lazypmap", i); + intrcnt_add(buf, &ipi_lazypmap_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:hardclock", i); intrcnt_add(buf, &ipi_hardclock_counts[i]); } Modified: projects/largeSMP/sys/i386/i386/pmap.c ============================================================================== --- projects/largeSMP/sys/i386/i386/pmap.c Fri May 20 14:32:28 2011 (r222130) +++ projects/largeSMP/sys/i386/i386/pmap.c Fri May 20 14:53:16 2011 (r222131) @@ -1900,6 +1900,104 @@ retry: * Pmap allocation/deallocation routines. ***************************************************/ +#ifdef SMP +/* + * Deal with a SMP shootdown of other users of the pmap that we are + * trying to dispose of. This can be a bit hairy. + */ +static cpuset_t *lazymask; +static u_int lazyptd; +static volatile u_int lazywait; + +void pmap_lazyfix_action(void); + +void +pmap_lazyfix_action(void) +{ + +#ifdef COUNT_IPIS + (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; +#endif + if (rcr3() == lazyptd) + load_cr3(PCPU_GET(curpcb)->pcb_cr3); + CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask); + atomic_store_rel_int(&lazywait, 1); +} + +static void +pmap_lazyfix_self(cpuset_t mymask) +{ + + if (rcr3() == lazyptd) + load_cr3(PCPU_GET(curpcb)->pcb_cr3); + CPU_NAND_ATOMIC(lazymask, &mymask); +} + + +static void +pmap_lazyfix(pmap_t pmap) +{ + cpuset_t mymask, mask; + u_int spins; + int lbs; + + mask = pmap->pm_active; + while (!CPU_EMPTY(&mask)) { + spins = 50000000; + + /* Find least significant set bit. */ + lsb = cpusetobj_ffs(&mask); + lsb--; + CPU_SETOF(lsb, &mask); + mtx_lock_spin(&smp_ipi_mtx); +#ifdef PAE + lazyptd = vtophys(pmap->pm_pdpt); +#else + lazyptd = vtophys(pmap->pm_pdir); +#endif + mymask = PCPU_GET(cpumask); + if (mask == mymask) { + lazymask = &pmap->pm_active; + pmap_lazyfix_self(mymask); + } else { + atomic_store_rel_int((u_int *)&lazymask, + (u_int)&pmap->pm_active); + atomic_store_rel_int(&lazywait, 0); + ipi_selected(mask, IPI_LAZYPMAP); + while (lazywait == 0) { + ia32_pause(); + if (--spins == 0) + break; + } + } + mtx_unlock_spin(&smp_ipi_mtx); + if (spins == 0) + printf("pmap_lazyfix: spun for 50000000\n"); + mask = pmap->pm_active; + } +} + +#else /* SMP */ + +/* + * Cleaning up on uniprocessor is easy. For various reasons, we're + * unlikely to have to even execute this code, including the fact + * that the cleanup is deferred until the parent does a wait(2), which + * means that another userland process has run. + */ +static void +pmap_lazyfix(pmap_t pmap) +{ + u_int cr3; + + cr3 = vtophys(pmap->pm_pdir); + if (cr3 == rcr3()) { + load_cr3(PCPU_GET(curpcb)->pcb_cr3); + CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); + } +} +#endif /* SMP */ + /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. @@ -1917,6 +2015,7 @@ pmap_release(pmap_t pmap) KASSERT(pmap->pm_root == NULL, ("pmap_release: pmap has reserved page table page(s)")); + pmap_lazyfix(pmap); mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); Modified: projects/largeSMP/sys/i386/i386/swtch.s ============================================================================== --- projects/largeSMP/sys/i386/i386/swtch.s Fri May 20 14:32:28 2011 (r222130) +++ projects/largeSMP/sys/i386/i386/swtch.s Fri May 20 14:53:16 2011 (r222131) @@ -174,6 +174,12 @@ ENTRY(cpu_switch) /* switch address space */ movl PCB_CR3(%edx),%eax +#ifdef PAE + cmpl %eax,IdlePDPT /* Kernel address space? */ +#else + cmpl %eax,IdlePTD /* Kernel address space? */ +#endif + je sw0 READ_CR3(%ebx) /* The same address space? */ cmpl %ebx,%eax je sw0 Modified: projects/largeSMP/sys/i386/include/smp.h ============================================================================== --- projects/largeSMP/sys/i386/include/smp.h Fri May 20 14:32:28 2011 (r222130) +++ projects/largeSMP/sys/i386/include/smp.h Fri May 20 14:53:16 2011 (r222131) @@ -42,6 +42,7 @@ extern u_long *ipi_invlrng_counts[MAXCPU extern u_long *ipi_invlpg_counts[MAXCPU]; extern u_long *ipi_invlcache_counts[MAXCPU]; extern u_long *ipi_rendezvous_counts[MAXCPU]; +extern u_long *ipi_lazypmap_counts[MAXCPU]; #endif /* IPI handlers */ @@ -52,7 +53,8 @@ inthand_t IDTVEC(invlcache), /* Write back and invalidate cache */ IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */ IDTVEC(cpustop), /* CPU stops & waits to be restarted */ - IDTVEC(rendezvous); /* handle CPU rendezvous */ + IDTVEC(rendezvous), /* handle CPU rendezvous */ + IDTVEC(lazypmap); /* handle lazy pmap release */ /* functions in mp_machdep.c */ void cpu_add(u_int apic_id, char boot_cpu); Modified: projects/largeSMP/sys/i386/xen/mp_machdep.c ============================================================================== --- projects/largeSMP/sys/i386/xen/mp_machdep.c Fri May 20 14:32:28 2011 (r222130) +++ projects/largeSMP/sys/i386/xen/mp_machdep.c Fri May 20 14:53:16 2011 (r222131) @@ -154,6 +154,7 @@ static cpuset_t hyperthreading_cpus_mask extern void Xhypervisor_callback(void); extern void failsafe_callback(void); +extern void pmap_lazyfix_action(void); struct cpu_group * cpu_topo(void) @@ -341,16 +342,24 @@ iv_invlcache(uintptr_t a, uintptr_t b) atomic_add_int(&smp_tlb_wait, 1); } +static void +iv_lazypmap(uintptr_t a, uintptr_t b) +{ + pmap_lazyfix_action(); + atomic_add_int(&smp_tlb_wait, 1); +} + /* * These start from "IPI offset" APIC_IPI_INTS */ -static call_data_func_t *ipi_vectors[5] = +static call_data_func_t *ipi_vectors[6] = { iv_rendezvous, iv_invltlb, iv_invlpg, iv_invlrng, iv_invlcache, + iv_lazypmap, }; /* Modified: projects/largeSMP/sys/i386/xen/pmap.c ============================================================================== --- projects/largeSMP/sys/i386/xen/pmap.c Fri May 20 14:32:28 2011 (r222130) +++ projects/largeSMP/sys/i386/xen/pmap.c Fri May 20 14:53:16 2011 (r222131) @@ -1683,6 +1683,104 @@ retry: * Pmap allocation/deallocation routines. ***************************************************/ +#ifdef SMP +/* + * Deal with a SMP shootdown of other users of the pmap that we are + * trying to dispose of. This can be a bit hairy. + */ +static cpuset_t *lazymask; +static u_int lazyptd; +static volatile u_int lazywait; + +void pmap_lazyfix_action(void); + +void +pmap_lazyfix_action(void) +{ + +#ifdef COUNT_IPIS + (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; +#endif + if (rcr3() == lazyptd) + load_cr3(PCPU_GET(curpcb)->pcb_cr3); + CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask); + atomic_store_rel_int(&lazywait, 1); +} + +static void +pmap_lazyfix_self(cpuset_t mymask) +{ + + if (rcr3() == lazyptd) + load_cr3(PCPU_GET(curpcb)->pcb_cr3); + CPU_NAND_ATOMIC(lazymask, &mymask); +} + + +static void +pmap_lazyfix(pmap_t pmap) +{ + cpuset_t mymask, mask; + u_int spins; + int lsb; + + mask = pmap->pm_active; + while (!CPU_EMPTY(&mask)) { + spins = 50000000; + + /* Find least significant set bit. */ + lsb = cpusetobj_ffs(&mask); + lsb--; + CPU_SETOF(lsb, &mask); + mtx_lock_spin(&smp_ipi_mtx); +#ifdef PAE + lazyptd = vtophys(pmap->pm_pdpt); +#else + lazyptd = vtophys(pmap->pm_pdir); +#endif + mymask = PCPU_GET(cpumask); + if (mask == mymask) { + lazymask = &pmap->pm_active; + pmap_lazyfix_self(mymask); + } else { + atomic_store_rel_int((u_int *)&lazymask, + (u_int)&pmap->pm_active); + atomic_store_rel_int(&lazywait, 0); + ipi_selected(mask, IPI_LAZYPMAP); + while (lazywait == 0) { + ia32_pause(); + if (--spins == 0) + break; + } + } + mtx_unlock_spin(&smp_ipi_mtx); + if (spins == 0) + printf("pmap_lazyfix: spun for 50000000\n"); + mask = pmap->pm_active; + } +} + +#else /* SMP */ + +/* + * Cleaning up on uniprocessor is easy. For various reasons, we're + * unlikely to have to even execute this code, including the fact + * that the cleanup is deferred until the parent does a wait(2), which + * means that another userland process has run. + */ +static void +pmap_lazyfix(pmap_t pmap) +{ + u_int cr3; + + cr3 = vtophys(pmap->pm_pdir); + if (cr3 == rcr3()) { + load_cr3(PCPU_GET(curpcb)->pcb_cr3); + CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); + } +} +#endif /* SMP */ + /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. @@ -1708,6 +1806,7 @@ pmap_release(pmap_t pmap) mtx_lock(&createdelete_lock); #endif + pmap_lazyfix(pmap); mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock);