Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 20 May 2011 14:53:16 +0000 (UTC)
From:      Attilio Rao <attilio@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r222131 - in projects/largeSMP/sys/i386: i386 include xen
Message-ID:  <201105201453.p4KErGem088936@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: attilio
Date: Fri May 20 14:53:16 2011
New Revision: 222131
URL: http://svn.freebsd.org/changeset/base/222131

Log:
  Reintroduce the lazypmap infrastructure and convert it to using
  cpuset_t.
  
  Requested by:	alc

Modified:
  projects/largeSMP/sys/i386/i386/apic_vector.s
  projects/largeSMP/sys/i386/i386/db_trace.c
  projects/largeSMP/sys/i386/i386/mp_machdep.c
  projects/largeSMP/sys/i386/i386/pmap.c
  projects/largeSMP/sys/i386/i386/swtch.s
  projects/largeSMP/sys/i386/include/smp.h
  projects/largeSMP/sys/i386/xen/mp_machdep.c
  projects/largeSMP/sys/i386/xen/pmap.c

Modified: projects/largeSMP/sys/i386/i386/apic_vector.s
==============================================================================
--- projects/largeSMP/sys/i386/i386/apic_vector.s	Fri May 20 14:32:28 2011	(r222130)
+++ projects/largeSMP/sys/i386/i386/apic_vector.s	Fri May 20 14:53:16 2011	(r222131)
@@ -357,4 +357,20 @@ IDTVEC(rendezvous)
 	POP_FRAME
 	iret
 	
+/*
+ * Clean up when we lose out on the lazy context switch optimization.
+ * ie: when we are about to release a PTD but a cpu is still borrowing it.
+ */
+	SUPERALIGN_TEXT
+IDTVEC(lazypmap)
+	PUSH_FRAME
+	SET_KERNEL_SREGS
+	cld
+
+	call	pmap_lazyfix_action
+
+	movl	lapic, %eax
+	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
+	POP_FRAME
+	iret
 #endif /* SMP */

Modified: projects/largeSMP/sys/i386/i386/db_trace.c
==============================================================================
--- projects/largeSMP/sys/i386/i386/db_trace.c	Fri May 20 14:32:28 2011	(r222130)
+++ projects/largeSMP/sys/i386/i386/db_trace.c	Fri May 20 14:53:16 2011	(r222131)
@@ -312,7 +312,8 @@ db_nextframe(struct i386_frame **fp, db_
 			frame_type = TRAP_TIMERINT;
 		else if (strcmp(name, "Xcpustop") == 0 ||
 		    strcmp(name, "Xrendezvous") == 0 ||
-		    strcmp(name, "Xipi_intr_bitmap_handler") == 0)
+		    strcmp(name, "Xipi_intr_bitmap_handler") == 0 ||
+		    strcmp(name, "Xlazypmap") == 0)
 			frame_type = TRAP_INTERRUPT;
 	}
 

Modified: projects/largeSMP/sys/i386/i386/mp_machdep.c
==============================================================================
--- projects/largeSMP/sys/i386/i386/mp_machdep.c	Fri May 20 14:32:28 2011	(r222130)
+++ projects/largeSMP/sys/i386/i386/mp_machdep.c	Fri May 20 14:53:16 2011	(r222131)
@@ -166,6 +166,7 @@ u_long *ipi_invlrng_counts[MAXCPU];
 u_long *ipi_invlpg_counts[MAXCPU];
 u_long *ipi_invlcache_counts[MAXCPU];
 u_long *ipi_rendezvous_counts[MAXCPU];
+u_long *ipi_lazypmap_counts[MAXCPU];
 static u_long *ipi_hardclock_counts[MAXCPU];
 #endif
 
@@ -575,6 +576,10 @@ cpu_mp_start(void)
 	setidt(IPI_INVLCACHE, IDTVEC(invlcache),
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
+	/* Install an inter-CPU IPI for lazy pmap release */
+	setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
 	/* Install an inter-CPU IPI for all-CPU rendezvous */
 	setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
@@ -1718,6 +1723,8 @@ mp_ipi_intrcnt(void *dummy)
 		intrcnt_add(buf, &ipi_ast_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i);
 		intrcnt_add(buf, &ipi_rendezvous_counts[i]);
+		snprintf(buf, sizeof(buf), "cpu%d:lazypmap", i);
+		intrcnt_add(buf, &ipi_lazypmap_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
 		intrcnt_add(buf, &ipi_hardclock_counts[i]);
 	}		

Modified: projects/largeSMP/sys/i386/i386/pmap.c
==============================================================================
--- projects/largeSMP/sys/i386/i386/pmap.c	Fri May 20 14:32:28 2011	(r222130)
+++ projects/largeSMP/sys/i386/i386/pmap.c	Fri May 20 14:53:16 2011	(r222131)
@@ -1900,6 +1900,104 @@ retry:
 * Pmap allocation/deallocation routines.
  ***************************************************/
 
+#ifdef SMP
+/*
+ * Deal with a SMP shootdown of other users of the pmap that we are
+ * trying to dispose of.  This can be a bit hairy.
+ */
+static cpuset_t *lazymask;
+static u_int lazyptd;
+static volatile u_int lazywait;
+
+void pmap_lazyfix_action(void);
+
+void
+pmap_lazyfix_action(void)
+{
+
+#ifdef COUNT_IPIS
+	(*ipi_lazypmap_counts[PCPU_GET(cpuid)])++;
+#endif
+	if (rcr3() == lazyptd)
+		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+	CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask);
+	atomic_store_rel_int(&lazywait, 1);
+}
+
+static void
+pmap_lazyfix_self(cpuset_t mymask)
+{
+
+	if (rcr3() == lazyptd)
+		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+	CPU_NAND_ATOMIC(lazymask, &mymask);
+}
+
+
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+	cpuset_t mymask, mask;
+	u_int spins;
+	int lbs;
+
+	mask = pmap->pm_active;
+	while (!CPU_EMPTY(&mask)) {
+		spins = 50000000;
+
+		/* Find least significant set bit. */
+		lsb = cpusetobj_ffs(&mask);
+		lsb--;
+		CPU_SETOF(lsb, &mask);
+		mtx_lock_spin(&smp_ipi_mtx);
+#ifdef PAE
+		lazyptd = vtophys(pmap->pm_pdpt);
+#else
+		lazyptd = vtophys(pmap->pm_pdir);
+#endif
+		mymask = PCPU_GET(cpumask);
+		if (mask == mymask) {
+			lazymask = &pmap->pm_active;
+			pmap_lazyfix_self(mymask);
+		} else {
+			atomic_store_rel_int((u_int *)&lazymask,
+			    (u_int)&pmap->pm_active);
+			atomic_store_rel_int(&lazywait, 0);
+			ipi_selected(mask, IPI_LAZYPMAP);
+			while (lazywait == 0) {
+				ia32_pause();
+				if (--spins == 0)
+					break;
+			}
+		}
+		mtx_unlock_spin(&smp_ipi_mtx);
+		if (spins == 0)
+			printf("pmap_lazyfix: spun for 50000000\n");
+		mask = pmap->pm_active;
+	}
+}
+
+#else	/* SMP */
+
+/*
+ * Cleaning up on uniprocessor is easy.  For various reasons, we're
+ * unlikely to have to even execute this code, including the fact
+ * that the cleanup is deferred until the parent does a wait(2), which
+ * means that another userland process has run.
+ */
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+	u_int cr3;
+
+	cr3 = vtophys(pmap->pm_pdir);
+	if (cr3 == rcr3()) {
+		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+		CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); 
+	}
+}
+#endif	/* SMP */
+
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
@@ -1917,6 +2015,7 @@ pmap_release(pmap_t pmap)
 	KASSERT(pmap->pm_root == NULL,
 	    ("pmap_release: pmap has reserved page table page(s)"));
 
+	pmap_lazyfix(pmap);
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_REMOVE(pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);

Modified: projects/largeSMP/sys/i386/i386/swtch.s
==============================================================================
--- projects/largeSMP/sys/i386/i386/swtch.s	Fri May 20 14:32:28 2011	(r222130)
+++ projects/largeSMP/sys/i386/i386/swtch.s	Fri May 20 14:53:16 2011	(r222131)
@@ -174,6 +174,12 @@ ENTRY(cpu_switch)
 
 	/* switch address space */
 	movl	PCB_CR3(%edx),%eax
+#ifdef PAE
+	cmpl	%eax,IdlePDPT			/* Kernel address space? */
+#else
+	cmpl	%eax,IdlePTD			/* Kernel address space? */
+#endif
+	je	sw0
 	READ_CR3(%ebx)				/* The same address space? */
 	cmpl	%ebx,%eax
 	je	sw0

Modified: projects/largeSMP/sys/i386/include/smp.h
==============================================================================
--- projects/largeSMP/sys/i386/include/smp.h	Fri May 20 14:32:28 2011	(r222130)
+++ projects/largeSMP/sys/i386/include/smp.h	Fri May 20 14:53:16 2011	(r222131)
@@ -42,6 +42,7 @@ extern u_long *ipi_invlrng_counts[MAXCPU
 extern u_long *ipi_invlpg_counts[MAXCPU];
 extern u_long *ipi_invlcache_counts[MAXCPU];
 extern u_long *ipi_rendezvous_counts[MAXCPU];
+extern u_long *ipi_lazypmap_counts[MAXCPU];
 #endif
 
 /* IPI handlers */
@@ -52,7 +53,8 @@ inthand_t
 	IDTVEC(invlcache),	/* Write back and invalidate cache */
 	IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */ 
 	IDTVEC(cpustop),	/* CPU stops & waits to be restarted */
-	IDTVEC(rendezvous);	/* handle CPU rendezvous */
+	IDTVEC(rendezvous),	/* handle CPU rendezvous */
+	IDTVEC(lazypmap);	/* handle lazy pmap release */
 
 /* functions in mp_machdep.c */
 void	cpu_add(u_int apic_id, char boot_cpu);

Modified: projects/largeSMP/sys/i386/xen/mp_machdep.c
==============================================================================
--- projects/largeSMP/sys/i386/xen/mp_machdep.c	Fri May 20 14:32:28 2011	(r222130)
+++ projects/largeSMP/sys/i386/xen/mp_machdep.c	Fri May 20 14:53:16 2011	(r222131)
@@ -154,6 +154,7 @@ static cpuset_t	hyperthreading_cpus_mask
 
 extern void Xhypervisor_callback(void);
 extern void failsafe_callback(void);
+extern void pmap_lazyfix_action(void);
 
 struct cpu_group *
 cpu_topo(void)
@@ -341,16 +342,24 @@ iv_invlcache(uintptr_t a, uintptr_t b)
 	atomic_add_int(&smp_tlb_wait, 1);
 }
 
+static void
+iv_lazypmap(uintptr_t a, uintptr_t b)
+{
+	pmap_lazyfix_action();
+	atomic_add_int(&smp_tlb_wait, 1);
+}
+
 /*
  * These start from "IPI offset" APIC_IPI_INTS
  */
-static call_data_func_t *ipi_vectors[5] = 
+static call_data_func_t *ipi_vectors[6] = 
 {
   iv_rendezvous,
   iv_invltlb,
   iv_invlpg,
   iv_invlrng,
   iv_invlcache,
+  iv_lazypmap,
 };
 
 /*

Modified: projects/largeSMP/sys/i386/xen/pmap.c
==============================================================================
--- projects/largeSMP/sys/i386/xen/pmap.c	Fri May 20 14:32:28 2011	(r222130)
+++ projects/largeSMP/sys/i386/xen/pmap.c	Fri May 20 14:53:16 2011	(r222131)
@@ -1683,6 +1683,104 @@ retry:
 * Pmap allocation/deallocation routines.
  ***************************************************/
 
+#ifdef SMP
+/*
+ * Deal with a SMP shootdown of other users of the pmap that we are
+ * trying to dispose of.  This can be a bit hairy.
+ */
+static cpuset_t *lazymask;
+static u_int lazyptd;
+static volatile u_int lazywait;
+
+void pmap_lazyfix_action(void);
+
+void
+pmap_lazyfix_action(void)
+{
+
+#ifdef COUNT_IPIS
+	(*ipi_lazypmap_counts[PCPU_GET(cpuid)])++;
+#endif
+	if (rcr3() == lazyptd)
+		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+	CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask);
+	atomic_store_rel_int(&lazywait, 1);
+}
+
+static void
+pmap_lazyfix_self(cpuset_t mymask)
+{
+
+	if (rcr3() == lazyptd)
+		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+	CPU_NAND_ATOMIC(lazymask, &mymask);
+}
+
+
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+	cpuset_t mymask, mask;
+	u_int spins;
+	int lsb;
+
+	mask = pmap->pm_active;
+	while (!CPU_EMPTY(&mask)) {
+		spins = 50000000;
+
+		/* Find least significant set bit. */
+		lsb = cpusetobj_ffs(&mask);
+		lsb--;
+		CPU_SETOF(lsb, &mask);
+		mtx_lock_spin(&smp_ipi_mtx);
+#ifdef PAE
+		lazyptd = vtophys(pmap->pm_pdpt);
+#else
+		lazyptd = vtophys(pmap->pm_pdir);
+#endif
+		mymask = PCPU_GET(cpumask);
+		if (mask == mymask) {
+			lazymask = &pmap->pm_active;
+			pmap_lazyfix_self(mymask);
+		} else {
+			atomic_store_rel_int((u_int *)&lazymask,
+			    (u_int)&pmap->pm_active);
+			atomic_store_rel_int(&lazywait, 0);
+			ipi_selected(mask, IPI_LAZYPMAP);
+			while (lazywait == 0) {
+				ia32_pause();
+				if (--spins == 0)
+					break;
+			}
+		}
+		mtx_unlock_spin(&smp_ipi_mtx);
+		if (spins == 0)
+			printf("pmap_lazyfix: spun for 50000000\n");
+		mask = pmap->pm_active;
+	}
+}
+
+#else	/* SMP */
+
+/*
+ * Cleaning up on uniprocessor is easy.  For various reasons, we're
+ * unlikely to have to even execute this code, including the fact
+ * that the cleanup is deferred until the parent does a wait(2), which
+ * means that another userland process has run.
+ */
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+	u_int cr3;
+
+	cr3 = vtophys(pmap->pm_pdir);
+	if (cr3 == rcr3()) {
+		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+		CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active);
+	}
+}
+#endif	/* SMP */
+
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
@@ -1708,6 +1806,7 @@ pmap_release(pmap_t pmap)
 	mtx_lock(&createdelete_lock);
 #endif
 
+	pmap_lazyfix(pmap);
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_REMOVE(pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201105201453.p4KErGem088936>