Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 9 May 2015 19:11:02 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r282684 - in head/sys: amd64/amd64 amd64/include x86/include x86/xen
Message-ID:  <201505091911.t49JB2gh067512@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Sat May  9 19:11:01 2015
New Revision: 282684
URL: https://svnweb.freebsd.org/changeset/base/282684

Log:
  Rewrite amd64 PCID implementation to follow an algorithm described in
  the Vahalia' "Unix Internals" section 15.12 "Other TLB Consistency
  Algorithms".  The same algorithm is already utilized by the MIPS pmap
  to handle ASIDs.
  
  The PCID for the address space is now allocated per-cpu during context
  switch to the thread using pmap, when no PCID on the cpu was ever
  allocated, or the current PCID is invalidated.  If the PCID is reused,
  bit 63 of %cr3 can be set to avoid TLB flush.
  
  Each cpu has PCID' algorithm generation count, which is saved in the
  pmap pcpu block when pcpu PCID is allocated.  On invalidation, the
  pmap generation count is zeroed, which signals the context switch code
  that already allocated PCID is no longer valid.  The implication is
  the TLB shootdown for the given cpu/address space, due to the
  allocation of new PCID.
  
  The pm_save mask is no longer has to be tracked, which (significantly)
  reduces the targets of the TLB shootdown IPIs.  Previously, pm_save
  was reset only on pmap_invalidate_all(), which made it accumulate the
  cpuids of all processors on which the thread was scheduled between
  full TLB shootdowns.
  
  Besides reducing the amount of TLB shootdowns and removing atomics to
  update pm_saves in the context switch code, the algorithm is much
  simpler than the maintanence of pm_save and selection of the right
  address space in the shootdown IPI handler.
  
  Reviewed by:	alc
  Tested by:	pho
  Sponsored by:	The FreeBSD Foundation
  MFC after:	3 weeks

Modified:
  head/sys/amd64/amd64/apic_vector.S
  head/sys/amd64/amd64/cpu_switch.S
  head/sys/amd64/amd64/genassym.c
  head/sys/amd64/amd64/machdep.c
  head/sys/amd64/amd64/mp_machdep.c
  head/sys/amd64/amd64/pmap.c
  head/sys/amd64/amd64/vm_machdep.c
  head/sys/amd64/include/cpufunc.h
  head/sys/amd64/include/pcpu.h
  head/sys/amd64/include/pmap.h
  head/sys/amd64/include/smp.h
  head/sys/x86/include/specialreg.h
  head/sys/x86/xen/xen_apic.c

Modified: head/sys/amd64/amd64/apic_vector.S
==============================================================================
--- head/sys/amd64/amd64/apic_vector.S	Sat May  9 19:09:34 2015	(r282683)
+++ head/sys/amd64/amd64/apic_vector.S	Sat May  9 19:11:01 2015	(r282684)
@@ -203,30 +203,28 @@ invltlb_ret:
 	jmp	doreti_iret
 
 	SUPERALIGN_TEXT
+IDTVEC(invltlb)
+	PUSH_FRAME
+
+	call	invltlb_handler
+	jmp	invltlb_ret
+
 IDTVEC(invltlb_pcid)
 	PUSH_FRAME
 
 	call	invltlb_pcid_handler
 	jmp	invltlb_ret
 
-
-	SUPERALIGN_TEXT
-IDTVEC(invltlb)
+IDTVEC(invltlb_invpcid)
 	PUSH_FRAME
 
-	call	invltlb_handler
+	call	invltlb_invpcid_handler
 	jmp	invltlb_ret
 
 /*
  * Single page TLB shootdown
  */
 	.text
-	SUPERALIGN_TEXT
-IDTVEC(invlpg_pcid)
-	PUSH_FRAME
-
-	call	invlpg_pcid_handler
-	jmp	invltlb_ret
 
 	SUPERALIGN_TEXT
 IDTVEC(invlpg)

Modified: head/sys/amd64/amd64/cpu_switch.S
==============================================================================
--- head/sys/amd64/amd64/cpu_switch.S	Sat May  9 19:09:34 2015	(r282683)
+++ head/sys/amd64/amd64/cpu_switch.S	Sat May  9 19:11:01 2015	(r282684)
@@ -69,16 +69,10 @@
  * %rsi = newtd
  */
 ENTRY(cpu_throw)
-	movl	PCPU(CPUID),%eax
-	testq	%rdi,%rdi
-	jz	1f
-	/* release bit from old pm_active */
-	movq	PCPU(CURPMAP),%rdx
-	LK btrl	%eax,PM_ACTIVE(%rdx)		/* clear old */
-1:
-	movq	TD_PCB(%rsi),%r8		/* newtd->td_pcb */
-	movq	PCB_CR3(%r8),%rcx		/* new address space */
-	jmp	swact
+	movq	%rsi,%r12
+	movq	%rsi,%rdi
+	call	pmap_activate_sw
+	jmp	sw1
 END(cpu_throw)
 
 /*
@@ -132,59 +126,20 @@ ctx_switch_xsave:
 	xorl	%eax,%eax
 	movq	%rax,PCPU(FPCURTHREAD)
 3:
-
 	/* Save is done.  Now fire up new thread. Leave old vmspace. */
-	movq	TD_PCB(%rsi),%r8
-
-	/* switch address space */
-	movq	PCB_CR3(%r8),%rcx
-	movq	%cr3,%rax
-	cmpq	%rcx,%rax			/* Same address space? */
-	jne	swinact
-	SETLK	%rdx, TD_LOCK(%rdi)		/* Release the old thread */
-	jmp	sw1
-swinact:
-	movl	PCPU(CPUID),%eax
-	/* Release bit from old pmap->pm_active */
-	movq	PCPU(CURPMAP),%r12
-	LK btrl	%eax,PM_ACTIVE(%r12)		/* clear old */
-	SETLK	%rdx,TD_LOCK(%rdi)		/* Release the old thread */
-swact:
-	/* Set bit in new pmap->pm_active */
-	movq	TD_PROC(%rsi),%rdx		/* newproc */
-	movq	P_VMSPACE(%rdx), %rdx
-	addq	$VM_PMAP,%rdx
-	cmpl	$-1,PM_PCID(%rdx)
-	je	1f
-	LK btsl	%eax,PM_SAVE(%rdx)
-	jnc	1f
-	btsq	$63,%rcx			/* CR3_PCID_SAVE */
-	incq	PCPU(PM_SAVE_CNT)
-1:
-	movq	%rcx,%cr3			/* new address space */
-	LK btsl	%eax,PM_ACTIVE(%rdx)		/* set new */
-	movq	%rdx,PCPU(CURPMAP)
-
-	/*
-	 * We might lose the race and other CPU might have changed
-	 * the pmap after we set our bit in pmap->pm_save.  Recheck.
-	 * Reload %cr3 with CR3_PCID_SAVE bit cleared if pmap was
-	 * modified, causing TLB flush for this pcid.
-	 */
-	btrq	$63,%rcx
-	jnc	1f
-	LK btsl	%eax,PM_SAVE(%rdx)
-	jc	1f
-	decq	PCPU(PM_SAVE_CNT)
-	movq	%rcx,%cr3
-1:
-
+	movq	%rsi,%r12
+	movq	%rdi,%r13
+	movq	%rdx,%r15
+	movq	%rsi,%rdi
+	callq	pmap_activate_sw
+	SETLK	%r15,TD_LOCK(%r13)		/* Release the old thread */
 sw1:
+	movq	TD_PCB(%r12),%r8
 #if defined(SCHED_ULE) && defined(SMP)
 	/* Wait for the new thread to become unblocked */
 	movq	$blocked_lock, %rdx
 1:
-	movq	TD_LOCK(%rsi),%rcx
+	movq	TD_LOCK(%r12),%rcx
 	cmpq	%rcx, %rdx
 	pause
 	je	1b
@@ -195,13 +150,13 @@ sw1:
 	 */
 
 	/* Skip loading user fsbase/gsbase for kthreads */
-	testl	$TDP_KTHREAD,TD_PFLAGS(%rsi)
+	testl	$TDP_KTHREAD,TD_PFLAGS(%r12)
 	jnz	do_kthread
 
 	/*
 	 * Load ldt register
 	 */
-	movq	TD_PROC(%rsi),%rcx
+	movq	TD_PROC(%r12),%rcx
 	cmpq	$0, P_MD+MD_LDT(%rcx)
 	jne	do_ldt
 	xorl	%eax,%eax
@@ -238,7 +193,7 @@ done_tss:
 	movq	%r8,PCPU(CURPCB)
 	/* Update the TSS_RSP0 pointer for the next interrupt */
 	movq	%r8,COMMON_TSS_RSP0(%rdx)
-	movq	%rsi,PCPU(CURTHREAD)		/* into next thread */
+	movq	%r12,PCPU(CURTHREAD)		/* into next thread */
 
 	/* Test if debug registers should be restored. */
 	testl	$PCB_DBREGS,PCB_FLAGS(%r8)

Modified: head/sys/amd64/amd64/genassym.c
==============================================================================
--- head/sys/amd64/amd64/genassym.c	Sat May  9 19:09:34 2015	(r282683)
+++ head/sys/amd64/amd64/genassym.c	Sat May  9 19:11:01 2015	(r282684)
@@ -71,8 +71,6 @@ __FBSDID("$FreeBSD$");
 ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
 ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
 ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
-ASSYM(PM_SAVE, offsetof(struct pmap, pm_save));
-ASSYM(PM_PCID, offsetof(struct pmap, pm_pcid));
 
 ASSYM(P_MD, offsetof(struct proc, p_md));
 ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));

Modified: head/sys/amd64/amd64/machdep.c
==============================================================================
--- head/sys/amd64/amd64/machdep.c	Sat May  9 19:09:34 2015	(r282683)
+++ head/sys/amd64/amd64/machdep.c	Sat May  9 19:11:01 2015	(r282684)
@@ -1718,7 +1718,6 @@ hammer_time(u_int64_t modulep, u_int64_t
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0;
-	thread0.td_pcb->pcb_cr3 = KPML4phys; /* PCID 0 is reserved for kernel */
 	thread0.td_frame = &proc0_tf;
 
         env = kern_getenv("kernelname");

Modified: head/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- head/sys/amd64/amd64/mp_machdep.c	Sat May  9 19:09:34 2015	(r282683)
+++ head/sys/amd64/amd64/mp_machdep.c	Sat May  9 19:11:01 2015	(r282684)
@@ -88,12 +88,9 @@ char *doublefault_stack;
 char *nmi_stack;
 
 /* Variables needed for SMP tlb shootdown. */
-vm_offset_t smp_tlb_addr2;
-struct invpcid_descr smp_tlb_invpcid;
+static vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
+static pmap_t smp_tlb_pmap;
 volatile int smp_tlb_wait;
-uint64_t pcid_cr3;
-pmap_t smp_tlb_pmap;
-extern int invpcid_works;
 
 extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
 
@@ -139,14 +136,17 @@ cpu_mp_start(void)
 
 	/* Install an inter-CPU IPI for TLB invalidation */
 	if (pmap_pcid_enabled) {
-		setidt(IPI_INVLTLB, IDTVEC(invltlb_pcid), SDT_SYSIGT,
-		    SEL_KPL, 0);
-		setidt(IPI_INVLPG, IDTVEC(invlpg_pcid), SDT_SYSIGT,
-		    SEL_KPL, 0);
+		if (invpcid_works) {
+			setidt(IPI_INVLTLB, IDTVEC(invltlb_invpcid),
+			    SDT_SYSIGT, SEL_KPL, 0);
+		} else {
+			setidt(IPI_INVLTLB, IDTVEC(invltlb_pcid), SDT_SYSIGT,
+			    SEL_KPL, 0);
+		}
 	} else {
 		setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0);
-		setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
 	}
+	setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0);
 
 	/* Install an inter-CPU IPI for cache invalidation. */
@@ -242,6 +242,9 @@ init_secondary(void)
 	pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL];
 	pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
 	    GUSERLDT_SEL];
+	pc->pc_curpmap = kernel_pmap;
+	pc->pc_pcid_gen = 1;
+	pc->pc_pcid_next = PMAP_PCID_KERN + 1;
 
 	/* Save the per-cpu pointer for use by the NMI handler. */
 	np->np_pcpu = (register_t) pc;
@@ -407,35 +410,8 @@ start_ap(int apic_id)
 }
 
 /*
- * Flush the TLB on all other CPU's
+ * Flush the TLB on other CPU's
  */
-static void
-smp_tlb_shootdown(u_int vector, pmap_t pmap, vm_offset_t addr1,
-    vm_offset_t addr2)
-{
-	u_int ncpu;
-
-	ncpu = mp_ncpus - 1;	/* does not shootdown self */
-	if (ncpu < 1)
-		return;		/* no other cpus */
-	if (!(read_rflags() & PSL_I))
-		panic("%s: interrupts disabled", __func__);
-	mtx_lock_spin(&smp_ipi_mtx);
-	smp_tlb_invpcid.addr = addr1;
-	if (pmap == NULL) {
-		smp_tlb_invpcid.pcid = 0;
-	} else {
-		smp_tlb_invpcid.pcid = pmap->pm_pcid;
-		pcid_cr3 = pmap->pm_cr3;
-	}
-	smp_tlb_addr2 = addr2;
-	smp_tlb_pmap = pmap;
-	atomic_store_rel_int(&smp_tlb_wait, 0);
-	ipi_all_but_self(vector);
-	while (smp_tlb_wait < ncpu)
-		ia32_pause();
-	mtx_unlock_spin(&smp_ipi_mtx);
-}
 
 static void
 smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
@@ -443,7 +419,11 @@ smp_targeted_tlb_shootdown(cpuset_t mask
 {
 	int cpu, ncpu, othercpus;
 
-	othercpus = mp_ncpus - 1;
+	othercpus = mp_ncpus - 1;	/* does not shootdown self */
+
+	/*
+	 * Check for other cpus.  Return if none.
+	 */
 	if (CPU_ISFULLSET(&mask)) {
 		if (othercpus < 1)
 			return;
@@ -452,16 +432,11 @@ smp_targeted_tlb_shootdown(cpuset_t mask
 		if (CPU_EMPTY(&mask))
 			return;
 	}
+
 	if (!(read_rflags() & PSL_I))
 		panic("%s: interrupts disabled", __func__);
 	mtx_lock_spin(&smp_ipi_mtx);
-	smp_tlb_invpcid.addr = addr1;
-	if (pmap == NULL) {
-		smp_tlb_invpcid.pcid = 0;
-	} else {
-		smp_tlb_invpcid.pcid = pmap->pm_pcid;
-		pcid_cr3 = pmap->pm_cr3;
-	}
+	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	smp_tlb_pmap = pmap;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
@@ -485,65 +460,39 @@ smp_targeted_tlb_shootdown(cpuset_t mask
 }
 
 void
-smp_invlpg(pmap_t pmap, vm_offset_t addr)
-{
-
-	if (smp_started) {
-		smp_tlb_shootdown(IPI_INVLPG, pmap, addr, 0);
-#ifdef COUNT_XINVLTLB_HITS
-		ipi_page++;
-#endif
-	}
-}
-
-void
-smp_invlpg_range(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2)
-{
-
-	if (smp_started) {
-		smp_tlb_shootdown(IPI_INVLRNG, pmap, addr1, addr2);
-#ifdef COUNT_XINVLTLB_HITS
-		ipi_range++;
-		ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
-#endif
-	}
-}
-
-void
 smp_masked_invltlb(cpuset_t mask, pmap_t pmap)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0);
 #ifdef COUNT_XINVLTLB_HITS
-		ipi_masked_global++;
+		ipi_global++;
 #endif
 	}
 }
 
 void
-smp_masked_invlpg(cpuset_t mask, pmap_t pmap, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
 {
 
 	if (smp_started) {
-		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0);
+		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, NULL, addr, 0);
 #ifdef COUNT_XINVLTLB_HITS
-		ipi_masked_page++;
+		ipi_page++;
 #endif
 	}
 }
 
 void
-smp_masked_invlpg_range(cpuset_t mask, pmap_t pmap, vm_offset_t addr1,
-    vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
-		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1,
-		    addr2);
+		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, NULL,
+		    addr1, addr2);
 #ifdef COUNT_XINVLTLB_HITS
-		ipi_masked_range++;
-		ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
+		ipi_range++;
+		ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
 #endif
 	}
 }
@@ -552,19 +501,9 @@ void
 smp_cache_flush(void)
 {
 
-	if (smp_started)
-		smp_tlb_shootdown(IPI_INVLCACHE, NULL, 0, 0);
-}
-
-void
-smp_invltlb(pmap_t pmap)
-{ 
-
 	if (smp_started) {
-		smp_tlb_shootdown(IPI_INVLTLB, pmap, 0, 0);
-#ifdef COUNT_XINVLTLB_HITS
-		ipi_global++;
-#endif
+		smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL,
+		    0, 0);
 	}
 }
 
@@ -586,10 +525,10 @@ invltlb_handler(void)
 }
 
 void
-invltlb_pcid_handler(void)
+invltlb_invpcid_handler(void)
 {
-	uint64_t cr3;
-	u_int cpuid;
+	struct invpcid_descr d;
+
 #ifdef COUNT_XINVLTLB_HITS
 	xhits_gbl[PCPU_GET(cpuid)]++;
 #endif /* COUNT_XINVLTLB_HITS */
@@ -597,49 +536,45 @@ invltlb_pcid_handler(void)
 	(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
 #endif /* COUNT_IPIS */
 
-	if (smp_tlb_invpcid.pcid != (uint64_t)-1 &&
-	    smp_tlb_invpcid.pcid != 0) {
-		if (invpcid_works) {
-			invpcid(&smp_tlb_invpcid, INVPCID_CTX);
-		} else {
-			/* Otherwise reload %cr3 twice. */
-			cr3 = rcr3();
-			if (cr3 != pcid_cr3) {
-				load_cr3(pcid_cr3);
-				cr3 |= CR3_PCID_SAVE;
-			}
-			load_cr3(cr3);
-		}
-	} else {
-		invltlb_globpcid();
-	}
-	if (smp_tlb_pmap != NULL) {
-		cpuid = PCPU_GET(cpuid);
-		if (!CPU_ISSET(cpuid, &smp_tlb_pmap->pm_active))
-			CPU_CLR_ATOMIC(cpuid, &smp_tlb_pmap->pm_save);
-	}
-
+	d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
+	d.pad = 0;
+	d.addr = 0;
+	invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB :
+	    INVPCID_CTX);
 	atomic_add_int(&smp_tlb_wait, 1);
 }
 
 void
-invlpg_handler(void)
+invltlb_pcid_handler(void)
 {
 #ifdef COUNT_XINVLTLB_HITS
-	xhits_pg[PCPU_GET(cpuid)]++;
+	xhits_gbl[PCPU_GET(cpuid)]++;
 #endif /* COUNT_XINVLTLB_HITS */
 #ifdef COUNT_IPIS
-	(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
+	(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
 #endif /* COUNT_IPIS */
 
-	invlpg(smp_tlb_invpcid.addr);
+	if (smp_tlb_pmap == kernel_pmap) {
+		invltlb_globpcid();
+	} else {
+		/*
+		 * The current pmap might not be equal to
+		 * smp_tlb_pmap.  The clearing of the pm_gen in
+		 * pmap_invalidate_all() takes care of TLB
+		 * invalidation when switching to the pmap on this
+		 * CPU.
+		 */
+		if (PCPU_GET(curpmap) == smp_tlb_pmap) {
+			load_cr3(smp_tlb_pmap->pm_cr3 |
+			    smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid);
+		}
+	}
 	atomic_add_int(&smp_tlb_wait, 1);
 }
 
 void
-invlpg_pcid_handler(void)
+invlpg_handler(void)
 {
-	uint64_t cr3;
 #ifdef COUNT_XINVLTLB_HITS
 	xhits_pg[PCPU_GET(cpuid)]++;
 #endif /* COUNT_XINVLTLB_HITS */
@@ -647,45 +582,15 @@ invlpg_pcid_handler(void)
 	(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
 #endif /* COUNT_IPIS */
 
-	if (smp_tlb_invpcid.pcid == (uint64_t)-1) {
-		invltlb_globpcid();
-	} else if (smp_tlb_invpcid.pcid == 0) {
-		invlpg(smp_tlb_invpcid.addr);
-	} else if (invpcid_works) {
-		invpcid(&smp_tlb_invpcid, INVPCID_ADDR);
-	} else {
-		/*
-		 * PCID supported, but INVPCID is not.
-		 * Temporarily switch to the target address
-		 * space and do INVLPG.
-		 */
-		cr3 = rcr3();
-		if (cr3 != pcid_cr3)
-			load_cr3(pcid_cr3 | CR3_PCID_SAVE);
-		invlpg(smp_tlb_invpcid.addr);
-		load_cr3(cr3 | CR3_PCID_SAVE);
-	}
-
+	invlpg(smp_tlb_addr1);
 	atomic_add_int(&smp_tlb_wait, 1);
 }
 
-static inline void
-invlpg_range(vm_offset_t start, vm_offset_t end)
-{
-
-	do {
-		invlpg(start);
-		start += PAGE_SIZE;
-	} while (start < end);
-}
-
 void
 invlrng_handler(void)
 {
-	struct invpcid_descr d;
 	vm_offset_t addr;
-	uint64_t cr3;
-	u_int cpuid;
+
 #ifdef COUNT_XINVLTLB_HITS
 	xhits_rng[PCPU_GET(cpuid)]++;
 #endif /* COUNT_XINVLTLB_HITS */
@@ -693,38 +598,11 @@ invlrng_handler(void)
 	(*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
 #endif /* COUNT_IPIS */
 
-	addr = smp_tlb_invpcid.addr;
-	if (pmap_pcid_enabled) {
-		if (smp_tlb_invpcid.pcid == 0) {
-			/*
-			 * kernel pmap - use invlpg to invalidate
-			 * global mapping.
-			 */
-			invlpg_range(addr, smp_tlb_addr2);
-		} else if (smp_tlb_invpcid.pcid == (uint64_t)-1) {
-			invltlb_globpcid();
-			if (smp_tlb_pmap != NULL) {
-				cpuid = PCPU_GET(cpuid);
-				if (!CPU_ISSET(cpuid, &smp_tlb_pmap->pm_active))
-					CPU_CLR_ATOMIC(cpuid,
-					    &smp_tlb_pmap->pm_save);
-			}
-		} else if (invpcid_works) {
-			d = smp_tlb_invpcid;
-			do {
-				invpcid(&d, INVPCID_ADDR);
-				d.addr += PAGE_SIZE;
-			} while (d.addr <= smp_tlb_addr2);
-		} else {
-			cr3 = rcr3();
-			if (cr3 != pcid_cr3)
-				load_cr3(pcid_cr3 | CR3_PCID_SAVE);
-			invlpg_range(addr, smp_tlb_addr2);
-			load_cr3(cr3 | CR3_PCID_SAVE);
-		}
-	} else {
-		invlpg_range(addr, smp_tlb_addr2);
-	}
+	addr = smp_tlb_addr1;
+	do {
+		invlpg(addr);
+		addr += PAGE_SIZE;
+	} while (addr < smp_tlb_addr2);
 
 	atomic_add_int(&smp_tlb_wait, 1);
 }

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c	Sat May  9 19:09:34 2015	(r282683)
+++ head/sys/amd64/amd64/pmap.c	Sat May  9 19:11:01 2015	(r282684)
@@ -273,6 +273,8 @@ pmap_modified_bit(pmap_t pmap)
 	return (mask);
 }
 
+extern	struct pcpu __pcpu[];
+
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
 #define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
@@ -379,8 +381,6 @@ caddr_t CADDR1 = 0;
 
 static int pmap_flags = PMAP_PDE_SUPERPAGE;	/* flags for x86 pmaps */
 
-static struct unrhdr pcid_unr;
-static struct mtx pcid_mtx;
 int pmap_pcid_enabled = 0;
 SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &pmap_pcid_enabled, 0, "Is TLB Context ID enabled ?");
@@ -827,6 +827,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 {
 	vm_offset_t va;
 	pt_entry_t *pte;
+	int i;
 
 	/*
 	 * Create an initial set of page tables to run the kernel in.
@@ -861,7 +862,6 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 	kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
 	kernel_pmap->pm_cr3 = KPML4phys;
 	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
-	CPU_FILL(&kernel_pmap->pm_save);	/* always superset of pm_active */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 	kernel_pmap->pm_flags = pmap_flags;
 
@@ -895,18 +895,28 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 	/* Initialize TLB Context Id. */
 	TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
 	if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
-		load_cr4(rcr4() | CR4_PCIDE);
-		mtx_init(&pcid_mtx, "pcid", NULL, MTX_DEF);
-		init_unrhdr(&pcid_unr, 1, (1 << 12) - 1, &pcid_mtx);
 		/* Check for INVPCID support */
 		invpcid_works = (cpu_stdext_feature & CPUID_STDEXT_INVPCID)
 		    != 0;
-		kernel_pmap->pm_pcid = 0;
-#ifndef SMP
+		for (i = 0; i < MAXCPU; i++) {
+			kernel_pmap->pm_pcids[i].pm_pcid = PMAP_PCID_KERN;
+			kernel_pmap->pm_pcids[i].pm_gen = 1;
+		}
+		__pcpu[0].pc_pcid_next = PMAP_PCID_KERN + 1;
+		__pcpu[0].pc_pcid_gen = 1;
+		/*
+		 * pcpu area for APs is zeroed during AP startup.
+		 * pc_pcid_next and pc_pcid_gen are initialized by AP
+		 * during pcpu setup.
+		 */
+#ifdef SMP
+		load_cr4(rcr4() | CR4_PCIDE);
+#else
 		pmap_pcid_enabled = 0;
 #endif
-	} else
+	} else {
 		pmap_pcid_enabled = 0;
+	}
 }
 
 /*
@@ -1277,28 +1287,6 @@ pmap_update_pde_invalidate(pmap_t pmap, 
 }
 #ifdef SMP
 
-static void
-pmap_invalidate_page_pcid(pmap_t pmap, vm_offset_t va)
-{
-	struct invpcid_descr d;
-	uint64_t cr3;
-
-	if (invpcid_works) {
-		d.pcid = pmap->pm_pcid;
-		d.pad = 0;
-		d.addr = va;
-		invpcid(&d, INVPCID_ADDR);
-		return;
-	}
-
-	cr3 = rcr3();
-	critical_enter();
-	load_cr3(pmap->pm_cr3 | CR3_PCID_SAVE);
-	invlpg(va);
-	load_cr3(cr3 | CR3_PCID_SAVE);
-	critical_exit();
-}
-
 /*
  * For SMP, these functions have to use the IPI mechanism for coherence.
  *
@@ -1361,8 +1349,8 @@ pmap_invalidate_ept(pmap_t pmap)
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
-	cpuset_t other_cpus;
-	u_int cpuid;
+	cpuset_t *mask;
+	u_int cpuid, i;
 
 	if (pmap_type_guest(pmap)) {
 		pmap_invalidate_ept(pmap);
@@ -1373,74 +1361,33 @@ pmap_invalidate_page(pmap_t pmap, vm_off
 	    ("pmap_invalidate_page: invalid type %d", pmap->pm_type));
 
 	sched_pin();
-	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
-		if (!pmap_pcid_enabled) {
-			invlpg(va);
-		} else {
-			if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
-				if (pmap == PCPU_GET(curpmap))
-					invlpg(va);
-				else
-					pmap_invalidate_page_pcid(pmap, va);
-			} else {
-				invltlb_globpcid();
-			}
-		}
-		smp_invlpg(pmap, va);
+	if (pmap == kernel_pmap) {
+		invlpg(va);
+		mask = &all_cpus;
 	} else {
 		cpuid = PCPU_GET(cpuid);
-		other_cpus = all_cpus;
-		CPU_CLR(cpuid, &other_cpus);
-		if (CPU_ISSET(cpuid, &pmap->pm_active))
+		if (pmap == PCPU_GET(curpmap))
 			invlpg(va);
-		else if (pmap_pcid_enabled) {
-			if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0)
-				pmap_invalidate_page_pcid(pmap, va);
-			else
-				invltlb_globpcid();
+		else if (pmap_pcid_enabled)
+			pmap->pm_pcids[cpuid].pm_gen = 0;
+		if (pmap_pcid_enabled) {
+			CPU_FOREACH(i) {
+				if (cpuid != i)
+					pmap->pm_pcids[i].pm_gen = 0;
+			}
 		}
-		if (pmap_pcid_enabled)
-			CPU_AND(&other_cpus, &pmap->pm_save);
-		else
-			CPU_AND(&other_cpus, &pmap->pm_active);
-		if (!CPU_EMPTY(&other_cpus))
-			smp_masked_invlpg(other_cpus, pmap, va);
+		mask = &pmap->pm_active;
 	}
+	smp_masked_invlpg(*mask, va);
 	sched_unpin();
 }
 
-static void
-pmap_invalidate_range_pcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
-	struct invpcid_descr d;
-	uint64_t cr3;
-	vm_offset_t addr;
-
-	if (invpcid_works) {
-		d.pcid = pmap->pm_pcid;
-		d.pad = 0;
-		for (addr = sva; addr < eva; addr += PAGE_SIZE) {
-			d.addr = addr;
-			invpcid(&d, INVPCID_ADDR);
-		}
-		return;
-	}
-
-	cr3 = rcr3();
-	critical_enter();
-	load_cr3(pmap->pm_cr3 | CR3_PCID_SAVE);
-	for (addr = sva; addr < eva; addr += PAGE_SIZE)
-		invlpg(addr);
-	load_cr3(cr3 | CR3_PCID_SAVE);
-	critical_exit();
-}
-
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
-	cpuset_t other_cpus;
+	cpuset_t *mask;
 	vm_offset_t addr;
-	u_int cpuid;
+	u_int cpuid, i;
 
 	if (pmap_type_guest(pmap)) {
 		pmap_invalidate_ept(pmap);
@@ -1451,55 +1398,36 @@ pmap_invalidate_range(pmap_t pmap, vm_of
 	    ("pmap_invalidate_range: invalid type %d", pmap->pm_type));
 
 	sched_pin();
-	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
-		if (!pmap_pcid_enabled) {
-			for (addr = sva; addr < eva; addr += PAGE_SIZE)
-				invlpg(addr);
-		} else {
-			if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
-				if (pmap == PCPU_GET(curpmap)) {
-					for (addr = sva; addr < eva;
-					    addr += PAGE_SIZE)
-						invlpg(addr);
-				} else {
-					pmap_invalidate_range_pcid(pmap,
-					    sva, eva);
-				}
-			} else {
-				invltlb_globpcid();
-			}
-		}
-		smp_invlpg_range(pmap, sva, eva);
+	cpuid = PCPU_GET(cpuid);
+	if (pmap == kernel_pmap) {
+		for (addr = sva; addr < eva; addr += PAGE_SIZE)
+			invlpg(addr);
+		mask = &all_cpus;
 	} else {
-		cpuid = PCPU_GET(cpuid);
-		other_cpus = all_cpus;
-		CPU_CLR(cpuid, &other_cpus);
-		if (CPU_ISSET(cpuid, &pmap->pm_active)) {
+		if (pmap == PCPU_GET(curpmap)) {
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
 		} else if (pmap_pcid_enabled) {
-			if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0)
-				pmap_invalidate_range_pcid(pmap, sva, eva);
-			else
-				invltlb_globpcid();
+			pmap->pm_pcids[cpuid].pm_gen = 0;
 		}
-		if (pmap_pcid_enabled)
-			CPU_AND(&other_cpus, &pmap->pm_save);
-		else
-			CPU_AND(&other_cpus, &pmap->pm_active);
-		if (!CPU_EMPTY(&other_cpus))
-			smp_masked_invlpg_range(other_cpus, pmap, sva, eva);
+		if (pmap_pcid_enabled) {
+			CPU_FOREACH(i) {
+				if (cpuid != i)
+					pmap->pm_pcids[i].pm_gen = 0;
+			}
+		}
+		mask = &pmap->pm_active;
 	}
+	smp_masked_invlpg_range(*mask, sva, eva);
 	sched_unpin();
 }
 
 void
 pmap_invalidate_all(pmap_t pmap)
 {
-	cpuset_t other_cpus;
+	cpuset_t *mask;
 	struct invpcid_descr d;
-	uint64_t cr3;
-	u_int cpuid;
+	u_int cpuid, i;
 
 	if (pmap_type_guest(pmap)) {
 		pmap_invalidate_ept(pmap);
@@ -1510,60 +1438,42 @@ pmap_invalidate_all(pmap_t pmap)
 	    ("pmap_invalidate_all: invalid type %d", pmap->pm_type));
 
 	sched_pin();
-	cpuid = PCPU_GET(cpuid);
-	if (pmap == kernel_pmap ||
-	    (pmap_pcid_enabled && !CPU_CMP(&pmap->pm_save, &all_cpus)) ||
-	    !CPU_CMP(&pmap->pm_active, &all_cpus)) {
-		if (invpcid_works) {
+	if (pmap == kernel_pmap) {
+		if (pmap_pcid_enabled && invpcid_works) {
 			bzero(&d, sizeof(d));
 			invpcid(&d, INVPCID_CTXGLOB);
 		} else {
 			invltlb_globpcid();
 		}
-		if (!CPU_ISSET(cpuid, &pmap->pm_active))
-			CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
-		smp_invltlb(pmap);
+		mask = &all_cpus;
 	} else {
-		other_cpus = all_cpus;
-		CPU_CLR(cpuid, &other_cpus);
-
-		/*
-		 * This logic is duplicated in the Xinvltlb shootdown
-		 * IPI handler.
-		 */
-		if (pmap_pcid_enabled) {
-			if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
+		cpuid = PCPU_GET(cpuid);
+		if (pmap == PCPU_GET(curpmap)) {
+			if (pmap_pcid_enabled) {
 				if (invpcid_works) {
-					d.pcid = pmap->pm_pcid;
+					d.pcid = pmap->pm_pcids[cpuid].pm_pcid;
 					d.pad = 0;
 					d.addr = 0;
 					invpcid(&d, INVPCID_CTX);
 				} else {
-					cr3 = rcr3();
-					critical_enter();
-
-					/*
-					 * Bit 63 is clear, pcid TLB
-					 * entries are invalidated.
-					 */
-					load_cr3(pmap->pm_cr3);
-					load_cr3(cr3 | CR3_PCID_SAVE);
-					critical_exit();
+					load_cr3(pmap->pm_cr3 | pmap->pm_pcids
+					    [PCPU_GET(cpuid)].pm_pcid);
 				}
 			} else {
-				invltlb_globpcid();
+				invltlb();
 			}
-		} else if (CPU_ISSET(cpuid, &pmap->pm_active))
-			invltlb();
-		if (!CPU_ISSET(cpuid, &pmap->pm_active))
-			CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
-		if (pmap_pcid_enabled)
-			CPU_AND(&other_cpus, &pmap->pm_save);
-		else
-			CPU_AND(&other_cpus, &pmap->pm_active);
-		if (!CPU_EMPTY(&other_cpus))
-			smp_masked_invltlb(other_cpus, pmap);
+		} else if (pmap_pcid_enabled) {
+			pmap->pm_pcids[cpuid].pm_gen = 0;
+		}
+		if (pmap_pcid_enabled) {
+			CPU_FOREACH(i) {
+				if (cpuid != i)
+					pmap->pm_pcids[i].pm_gen = 0;
+			}
+		}
+		mask = &pmap->pm_active;
 	}
+	smp_masked_invltlb(*mask, pmap);
 	sched_unpin();
 }
 
@@ -1627,7 +1537,6 @@ pmap_update_pde(pmap_t pmap, vm_offset_t
 		active = all_cpus;
 	else {
 		active = pmap->pm_active;
-		CPU_AND_ATOMIC(&pmap->pm_save, &active);
 	}
 	if (CPU_OVERLAP(&active, &other_cpus)) { 
 		act.store = cpuid;
@@ -2205,11 +2114,9 @@ pmap_pinit0(pmap_t pmap)
 	pmap->pm_cr3 = KPML4phys;
 	pmap->pm_root.rt_root = 0;
 	CPU_ZERO(&pmap->pm_active);
-	CPU_ZERO(&pmap->pm_save);
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
-	pmap->pm_pcid = pmap_pcid_enabled ? 0 : -1;
 	pmap->pm_flags = pmap_flags;
 }
 
@@ -2233,7 +2140,10 @@ pmap_pinit_type(pmap_t pmap, enum pmap_t
 
 	pml4phys = VM_PAGE_TO_PHYS(pml4pg);
 	pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pml4phys);
-	pmap->pm_pcid = -1;
+	CPU_FOREACH(i) {
+		pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE;
+		pmap->pm_pcids[i].pm_gen = 0;
+	}
 	pmap->pm_cr3 = ~0;	/* initialize to an invalid value */
 
 	if ((pml4pg->flags & PG_ZERO) == 0)
@@ -2260,12 +2170,6 @@ pmap_pinit_type(pmap_t pmap, enum pmap_t
 		/* install self-referential address mapping entry(s) */
 		pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) |
 		    X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M;
-
-		if (pmap_pcid_enabled) {
-			pmap->pm_pcid = alloc_unr(&pcid_unr);
-			if (pmap->pm_pcid != -1)
-				pmap->pm_cr3 |= pmap->pm_pcid;
-		}
 	}
 
 	pmap->pm_root.rt_root = 0;
@@ -2274,7 +2178,6 @@ pmap_pinit_type(pmap_t pmap, enum pmap_t
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 	pmap->pm_flags = flags;
 	pmap->pm_eptgen = 0;
-	CPU_ZERO(&pmap->pm_save);
 
 	return (1);
 }
@@ -2535,14 +2438,6 @@ pmap_release(pmap_t pmap)
 	KASSERT(CPU_EMPTY(&pmap->pm_active),
 	    ("releasing active pmap %p", pmap));
 
-	if (pmap_pcid_enabled) {
-		/*
-		 * Invalidate any left TLB entries, to allow the reuse
-		 * of the pcid.
-		 */
-		pmap_invalidate_all(pmap);
-	}
-
 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4));
 
 	for (i = 0; i < NKPML4E; i++)	/* KVA */
@@ -2554,8 +2449,6 @@ pmap_release(pmap_t pmap)
 	m->wire_count--;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201505091911.t49JB2gh067512>