Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 6 Oct 2013 09:37:58 +0000 (UTC)
From:      Mark Murray <markm@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r256080 - in projects/random_number_generator: lib/libvmmapi sys/amd64/amd64 sys/amd64/include sys/amd64/vmm sys/amd64/vmm/amd sys/amd64/vmm/intel sys/amd64/vmm/io sys/cam/ctl sys/conf ...
Message-ID:  <201310060937.r969bw3v038058@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: markm
Date: Sun Oct  6 09:37:57 2013
New Revision: 256080
URL: http://svnweb.freebsd.org/changeset/base/256080

Log:
  MFC - tracking commit

Deleted:
  projects/random_number_generator/sys/kern/uipc_cow.c
Modified:
  projects/random_number_generator/lib/libvmmapi/vmmapi.c
  projects/random_number_generator/lib/libvmmapi/vmmapi.h
  projects/random_number_generator/sys/amd64/amd64/machdep.c
  projects/random_number_generator/sys/amd64/amd64/pmap.c
  projects/random_number_generator/sys/amd64/amd64/trap.c
  projects/random_number_generator/sys/amd64/include/pcpu.h
  projects/random_number_generator/sys/amd64/include/pmap.h
  projects/random_number_generator/sys/amd64/include/vmm.h   (contents, props changed)
  projects/random_number_generator/sys/amd64/include/vmm_dev.h   (contents, props changed)
  projects/random_number_generator/sys/amd64/include/vmm_instruction_emul.h   (contents, props changed)
  projects/random_number_generator/sys/amd64/vmm/amd/amdv.c
  projects/random_number_generator/sys/amd64/vmm/intel/ept.c
  projects/random_number_generator/sys/amd64/vmm/intel/ept.h
  projects/random_number_generator/sys/amd64/vmm/intel/vmcs.c
  projects/random_number_generator/sys/amd64/vmm/intel/vmcs.h
  projects/random_number_generator/sys/amd64/vmm/intel/vmx.c
  projects/random_number_generator/sys/amd64/vmm/intel/vmx.h
  projects/random_number_generator/sys/amd64/vmm/intel/vmx_genassym.c
  projects/random_number_generator/sys/amd64/vmm/intel/vmx_support.S
  projects/random_number_generator/sys/amd64/vmm/io/ppt.c
  projects/random_number_generator/sys/amd64/vmm/io/ppt.h
  projects/random_number_generator/sys/amd64/vmm/vmm.c
  projects/random_number_generator/sys/amd64/vmm/vmm_dev.c
  projects/random_number_generator/sys/amd64/vmm/vmm_instruction_emul.c
  projects/random_number_generator/sys/amd64/vmm/vmm_mem.c
  projects/random_number_generator/sys/amd64/vmm/vmm_mem.h
  projects/random_number_generator/sys/cam/ctl/ctl_frontend_iscsi.c
  projects/random_number_generator/sys/conf/files
  projects/random_number_generator/sys/dev/e1000/if_igb.c
  projects/random_number_generator/sys/dev/hyperv/vmbus/hv_hv.c
  projects/random_number_generator/sys/dev/ixgbe/ixgbe.c
  projects/random_number_generator/sys/dev/virtio/network/if_vtnet.c
  projects/random_number_generator/sys/dev/xen/timer/timer.c
  projects/random_number_generator/sys/i386/include/pcpu.h
  projects/random_number_generator/sys/i386/xen/mp_machdep.c
  projects/random_number_generator/sys/i386/xen/mptable.c
  projects/random_number_generator/sys/kern/kern_malloc.c
  projects/random_number_generator/sys/x86/acpica/madt.c
  projects/random_number_generator/sys/x86/xen/hvm.c
  projects/random_number_generator/sys/x86/xen/xen_intr.c
  projects/random_number_generator/usr.sbin/bhyve/bhyverun.c
  projects/random_number_generator/usr.sbin/bhyve/pci_emul.c
  projects/random_number_generator/usr.sbin/bhyve/rtc.c
  projects/random_number_generator/usr.sbin/bhyvectl/bhyvectl.c
  projects/random_number_generator/usr.sbin/bhyveload/bhyveload.c
Directory Properties:
  projects/random_number_generator/   (props changed)
  projects/random_number_generator/lib/libvmmapi/   (props changed)
  projects/random_number_generator/sys/   (props changed)
  projects/random_number_generator/sys/amd64/vmm/   (props changed)
  projects/random_number_generator/sys/conf/   (props changed)
  projects/random_number_generator/sys/dev/hyperv/   (props changed)
  projects/random_number_generator/usr.sbin/bhyve/   (props changed)
  projects/random_number_generator/usr.sbin/bhyvectl/   (props changed)
  projects/random_number_generator/usr.sbin/bhyveload/   (props changed)

Modified: projects/random_number_generator/lib/libvmmapi/vmmapi.c
==============================================================================
--- projects/random_number_generator/lib/libvmmapi/vmmapi.c	Sun Oct  6 06:57:28 2013	(r256079)
+++ projects/random_number_generator/lib/libvmmapi/vmmapi.c	Sun Oct  6 09:37:57 2013	(r256080)
@@ -124,7 +124,8 @@ vm_destroy(struct vmctx *vm)
 }
 
 int
-vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len)
+vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
+		  int *wired)
 {
 	int error;
 	struct vm_memory_segment seg;
@@ -133,6 +134,8 @@ vm_get_memory_seg(struct vmctx *ctx, vm_
 	seg.gpa = gpa;
 	error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
 	*ret_len = seg.len;
+	if (wired != NULL)
+		*wired = seg.wired;
 	return (error);
 }
 
@@ -741,3 +744,23 @@ vcpu_reset(struct vmctx *vmctx, int vcpu
 done:
 	return (error);
 }
+
+int
+vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num)
+{
+	int error, i;
+	struct vm_gpa_pte gpapte;
+
+	bzero(&gpapte, sizeof(gpapte));
+	gpapte.gpa = gpa;
+
+	error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte);
+
+	if (error == 0) {
+		*num = gpapte.ptenum;
+		for (i = 0; i < gpapte.ptenum; i++)
+			pte[i] = gpapte.pte[i];
+	}
+
+	return (error);
+}

Modified: projects/random_number_generator/lib/libvmmapi/vmmapi.h
==============================================================================
--- projects/random_number_generator/lib/libvmmapi/vmmapi.h	Sun Oct  6 06:57:28 2013	(r256079)
+++ projects/random_number_generator/lib/libvmmapi/vmmapi.h	Sun Oct  6 09:37:57 2013	(r256080)
@@ -45,9 +45,11 @@ enum vm_mmap_style {
 int	vm_create(const char *name);
 struct vmctx *vm_open(const char *name);
 void	vm_destroy(struct vmctx *ctx);
-int	vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len);
+int	vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
+			  int *wired);
 int	vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
 void	*vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
+int	vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
 uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
 void	vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
 int	vm_set_desc(struct vmctx *ctx, int vcpu, int reg,

Modified: projects/random_number_generator/sys/amd64/amd64/machdep.c
==============================================================================
--- projects/random_number_generator/sys/amd64/amd64/machdep.c	Sun Oct  6 06:57:28 2013	(r256079)
+++ projects/random_number_generator/sys/amd64/amd64/machdep.c	Sun Oct  6 09:37:57 2013	(r256080)
@@ -1574,7 +1574,7 @@ getmemsize(caddr_t kmdp, u_int64_t first
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
-			*pte = pa | PG_V | PG_RW | PG_N;
+			*pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD;
 			invltlb();
 
 			tmp = *(int *)ptr;

Modified: projects/random_number_generator/sys/amd64/amd64/pmap.c
==============================================================================
--- projects/random_number_generator/sys/amd64/amd64/pmap.c	Sun Oct  6 06:57:28 2013	(r256079)
+++ projects/random_number_generator/sys/amd64/amd64/pmap.c	Sun Oct  6 09:37:57 2013	(r256080)
@@ -76,6 +76,8 @@
  * SUCH DAMAGE.
  */
 
+#define	AMD64_NPT_AWARE
+
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
@@ -143,6 +145,120 @@ __FBSDID("$FreeBSD$");
 #include <machine/smp.h>
 #endif
 
+static __inline boolean_t
+pmap_emulate_ad_bits(pmap_t pmap)
+{
+
+	return ((pmap->pm_flags & PMAP_EMULATE_AD_BITS) != 0);
+}
+
+static __inline pt_entry_t
+pmap_valid_bit(pmap_t pmap)
+{
+	pt_entry_t mask;
+
+	switch (pmap->pm_type) {
+	case PT_X86:
+		mask = X86_PG_V;
+		break;
+	case PT_EPT:
+		if (pmap_emulate_ad_bits(pmap))
+			mask = EPT_PG_EMUL_V;
+		else
+			mask = EPT_PG_READ;
+		break;
+	default:
+		panic("pmap_valid_bit: invalid pm_type %d", pmap->pm_type);
+	}
+
+	return (mask);
+}
+
+static __inline pt_entry_t
+pmap_rw_bit(pmap_t pmap)
+{
+	pt_entry_t mask;
+
+	switch (pmap->pm_type) {
+	case PT_X86:
+		mask = X86_PG_RW;
+		break;
+	case PT_EPT:
+		if (pmap_emulate_ad_bits(pmap))
+			mask = EPT_PG_EMUL_RW;
+		else
+			mask = EPT_PG_WRITE;
+		break;
+	default:
+		panic("pmap_rw_bit: invalid pm_type %d", pmap->pm_type);
+	}
+
+	return (mask);
+}
+
+static __inline pt_entry_t
+pmap_global_bit(pmap_t pmap)
+{
+	pt_entry_t mask;
+
+	switch (pmap->pm_type) {
+	case PT_X86:
+		mask = X86_PG_G;
+		break;
+	case PT_EPT:
+		mask = 0;
+		break;
+	default:
+		panic("pmap_global_bit: invalid pm_type %d", pmap->pm_type);
+	}
+
+	return (mask);
+}
+
+static __inline pt_entry_t
+pmap_accessed_bit(pmap_t pmap)
+{
+	pt_entry_t mask;
+
+	switch (pmap->pm_type) {
+	case PT_X86:
+		mask = X86_PG_A;
+		break;
+	case PT_EPT:
+		if (pmap_emulate_ad_bits(pmap))
+			mask = EPT_PG_READ;
+		else
+			mask = EPT_PG_A;
+		break;
+	default:
+		panic("pmap_accessed_bit: invalid pm_type %d", pmap->pm_type);
+	}
+
+	return (mask);
+}
+
+static __inline pt_entry_t
+pmap_modified_bit(pmap_t pmap)
+{
+	pt_entry_t mask;
+
+	switch (pmap->pm_type) {
+	case PT_X86:
+		mask = X86_PG_M;
+		break;
+	case PT_EPT:
+		if (pmap_emulate_ad_bits(pmap))
+			mask = EPT_PG_WRITE;
+		else
+			mask = EPT_PG_M;
+		break;
+	default:
+		panic("pmap_modified_bit: invalid pm_type %d", pmap->pm_type);
+	}
+
+	return (mask);
+}
+
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
 #define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
@@ -247,6 +363,8 @@ static struct md_page *pv_table;
 pt_entry_t *CMAP1 = 0;
 caddr_t CADDR1 = 0;
 
+static int pmap_flags = PMAP_PDE_SUPERPAGE;	/* flags for x86 pmaps */
+
 static struct unrhdr pcid_unr;
 static struct mtx pcid_mtx;
 int pmap_pcid_enabled = 1;
@@ -306,12 +424,12 @@ static void pmap_fill_ptp(pt_entry_t *fi
 static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
 static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
 static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
-static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
+static void pmap_pde_attr(pd_entry_t *pde, int cache_bits, int mask);
 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
     struct rwlock **lockp);
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
     vm_prot_t prot);
-static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
+static void pmap_pte_attr(pt_entry_t *pte, int cache_bits, int mask);
 static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
     struct spglist *free, struct rwlock **lockp);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
@@ -323,7 +441,7 @@ static boolean_t pmap_try_insert_pv_entr
     vm_page_t m, struct rwlock **lockp);
 static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
     pd_entry_t newpde);
-static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
+static void pmap_update_pde_invalidate(pmap_t, vm_offset_t va, pd_entry_t pde);
 
 static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex,
 		struct rwlock **lockp);
@@ -416,7 +534,9 @@ static __inline pdp_entry_t *
 pmap_pdpe(pmap_t pmap, vm_offset_t va)
 {
 	pml4_entry_t *pml4e;
+	pt_entry_t PG_V;
 
+	PG_V = pmap_valid_bit(pmap);
 	pml4e = pmap_pml4e(pmap, va);
 	if ((*pml4e & PG_V) == 0)
 		return (NULL);
@@ -438,7 +558,9 @@ static __inline pd_entry_t *
 pmap_pde(pmap_t pmap, vm_offset_t va)
 {
 	pdp_entry_t *pdpe;
+	pt_entry_t PG_V;
 
+	PG_V = pmap_valid_bit(pmap);
 	pdpe = pmap_pdpe(pmap, va);
 	if (pdpe == NULL || (*pdpe & PG_V) == 0)
 		return (NULL);
@@ -460,7 +582,9 @@ static __inline pt_entry_t *
 pmap_pte(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t *pde;
+	pt_entry_t PG_V;
 
+	PG_V = pmap_valid_bit(pmap);
 	pde = pmap_pde(pmap, va);
 	if (pde == NULL || (*pde & PG_V) == 0)
 		return (NULL);
@@ -490,6 +614,8 @@ vtopte(vm_offset_t va)
 {
 	u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
 
+	KASSERT(va >= VM_MAXUSER_ADDRESS, ("vtopte on a uva/gpa 0x%0lx", va));
+
 	return (PTmap + ((va >> PAGE_SHIFT) & mask));
 }
 
@@ -498,6 +624,8 @@ vtopde(vm_offset_t va)
 {
 	u_int64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
 
+	KASSERT(va >= VM_MAXUSER_ADDRESS, ("vtopde on a uva/gpa 0x%0lx", va));
+
 	return (PDmap + ((va >> PDRSHIFT) & mask));
 }
 
@@ -601,22 +729,24 @@ create_pagetables(vm_paddr_t *firstaddr)
 	/* XXX not fully used, underneath 2M pages */
 	pt_p = (pt_entry_t *)KPTphys;
 	for (i = 0; ptoa(i) < *firstaddr; i++)
-		pt_p[i] = ptoa(i) | PG_RW | PG_V | PG_G;
+		pt_p[i] = ptoa(i) | X86_PG_RW | X86_PG_V | X86_PG_G;
 
 	/* Now map the page tables at their location within PTmap */
 	pd_p = (pd_entry_t *)KPDphys;
 	for (i = 0; i < nkpt; i++)
-		pd_p[i] = (KPTphys + ptoa(i)) | PG_RW | PG_V;
+		pd_p[i] = (KPTphys + ptoa(i)) | X86_PG_RW | X86_PG_V;
 
 	/* Map from zero to end of allocations under 2M pages */
 	/* This replaces some of the KPTphys entries above */
 	for (i = 0; (i << PDRSHIFT) < *firstaddr; i++)
-		pd_p[i] = (i << PDRSHIFT) | PG_RW | PG_V | PG_PS | PG_G;
+		pd_p[i] = (i << PDRSHIFT) | X86_PG_RW | X86_PG_V | PG_PS |
+		    X86_PG_G;
 
 	/* And connect up the PD to the PDP (leaving room for L4 pages) */
 	pdp_p = (pdp_entry_t *)(KPDPphys + ptoa(KPML4I - KPML4BASE));
 	for (i = 0; i < nkpdpe; i++)
-		pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | PG_RW | PG_V | PG_U;
+		pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | X86_PG_RW | X86_PG_V |
+		    PG_U;
 
 	/*
 	 * Now, set up the direct map region using 2MB and/or 1GB pages.  If
@@ -630,36 +760,36 @@ create_pagetables(vm_paddr_t *firstaddr)
 	for (i = NPDEPG * ndm1g, j = 0; i < NPDEPG * ndmpdp; i++, j++) {
 		pd_p[j] = (vm_paddr_t)i << PDRSHIFT;
 		/* Preset PG_M and PG_A because demotion expects it. */
-		pd_p[j] |= PG_RW | PG_V | PG_PS | PG_G |
-		    PG_M | PG_A;
+		pd_p[j] |= X86_PG_RW | X86_PG_V | PG_PS | X86_PG_G |
+		    X86_PG_M | X86_PG_A;
 	}
 	pdp_p = (pdp_entry_t *)DMPDPphys;
 	for (i = 0; i < ndm1g; i++) {
 		pdp_p[i] = (vm_paddr_t)i << PDPSHIFT;
 		/* Preset PG_M and PG_A because demotion expects it. */
-		pdp_p[i] |= PG_RW | PG_V | PG_PS | PG_G |
-		    PG_M | PG_A;
+		pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_PS | X86_PG_G |
+		    X86_PG_M | X86_PG_A;
 	}
 	for (j = 0; i < ndmpdp; i++, j++) {
 		pdp_p[i] = DMPDphys + ptoa(j);
-		pdp_p[i] |= PG_RW | PG_V | PG_U;
+		pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_U;
 	}
 
 	/* And recursively map PML4 to itself in order to get PTmap */
 	p4_p = (pml4_entry_t *)KPML4phys;
 	p4_p[PML4PML4I] = KPML4phys;
-	p4_p[PML4PML4I] |= PG_RW | PG_V | PG_U;
+	p4_p[PML4PML4I] |= X86_PG_RW | X86_PG_V | PG_U;
 
 	/* Connect the Direct Map slot(s) up to the PML4. */
 	for (i = 0; i < ndmpdpphys; i++) {
 		p4_p[DMPML4I + i] = DMPDPphys + ptoa(i);
-		p4_p[DMPML4I + i] |= PG_RW | PG_V | PG_U;
+		p4_p[DMPML4I + i] |= X86_PG_RW | X86_PG_V | PG_U;
 	}
 
 	/* Connect the KVA slots up to the PML4 */
 	for (i = 0; i < NKPML4E; i++) {
 		p4_p[KPML4BASE + i] = KPDPphys + ptoa(i);
-		p4_p[KPML4BASE + i] |= PG_RW | PG_V | PG_U;
+		p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V | PG_U;
 	}
 }
 
@@ -705,6 +835,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
 	CPU_ZERO(&kernel_pmap->pm_save);
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
+	kernel_pmap->pm_flags = pmap_flags;
 
  	/*
 	 * Initialize the global pv list lock.
@@ -948,35 +1079,131 @@ SYSCTL_ULONG(_vm_pmap_pdpe, OID_AUTO, de
  * Low level helper routines.....
  ***************************************************/
 
+static pt_entry_t
+pmap_swap_pat(pmap_t pmap, pt_entry_t entry)
+{
+	int x86_pat_bits = X86_PG_PTE_PAT | X86_PG_PDE_PAT;
+
+	switch (pmap->pm_type) {
+	case PT_X86:
+		/* Verify that both PAT bits are not set at the same time */
+		KASSERT((entry & x86_pat_bits) != x86_pat_bits,
+		    ("Invalid PAT bits in entry %#lx", entry));
+
+		/* Swap the PAT bits if one of them is set */
+		if ((entry & x86_pat_bits) != 0)
+			entry ^= x86_pat_bits;
+		break;
+	case PT_EPT:
+		/*
+		 * Nothing to do - the memory attributes are represented
+		 * the same way for regular pages and superpages.
+		 */
+		break;
+	default:
+		panic("pmap_switch_pat_bits: bad pm_type %d", pmap->pm_type);
+	}
+
+	return (entry);
+}
+
 /*
  * Determine the appropriate bits to set in a PTE or PDE for a specified
  * caching mode.
  */
 static int
-pmap_cache_bits(int mode, boolean_t is_pde)
+pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde)
 {
 	int cache_bits, pat_flag, pat_idx;
 
 	if (mode < 0 || mode >= PAT_INDEX_SIZE || pat_index[mode] < 0)
 		panic("Unknown caching mode %d\n", mode);
 
-	/* The PAT bit is different for PTE's and PDE's. */
-	pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
+	switch (pmap->pm_type) {
+	case PT_X86:
+		/* The PAT bit is different for PTE's and PDE's. */
+		pat_flag = is_pde ? X86_PG_PDE_PAT : X86_PG_PTE_PAT;
+
+		/* Map the caching mode to a PAT index. */
+		pat_idx = pat_index[mode];
+
+		/* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
+		cache_bits = 0;
+		if (pat_idx & 0x4)
+			cache_bits |= pat_flag;
+		if (pat_idx & 0x2)
+			cache_bits |= PG_NC_PCD;
+		if (pat_idx & 0x1)
+			cache_bits |= PG_NC_PWT;
+		break;
+
+	case PT_EPT:
+		cache_bits = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(mode);
+		break;
 
-	/* Map the caching mode to a PAT index. */
-	pat_idx = pat_index[mode];
+	default:
+		panic("unsupported pmap type %d", pmap->pm_type);
+	}
 
-	/* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
-	cache_bits = 0;
-	if (pat_idx & 0x4)
-		cache_bits |= pat_flag;
-	if (pat_idx & 0x2)
-		cache_bits |= PG_NC_PCD;
-	if (pat_idx & 0x1)
-		cache_bits |= PG_NC_PWT;
 	return (cache_bits);
 }
 
+static int
+pmap_cache_mask(pmap_t pmap, boolean_t is_pde)
+{
+	int mask;
+
+	switch (pmap->pm_type) {
+	case PT_X86:
+		mask = is_pde ? X86_PG_PDE_CACHE : X86_PG_PTE_CACHE;
+		break;
+	case PT_EPT:
+		mask = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(0x7);
+		break;
+	default:
+		panic("pmap_cache_mask: invalid pm_type %d", pmap->pm_type);
+	}
+
+	return (mask);
+}
+
+static __inline boolean_t
+pmap_ps_enabled(pmap_t pmap)
+{
+
+	return (pg_ps_enabled && (pmap->pm_flags & PMAP_PDE_SUPERPAGE) != 0);
+}
+
+static void
+pmap_update_pde_store(pmap_t pmap, pd_entry_t *pde, pd_entry_t newpde)
+{
+
+	switch (pmap->pm_type) {
+	case PT_X86:
+		break;
+	case PT_EPT:
+		/*
+		 * XXX
+		 * This is a little bogus since the generation number is
+		 * supposed to be bumped up when a region of the address
+		 * space is invalidated in the page tables.
+		 *
+		 * In this case the old PDE entry is valid but yet we want
+		 * to make sure that any mappings using the old entry are
+		 * invalidated in the TLB.
+		 *
+		 * The reason this works as expected is because we rendezvous
+		 * "all" host cpus and force any vcpu context to exit as a
+		 * side-effect.
+		 */
+		atomic_add_acq_long(&pmap->pm_eptgen, 1);
+		break;
+	default:
+		panic("pmap_update_pde_store: bad pm_type %d", pmap->pm_type);
+	}
+	pde_store(pde, newpde);
+}
+
 /*
  * After changing the page size for the specified virtual address in the page
  * table, flush the corresponding entries from the processor's TLB.  Only the
@@ -985,8 +1212,17 @@ pmap_cache_bits(int mode, boolean_t is_p
  * The calling thread must be pinned to a processor.
  */
 static void
-pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
+pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde)
 {
+	pt_entry_t PG_G;
+
+	if (pmap->pm_type == PT_EPT)
+		return;
+
+	KASSERT(pmap->pm_type == PT_X86,
+	    ("pmap_update_pde_invalidate: invalid type %d", pmap->pm_type));
+
+	PG_G = pmap_global_bit(pmap);
 
 	if ((newpde & PG_PS) == 0)
 		/* Demotion: flush a specific 2MB page mapping. */
@@ -1048,12 +1284,61 @@ pmap_invalidate_page_pcid(pmap_t pmap, v
  * immutable.  The kernel page table is always active on every
  * processor.
  */
+
+/*
+ * Interrupt the cpus that are executing in the guest context.
+ * This will force the vcpu to exit and the cached EPT mappings
+ * will be invalidated by the host before the next vmresume.
+ */
+static __inline void
+pmap_invalidate_ept(pmap_t pmap)
+{
+
+	sched_pin();
+	KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
+	    ("pmap_invalidate_ept: absurd pm_active"));
+
+	/*
+	 * The TLB mappings associated with a vcpu context are not
+	 * flushed each time a different vcpu is chosen to execute.
+	 *
+	 * This is in contrast with a process's vtop mappings that
+	 * are flushed from the TLB on each context switch.
+	 *
+	 * Therefore we need to do more than just a TLB shootdown on
+	 * the active cpus in 'pmap->pm_active'. To do this we keep
+	 * track of the number of invalidations performed on this pmap.
+	 *
+	 * Each vcpu keeps a cache of this counter and compares it
+	 * just before a vmresume. If the counter is out-of-date an
+	 * invept will be done to flush stale mappings from the TLB.
+	 */
+	atomic_add_acq_long(&pmap->pm_eptgen, 1);
+
+	/*
+	 * Force the vcpu to exit and trap back into the hypervisor.
+	 *
+	 * XXX this is not optimal because IPI_AST builds a trapframe
+	 * whereas all we need is an 'eoi' followed by 'iret'.
+	 */
+	ipi_selected(pmap->pm_active, IPI_AST);
+	sched_unpin();
+}
+
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 	cpuset_t other_cpus;
 	u_int cpuid;
 
+	if (pmap->pm_type == PT_EPT) {
+		pmap_invalidate_ept(pmap);
+		return;
+	}
+
+	KASSERT(pmap->pm_type == PT_X86,
+	    ("pmap_invalidate_page: invalid type %d", pmap->pm_type));
+
 	sched_pin();
 	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		if (!pmap_pcid_enabled) {
@@ -1124,6 +1409,14 @@ pmap_invalidate_range(pmap_t pmap, vm_of
 	vm_offset_t addr;
 	u_int cpuid;
 
+	if (pmap->pm_type == PT_EPT) {
+		pmap_invalidate_ept(pmap);
+		return;
+	}
+
+	KASSERT(pmap->pm_type == PT_X86,
+	    ("pmap_invalidate_range: invalid type %d", pmap->pm_type));
+
 	sched_pin();
 	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		if (!pmap_pcid_enabled) {
@@ -1175,6 +1468,14 @@ pmap_invalidate_all(pmap_t pmap)
 	uint64_t cr3;
 	u_int cpuid;
 
+	if (pmap->pm_type == PT_EPT) {
+		pmap_invalidate_ept(pmap);
+		return;
+	}
+
+	KASSERT(pmap->pm_type == PT_X86,
+	    ("pmap_invalidate_all: invalid type %d", pmap->pm_type));
+
 	sched_pin();
 	cpuid = PCPU_GET(cpuid);
 	if (pmap == kernel_pmap ||
@@ -1243,6 +1544,7 @@ pmap_invalidate_cache(void)
 
 struct pde_action {
 	cpuset_t invalidate;	/* processors that invalidate their TLB */
+	pmap_t pmap;
 	vm_offset_t va;
 	pd_entry_t *pde;
 	pd_entry_t newpde;
@@ -1255,7 +1557,7 @@ pmap_update_pde_action(void *arg)
 	struct pde_action *act = arg;
 
 	if (act->store == PCPU_GET(cpuid))
-		pde_store(act->pde, act->newpde);
+		pmap_update_pde_store(act->pmap, act->pde, act->newpde);
 }
 
 static void
@@ -1264,7 +1566,7 @@ pmap_update_pde_teardown(void *arg)
 	struct pde_action *act = arg;
 
 	if (CPU_ISSET(PCPU_GET(cpuid), &act->invalidate))
-		pmap_update_pde_invalidate(act->va, act->newpde);
+		pmap_update_pde_invalidate(act->pmap, act->va, act->newpde);
 }
 
 /*
@@ -1286,7 +1588,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t
 	cpuid = PCPU_GET(cpuid);
 	other_cpus = all_cpus;
 	CPU_CLR(cpuid, &other_cpus);
-	if (pmap == kernel_pmap)
+	if (pmap == kernel_pmap || pmap->pm_type == PT_EPT)
 		active = all_cpus;
 	else {
 		active = pmap->pm_active;
@@ -1296,6 +1598,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t
 		act.store = cpuid;
 		act.invalidate = active;
 		act.va = va;
+		act.pmap = pmap;
 		act.pde = pde;
 		act.newpde = newpde;
 		CPU_SET(cpuid, &active);
@@ -1303,9 +1606,9 @@ pmap_update_pde(pmap_t pmap, vm_offset_t
 		    smp_no_rendevous_barrier, pmap_update_pde_action,
 		    pmap_update_pde_teardown, &act);
 	} else {
-		pde_store(pde, newpde);
+		pmap_update_pde_store(pmap, pde, newpde);
 		if (CPU_ISSET(cpuid, &active))
-			pmap_update_pde_invalidate(va, newpde);
+			pmap_update_pde_invalidate(pmap, va, newpde);
 	}
 	sched_unpin();
 }
@@ -1318,8 +1621,17 @@ PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
-	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
-		invlpg(va);
+	switch (pmap->pm_type) {
+	case PT_X86:
+		if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
+			invlpg(va);
+		break;
+	case PT_EPT:
+		pmap->pm_eptgen++;
+		break;
+	default:
+		panic("pmap_invalidate_page: unknown type: %d", pmap->pm_type);
+	}
 }
 
 PMAP_INLINE void
@@ -1327,17 +1639,35 @@ pmap_invalidate_range(pmap_t pmap, vm_of
 {
 	vm_offset_t addr;
 
-	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
-		for (addr = sva; addr < eva; addr += PAGE_SIZE)
-			invlpg(addr);
+	switch (pmap->pm_type) {
+	case PT_X86:
+		if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
+			for (addr = sva; addr < eva; addr += PAGE_SIZE)
+				invlpg(addr);
+		break;
+	case PT_EPT:
+		pmap->pm_eptgen++;
+		break;
+	default:
+		panic("pmap_invalidate_range: unknown type: %d", pmap->pm_type);
+	}
 }
 
 PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
-	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
-		invltlb();
+	switch (pmap->pm_type) {
+	case PT_X86:
+		if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
+			invltlb();
+		break;
+	case PT_EPT:
+		pmap->pm_eptgen++;
+		break;
+	default:
+		panic("pmap_invalidate_all: unknown type %d", pmap->pm_type);
+	}
 }
 
 PMAP_INLINE void
@@ -1351,9 +1681,9 @@ static void
 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 
-	pde_store(pde, newpde);
+	pmap_update_pde_store(pmap, pde, newpde);
 	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
-		pmap_update_pde_invalidate(va, newpde);
+		pmap_update_pde_invalidate(pmap, va, newpde);
 	else
 		CPU_ZERO(&pmap->pm_save);
 }
@@ -1455,10 +1785,11 @@ pmap_extract(pmap_t pmap, vm_offset_t va
 {
 	pdp_entry_t *pdpe;
 	pd_entry_t *pde;
-	pt_entry_t *pte;
+	pt_entry_t *pte, PG_V;
 	vm_paddr_t pa;
 
 	pa = 0;
+	PG_V = pmap_valid_bit(pmap);
 	PMAP_LOCK(pmap);
 	pdpe = pmap_pdpe(pmap, va);
 	if (pdpe != NULL && (*pdpe & PG_V) != 0) {
@@ -1493,12 +1824,14 @@ vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	pd_entry_t pde, *pdep;
-	pt_entry_t pte;
+	pt_entry_t pte, PG_RW, PG_V;
 	vm_paddr_t pa;
 	vm_page_t m;
 
 	pa = 0;
 	m = NULL;
+	PG_RW = pmap_rw_bit(pmap);
+	PG_V = pmap_valid_bit(pmap);
 	PMAP_LOCK(pmap);
 retry:
 	pdep = pmap_pde(pmap, va);
@@ -1571,16 +1904,18 @@ pmap_kenter(vm_offset_t va, vm_paddr_t p
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
-	pte_store(pte, pa | PG_RW | PG_V | PG_G);
+	pte_store(pte, pa | X86_PG_RW | X86_PG_V | X86_PG_G);
 }
 
 static __inline void
 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
 {
 	pt_entry_t *pte;
+	int cache_bits;
 
 	pte = vtopte(va);
-	pte_store(pte, pa | PG_RW | PG_V | PG_G | pmap_cache_bits(mode, 0));
+	cache_bits = pmap_cache_bits(kernel_pmap, mode, 0);
+	pte_store(pte, pa | X86_PG_RW | X86_PG_V | X86_PG_G | cache_bits);
 }
 
 /*
@@ -1629,20 +1964,22 @@ pmap_qenter(vm_offset_t sva, vm_page_t *
 {
 	pt_entry_t *endpte, oldpte, pa, *pte;
 	vm_page_t m;
+	int cache_bits;
 
 	oldpte = 0;
 	pte = vtopte(sva);
 	endpte = pte + count;
 	while (pte < endpte) {
 		m = *ma++;
-		pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
-		if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) {
+		cache_bits = pmap_cache_bits(kernel_pmap, m->md.pat_mode, 0);
+		pa = VM_PAGE_TO_PHYS(m) | cache_bits;
+		if ((*pte & (PG_FRAME | X86_PG_PTE_CACHE)) != pa) {
 			oldpte |= *pte;
-			pte_store(pte, pa | PG_G | PG_RW | PG_V);
+			pte_store(pte, pa | X86_PG_G | X86_PG_RW | X86_PG_V);
 		}
 		pte++;
 	}
-	if (__predict_false((oldpte & PG_V) != 0))
+	if (__predict_false((oldpte & X86_PG_V) != 0))
 		pmap_invalidate_range(kernel_pmap, sva, sva + count *
 		    PAGE_SIZE);
 }
@@ -1841,6 +2178,7 @@ pmap_pinit0(pmap_t pmap)
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 	pmap->pm_pcid = pmap_pcid_enabled ? 0 : -1;
+	pmap->pm_flags = pmap_flags;
 }
 
 /*
@@ -1848,9 +2186,10 @@ pmap_pinit0(pmap_t pmap)
  * such as one in a vmspace structure.
  */
 int
-pmap_pinit(pmap_t pmap)
+pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
 {
 	vm_page_t pml4pg;
+	vm_paddr_t pml4phys;
 	int i;
 
 	/*
@@ -1860,41 +2199,61 @@ pmap_pinit(pmap_t pmap)
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
 		VM_WAIT;
 
-	pmap->pm_cr3 = VM_PAGE_TO_PHYS(pml4pg);
-	pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pmap->pm_cr3);
+	pml4phys = VM_PAGE_TO_PHYS(pml4pg);
+	pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pml4phys);
+	pmap->pm_pcid = -1;
+	pmap->pm_cr3 = ~0;	/* initialize to an invalid value */
 
 	if ((pml4pg->flags & PG_ZERO) == 0)
 		pagezero(pmap->pm_pml4);
 
-	/* Wire in kernel global address entries. */
-	for (i = 0; i < NKPML4E; i++) {
-		pmap->pm_pml4[KPML4BASE + i] = (KPDPphys + (i << PAGE_SHIFT)) |
-		    PG_RW | PG_V | PG_U;
-	}
-	for (i = 0; i < ndmpdpphys; i++) {
-		pmap->pm_pml4[DMPML4I + i] = (DMPDPphys + (i << PAGE_SHIFT)) |
-		    PG_RW | PG_V | PG_U;
-	}
+	/*
+	 * Do not install the host kernel mappings in the nested page
+	 * tables. These mappings are meaningless in the guest physical
+	 * address space.
+	 */
+	if ((pmap->pm_type = pm_type) == PT_X86) {
+		pmap->pm_cr3 = pml4phys;
+
+		/* Wire in kernel global address entries. */
+		for (i = 0; i < NKPML4E; i++) {
+			pmap->pm_pml4[KPML4BASE + i] = (KPDPphys + ptoa(i)) |
+			    X86_PG_RW | X86_PG_V | PG_U;
+		}
+		for (i = 0; i < ndmpdpphys; i++) {
+			pmap->pm_pml4[DMPML4I + i] = (DMPDPphys + ptoa(i)) |
+			    X86_PG_RW | X86_PG_V | PG_U;
+		}
+
+		/* install self-referential address mapping entry(s) */
+		pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) |
+		    X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M;
 
-	/* install self-referential address mapping entry(s) */
-	pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M;
+		if (pmap_pcid_enabled) {
+			pmap->pm_pcid = alloc_unr(&pcid_unr);
+			if (pmap->pm_pcid != -1)
+				pmap->pm_cr3 |= pmap->pm_pcid;
+		}
+	}
 
 	pmap->pm_root.rt_root = 0;
 	CPU_ZERO(&pmap->pm_active);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
-	if (pmap_pcid_enabled) {
-		pmap->pm_pcid = alloc_unr(&pcid_unr);
-		if (pmap->pm_pcid != -1)
-			pmap->pm_cr3 |= pmap->pm_pcid;
-	} else {
-		pmap->pm_pcid = -1;
-	}
+	pmap->pm_flags = flags;
+	pmap->pm_eptgen = 0;
 	CPU_ZERO(&pmap->pm_save);
 
 	return (1);
 }
 
+int
+pmap_pinit(pmap_t pmap)
+{
+
+	return (pmap_pinit_type(pmap, PT_X86, pmap_flags));
+}
+
 /*
  * This routine is called if the desired page table page does not exist.
  *
@@ -1910,9 +2269,15 @@ static vm_page_t
 _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
 {
 	vm_page_t m, pdppg, pdpg;
+	pt_entry_t PG_A, PG_M, PG_RW, PG_V;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
+	PG_A = pmap_accessed_bit(pmap);
+	PG_M = pmap_modified_bit(pmap);
+	PG_V = pmap_valid_bit(pmap);
+	PG_RW = pmap_rw_bit(pmap);
+
 	/*
 	 * Allocate a page table page.
 	 */
@@ -2040,9 +2405,11 @@ static vm_page_t
 pmap_allocpde(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 {
 	vm_pindex_t pdpindex, ptepindex;
-	pdp_entry_t *pdpe;
+	pdp_entry_t *pdpe, PG_V;
 	vm_page_t pdpg;
 
+	PG_V = pmap_valid_bit(pmap);
+
 retry:
 	pdpe = pmap_pdpe(pmap, va);
 	if (pdpe != NULL && (*pdpe & PG_V) != 0) {
@@ -2064,9 +2431,11 @@ static vm_page_t
 pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 {
 	vm_pindex_t ptepindex;
-	pd_entry_t *pd;
+	pd_entry_t *pd, PG_V;
 	vm_page_t m;
 
+	PG_V = pmap_valid_bit(pmap);
+
 	/*
 	 * Calculate pagetable page index
 	 */
@@ -2140,7 +2509,7 @@ pmap_release(pmap_t pmap)
 		pmap_invalidate_all(pmap);
 	}
 
-	m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME);
+	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4));
 
 	for (i = 0; i < NKPML4E; i++)	/* KVA */
 		pmap->pm_pml4[KPML4BASE + i] = 0;
@@ -2211,7 +2580,7 @@ pmap_growkernel(vm_offset_t addr)
 		addr = kernel_map->max_offset;
 	while (kernel_vm_end < addr) {
 		pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end);
-		if ((*pdpe & PG_V) == 0) {
+		if ((*pdpe & X86_PG_V) == 0) {
 			/* We need a new PDP entry */
 			nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDPSHIFT,
 			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
@@ -2221,12 +2590,12 @@ pmap_growkernel(vm_offset_t addr)
 			if ((nkpg->flags & PG_ZERO) == 0)
 				pmap_zero_page(nkpg);
 			paddr = VM_PAGE_TO_PHYS(nkpg);
-			*pdpe = (pdp_entry_t)
-				(paddr | PG_V | PG_RW | PG_A | PG_M);
+			*pdpe = (pdp_entry_t)(paddr | X86_PG_V | X86_PG_RW |
+			    X86_PG_A | X86_PG_M);
 			continue; /* try again */
 		}
 		pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201310060937.r969bw3v038058>