Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 18 Dec 2013 23:39:42 +0000 (UTC)
From:      Peter Grehan <grehan@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r259579 - in projects/bhyve_svm/sys/amd64: amd64 vmm vmm/amd
Message-ID:  <201312182339.rBINdg0w027032@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: grehan
Date: Wed Dec 18 23:39:42 2013
New Revision: 259579
URL: http://svnweb.freebsd.org/changeset/base/259579

Log:
  Enable memory overcommit for AMD processors.
  
   - No emulation of A/D bits is required since AMD-V RVI
  supports A/D bits.
   - Enable pmap PT_RVI support(w/o PAT) which is required for
  memory over-commit support.
   - Other minor fixes:
   * Make use of VMCB EXITINTINFO field. If a #VMEXIT happens while
  delivering an interrupt, EXITINTINFO has all the details that bhyve
  needs to inject the same interrupt.
   * SVM h/w decode assist code was incomplete - removed for now.
   * Some minor code clean-up (more coming).
  
  Submitted by:	Anish Gupta (akgupt3@gmail.com)

Modified:
  projects/bhyve_svm/sys/amd64/amd64/pmap.c
  projects/bhyve_svm/sys/amd64/vmm/amd/amdv.c
  projects/bhyve_svm/sys/amd64/vmm/amd/npt.c
  projects/bhyve_svm/sys/amd64/vmm/amd/npt.h
  projects/bhyve_svm/sys/amd64/vmm/amd/svm.c
  projects/bhyve_svm/sys/amd64/vmm/amd/svm_softc.h
  projects/bhyve_svm/sys/amd64/vmm/amd/vmcb.c
  projects/bhyve_svm/sys/amd64/vmm/amd/vmcb.h
  projects/bhyve_svm/sys/amd64/vmm/vmm.c
  projects/bhyve_svm/sys/amd64/vmm/vmm_instruction_emul.c
  projects/bhyve_svm/sys/amd64/vmm/vmm_msr.c

Modified: projects/bhyve_svm/sys/amd64/amd64/pmap.c
==============================================================================
--- projects/bhyve_svm/sys/amd64/amd64/pmap.c	Wed Dec 18 22:31:53 2013	(r259578)
+++ projects/bhyve_svm/sys/amd64/amd64/pmap.c	Wed Dec 18 23:39:42 2013	(r259579)
@@ -146,6 +146,13 @@ __FBSDID("$FreeBSD$");
 #endif
 
 static __inline boolean_t
+pmap_type_guest(pmap_t pmap)
+{
+
+	return ((pmap->pm_type == PT_EPT) || (pmap->pm_type == PT_RVI));
+}
+
+static __inline boolean_t
 pmap_emulate_ad_bits(pmap_t pmap)
 {
 
@@ -159,6 +166,7 @@ pmap_valid_bit(pmap_t pmap)
 
 	switch (pmap->pm_type) {
 	case PT_X86:
+	case PT_RVI:
 		mask = X86_PG_V;
 		break;
 	case PT_EPT:
@@ -181,6 +189,7 @@ pmap_rw_bit(pmap_t pmap)
 
 	switch (pmap->pm_type) {
 	case PT_X86:
+	case PT_RVI:
 		mask = X86_PG_RW;
 		break;
 	case PT_EPT:
@@ -205,6 +214,7 @@ pmap_global_bit(pmap_t pmap)
 	case PT_X86:
 		mask = X86_PG_G;
 		break;
+	case PT_RVI:
 	case PT_EPT:
 		mask = 0;
 		break;
@@ -222,6 +232,7 @@ pmap_accessed_bit(pmap_t pmap)
 
 	switch (pmap->pm_type) {
 	case PT_X86:
+	case PT_RVI:
 		mask = X86_PG_A;
 		break;
 	case PT_EPT:
@@ -244,6 +255,7 @@ pmap_modified_bit(pmap_t pmap)
 
 	switch (pmap->pm_type) {
 	case PT_X86:
+	case PT_RVI:
 		mask = X86_PG_M;
 		break;
 	case PT_EPT:
@@ -1094,6 +1106,9 @@ pmap_swap_pat(pmap_t pmap, pt_entry_t en
 		if ((entry & x86_pat_bits) != 0)
 			entry ^= x86_pat_bits;
 		break;
+	case PT_RVI:
+		/* XXX: PAT support. */
+		break;
 	case PT_EPT:
 		/*
 		 * Nothing to do - the memory attributes are represented
@@ -1137,6 +1152,11 @@ pmap_cache_bits(pmap_t pmap, int mode, b
 			cache_bits |= PG_NC_PWT;
 		break;
 
+	case PT_RVI:
+		/* XXX: PAT support. */
+		cache_bits = 0;
+		break;
+
 	case PT_EPT:
 		cache_bits = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(mode);
 		break;
@@ -1157,6 +1177,10 @@ pmap_cache_mask(pmap_t pmap, boolean_t i
 	case PT_X86:
 		mask = is_pde ? X86_PG_PDE_CACHE : X86_PG_PTE_CACHE;
 		break;
+	case PT_RVI:
+		/* XXX: PAT support. */
+		mask = 0;
+		break;
 	case PT_EPT:
 		mask = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(0x7);
 		break;
@@ -1181,6 +1205,7 @@ pmap_update_pde_store(pmap_t pmap, pd_en
 	switch (pmap->pm_type) {
 	case PT_X86:
 		break;
+	case PT_RVI:
 	case PT_EPT:
 		/*
 		 * XXX
@@ -1216,9 +1241,9 @@ pmap_update_pde_invalidate(pmap_t pmap, 
 {
 	pt_entry_t PG_G;
 
-	if (pmap->pm_type == PT_EPT)
+	if (pmap_type_guest(pmap))
 		return;
-
+	
 	KASSERT(pmap->pm_type == PT_X86,
 	    ("pmap_update_pde_invalidate: invalid type %d", pmap->pm_type));
 
@@ -1331,11 +1356,11 @@ pmap_invalidate_page(pmap_t pmap, vm_off
 	cpuset_t other_cpus;
 	u_int cpuid;
 
-	if (pmap->pm_type == PT_EPT) {
+	if (pmap_type_guest(pmap)) {
 		pmap_invalidate_ept(pmap);
 		return;
 	}
-
+	
 	KASSERT(pmap->pm_type == PT_X86,
 	    ("pmap_invalidate_page: invalid type %d", pmap->pm_type));
 
@@ -1409,7 +1434,7 @@ pmap_invalidate_range(pmap_t pmap, vm_of
 	vm_offset_t addr;
 	u_int cpuid;
 
-	if (pmap->pm_type == PT_EPT) {
+	if (pmap_type_guest(pmap)) {
 		pmap_invalidate_ept(pmap);
 		return;
 	}
@@ -1468,7 +1493,7 @@ pmap_invalidate_all(pmap_t pmap)
 	uint64_t cr3;
 	u_int cpuid;
 
-	if (pmap->pm_type == PT_EPT) {
+	if (pmap_type_guest(pmap)) {
 		pmap_invalidate_ept(pmap);
 		return;
 	}
@@ -1588,7 +1613,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t
 	cpuid = PCPU_GET(cpuid);
 	other_cpus = all_cpus;
 	CPU_CLR(cpuid, &other_cpus);
-	if (pmap == kernel_pmap || pmap->pm_type == PT_EPT)
+	if (pmap == kernel_pmap || pmap_type_guest(pmap)) 
 		active = all_cpus;
 	else {
 		active = pmap->pm_active;
@@ -1626,6 +1651,7 @@ pmap_invalidate_page(pmap_t pmap, vm_off
 		if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 			invlpg(va);
 		break;
+	case PT_RVI:
 	case PT_EPT:
 		pmap->pm_eptgen++;
 		break;
@@ -1645,6 +1671,7 @@ pmap_invalidate_range(pmap_t pmap, vm_of
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
 		break;
+	case PT_RVI:
 	case PT_EPT:
 		pmap->pm_eptgen++;
 		break;
@@ -1662,6 +1689,7 @@ pmap_invalidate_all(pmap_t pmap)
 		if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 			invltlb();
 		break;
+	case PT_RVI:
 	case PT_EPT:
 		pmap->pm_eptgen++;
 		break;

Modified: projects/bhyve_svm/sys/amd64/vmm/amd/amdv.c
==============================================================================
--- projects/bhyve_svm/sys/amd64/vmm/amd/amdv.c	Wed Dec 18 22:31:53 2013	(r259578)
+++ projects/bhyve_svm/sys/amd64/vmm/amd/amdv.c	Wed Dec 18 23:39:42 2013	(r259579)
@@ -38,136 +38,6 @@ __FBSDID("$FreeBSD$");
 #include "io/iommu.h"
 
 static int
-amdv_init(void)
-{
-
-	printf("amdv_init: not implemented\n");
-	return (ENXIO);
-}
-
-static int
-amdv_cleanup(void)
-{
-
-	printf("amdv_cleanup: not implemented\n");
-	return (ENXIO);
-}
-
-static void *
-amdv_vminit(struct vm *vm, struct pmap *pmap)
-{
-
-	printf("amdv_vminit: not implemented\n");
-	return (NULL);
-}
-
-static int
-amdv_vmrun(void *arg, int vcpu, register_t rip, struct pmap *pmap)
-{
-
-	printf("amdv_vmrun: not implemented\n");
-	return (ENXIO);
-}
-
-static void
-amdv_vmcleanup(void *arg)
-{
-
-	printf("amdv_vmcleanup: not implemented\n");
-	return;
-}
-
-static int
-amdv_getreg(void *arg, int vcpu, int regnum, uint64_t *retval)
-{
-	
-	printf("amdv_getreg: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_setreg(void *arg, int vcpu, int regnum, uint64_t val)
-{
-	
-	printf("amdv_setreg: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_getdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
-{
-
-	printf("amdv_get_desc: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_setdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
-{
-
-	printf("amdv_get_desc: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_inject_event(void *vmi, int vcpu, int type, int vector,
-		  uint32_t error_code, int error_code_valid)
-{
-
-	printf("amdv_inject_event: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_getcap(void *arg, int vcpu, int type, int *retval)
-{
-
-	printf("amdv_getcap: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_setcap(void *arg, int vcpu, int type, int val)
-{
-
-	printf("amdv_setcap: not implemented\n");
-	return (EINVAL);
-}
-
-static struct vmspace *
-amdv_vmspace_alloc(vm_offset_t min, vm_offset_t max)
-{
-
-	printf("amdv_vmspace_alloc: not implemented\n");
-	return (NULL);
-}
-
-static void
-amdv_vmspace_free(struct vmspace *vmspace)
-{
-
-	printf("amdv_vmspace_free: not implemented\n");
-	return;
-}
-
-struct vmm_ops vmm_ops_amd = {
-	amdv_init,
-	amdv_cleanup,
-	amdv_vminit,
-	amdv_vmrun,
-	amdv_vmcleanup,
-	amdv_getreg,
-	amdv_setreg,
-	amdv_getdesc,
-	amdv_setdesc,
-	amdv_inject_event,
-	amdv_getcap,
-	amdv_setcap,
-	amdv_vmspace_alloc,
-	amdv_vmspace_free,
-};
-
-static int
 amd_iommu_init(void)
 {
 

Modified: projects/bhyve_svm/sys/amd64/vmm/amd/npt.c
==============================================================================
--- projects/bhyve_svm/sys/amd64/vmm/amd/npt.c	Wed Dec 18 22:31:53 2013	(r259578)
+++ projects/bhyve_svm/sys/amd64/vmm/amd/npt.c	Wed Dec 18 23:39:42 2013	(r259579)
@@ -28,11 +28,13 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#include <sys/kernel.h>
 #include <sys/systm.h>
-#include <sys/malloc.h>
+#include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
+#include <vm/vm_extern.h>
 
 #include <machine/pmap.h>
 #include <machine/md_var.h>
@@ -44,268 +46,46 @@ __FBSDID("$FreeBSD$");
 #include "svm_softc.h"
 #include "npt.h"
 
-/*
- * "Nested Paging" is an optional SVM feature that provides two levels of
- * address translation, thus eliminating the need for the VMM to maintain
- * shadow page tables.
- *
- * Documented in APMv2, section 15.25, Nested Paging.
- */
-
-#define PAGE_4KB		(4 * 1024)
-#define PAGE_2MB		(2 * 1024 * 1024UL)
-#define PAGE_1GB		(1024 * 1024 * 1024UL)
-
-#define GPA_UNMAPPED		((vm_paddr_t)~0)
-
-/* Get page entry to physical address. */
-#define PTE2PA(x)		((uint64_t)(x) & ~PAGE_MASK)
-
-MALLOC_DECLARE(M_SVM);
-
-static uint64_t svm_npt_create(pml4_entry_t *pml4, vm_paddr_t gpa, 
-				vm_paddr_t hpa, vm_memattr_t attr, 
-				int prot, uint64_t size);
-
-static const int PT_INDEX_MASK = 0x1FF;
-static const int PT_SHIFT = 9;
-
-/*
- * Helper function to create nested page table entries for a page
- * of size 1GB, 2MB or 4KB.
- *
- * Starting from PML4 create a PDPTE, PDE or PTE depending on 'pg_size'
- * value of 1GB, 2MB or 4KB respectively.
- *
- * Return size of the mapping created on success and 0 on failure.
- *
- * XXX: NPT PAT settings. 
- */
-static  uint64_t
-svm_npt_create(pml4_entry_t * pml4, vm_paddr_t gpa, vm_paddr_t hpa,
-    		vm_memattr_t attr, int prot, uint64_t pg_size)
-{
-	uint64_t *pt, *page, pa;
-	pt_entry_t mode;
-	int shift, index;
-
-	KASSERT(pg_size, ("Size of page must be 1GB, 2MB or 4KB"));
-	if (hpa & (pg_size - 1)) {
-		ERR("HPA(0x%lx) is not aligned, size:0x%lx\n", hpa, pg_size);
-		return (0);
-	}
-
-	if (gpa & (pg_size - 1)) {
-		ERR("GPA(0x%lx) is not aligned, size (0x%lx)\n", gpa, pg_size);
-		return (0);
-	}
-
-	/* Find out mode bits for PTE */
-	mode = PG_U | PG_V;
-	if (prot & VM_PROT_WRITE)
-		mode |= PG_RW;
-	if ((prot & VM_PROT_EXECUTE) == 0) 	
-		mode |= pg_nx;
-		
-	pt = (uint64_t *)pml4;
-	shift = PML4SHIFT;
-
-	while ((shift > PAGE_SHIFT) && (pg_size < (1UL << shift))) {
-		/* Get PDP, PD or PT index from guest physical address. */
-		index = (gpa >> shift) & PT_INDEX_MASK;
-
-		/* If page entry is missing, allocate new page for table.*/
-		if (pt[index] == 0) {
-			page = malloc(PAGE_SIZE, M_SVM, M_WAITOK | M_ZERO);
-			pt[index] = vtophys(page) | mode;
-		}
-
-		pa = PTE2PA(pt[index]);;
-		pt = (uint64_t *)PHYS_TO_DMAP(pa);
-		shift -= PT_SHIFT;
-	}
-
-	/* Create leaf entry mapping. */
-	index = (gpa >> shift) & PT_INDEX_MASK;
-	
-	if (prot != VM_PROT_NONE) {
-		pt[index] = hpa | mode;
-		pt[index] |= (pg_size > PAGE_SIZE) ? PG_PS : 0;
-	} else
-		pt[index] = 0;
-	
-	return (1UL << shift);
-}
-
-/*
- * Map guest physical address to host physical address.
- */
-int
-svm_npt_vmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa,
-	size_t size, vm_memattr_t attr, int prot, boolean_t spok)
-{
-	pml4_entry_t *pml4;
-	struct svm_softc *svm_sc;
-	uint64_t len, mapped, pg_size;
-
-	svm_sc = arg;
-	pml4 = svm_sc->np_pml4;
-
-	pg_size = PAGE_4KB;
-	if (spok) {
-		pg_size = PAGE_2MB;
-		if (amd_feature & AMDID_PAGE1GB)
-			pg_size = PAGE_1GB;
-	}
-
-	/* Compute the largest page mapping that can be used */
-	while (pg_size > PAGE_4KB) {
-		if (size >= pg_size &&
-		    (gpa & (pg_size - 1)) == 0 &&
-		    (hpa & (pg_size - 1)) == 0) {
-			break;
-		}
-		pg_size >>= PT_SHIFT;
-	}
-
-	len = 0;
-	while (len < size) {
-		mapped = svm_npt_create(pml4, gpa + len, hpa + len, attr, prot,
-					pg_size);
-		len += mapped;
-	}
-
-	return (0);
-}
-
-/*
- * Get HPA for a given GPA.
- */
-vm_paddr_t
-svm_npt_vmmap_get(void *arg, vm_paddr_t gpa)
-{
-	struct svm_softc *svm_sc;
-	pml4_entry_t *pml4;
-	uint64_t *pt, pa, hpa, pgmask;
-	int shift, index;
-
-	svm_sc = arg;
-	pml4 = svm_sc->np_pml4;
-
-	pt = (uint64_t *)pml4;
-	shift = PML4SHIFT;
-	
-	while (shift > PAGE_SHIFT) {
-		 /* Get PDP, PD or PT index from GPA */
-		index = (gpa >> shift) & PT_INDEX_MASK;
-		if (pt[index] == 0) {
-			ERR("No entry for GPA:0x%lx.", gpa);
-			return (GPA_UNMAPPED);
-		}
-
-		if (pt[index] & PG_PS) {
-			break;
-		}
-
-		pa = PTE2PA(pt[index]);;
-		pt = (uint64_t *)PHYS_TO_DMAP(pa);
-		shift -= PT_SHIFT;
-	}
-
-	index = (gpa >> shift) & PT_INDEX_MASK;
-	if (pt[index] == 0) {
-		ERR("No mapping for GPA:0x%lx.\n", gpa);
-		return (GPA_UNMAPPED);
-	}
-
-	/* Add GPA offset to HPA */
-	pgmask = (1UL << shift) - 1;
-	hpa = (PTE2PA(pt[index]) & ~pgmask) | (gpa & pgmask);
-
-	return (hpa);
-}
+SYSCTL_DECL(_hw_vmm);
+SYSCTL_NODE(_hw_vmm, OID_AUTO, npt, CTLFLAG_RW, NULL, NULL);
 
+static int npt_flags;
+SYSCTL_INT(_hw_vmm_npt, OID_AUTO, pmap_flags, CTLFLAG_RD,
+	&npt_flags, 0, NULL);
 /*
  * AMD nested page table init.
  */
 int
 svm_npt_init(void)
 {
+	int enable_superpage = 1;
+
+	TUNABLE_INT_FETCH("hw.vmm.npt.enable_superpage", &enable_superpage);
+	if (enable_superpage)
+		npt_flags |= PMAP_PDE_SUPERPAGE; 
 	
 	return (0);
 }
 
-/*
- * Free Page Table page.
- */
-static void
-free_pt(pd_entry_t pde)
-{
-	pt_entry_t *pt;
 
-	pt = (pt_entry_t *)PHYS_TO_DMAP(PTE2PA(pde));
-	free(pt, M_SVM);
-}
 
-/*
- * Free Page Directory page.
- */
-static void
-free_pd(pdp_entry_t pdpe)
+static int
+npt_pinit(pmap_t pmap)
 {
-	pd_entry_t *pd;
-	int i;
-
-	pd = (pd_entry_t *)PHYS_TO_DMAP(PTE2PA(pdpe));
-	for (i = 0; i < NPDEPG; i++) {
-		/* Skip not-present or superpage entries */
-		if ((pd[i] == 0) || (pd[i] & PG_PS))
-			continue;
 
-		free_pt(pd[i]);
-	}
-
-	free(pd, M_SVM);
+	return (pmap_pinit_type(pmap, PT_RVI, npt_flags));
 }
 
-/*
- * Free Page Directory Pointer page.
- */
-static void
-free_pdp(pml4_entry_t pml4e)
+struct vmspace *
+svm_npt_alloc(vm_offset_t min, vm_offset_t max)
 {
-	pdp_entry_t *pdp;
-	int i;
-
-	pdp = (pdp_entry_t *)PHYS_TO_DMAP(PTE2PA(pml4e));
-	for (i = 0; i < NPDPEPG; i++) {
-		/* Skip not-present or superpage entries */
-		if ((pdp[i] == 0) || (pdp[i] & PG_PS))
-			continue;
-
-		free_pd(pdp[i]);
-	}
-
-	free(pdp, M_SVM);
+	
+	return (vmspace_alloc(min, max, npt_pinit));
 }
 
-/*
- * Free the guest's nested page table.
- */
-int
-svm_npt_cleanup(struct svm_softc *svm_sc)
+void
+svm_npt_free(struct vmspace *vmspace)
 {
-	pml4_entry_t *pml4;
-	int i;
 
-	pml4 = svm_sc->np_pml4;
-
-	for (i = 0; i < NPML4EPG; i++) {
-		if (pml4[i] != 0) {
-			free_pdp(pml4[i]);
-			pml4[i] = 0;
-		}
-	}
-
-	return (0);
+	vmspace_free(vmspace);
 }

Modified: projects/bhyve_svm/sys/amd64/vmm/amd/npt.h
==============================================================================
--- projects/bhyve_svm/sys/amd64/vmm/amd/npt.h	Wed Dec 18 22:31:53 2013	(r259578)
+++ projects/bhyve_svm/sys/amd64/vmm/amd/npt.h	Wed Dec 18 23:39:42 2013	(r259579)
@@ -31,10 +31,7 @@
 
 struct svm_softc;
 
-int 	   svm_npt_init(void);
-int 	   svm_npt_cleanup(struct svm_softc *sc);
-vm_paddr_t svm_npt_vmmap_get(void *arg, vm_paddr_t gpa);
-int	   svm_npt_vmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, 
-			     size_t len, vm_memattr_t attr, int prot,
-			     boolean_t sp);
+int 	svm_npt_init(void);
+struct	vmspace *svm_npt_alloc(vm_offset_t min, vm_offset_t max);
+void	svm_npt_free(struct vmspace *vmspace);
 #endif /* _SVM_NPT_H_ */

Modified: projects/bhyve_svm/sys/amd64/vmm/amd/svm.c
==============================================================================
--- projects/bhyve_svm/sys/amd64/vmm/amd/svm.c	Wed Dec 18 22:31:53 2013	(r259578)
+++ projects/bhyve_svm/sys/amd64/vmm/amd/svm.c	Wed Dec 18 23:39:42 2013	(r259579)
@@ -61,7 +61,7 @@ __FBSDID("$FreeBSD$");
 #include "npt.h"
 
 /*
- * SVM CPUID function 0x8000_000Ai, edx bit decoding.
+ * SVM CPUID function 0x8000_000A, edx bit decoding.
  */
 #define AMD_CPUID_SVM_NP		BIT(0)  /* Nested paging or RVI */
 #define AMD_CPUID_SVM_LBR		BIT(1)  /* Last branch virtualization */
@@ -79,7 +79,7 @@ MALLOC_DEFINE(M_SVM, "svm", "svm");
 /* Per-CPU context area. */
 extern struct pcpu __pcpu[];
 
-static int svm_vmexit(struct svm_softc *svm_sc, int vcpu,
+static bool svm_vmexit(struct svm_softc *svm_sc, int vcpu,
 			struct vm_exit *vmexit);
 static int svm_msr_rw_ok(uint8_t *btmap, uint64_t msr);
 static int svm_msr_index(uint64_t msr, int *index, int *bit);
@@ -98,11 +98,6 @@ static uint32_t guest_asid = 1;
  */
 static int max_asid;
 
-/*
- * Statistics
- */
-static VMM_STAT_AMD(VMEXIT_NPF_LAPIC, "vm exits due to Local APIC access");
-
 /* 
  * SVM host state saved area of size 4KB for each core.
  */
@@ -113,6 +108,8 @@ static uint8_t hsave[MAXCPU][PAGE_SIZE] 
  */
 static struct svm_regctx host_ctx[MAXCPU];
 
+static VMM_STAT_AMD(VCPU_EXITINTINFO, "Valid EXITINTINFO");
+
 /* 
  * Common function to enable or disabled SVM for a CPU.
  */
@@ -123,19 +120,13 @@ cpu_svm_enable_disable(boolean_t enable)
 	
 	efer_msr = rdmsr(MSR_EFER);
 
-	if (enable) {
+	if (enable) 
 		efer_msr |= EFER_SVM;
-	} else {
+	else 
 		efer_msr &= ~EFER_SVM;
-	}
 
 	wrmsr(MSR_EFER, efer_msr);
 
-	if(rdmsr(MSR_EFER) != efer_msr) {
-		ERR("SVM couldn't be enabled on CPU%d.\n", curcpu);
-		return (EIO);
-	}
-
 	return(0);
 }
 
@@ -199,20 +190,16 @@ svm_cpuid_features(void)
 	}
 
 	/*
-	 * XXX: BHyVe need EPT or RVI to work.
+	 * bhyve need RVI to work.
 	 */
 	if (!(svm_feature & AMD_CPUID_SVM_NP)) {
 		printf("Missing Nested paging or RVI SVM support in processor.\n");
 		return (EIO);
 	}
 	
-	if (svm_feature & (AMD_CPUID_SVM_NRIP_SAVE |
-			AMD_CPUID_SVM_DECODE_ASSIST)) {
+	if (svm_feature & AMD_CPUID_SVM_NRIP_SAVE) 
 		return (0);
-	}
-	/* XXX: Should never be here? */
-	printf("Processor doesn't support nRIP or decode assist, can't"
-		"run BhyVe.\n");
+	
 	return (EIO);
 }
 
@@ -267,16 +254,16 @@ svm_init(void)
 	int err;
 
 	err = is_svm_enabled();
-	if (err) {
+	if (err) 
 		return (err);
-	}
+	
 
 	svm_npt_init();
 	
 	/* Start SVM on all CPUs */
 	smp_rendezvous(NULL, svm_enable, NULL, NULL);
 		
-	return(0);
+	return (0);
 }
 
 /*
@@ -383,7 +370,7 @@ svm_init_vcpu(struct svm_vcpu *vcpu, vm_
  * Initialise a virtual machine.
  */
 static void *
-svm_vminit(struct vm *vm)
+svm_vminit(struct vm *vm, pmap_t pmap)
 {
 	struct svm_softc *svm_sc;
 	vm_paddr_t msrpm_pa, iopm_pa, pml4_pa;	
@@ -401,10 +388,10 @@ svm_vminit(struct vm *vm)
 	svm_sc->vm = vm;
 	svm_sc->svm_feature = svm_feature;
 	svm_sc->vcpu_cnt = VM_MAXCPU;
-
+	svm_sc->nptp = (vm_offset_t)vtophys(pmap->pm_pml4);
 	/*
 	 * Each guest has its own unique ASID.
-	 * ASID(Addres Space Identifier) are used by TLB entries.
+	 * ASID(Address Space Identifier) is used by TLB entry.
 	 */
 	svm_sc->asid = guest_asid++;
 	
@@ -438,7 +425,7 @@ svm_vminit(struct vm *vm)
 	/* Cache physical address for multiple vcpus. */
 	iopm_pa = vtophys(svm_sc->iopm_bitmap);
 	msrpm_pa = vtophys(svm_sc->msr_bitmap);
-	pml4_pa = vtophys(svm_sc->np_pml4);
+	pml4_pa = svm_sc->nptp;
 
 	for (i = 0; i < svm_sc->vcpu_cnt; i++) {
 		if (svm_init_vcpu(svm_get_vcpu(svm_sc, i), iopm_pa, msrpm_pa,
@@ -458,7 +445,7 @@ cleanup:
 /*
  * Handle guest I/O intercept.
  */
-static int
+static bool
 svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
 {
 	struct vmcb_ctrl *ctrl;
@@ -477,74 +464,39 @@ svm_handle_io(struct svm_softc *svm_sc, 
 	vmexit->u.inout.port 	= (uint16_t)(info1 >> 16);
 	vmexit->u.inout.eax 	= (uint32_t)(state->rax);
 
-	return (1);
+	return (false);
 }
 
-/*
- * SVM Nested Page(RVI) Fault handler.
- * Nested page fault handler used by local APIC emulation.
- */
-static int
-svm_handle_npf(struct vm *vm, int vcpu, uint64_t gpa, uint64_t rip,
-		uint64_t exitinfo1, uint64_t cr3, struct vie *vie)
+static void
+svm_npf_paging(uint64_t exitinfo1, int *type, int *prot)
 {
-	int err;
 
-	if (exitinfo1 & VMCB_NPF_INFO1_ID) {
- 		VMM_CTR0(vm, vcpu, "SVM:NPF for code access.");
-		return (0);
-	}
+	if (exitinfo1 & VMCB_NPF_INFO1_W)
+		*type = VM_PROT_WRITE;
+	else
+		*type = VM_PROT_READ;
 	
-	if (exitinfo1 & VMCB_NPF_INFO1_RSV) {
- 		VMM_CTR0(vm, vcpu, "SVM:NPF reserved bits are set.");
-		return (0);
-	}
-	
-	if (exitinfo1 & VMCB_NPF_INFO1_GPT) {
- 		VMM_CTR0(vm, vcpu, "SVM:NPF during guest page table walk.");
-		return (0);
-	}
+	/* XXX: protection is not used. */
+	*prot = 0;
+}
 
-	/*
-	 * nRIP is NULL for NPF so we don't have the length of instruction,
-	 * we rely on instruction decode s/w to determine the size of
-	 * instruction.
-	 *
-	 * XXX: DecodeAssist can use instruction from buffer.
-	 */
-	if (vmm_fetch_instruction(vm, vcpu, rip, VIE_INST_SIZE,
-				cr3, vie) != 0) {
- 		ERR("SVM:NPF instruction fetch failed, RIP:0x%lx\n", rip);
-		return (EINVAL);
+static bool
+svm_npf_emul_fault(uint64_t exitinfo1)
+{
+	
+	if (exitinfo1 & VMCB_NPF_INFO1_ID) {
+		return (false);
 	}
 
-	KASSERT(vie->num_valid, ("No instruction to emulate."));
-	/*
-	 * SVM doesn't provide GLA unlike Intel VM-x. VIE_INVALID_GLA
-	 * which is a non-cannonical address indicate that GLA is not
-	 * available to instruction emulation.
-	 *
-	 * XXX: Which SVM capability can provided GLA?
-	 */
-	if(vmm_decode_instruction(vm, vcpu, VIE_INVALID_GLA, vie)) {
-		ERR("SVM: Couldn't decode instruction.\n");
-		return (0);
+	if (exitinfo1 & VMCB_NPF_INFO1_GPT) {
+		return (false);
 	}
 
-	/*
-	 * XXX: Decoding for user space(IOAPIC) should be done in
-	 * user space.
-	 */	
-	if (gpa < DEFAULT_APIC_BASE || gpa >= (DEFAULT_APIC_BASE + PAGE_SIZE)) {
-		VMM_CTR2(vm, vcpu, "SVM:NPF GPA(0x%lx) outside of local APIC"
-			" range(0x%x)\n", gpa, DEFAULT_APIC_BASE);
-		return (0);
+	if ((exitinfo1 & VMCB_NPF_INFO1_GPA) == 0) {
+		return (false);
 	}
 
-	err = vmm_emulate_instruction(vm, vcpu, gpa, vie, lapic_mmio_read,
-		lapic_mmio_write, 0);
-
-	return (err ? 0 : 1);
+	return (true);	
 }
 
 /*
@@ -571,12 +523,12 @@ svm_efer(struct svm_softc *svm_sc, int v
 }
 
 /*
- * Determine the cause of virtual cpu exit and return to user space if exit
- * demand so.
- * Return: 1 - Return to user space.
- *	   0 - Continue vcpu run.
+ * Determine the cause of virtual cpu exit and handle VMEXIT.
+ * Return: false - Break vcpu execution loop and handle vmexit
+ *		   in kernel or user space.
+ *	   true  - Continue vcpu run.
  */
-static int
+static bool 
 svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
 {
 	struct vmcb_state *state;
@@ -584,35 +536,27 @@ svm_vmexit(struct svm_softc *svm_sc, int
 	struct svm_regctx *ctx;
 	uint64_t code, info1, info2, val;
 	uint32_t eax, ecx, edx;
-	int user;		/* Flag for user mode */
-	int update_rip;		/* Flag for updating RIP */
-	int inst_len;
+	bool update_rip, loop;
 
 	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
 
 	state = svm_get_vmcb_state(svm_sc, vcpu);
 	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
 	ctx   = svm_get_guest_regctx(svm_sc, vcpu);
-	update_rip = 1;
-	user = 0;
-	
-	vmexit->exitcode = VM_EXITCODE_VMX;
-	vmexit->u.vmx.error = 0;
-	code = ctrl->exitcode;
+	code  = ctrl->exitcode;
 	info1 = ctrl->exitinfo1;
 	info2 = ctrl->exitinfo2;
 
-	if (ctrl->nrip) {
-		inst_len = ctrl->nrip - state->rip;
-	} else {
-		inst_len = ctrl->inst_decode_size;
-	}
+	update_rip = true;
+	loop = true;
+	vmexit->exitcode = VM_EXITCODE_VMX;
+	vmexit->u.vmx.error = 0;
 
 	switch (code) {
 		case	VMCB_EXIT_MC: /* Machine Check. */
 			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MTRAP, 1);
 			vmexit->exitcode = VM_EXITCODE_MTRAP;
-			user = 1;
+			loop = false;
 			break;
 
 		case	VMCB_EXIT_MSR:	/* MSR access. */
@@ -628,27 +572,29 @@ svm_vmexit(struct svm_softc *svm_sc, int
 
 			if (info1) {
 				/* VM exited because of write MSR */
-				vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1);
+				vmm_stat_incr(svm_sc->vm, vcpu, 
+					VMEXIT_WRMSR, 1);
 				vmexit->exitcode = VM_EXITCODE_WRMSR;
 				vmexit->u.msr.code = ecx;
 				val = (uint64_t)edx << 32 | eax;
 				if (emulate_wrmsr(svm_sc->vm, vcpu, ecx, val)) {
 					vmexit->u.msr.wval = val;
-					user = 1;
+					loop = false;
 				}
 				VMM_CTR3(svm_sc->vm, vcpu,
 					"VMEXIT WRMSR(%s handling) 0x%lx @0x%x",
-					user ? "user" : "kernel", val, ecx);
+					loop ? "kernel" : "user", val, ecx);
 			} else {
-				vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_RDMSR, 1);
+				vmm_stat_incr(svm_sc->vm, vcpu, 
+					VMEXIT_RDMSR, 1);
 				vmexit->exitcode = VM_EXITCODE_RDMSR;
 				vmexit->u.msr.code = ecx;
 				if (emulate_rdmsr(svm_sc->vm, vcpu, ecx)) {
-					user = 1; 
+					loop = false; 
 				}
 				VMM_CTR3(svm_sc->vm, vcpu, "SVM:VMEXIT RDMSR"
-					" 0x%lx,%lx @0x%x", ctx->e.g.sctx_rdx, 
-					state->rax, ecx);
+					" MSB=0x%08x, LSB=%08x @0x%x", 
+					ctx->e.g.sctx_rdx, state->rax, ecx);
 			}
 
 #define MSR_AMDK8_IPM           0xc0010055
@@ -659,17 +605,16 @@ svm_vmexit(struct svm_softc *svm_sc, int
 			 * XXX: special handling of AMD C1E - Ignore.
 			 */
 			 if (ecx == MSR_AMDK8_IPM)
-				user = 0;
+				loop = true;
 			break;
 
-		case 	VMCB_EXIT_INTR:
+		case VMCB_EXIT_INTR:
 			/*
 			 * Exit on External Interrupt.
 			 * Give host interrupt handler to run and if its guest
 			 * interrupt, local APIC will inject event in guest.
 			 */
-				user = 0;
-			update_rip = 0;
+			update_rip = false;
 			VMM_CTR1(svm_sc->vm, vcpu, "SVM:VMEXIT ExtInt"
 				" RIP:0x%lx.\n", state->rip);
 			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXTINT, 1);
@@ -677,9 +622,8 @@ svm_vmexit(struct svm_softc *svm_sc, int
 
 		case VMCB_EXIT_IO:

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201312182339.rBINdg0w027032>