Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 23 Aug 2013 00:37:27 +0000 (UTC)
From:      Peter Grehan <grehan@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r254677 - in projects/bhyve_svm/sys: amd64/vmm amd64/vmm/amd modules/vmm
Message-ID:  <201308230037.r7N0bRG1048920@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: grehan
Date: Fri Aug 23 00:37:26 2013
New Revision: 254677
URL: http://svnweb.freebsd.org/changeset/base/254677

Log:
  Add in last remaining files to get AMD-SVM operational.
  
  Submitted by:	Anish Gupta (akgupt3@gmail.com)

Added:
  projects/bhyve_svm/sys/amd64/vmm/amd/svm.c   (contents, props changed)
  projects/bhyve_svm/sys/amd64/vmm/amd/svm_msr.h   (contents, props changed)
  projects/bhyve_svm/sys/amd64/vmm/amd/svm_support.S   (contents, props changed)
Modified:
  projects/bhyve_svm/sys/amd64/vmm/amd/amdv.c
  projects/bhyve_svm/sys/amd64/vmm/vmm_instruction_emul.c
  projects/bhyve_svm/sys/modules/vmm/Makefile

Modified: projects/bhyve_svm/sys/amd64/vmm/amd/amdv.c
==============================================================================
--- projects/bhyve_svm/sys/amd64/vmm/amd/amdv.c	Thu Aug 22 22:29:27 2013	(r254676)
+++ projects/bhyve_svm/sys/amd64/vmm/amd/amdv.c	Fri Aug 23 00:37:26 2013	(r254677)
@@ -38,137 +38,6 @@ __FBSDID("$FreeBSD$");
 #include "io/iommu.h"
 
 static int
-amdv_init(void)
-{
-
-	printf("amdv_init: not implemented\n");
-	return (ENXIO);
-}
-
-static int
-amdv_cleanup(void)
-{
-
-	printf("amdv_cleanup: not implemented\n");
-	return (ENXIO);
-}
-
-static void *
-amdv_vminit(struct vm *vm)
-{
-
-	printf("amdv_vminit: not implemented\n");
-	return (NULL);
-}
-
-static int
-amdv_vmrun(void *arg, int vcpu, register_t rip)
-{
-
-	printf("amdv_vmrun: not implemented\n");
-	return (ENXIO);
-}
-
-static void
-amdv_vmcleanup(void *arg)
-{
-
-	printf("amdv_vmcleanup: not implemented\n");
-	return;
-}
-
-static int
-amdv_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
-	    vm_memattr_t attr, int prot, boolean_t spok)
-{
-
-	printf("amdv_vmmmap_set: not implemented\n");
-	return (EINVAL);
-}
-
-static vm_paddr_t
-amdv_vmmmap_get(void *arg, vm_paddr_t gpa)
-{
-
-	printf("amdv_vmmmap_get: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_getreg(void *arg, int vcpu, int regnum, uint64_t *retval)
-{
-	
-	printf("amdv_getreg: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_setreg(void *arg, int vcpu, int regnum, uint64_t val)
-{
-	
-	printf("amdv_setreg: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_getdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
-{
-
-	printf("amdv_get_desc: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_setdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
-{
-
-	printf("amdv_get_desc: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_inject_event(void *vmi, int vcpu, int type, int vector,
-		  uint32_t error_code, int error_code_valid)
-{
-
-	printf("amdv_inject_event: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_getcap(void *arg, int vcpu, int type, int *retval)
-{
-
-	printf("amdv_getcap: not implemented\n");
-	return (EINVAL);
-}
-
-static int
-amdv_setcap(void *arg, int vcpu, int type, int val)
-{
-
-	printf("amdv_setcap: not implemented\n");
-	return (EINVAL);
-}
-
-struct vmm_ops vmm_ops_amd = {
-	amdv_init,
-	amdv_cleanup,
-	amdv_vminit,
-	amdv_vmrun,
-	amdv_vmcleanup,
-	amdv_vmmmap_set,
-	amdv_vmmmap_get,
-	amdv_getreg,
-	amdv_setreg,
-	amdv_getdesc,
-	amdv_setdesc,
-	amdv_inject_event,
-	amdv_getcap,
-	amdv_setcap
-};
-
-static int
 amd_iommu_init(void)
 {
 

Added: projects/bhyve_svm/sys/amd64/vmm/amd/svm.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ projects/bhyve_svm/sys/amd64/vmm/amd/svm.c	Fri Aug 23 00:37:26 2013	(r254677)
@@ -0,0 +1,1363 @@
+/*-
+ * Copyright (c) 2013, Anish Gupta (akgupt3@gmail.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/cpufunc.h>
+#include <machine/psl.h>
+#include <machine/pmap.h>
+#include <machine/md_var.h>
+#include <machine/vmparam.h>
+#include <machine/specialreg.h>
+#include <machine/segments.h>
+#include <machine/vmm.h>
+
+#include <x86/apicreg.h>
+
+#include "vmm_lapic.h"
+#include "vmm_msr.h"
+#include "vmm_stat.h"
+#include "vmm_ktr.h"
+
+#include "x86.h"
+#include "vmcb.h"
+#include "svm.h"
+#include "svm_softc.h"
+#include "npt.h"
+
+/*
+ * SVM CPUID function 0x8000_000Ai, edx bit decoding.
+ */
+#define AMD_CPUID_SVM_NP		BIT(0)  /* Nested paging or RVI */
+#define AMD_CPUID_SVM_LBR		BIT(1)  /* Last branch virtualization */
+#define AMD_CPUID_SVM_SVML		BIT(2)  /* SVM lock */
+#define AMD_CPUID_SVM_NRIP_SAVE		BIT(3)  /* Next RIP is saved */
+#define AMD_CPUID_SVM_TSC_RATE		BIT(4)  /* TSC rate control. */
+#define AMD_CPUID_SVM_VMCB_CLEAN	BIT(5)  /* VMCB state caching */
+#define AMD_CPUID_SVM_ASID_FLUSH	BIT(6)  /* Flush by ASID */
+#define AMD_CPUID_SVM_DECODE_ASSIST	BIT(7)  /* Decode assist */
+#define AMD_CPUID_SVM_PAUSE_INC		BIT(10) /* Pause intercept filter. */
+#define AMD_CPUID_SVM_PAUSE_FTH		BIT(12) /* Pause filter threshold */
+
+MALLOC_DEFINE(M_SVM, "svm", "svm");
+
+/* Per-CPU context area. */
+extern struct pcpu __pcpu[];
+
+static int svm_vmexit(struct svm_softc *svm_sc, int vcpu,
+			struct vm_exit *vmexit);
+static int svm_msr_rw_ok(uint8_t *btmap, uint64_t msr);
+static int svm_msr_index(uint64_t msr, int *index, int *bit);
+
+static uint32_t svm_feature; /* AMD SVM features. */
+
+/*
+ * Starting guest ASID, 0 is reserved for host.
+ * Each guest will have its own unique ASID.
+ */
+static uint32_t guest_asid = 1;
+
+/*
+ * Max ASID processor can support.
+ * This limit the maximum number of virtual machines that can be created.
+ */
+static int max_asid;
+
+/*
+ * Statistics
+ */
+static VMM_STAT_AMD(VMEXIT_NPF_LAPIC, "vm exits due to Local APIC access");
+
+/* 
+ * SVM host state saved area of size 4KB for each core.
+ */
+static uint8_t hsave[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE);
+
+/*
+ * S/w saved host context.
+ */
+static struct svm_regctx host_ctx[MAXCPU];
+
+/* 
+ * Common function to enable or disabled SVM for a CPU.
+ */
+static int
+cpu_svm_enable_disable(boolean_t enable)
+{
+	uint64_t efer_msr;
+	
+	efer_msr = rdmsr(MSR_EFER);
+
+	if (enable) {
+		efer_msr |= EFER_SVM;
+	} else {
+		efer_msr &= ~EFER_SVM;
+	}
+
+	wrmsr(MSR_EFER, efer_msr);
+
+	if(rdmsr(MSR_EFER) != efer_msr) {
+		ERR("SVM couldn't be enabled on CPU%d.\n", curcpu);
+		return (EIO);
+	}
+
+	return(0);
+}
+
+/*
+ * Disable SVM on a CPU.
+ */
+static void
+svm_disable(void *arg __unused)
+{
+
+	(void)cpu_svm_enable_disable(FALSE);
+}
+
+/*
+ * Disable SVM for all CPUs.
+ */
+static int
+svm_cleanup(void)
+{
+
+	smp_rendezvous(NULL, svm_disable, NULL, NULL);
+	return (0);
+}
+
+/*
+ * Check for required BHyVe SVM features in a CPU.
+ */
+static int
+svm_cpuid_features(void)
+{
+	u_int regs[4];
+
+	/* CPUID Fn8000_000A is for SVM */
+	do_cpuid(0x8000000A, regs);
+	svm_feature = regs[3];
+
+	printf("SVM rev: 0x%x NASID:0x%x\n", regs[0] & 0xFF, regs[1]);
+	max_asid = regs[1];
+	
+	printf("SVM Features:0x%b\n", svm_feature,
+		"\020"
+		"\001NP"		/* Nested paging */
+		"\002LbrVirt"		/* LBR virtualization */
+		"\003SVML"		/* SVM lock */
+		"\004NRIPS"		/* NRIP save */
+		"\005TscRateMsr"	/* MSR based TSC rate control */
+		"\006VmcbClean"		/* VMCB clean bits */
+		"\007FlushByAsid"	/* Flush by ASID */
+		"\010DecodeAssist"	/* Decode assist */
+		"\011<b20>"
+		"\012<b20>"
+		"\013PauseFilter"	
+		"\014<b20>"
+		"\013PauseFilterThreshold"	
+		);
+
+	/* SVM Lock */ 
+	if (!(svm_feature & AMD_CPUID_SVM_SVML)) {
+		printf("SVM is disabled by BIOS, please enable in BIOS.\n");
+		return (ENXIO);
+	}
+
+	/*
+	 * XXX: BHyVe need EPT or RVI to work.
+	 */
+	if (!(svm_feature & AMD_CPUID_SVM_NP)) {
+		printf("Missing Nested paging or RVI SVM support in processor.\n");
+		return (EIO);
+	}
+	
+	if (svm_feature & (AMD_CPUID_SVM_NRIP_SAVE |
+			AMD_CPUID_SVM_DECODE_ASSIST)) {
+		return (0);
+	}
+	/* XXX: Should never be here? */
+	printf("Processor doesn't support nRIP or decode assist, can't"
+		"run BhyVe.\n");
+	return (EIO);
+}
+
+/*
+ * Enable SVM for a CPU.
+ */
+static void
+svm_enable(void *arg __unused)
+{
+	uint64_t hsave_pa;
+
+	(void)cpu_svm_enable_disable(TRUE);
+
+	hsave_pa = vtophys(hsave[curcpu]);
+	wrmsr(MSR_VM_HSAVE_PA, hsave_pa);
+	
+	if (rdmsr(MSR_VM_HSAVE_PA) != hsave_pa) {
+		panic("VM_HSAVE_PA is wrong on CPU%d\n", curcpu);
+	}
+}
+
+/*
+ * Check if a processor support SVM.
+ */
+static int
+is_svm_enabled(void)
+{
+	uint64_t msr;
+
+	 /* Section 15.4 Enabling SVM from APM2. */
+	if ((amd_feature2 & AMDID2_SVM) == 0) {
+		printf("SVM is not supported on this processor.\n");
+		return (ENXIO);
+	}
+
+	msr = rdmsr(MSR_VM_CR);
+	/* Make sure SVM is not disabled by BIOS. */
+	if ((msr & VM_CR_SVMDIS) == 0) {
+		return svm_cpuid_features();
+	}
+
+	printf("SVM disabled by Key, consult TPM/BIOS manual.\n");
+	return (ENXIO);
+}
+
+/*
+ * Enable SVM on CPU and initialize nested page table h/w.
+ */
+static int
+svm_init(void)
+{
+	int err;
+
+	err = is_svm_enabled();
+	if (err) {
+		return (err);
+	}
+
+	svm_npt_init();
+	
+	/* Start SVM on all CPUs */
+	smp_rendezvous(NULL, svm_enable, NULL, NULL);
+		
+	return(0);
+}
+
+/*
+ * Get index and bit position for a MSR in MSR permission
+ * bitmap. Two bits are used for each MSR, lower bit is
+ * for read and higher bit is for write.
+ */
+static int
+svm_msr_index(uint64_t msr, int *index, int *bit)
+{
+	uint32_t base, off;
+
+/* Pentium compatible MSRs */
+#define MSR_PENTIUM_START 	0	
+#define MSR_PENTIUM_END 	0x1FFF
+/* AMD 6th generation and Intel compatible MSRs */
+#define MSR_AMD6TH_START 	0xC0000000UL	
+#define MSR_AMD6TH_END 		0xC0001FFFUL	
+/* AMD 7th and 8th generation compatible MSRs */
+#define MSR_AMD7TH_START 	0xC0010000UL	
+#define MSR_AMD7TH_END 		0xC0011FFFUL	
+	
+	*index = -1;
+	*bit = (msr % 4) * 2;
+	base = 0;
+
+	if (msr >= MSR_PENTIUM_START && msr <= MSR_PENTIUM_END) {
+		*index = msr / 4;
+		return (0);
+	}
+	
+	base += (MSR_PENTIUM_END - MSR_PENTIUM_START + 1); 
+	if (msr >= MSR_AMD6TH_START && msr <= MSR_AMD6TH_END) {
+		off = (msr - MSR_AMD6TH_START); 
+		*index = (off + base) / 4;
+		return (0);
+	} 
+	
+	base += (MSR_AMD6TH_END - MSR_AMD6TH_START + 1);
+	if (msr >= MSR_AMD7TH_START && msr <= MSR_AMD7TH_END) {
+		off = (msr - MSR_AMD7TH_START);
+		*index = (off + base) / 4;
+		return (0);
+	}
+
+	return (EIO);
+}
+
+/*
+ * Give virtual cpu the complete access to MSR(read & write).
+ */
+static int
+svm_msr_rw_ok(uint8_t *perm_bitmap, uint64_t msr)
+{
+	int index, bit, err;
+
+	err = svm_msr_index(msr, &index, &bit);
+	if (err) {
+		ERR("MSR 0x%lx is not writeable by guest.\n", msr);
+		return (err);
+	}
+	
+	if (index < 0 || index > (SVM_MSR_BITMAP_SIZE)) {
+		ERR("MSR 0x%lx index out of range(%d).\n", msr, index);
+		return (EINVAL);
+	}
+	if (bit < 0 || bit > 8) {
+		ERR("MSR 0x%lx bit out of range(%d).\n", msr, bit);
+		return (EINVAL);
+	}
+
+	/* Disable intercept for read and write. */
+	perm_bitmap[index] &= ~(3 << bit);
+	CTR1(KTR_VMM, "Guest has full control on SVM:MSR(0x%lx).\n", msr);
+	
+	return (0);
+}
+
+/*
+ * Initialise VCPU.
+ */
+static int
+svm_init_vcpu(struct svm_vcpu *vcpu, vm_paddr_t iopm_pa, vm_paddr_t msrpm_pa,
+		vm_paddr_t pml4_pa, uint8_t asid)
+{
+	
+	vcpu->lastcpu = NOCPU;
+	vcpu->vmcb_pa = vtophys(&vcpu->vmcb);
+	
+	/* 
+	 * Initiaise VMCB persistent area of vcpu.
+	 * 1. Permission bitmap for MSR and IO space.
+	 * 2. Nested paging.
+	 * 3. ASID of virtual machine. 
+	 */
+	if (svm_init_vmcb(&vcpu->vmcb, iopm_pa, msrpm_pa, pml4_pa)) {
+			return (EIO);
+	}
+	
+	return (0);
+}
+		
+/*
+ * Initialise a virtual machine.
+ */
+static void *
+svm_vminit(struct vm *vm)
+{
+	struct svm_softc *svm_sc;
+	vm_paddr_t msrpm_pa, iopm_pa, pml4_pa;	
+	int i;
+
+	if (guest_asid >= max_asid) {
+		ERR("Host support max ASID:%d, can't create more guests.\n",
+			max_asid);
+		return (NULL);
+	}
+	
+	svm_sc = (struct svm_softc *)malloc(sizeof (struct svm_softc),
+			M_SVM, M_WAITOK | M_ZERO);
+
+	svm_sc->vm = vm;
+	svm_sc->svm_feature = svm_feature;
+	svm_sc->vcpu_cnt = VM_MAXCPU;
+
+	/*
+	 * Each guest has its own unique ASID.
+	 * ASID(Addres Space Identifier) are used by TLB entries.
+	 */
+	svm_sc->asid = guest_asid++;
+	
+	/*
+	 * Intercept MSR access to all MSRs except GSBASE, FSBASE,... etc.
+	 */	
+	 memset(svm_sc->msr_bitmap, 0xFF, sizeof(svm_sc->msr_bitmap));
+
+	/*
+	 * Following MSR can be completely controlled by virtual machines
+	 * since access to following are translated to access to VMCB.
+	 */
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_GSBASE);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_FSBASE);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_KGSBASE);
+	
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_STAR);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_LSTAR);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_CSTAR);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SF_MASK);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_CS_MSR);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_ESP_MSR);
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_SYSENTER_EIP_MSR);
+	
+	/* For Nested Paging/RVI only. */
+	svm_msr_rw_ok(svm_sc->msr_bitmap, MSR_PAT);
+
+	 /* Intercept access to all I/O ports. */
+	memset(svm_sc->iopm_bitmap, 0xFF, sizeof(svm_sc->iopm_bitmap));
+
+	/* Cache physical address for multiple vcpus. */
+	iopm_pa = vtophys(svm_sc->iopm_bitmap);
+	msrpm_pa = vtophys(svm_sc->msr_bitmap);
+	pml4_pa = vtophys(svm_sc->np_pml4);
+
+	for (i = 0; i < svm_sc->vcpu_cnt; i++) {
+		if (svm_init_vcpu(svm_get_vcpu(svm_sc, i), iopm_pa, msrpm_pa,
+				pml4_pa, svm_sc->asid)) {
+			ERR("SVM couldn't initialise VCPU%d\n", i);
+			goto cleanup;
+		}
+	}
+	
+	return (svm_sc);
+
+cleanup:
+	free(svm_sc, M_SVM);
+	return (NULL);
+}
+
+/*
+ * Handle guest I/O intercept.
+ */
+static int
+svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
+{
+	struct vmcb_ctrl *ctrl;
+	struct vmcb_state *state;
+	uint64_t info1;
+	
+	state = svm_get_vmcb_state(svm_sc, vcpu);
+	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
+	info1 = ctrl->exitinfo1;
+	
+	vmexit->exitcode 	= VM_EXITCODE_INOUT;
+	vmexit->u.inout.in 	= (info1 & BIT(0)) ? 1 : 0;
+	vmexit->u.inout.string 	= (info1 & BIT(2)) ? 1 : 0;
+	vmexit->u. inout.rep 	= (info1 & BIT(3)) ? 1 : 0;
+	vmexit->u.inout.bytes 	= (info1 >> 4) & 0x7;
+	vmexit->u.inout.port 	= (uint16_t)(info1 >> 16);
+	vmexit->u.inout.eax 	= (uint32_t)(state->rax);
+
+	return (1);
+}
+
+/*
+ * SVM Nested Page(RVI) Fault handler.
+ * Nested page fault handler used by local APIC emulation.
+ */
+static int
+svm_handle_npf(struct vm *vm, int vcpu, uint64_t gpa, uint64_t rip,
+		uint64_t exitinfo1, uint64_t cr3, struct vie *vie)
+{
+	int err;
+
+	if (exitinfo1 & VMCB_NPF_INFO1_ID) {
+ 		VMM_CTR0(vm, vcpu, "SVM:NPF for code access.");
+		return (0);
+	}
+	
+	if (exitinfo1 & VMCB_NPF_INFO1_RSV) {
+ 		VMM_CTR0(vm, vcpu, "SVM:NPF reserved bits are set.");
+		return (0);
+	}
+	
+	if (exitinfo1 & VMCB_NPF_INFO1_GPT) {
+ 		VMM_CTR0(vm, vcpu, "SVM:NPF during guest page table walk.");
+		return (0);
+	}
+
+	/*
+	 * nRIP is NULL for NPF so we don't have the length of instruction,
+	 * we rely on instruction decode s/w to determine the size of
+	 * instruction.
+	 *
+	 * XXX: DecodeAssist can use instruction from buffer.
+	 */
+	if (vmm_fetch_instruction(vm, vcpu, rip, VIE_INST_SIZE,
+				cr3, vie) != 0) {
+ 		ERR("SVM:NPF instruction fetch failed, RIP:0x%lx\n", rip);
+		return (EINVAL);
+	}
+
+	KASSERT(vie->num_valid, ("No instruction to emulate."));
+	/*
+	 * SVM doesn't provide GLA unlike Intel VM-x. VIE_INVALID_GLA
+	 * which is a non-cannonical address indicate that GLA is not
+	 * available to instruction emulation.
+	 *
+	 * XXX: Which SVM capability can provided GLA?
+	 */
+	if(vmm_decode_instruction(vm, vcpu, VIE_INVALID_GLA, vie)) {
+		ERR("SVM: Couldn't decode instruction.\n");
+		return (0);
+	}
+
+	/*
+	 * XXX: Decoding for user space(IOAPIC) should be done in
+	 * user space.
+	 */	
+	if (gpa < DEFAULT_APIC_BASE || gpa >= (DEFAULT_APIC_BASE + PAGE_SIZE)) {
+		VMM_CTR2(vm, vcpu, "SVM:NPF GPA(0x%lx) outside of local APIC"
+			" range(0x%x)\n", gpa, DEFAULT_APIC_BASE);
+		return (0);
+	}
+
+	err = vmm_emulate_instruction(vm, vcpu, gpa, vie, lapic_mmio_read,
+		lapic_mmio_write, 0);
+
+	return (err ? 0 : 1);
+}
+
+/*
+ * Special handling of EFER MSR.
+ * SVM guest must have SVM EFER bit set, prohibit guest from cleareing SVM
+ * enable bit in EFER.
+ */
+static void
+svm_efer(struct svm_softc *svm_sc, int vcpu, boolean_t write)
+{
+	struct svm_regctx *swctx;
+	struct vmcb_state *state;
+	
+	state = svm_get_vmcb_state(svm_sc, vcpu);
+	swctx = svm_get_guest_regctx(svm_sc, vcpu);
+
+	if (write) {
+		state->efer = ((swctx->e.g.sctx_rdx & (uint32_t)~0) << 32) |
+				((uint32_t)state->rax) | EFER_SVM;
+	} else {
+		state->rax = (uint32_t)state->efer;
+		swctx->e.g.sctx_rdx = (uint32_t)(state->efer >> 32);
+	}
+}
+
+/*
+ * Determine the cause of virtual cpu exit and return to user space if exit
+ * demand so.
+ * Return: 1 - Return to user space.
+ *	   0 - Continue vcpu run.
+ */
+static int
+svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
+{
+	struct vmcb_state *state;
+	struct vmcb_ctrl *ctrl;
+	struct svm_regctx *ctx;
+	uint64_t code, info1, info2, val;
+	uint32_t eax, ecx, edx;
+	int user;		/* Flag for user mode */
+	int update_rip;		/* Flag for updating RIP */
+	int inst_len;
+
+	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
+
+	state = svm_get_vmcb_state(svm_sc, vcpu);
+	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
+	ctx   = svm_get_guest_regctx(svm_sc, vcpu);
+	update_rip = 1;
+	user = 0;
+	
+	vmexit->exitcode = VM_EXITCODE_VMX;
+	vmexit->u.vmx.error = 0;
+	code = ctrl->exitcode;
+	info1 = ctrl->exitinfo1;
+	info2 = ctrl->exitinfo2;
+
+	if (ctrl->nrip) {
+		inst_len = ctrl->nrip - state->rip;
+	} else {
+		inst_len = ctrl->inst_decode_size;
+	}
+
+	switch (code) {
+		case	VMCB_EXIT_MC: /* Machine Check. */
+			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MTRAP, 1);
+			vmexit->exitcode = VM_EXITCODE_MTRAP;
+			user = 1;
+			break;
+
+		case	VMCB_EXIT_MSR:	/* MSR access. */
+			eax = state->rax;
+			ecx = ctx->sctx_rcx;
+			edx = ctx->e.g.sctx_rdx;
+			
+			if (ecx == MSR_EFER) {
+				VMM_CTR0(svm_sc->vm, vcpu,"VMEXIT EFER\n");
+				svm_efer(svm_sc, vcpu, info1);
+				break;
+			}
+
+			if (info1) {
+				/* VM exited because of write MSR */
+				vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1);
+				vmexit->exitcode = VM_EXITCODE_WRMSR;
+				vmexit->u.msr.code = ecx;
+				val = (uint64_t)edx << 32 | eax;
+				if (emulate_wrmsr(svm_sc->vm, vcpu, ecx, val)) {
+					vmexit->u.msr.wval = val;
+					user = 1;
+				}
+				VMM_CTR3(svm_sc->vm, vcpu,
+					"VMEXIT WRMSR(%s handling) 0x%lx @0x%x",
+					user ? "user" : "kernel", val, ecx);
+			} else {
+				vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_RDMSR, 1);
+				vmexit->exitcode = VM_EXITCODE_RDMSR;
+				vmexit->u.msr.code = ecx;
+				if (emulate_rdmsr(svm_sc->vm, vcpu, ecx)) {
+					user = 1; 
+				}
+				VMM_CTR3(svm_sc->vm, vcpu, "SVM:VMEXIT RDMSR"
+					" 0x%lx,%lx @0x%x", ctx->e.g.sctx_rdx, 
+					state->rax, ecx);
+			}
+
+#define MSR_AMDK8_IPM           0xc0010055
+			/*
+			 * We can't hide AMD C1E idle capability since its
+			 * based on CPU generation, for now ignore access to
+			 * this MSR by vcpus
+			 * XXX: special handling of AMD C1E - Ignore.
+			 */
+			 if (ecx == MSR_AMDK8_IPM)
+				user = 0;
+			break;
+
+		case 	VMCB_EXIT_INTR:
+			/*
+			 * Exit on External Interrupt.
+			 * Give host interrupt handler to run and if its guest
+			 * interrupt, local APIC will inject event in guest.
+			 */
+				user = 0;
+			update_rip = 0;
+			VMM_CTR1(svm_sc->vm, vcpu, "SVM:VMEXIT ExtInt"
+				" RIP:0x%lx.\n", state->rip);
+			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXTINT, 1);
+			break;
+
+		case VMCB_EXIT_IO:
+			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1);
+			user = svm_handle_io(svm_sc, vcpu, vmexit);
+			break;
+
+		case VMCB_EXIT_CPUID:
+			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_CPUID, 1);
+			(void)x86_emulate_cpuid(svm_sc->vm, vcpu,
+					(uint32_t *)&state->rax,
+					(uint32_t *)&ctx->sctx_rbx,
+					(uint32_t *)&ctx->sctx_rcx,
+					(uint32_t *)&ctx->e.g.sctx_rdx);
+			VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT CPUID\n");
+			user = 0;
+			break;
+
+			case VMCB_EXIT_HLT:
+			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_HLT, 1);
+ 			if (ctrl->v_irq) {
+				 /* Interrupt is pending, can't halt guest. */
+				user = 0;
+				vmm_stat_incr(svm_sc->vm, vcpu,
+					VMEXIT_HLT_IGNORED, 1);
+				VMM_CTR0(svm_sc->vm, vcpu, 
+					"VMEXIT halt ignored.");
+			} else {
+				VMM_CTR0(svm_sc->vm, vcpu,
+					"VMEXIT halted CPU.");
+				vmexit->exitcode = VM_EXITCODE_HLT;
+				user = 1;
+
+			}
+			break;
+
+			case VMCB_EXIT_PAUSE:
+			VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT pause");
+			vmexit->exitcode = VM_EXITCODE_PAUSE;
+			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_PAUSE, 1);
+
+			break;
+
+		case VMCB_EXIT_NPF:
+			 /* EXITINFO2 has the physical fault address (GPA). */
+			if (!svm_handle_npf(svm_sc->vm, vcpu, info2,
+					state->rip, info1, state->cr3,
+					&vmexit->u.paging.vie)) {
+				/* I/O APIC for MSI/X. */
+				vmexit->exitcode = VM_EXITCODE_PAGING;
+				user = 1;
+				vmexit->u.paging.gpa = info2;
+			} else {
+				/* Local APIC NPF */
+				update_rip = 1;
+				vmm_stat_incr(svm_sc->vm, vcpu,
+						VMEXIT_NPF_LAPIC, 1);
+			}
+			
+			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EPT_FAULT, 1);
+			inst_len = vmexit->u.paging.vie.num_processed;
+ 			VMM_CTR3(svm_sc->vm, vcpu, "VMEXIT NPF, GPA:0x%lx "
+				"user=%d instr len=%d.\n", info2, user,
+				inst_len);
+			break;
+
+		case VMCB_EXIT_SHUTDOWN:
+			VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT guest shutdown.");
+			user = 1;
+			vmexit->exitcode = VM_EXITCODE_VMX;
+			break;
+
+		case VMCB_EXIT_INVALID:
+			VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT INVALID.");
+			user = 1;
+			vmexit->exitcode = VM_EXITCODE_VMX;
+			break;
+
+		default:
+			 /* Return to user space. */
+			user = 1;
+			update_rip = 0;
+			VMM_CTR3(svm_sc->vm, vcpu, "VMEXIT=0x%lx"
+				" EXITINFO1: 0x%lx EXITINFO2:0x%lx\n",
+		 		ctrl->exitcode, info1, info2);
+			VMM_CTR3(svm_sc->vm, vcpu, "SVM:RIP: 0x%lx nRIP:0x%lx"
+				" Inst decoder len:%d\n", state->rip,
+				ctrl->nrip, ctrl->inst_decode_size);
+			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_UNKNOWN, 1);
+			break;
+	}	
+
+	if (ctrl->v_irq) {
+		VMM_CTR2(svm_sc->vm, vcpu, "SVM:SVM intr pending vector:0x%x"
+		" priority:0x%x", ctrl->v_intr_vector, ctrl->v_intr_prio);
+	}
+
+	vmexit->rip = state->rip;
+	if (update_rip) {
+		vmexit->rip += inst_len;
+	}
+
+	/* Return to userland for APs to start. */
+	if (vmexit->exitcode == VM_EXITCODE_SPINUP_AP) {
+ 		VMM_CTR1(svm_sc->vm, vcpu, "SVM:Starting APs, RIP0x%lx.\n",
+			vmexit->rip);
+		user = 1;
+	}
+
+	 /* XXX: Set next RIP before restarting virtual cpus. */
+	if (ctrl->nrip == 0) {
+		ctrl->nrip = state->rip;
+	}
+	
+	return (user);
+}
+
+/*
+ * Inject NMI to virtual cpu.
+ */
+static int
+svm_inject_nmi(struct svm_softc *svm_sc, int vcpu)
+{
+	struct vmcb_ctrl *ctrl;
+
+	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
+
+	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
+	 /* Can't inject another NMI if last one is pending.*/
+	if (!vm_nmi_pending(svm_sc->vm, vcpu))
+		return (0);
+
+	 /* Inject NMI, vector number is not used.*/
+	if (vmcb_eventinject(ctrl, VM_NMI, IDT_NMI, 0, FALSE)) {
+		VMM_CTR0(svm_sc->vm, vcpu, "SVM:NMI injection failed.\n");
+		return (EIO);
+	}
+
+	/* Acknowledge the request is accepted.*/
+	vm_nmi_clear(svm_sc->vm, vcpu);
+
+	VMM_CTR0(svm_sc->vm, vcpu, "SVM:Injected NMI.\n");
+
+	return (1);
+}
+
+/*
+ * Inject event to virtual cpu.
+ */
+static void
+svm_inj_interrupts(struct svm_softc *svm_sc, int vcpu)
+{
+	struct vmcb_ctrl *ctrl;
+	struct vmcb_state *state;
+	int vector;
+	
+	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
+
+	state = svm_get_vmcb_state(svm_sc, vcpu);
+	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
+
+	/* Can't inject multiple events at once. */
+	if (ctrl->eventinj & VMCB_EVENTINJ_VALID) {
+		VMM_CTR1(svm_sc->vm, vcpu,
+			"SVM:Last event(0x%lx) is pending.\n", ctrl->eventinj);
+		return ;
+	}
+
+	/* Wait for guest to come out of interrupt shadow. */
+	if (ctrl->intr_shadow) {
+		VMM_CTR0(svm_sc->vm, vcpu, "SVM:Guest in interrupt shadow.\n");
+		goto inject_failed;
+	}
+	
+	/* Make sure no interrupt is pending.*/
+	if (ctrl->v_irq) {
+		VMM_CTR0(svm_sc->vm, vcpu, 
+			"SVM:virtual interrupt is pending.\n");
+		goto inject_failed;
+	}
+
+	/* NMI event has priority over interrupts.*/

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201308230037.r7N0bRG1048920>