Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 14 Sep 2014 04:39:04 +0000 (UTC)
From:      Neel Natu <neel@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r271570 - in projects/bhyve_svm: sys/amd64/include sys/amd64/vmm/amd usr.sbin/bhyve
Message-ID:  <201409140439.s8E4d43V064115@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: neel
Date: Sun Sep 14 04:39:04 2014
New Revision: 271570
URL: http://svnweb.freebsd.org/changeset/base/271570

Log:
  Set the 'vmexit->inst_length' field properly depending on the type of the
  VM-exit and ultimately on whether nRIP is valid. This allows us to update
  the %rip after the emulation is finished so any exceptions triggered during
  the emulation will point to the right instruction.
  
  Don't attempt to handle INS/OUTS VM-exits unless the DecodeAssist capability
  is available. The effective segment field in EXITINFO1 is not valid without
  this capability.
  
  Add VM_EXITCODE_SVM to flag SVM VM-exits that cannot be handled. Provide the
  VMCB fields exitinfo1 and exitinfo2 as collateral to help with debugging.
  
  Provide a SVM VM-exit handler to dump the exitcode, exitinfo1 and exitinfo2
  fields in bhyve(8).
  
  Reviewed by:	Anish Gupta (akgupt3@gmail.com)
  Reviewed by:	grehan

Modified:
  projects/bhyve_svm/sys/amd64/include/vmm.h
  projects/bhyve_svm/sys/amd64/vmm/amd/svm.c
  projects/bhyve_svm/usr.sbin/bhyve/bhyverun.c

Modified: projects/bhyve_svm/sys/amd64/include/vmm.h
==============================================================================
--- projects/bhyve_svm/sys/amd64/include/vmm.h	Sun Sep 14 03:01:18 2014	(r271569)
+++ projects/bhyve_svm/sys/amd64/include/vmm.h	Sun Sep 14 04:39:04 2014	(r271570)
@@ -485,6 +485,7 @@ enum vm_exitcode {
 	VM_EXITCODE_SUSPENDED,
 	VM_EXITCODE_INOUT_STR,
 	VM_EXITCODE_TASK_SWITCH,
+	VM_EXITCODE_SVM,
 	VM_EXITCODE_MAX
 };
 
@@ -562,6 +563,14 @@ struct vm_exit {
 			int		inst_type;
 			int		inst_error;
 		} vmx;
+		/*
+		 * SVM specific payload.
+		 */
+		struct {
+			uint64_t	exitcode;
+			uint64_t	exitinfo1;
+			uint64_t	exitinfo2;
+		} svm;
 		struct {
 			uint32_t	code;		/* ecx value */
 			uint64_t	wval;

Modified: projects/bhyve_svm/sys/amd64/vmm/amd/svm.c
==============================================================================
--- projects/bhyve_svm/sys/amd64/vmm/amd/svm.c	Sun Sep 14 03:01:18 2014	(r271569)
+++ projects/bhyve_svm/sys/amd64/vmm/amd/svm.c	Sun Sep 14 04:39:04 2014	(r271570)
@@ -735,11 +735,12 @@ svm_paging_info(struct vmcb *vmcb, struc
 	    state->efer);
 }
 
+#define	UNHANDLED 0
 
 /*
  * Handle guest I/O intercept.
  */
-static bool
+static int
 svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
 {
 	struct vmcb_ctrl *ctrl;
@@ -747,21 +748,34 @@ svm_handle_io(struct svm_softc *svm_sc, 
 	struct svm_regctx *regs;
 	struct vm_inout_str *vis;
 	uint64_t info1;
+	int inout_string;
 
 	state = svm_get_vmcb_state(svm_sc, vcpu);
 	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
 	regs  = svm_get_guest_regctx(svm_sc, vcpu);
+
 	info1 = ctrl->exitinfo1;
-	
+	inout_string = info1 & BIT(2) ? 1 : 0;
+
+	/*
+	 * The effective segment number in EXITINFO1[12:10] is populated
+	 * only if the processor has the DecodeAssist capability.
+	 *
+	 * XXX this is not specified explicitly in APMv2 but can be verified
+	 * empirically.
+	 */
+	if (inout_string && !decode_assist())
+		return (UNHANDLED);
+
 	vmexit->exitcode 	= VM_EXITCODE_INOUT;
 	vmexit->u.inout.in 	= (info1 & BIT(0)) ? 1 : 0;
-	vmexit->u.inout.string 	= (info1 & BIT(2)) ? 1 : 0;
+	vmexit->u.inout.string 	= inout_string;
 	vmexit->u.inout.rep 	= (info1 & BIT(3)) ? 1 : 0;
 	vmexit->u.inout.bytes 	= (info1 >> 4) & 0x7;
 	vmexit->u.inout.port 	= (uint16_t)(info1 >> 16);
 	vmexit->u.inout.eax 	= (uint32_t)(state->rax);
 
-	if (vmexit->u.inout.string) {
+	if (inout_string) {
 		vmexit->exitcode = VM_EXITCODE_INOUT_STR;
 		vis = &vmexit->u.inout_str;
 		svm_paging_info(svm_get_vmcb(svm_sc, vcpu), &vis->paging);
@@ -773,8 +787,8 @@ svm_handle_io(struct svm_softc *svm_sc, 
 		svm_inout_str_seginfo(svm_sc, vcpu, info1,
 		    vmexit->u.inout.in, vis);
 	}
-	
-	return (false);
+
+	return (UNHANDLED);
 }
 
 static int
@@ -823,11 +837,6 @@ svm_handle_inst_emul(struct vmcb *vmcb, 
 	vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
 	svm_paging_info(vmcb, paging);
 
-	/*
-	 * The inst_length will be determined by decoding the instruction.
-	 */
-	vmexit->inst_length = 0;
-
 	seg = vmcb_seg(vmcb, VM_REG_GUEST_CS);
 	switch(paging->cpu_mode) {
 	case CPU_MODE_PROTECTED:
@@ -1054,122 +1063,167 @@ exit_reason_to_str(uint64_t reason)
 #endif	/* KTR */
 
 /*
- * Determine the cause of virtual cpu exit and handle VMEXIT.
- * Return: false - Break vcpu execution loop and handle vmexit
- *		   in kernel or user space.
- *	   true  - Continue vcpu run.
+ * From section "State Saved on Exit" in APMv2: nRIP is saved for all #VMEXITs
+ * that are due to instruction intercepts as well as MSR and IOIO intercepts
+ * and exceptions caused by INT3, INTO and BOUND instructions.
+ *
+ * Return 1 if the nRIP is valid and 0 otherwise.
  */
-static bool 
+static int
+nrip_valid(uint64_t exitcode)
+{
+	switch (exitcode) {
+	case 0x00 ... 0x0F:	/* read of CR0 through CR15 */
+	case 0x10 ... 0x1F:	/* write of CR0 through CR15 */
+	case 0x20 ... 0x2F:	/* read of DR0 through DR15 */
+	case 0x30 ... 0x3F:	/* write of DR0 through DR15 */
+	case 0x43:		/* INT3 */
+	case 0x44:		/* INTO */
+	case 0x45:		/* BOUND */
+	case 0x65 ... 0x7C:	/* VMEXIT_CR0_SEL_WRITE ... VMEXIT_MSR */
+	case 0x80 ... 0x8D:	/* VMEXIT_VMRUN ... VMEXIT_XSETBV */
+		return (1);
+	default:
+		return (0);
+	}
+}
+
+/*
+ * Collateral for a generic SVM VM-exit.
+ */
+static void
+vm_exit_svm(struct vm_exit *vme, uint64_t code, uint64_t info1, uint64_t info2)
+{
+
+	vme->exitcode = VM_EXITCODE_SVM;
+	vme->u.svm.exitcode = code;
+	vme->u.svm.exitinfo1 = info1;
+	vme->u.svm.exitinfo2 = info2;
+}
+
+static int
 svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
 {
+	struct vmcb *vmcb;
 	struct vmcb_state *state;
 	struct vmcb_ctrl *ctrl;
 	struct svm_regctx *ctx;
 	uint64_t code, info1, info2, val;
 	uint32_t eax, ecx, edx;
-	bool update_rip, loop, retu;
+	int handled;
+	bool retu;
 
-	KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
+	ctx = svm_get_guest_regctx(svm_sc, vcpu);
+	vmcb = svm_get_vmcb(svm_sc, vcpu);
+	state = &vmcb->state;
+	ctrl = &vmcb->ctrl;
 
-	state = svm_get_vmcb_state(svm_sc, vcpu);
-	ctrl  = svm_get_vmcb_ctrl(svm_sc, vcpu);
-	ctx   = svm_get_guest_regctx(svm_sc, vcpu);
-	code  = ctrl->exitcode;
+	handled = 0;
+	code = ctrl->exitcode;
 	info1 = ctrl->exitinfo1;
 	info2 = ctrl->exitinfo2;
 
-	update_rip = true;
-	loop = true;
-	vmexit->exitcode = VM_EXITCODE_VMX;
-	vmexit->u.vmx.status = 0;
+	vmexit->exitcode = VM_EXITCODE_BOGUS;
+	vmexit->rip = state->rip;
+	vmexit->inst_length = nrip_valid(code) ? ctrl->nrip - state->rip : 0;
 
 	vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_COUNT, 1);
 
+	/*
+	 * #VMEXIT(INVALID) needs to be handled early because the VMCB is
+	 * in an inconsistent state and can trigger assertions that would
+	 * never happen otherwise.
+	 */
+	if (code == VMCB_EXIT_INVALID) {
+		vm_exit_svm(vmexit, code, info1, info2);
+		return (0);
+	}
+
 	KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0, ("%s: event "
 	    "injection valid bit is set %#lx", __func__, ctrl->eventinj));
 
+	KASSERT(vmexit->inst_length >= 0 && vmexit->inst_length <= 15,
+	    ("invalid inst_length %d: code (%#lx), info1 (%#lx), info2 (%#lx)",
+	    vmexit->inst_length, code, info1, info2));
+
 	svm_save_intinfo(svm_sc, vcpu);
 
 	switch (code) {
-	case VMCB_EXIT_VINTR:
-		update_rip = false;
+	case VMCB_EXIT_VINTR:	/* interrupt window exiting */
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_VINTR, 1);
+		handled = 1;
 		break;
-	case VMCB_EXIT_MC:	/* Machine Check. */
-		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MTRAP, 1);
-		vmexit->exitcode = VM_EXITCODE_MTRAP;
-		loop = false;
+	case VMCB_EXIT_INTR:	/* external interrupt */
+		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXTINT, 1);
+		handled = 1;
+		break;
+	case VMCB_EXIT_NMI:	/* external NMI */
+		handled = 1;
+		break;
+	case VMCB_EXIT_MC:	/* machine check */
+		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXCEPTION, 1);
 		break;
 	case VMCB_EXIT_MSR:	/* MSR access. */
 		eax = state->rax;
 		ecx = ctx->sctx_rcx;
 		edx = ctx->e.g.sctx_rdx;
+		retu = false;	
 
 		if (ecx == MSR_EFER) {
-			KASSERT(info1 != 0, ("rdmsr(MSR_EFER) is not "
-			    "emulated: info1(%#lx) info2(%#lx)",
-			    info1, info2));
+			KASSERT(info1 != 0, ("rdmsr(MSR_EFER) is not emulated: "
+			    "info1(%#lx) info2(%#lx)", info1, info2));
 			svm_write_efer(svm_sc, vcpu, edx, eax);
+			handled = 1;
+			break;
+		}
+
+#define MSR_AMDK8_IPM           0xc0010055
+		/*
+		 * Ignore access to the "Interrupt Pending Message" MSR.
+		 */
+		if (ecx == MSR_AMDK8_IPM) {
+			if (!info1)
+				state->rax = ctx->e.g.sctx_rdx = 0;
+			handled = 1;
 			break;
 		}
 
-		retu = false;	
 		if (info1) {
-			/* VM exited because of write MSR */
 			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1);
-			vmexit->exitcode = VM_EXITCODE_WRMSR;
-			vmexit->u.msr.code = ecx;
 			val = (uint64_t)edx << 32 | eax;
+			VCPU_CTR2(svm_sc->vm, vcpu, "wrmsr %#x val %#lx",
+			    ecx, val);
 			if (emulate_wrmsr(svm_sc->vm, vcpu, ecx, val, &retu)) {
+				vmexit->exitcode = VM_EXITCODE_WRMSR;
+				vmexit->u.msr.code = ecx;
 				vmexit->u.msr.wval = val;
-				loop = false;
-			} else
-				loop = retu ? false : true;
-			VCPU_CTR3(svm_sc->vm, vcpu,
-			    "VMEXIT WRMSR(%s handling) 0x%lx @0x%x",
-			    loop ? "kernel" : "user", val, ecx);
+			} else if (!retu) {
+				handled = 1;
+			} else {
+				KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
+				    ("emulate_wrmsr retu with bogus exitcode"));
+			}
 		} else {
+			VCPU_CTR1(svm_sc->vm, vcpu, "rdmsr %#x", ecx);
 			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_RDMSR, 1);
-			vmexit->exitcode = VM_EXITCODE_RDMSR;
-			vmexit->u.msr.code = ecx;
-			if (emulate_rdmsr(svm_sc->vm, vcpu, ecx, &retu))
-				loop = false; 
-			else
-				loop = retu ? false : true;
-			VCPU_CTR3(svm_sc->vm, vcpu, "SVM:VMEXIT RDMSR"
-			    " MSB=0x%08x, LSB=%08x @0x%x", 
-			    ctx->e.g.sctx_rdx, state->rax, ecx);
+			if (emulate_rdmsr(svm_sc->vm, vcpu, ecx, &retu)) {
+				vmexit->exitcode = VM_EXITCODE_RDMSR;
+				vmexit->u.msr.code = ecx;
+			} else if (!retu) {
+				handled = 1;
+			} else {
+				KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
+				    ("emulate_rdmsr retu with bogus exitcode"));
+			}
 		}
-
-#define MSR_AMDK8_IPM           0xc0010055
-		/*
-		 * We can't hide AMD C1E idle capability since its
-		 * based on CPU generation, for now ignore access to
-		 * this MSR by vcpus
-		 * XXX: special handling of AMD C1E - Ignore.
-		 */
-		 if (ecx == MSR_AMDK8_IPM)
-			loop = true;
-		break;
-	case VMCB_EXIT_INTR:
-		/*
-		 * Exit on External Interrupt.
-		 * Give host interrupt handler to run and if its guest
-		 * interrupt, local APIC will inject event in guest.
-		 */
-		update_rip = false;
-		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXTINT, 1);
-		break;
-	case VMCB_EXIT_NMI:
-		update_rip = false;
 		break;
 	case VMCB_EXIT_IO:
-		loop = svm_handle_io(svm_sc, vcpu, vmexit);
+		handled = svm_handle_io(svm_sc, vcpu, vmexit);
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1);
 		break;
 	case VMCB_EXIT_CPUID:
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_CPUID, 1);
-		loop = x86_emulate_cpuid(svm_sc->vm, vcpu,
+		handled = x86_emulate_cpuid(svm_sc->vm, vcpu,
 		    (uint32_t *)&state->rax,
 		    (uint32_t *)&ctx->sctx_rbx,
 		    (uint32_t *)&ctx->sctx_rcx,
@@ -1179,25 +1233,18 @@ svm_vmexit(struct svm_softc *svm_sc, int
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_HLT, 1);
 		vmexit->exitcode = VM_EXITCODE_HLT;
 		vmexit->u.hlt.rflags = state->rflags;
-		loop = false;
 		break;
 	case VMCB_EXIT_PAUSE:
 		vmexit->exitcode = VM_EXITCODE_PAUSE;
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_PAUSE, 1);
-		loop = false;
 		break;
 	case VMCB_EXIT_NPF:
-		loop = false;
-		update_rip = false;
+		/* EXITINFO2 contains the faulting guest physical address */
 		if (info1 & VMCB_NPF_INFO1_RSV) {
 			VCPU_CTR2(svm_sc->vm, vcpu, "nested page fault with "
 			    "reserved bits set: info1(%#lx) info2(%#lx)",
 			    info1, info2);
-			break;
-		}
-
-		/* EXITINFO2 has the physical fault address (GPA). */
-		if(vm_mem_allocated(svm_sc->vm, info2)) {
+		} else if (vm_mem_allocated(svm_sc->vm, info2)) {
 			vmexit->exitcode = VM_EXITCODE_PAGING;
 			vmexit->u.paging.gpa = info2;
 			vmexit->u.paging.fault_type = svm_npf_paging(info1);
@@ -1206,54 +1253,41 @@ svm_vmexit(struct svm_softc *svm_sc, int
 			    "on gpa %#lx/%#lx at rip %#lx",
 			    info2, info1, state->rip);
 		} else if (svm_npf_emul_fault(info1)) {
-			svm_handle_inst_emul(svm_get_vmcb(svm_sc, vcpu),
-				info2, vmexit);
+			svm_handle_inst_emul(vmcb, info2, vmexit);
 			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INST_EMUL, 1);
 			VCPU_CTR3(svm_sc->vm, vcpu, "inst_emul fault "
 			    "for gpa %#lx/%#lx at rip %#lx",
 			    info2, info1, state->rip);
 		}
 		break;
-	case VMCB_EXIT_SHUTDOWN:
-		loop = false;
-		break;
-	case VMCB_EXIT_INVALID:
-		loop = false;
-		break;
 	default:
-		/* Return to user space. */
-		loop = false;
-		update_rip = false;
-		VCPU_CTR3(svm_sc->vm, vcpu, "VMEXIT=0x%lx"
-			" EXITINFO1: 0x%lx EXITINFO2:0x%lx\n",
-			ctrl->exitcode, info1, info2);
-		VCPU_CTR3(svm_sc->vm, vcpu, "SVM:RIP: 0x%lx nRIP:0x%lx"
-			" Inst decoder len:%d\n", state->rip,
-			ctrl->nrip, ctrl->inst_len);
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_UNKNOWN, 1);
 		break;
 	}	
 
-	VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %#lx nrip %#lx",
-	    loop ? "handled" : "unhandled", exit_reason_to_str(code),
-	    state->rip, update_rip ? ctrl->nrip : state->rip);
+	VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %#lx/%d",
+	    handled ? "handled" : "unhandled", exit_reason_to_str(code),
+	    vmexit->rip, vmexit->inst_length);
 
-	vmexit->rip = state->rip;
-	if (update_rip) {
-		if (ctrl->nrip == 0) {
- 			VCPU_CTR1(svm_sc->vm, vcpu, "SVM_ERR:nRIP is not set "
-				 "for RIP0x%lx.\n", state->rip);
-			vmexit->exitcode = VM_EXITCODE_VMX;
-		} else 
-			vmexit->rip = ctrl->nrip;
-	}
-
-	/* If vcpu execution is continued, update RIP. */
-	if (loop) {
+	if (handled) {
+		vmexit->rip += vmexit->inst_length;
+		vmexit->inst_length = 0;
 		state->rip = vmexit->rip;
+	} else {
+		if (vmexit->exitcode == VM_EXITCODE_BOGUS) {
+			/*
+			 * If this VM exit was not claimed by anybody then
+			 * treat it as a generic SVM exit.
+			 */
+			vm_exit_svm(vmexit, code, info1, info2);
+		} else {
+			/*
+			 * The exitcode and collateral have been populated.
+			 * The VM exit will be processed further in userland.
+			 */
+		}
 	}
-
-	return (loop);
+	return (handled);
 }
 
 static void
@@ -1562,9 +1596,8 @@ svm_vmrun(void *arg, int vcpu, register_
 	struct vm *vm;
 	uint64_t vmcb_pa;
 	u_int thiscpu;
-	bool loop;	/* Continue vcpu execution loop. */
+	int handled;
 
-	loop = true;
 	svm_sc = arg;
 	vm = svm_sc->vm;
 
@@ -1616,8 +1649,6 @@ svm_vmrun(void *arg, int vcpu, register_
 	state->rip = rip;
 
 	do {
-		vmexit->inst_length = 0;
-
 		/*
 		 * Disable global interrupts to guarantee atomicity during
 		 * loading of guest state. This includes not only the state
@@ -1692,8 +1723,8 @@ svm_vmrun(void *arg, int vcpu, register_
 		enable_gintr();
 
 		/* Handle #VMEXIT and if required return to user space. */
-		loop = svm_vmexit(svm_sc, vcpu, vmexit);
-	} while (loop);
+		handled = svm_vmexit(svm_sc, vcpu, vmexit);
+	} while (handled);
 
 	return (0);
 }

Modified: projects/bhyve_svm/usr.sbin/bhyve/bhyverun.c
==============================================================================
--- projects/bhyve_svm/usr.sbin/bhyve/bhyverun.c	Sun Sep 14 03:01:18 2014	(r271569)
+++ projects/bhyve_svm/usr.sbin/bhyve/bhyverun.c	Sun Sep 14 04:39:04 2014	(r271570)
@@ -445,6 +445,20 @@ vmexit_vmx(struct vmctx *ctx, struct vm_
 }
 
 static int
+vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	fprintf(stderr, "vm exit[%d]\n", *pvcpu);
+	fprintf(stderr, "\treason\t\tSVM\n");
+	fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
+	fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
+	fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode);
+	fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1);
+	fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2);
+	return (VMEXIT_ABORT);
+}
+
+static int
 vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
@@ -555,6 +569,7 @@ static vmexit_handler_t handler[VM_EXITC
 	[VM_EXITCODE_INOUT]  = vmexit_inout,
 	[VM_EXITCODE_INOUT_STR]  = vmexit_inout,
 	[VM_EXITCODE_VMX]    = vmexit_vmx,
+	[VM_EXITCODE_SVM]    = vmexit_svm,
 	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
 	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
 	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201409140439.s8E4d43V064115>