Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 11 Feb 2013 20:36:08 +0000 (UTC)
From:      Neel Natu <neel@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r246686 - in head: lib/libvmmapi sys/amd64/include sys/amd64/vmm sys/amd64/vmm/io usr.sbin/bhyve usr.sbin/bhyvectl
Message-ID:  <201302112036.r1BKa8PW037243@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: neel
Date: Mon Feb 11 20:36:07 2013
New Revision: 246686
URL: http://svnweb.freebsd.org/changeset/base/246686

Log:
  Implement guest vcpu pinning using 'pthread_setaffinity_np(3)'.
  
  Prior to this change pinning was implemented via an ioctl (VM_SET_PINNING)
  that called 'sched_bind()' on behalf of the user thread.
  
  The ULE implementation of 'sched_bind()' bumps up 'td_pinned' which in turn
  runs afoul of the assertion '(td_pinned == 0)' in userret().
  
  Using the cpuset affinity to implement pinning of the vcpu threads works with
  both 4BSD and ULE schedulers and has the happy side-effect of getting rid
  of a bunch of code in vmm.ko.
  
  Discussed with:	grehan

Modified:
  head/lib/libvmmapi/vmmapi.c
  head/lib/libvmmapi/vmmapi.h
  head/sys/amd64/include/vmm.h
  head/sys/amd64/include/vmm_dev.h
  head/sys/amd64/vmm/io/ppt.c
  head/sys/amd64/vmm/vmm.c
  head/sys/amd64/vmm/vmm_dev.c
  head/usr.sbin/bhyve/bhyverun.c
  head/usr.sbin/bhyvectl/bhyvectl.c

Modified: head/lib/libvmmapi/vmmapi.c
==============================================================================
--- head/lib/libvmmapi/vmmapi.c	Mon Feb 11 19:23:23 2013	(r246685)
+++ head/lib/libvmmapi/vmmapi.c	Mon Feb 11 20:36:07 2013	(r246686)
@@ -250,34 +250,6 @@ vm_get_register(struct vmctx *ctx, int v
 }
 
 int
-vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid)
-{
-	int error;
-	struct vm_pin vmpin;
-
-	bzero(&vmpin, sizeof(vmpin));
-	vmpin.vm_cpuid = vcpu;
-
-	error = ioctl(ctx->fd, VM_GET_PINNING, &vmpin);
-	*host_cpuid = vmpin.host_cpuid;
-	return (error);
-}
-
-int
-vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid)
-{
-	int error;
-	struct vm_pin vmpin;
-
-	bzero(&vmpin, sizeof(vmpin));
-	vmpin.vm_cpuid = vcpu;
-	vmpin.host_cpuid = host_cpuid;
-
-	error = ioctl(ctx->fd, VM_SET_PINNING, &vmpin);
-	return (error);
-}
-
-int
 vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit)
 {
 	int error;

Modified: head/lib/libvmmapi/vmmapi.h
==============================================================================
--- head/lib/libvmmapi/vmmapi.h	Mon Feb 11 19:23:23 2013	(r246685)
+++ head/lib/libvmmapi/vmmapi.h	Mon Feb 11 20:36:07 2013	(r246686)
@@ -56,8 +56,6 @@ int	vm_get_desc(struct vmctx *ctx, int v
 		    uint64_t *base, uint32_t *limit, uint32_t *access);
 int	vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
 int	vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
-int	vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid);
-int	vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid);
 int	vm_run(struct vmctx *ctx, int vcpu, uint64_t rip,
 	       struct vm_exit *ret_vmexit);
 int	vm_apicid2vcpu(struct vmctx *ctx, int apicid);

Modified: head/sys/amd64/include/vmm.h
==============================================================================
--- head/sys/amd64/include/vmm.h	Mon Feb 11 19:23:23 2013	(r246685)
+++ head/sys/amd64/include/vmm.h	Mon Feb 11 20:36:07 2013	(r246686)
@@ -102,8 +102,6 @@ int vm_get_seg_desc(struct vm *vm, int v
 		    struct seg_desc *ret_desc);
 int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
 		    struct seg_desc *desc);
-int vm_get_pinning(struct vm *vm, int vcpu, int *cpuid);
-int vm_set_pinning(struct vm *vm, int vcpu, int cpuid);
 int vm_run(struct vm *vm, struct vm_run *vmrun);
 int vm_inject_event(struct vm *vm, int vcpu, int type,
 		    int vector, uint32_t error_code, int error_code_valid);

Modified: head/sys/amd64/include/vmm_dev.h
==============================================================================
--- head/sys/amd64/include/vmm_dev.h	Mon Feb 11 19:23:23 2013	(r246685)
+++ head/sys/amd64/include/vmm_dev.h	Mon Feb 11 20:36:07 2013	(r246686)
@@ -51,11 +51,6 @@ struct vm_seg_desc {			/* data or code s
 	struct seg_desc desc;
 };
 
-struct vm_pin {
-	int		vm_cpuid;
-	int		host_cpuid;	/* -1 to unpin */
-};
-
 struct vm_run {
 	int		cpuid;
 	uint64_t	rip;		/* start running here */
@@ -142,8 +137,6 @@ struct vm_x2apic {
 
 enum {
 	IOCNUM_RUN,
-	IOCNUM_SET_PINNING,
-	IOCNUM_GET_PINNING,
 	IOCNUM_MAP_MEMORY,
 	IOCNUM_GET_MEMORY_SEG,
 	IOCNUM_SET_REGISTER,
@@ -168,10 +161,6 @@ enum {
 
 #define	VM_RUN		\
 	_IOWR('v', IOCNUM_RUN, struct vm_run)
-#define	VM_SET_PINNING	\
-	_IOW('v', IOCNUM_SET_PINNING, struct vm_pin)
-#define	VM_GET_PINNING	\
-	_IOWR('v', IOCNUM_GET_PINNING, struct vm_pin)
 #define	VM_MAP_MEMORY	\
 	_IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
 #define	VM_GET_MEMORY_SEG \

Modified: head/sys/amd64/vmm/io/ppt.c
==============================================================================
--- head/sys/amd64/vmm/io/ppt.c	Mon Feb 11 19:23:23 2013	(r246685)
+++ head/sys/amd64/vmm/io/ppt.c	Mon Feb 11 20:36:07 2013	(r246686)
@@ -402,31 +402,6 @@ pptintr(void *arg)
 		return (FILTER_HANDLED);
 }
 
-/*
- * XXX
- * When we try to free the MSI resource the kernel will bind the thread to
- * the host cpu was originally handling the MSI. The function freeing the
- * MSI vector (apic_free_vector()) will panic the kernel if the thread
- * is already bound to a cpu.
- * 
- * So, we temporarily unbind the vcpu thread before freeing the MSI resource.
- */
-static void
-PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt)
-{
-	int pincpu = -1;
-
-	vm_get_pinning(vm, vcpu, &pincpu);
-
-	if (pincpu >= 0)
-		vm_set_pinning(vm, vcpu, -1);
-
-	ppt_teardown_msi(ppt);
-
-	if (pincpu >= 0)
-		vm_set_pinning(vm, vcpu, pincpu);
-}
-
 int
 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
 	      int destcpu, int vector, int numvec)
@@ -447,7 +422,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, i
 		return (EBUSY);
 
 	/* Free any allocated resources */
-	PPT_TEARDOWN_MSI(vm, vcpu, ppt);
+	ppt_teardown_msi(ppt);
 
 	if (numvec == 0)		/* nothing more to do */
 		return (0);
@@ -513,7 +488,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, i
 	}
 	
 	if (i < numvec) {
-		PPT_TEARDOWN_MSI(vm, vcpu, ppt);
+		ppt_teardown_msi(ppt);
 		return (ENXIO);
 	}
 

Modified: head/sys/amd64/vmm/vmm.c
==============================================================================
--- head/sys/amd64/vmm/vmm.c	Mon Feb 11 19:23:23 2013	(r246685)
+++ head/sys/amd64/vmm/vmm.c	Mon Feb 11 20:36:07 2013	(r246686)
@@ -70,7 +70,6 @@ struct vcpu {
 	int		flags;
 	enum vcpu_state	state;
 	struct mtx	mtx;
-	int		pincpu;		/* host cpuid this vcpu is bound to */
 	int		hostcpu;	/* host cpuid this vcpu last ran on */
 	uint64_t	guest_msrs[VMM_MSR_NUM];
 	struct vlapic	*vlapic;
@@ -81,18 +80,6 @@ struct vcpu {
 	enum x2apic_state x2apic_state;
 	int		nmi_pending;
 };
-#define	VCPU_F_PINNED	0x0001
-
-#define	VCPU_PINCPU(vm, vcpuid)	\
-    ((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1)
-
-#define	VCPU_UNPIN(vm, vcpuid)	(vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED)
-
-#define	VCPU_PIN(vm, vcpuid, host_cpuid)				\
-do {									\
-	vm->vcpu[vcpuid].flags |= VCPU_F_PINNED;			\
-	vm->vcpu[vcpuid].pincpu = host_cpuid;				\
-} while(0)
 
 #define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
 #define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
@@ -594,52 +581,6 @@ vm_set_seg_desc(struct vm *vm, int vcpu,
 	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
 }
 
-int
-vm_get_pinning(struct vm *vm, int vcpuid, int *cpuid)
-{
-
-	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
-		return (EINVAL);
-
-	*cpuid = VCPU_PINCPU(vm, vcpuid);
-
-	return (0);
-}
-
-int
-vm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid)
-{
-	struct thread *td;
-
-	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
-		return (EINVAL);
-
-	td = curthread;		/* XXXSMP only safe when muxing vcpus */
-
-	/* unpin */
-	if (host_cpuid < 0) {
-		VCPU_UNPIN(vm, vcpuid);
-		thread_lock(td);
-		sched_unbind(td);
-		thread_unlock(td);
-		return (0);
-	}
-
-	if (CPU_ABSENT(host_cpuid))
-		return (EINVAL);
-
-	/*
-	 * XXX we should check that 'host_cpuid' has not already been pinned
-	 * by another vm.
-	 */
-	thread_lock(td);
-	sched_bind(td, host_cpuid);
-	thread_unlock(td);
-	VCPU_PIN(vm, vcpuid, host_cpuid);
-
-	return (0);
-}
-
 static void
 restore_guest_fpustate(struct vcpu *vcpu)
 {

Modified: head/sys/amd64/vmm/vmm_dev.c
==============================================================================
--- head/sys/amd64/vmm/vmm_dev.c	Mon Feb 11 19:23:23 2013	(r246685)
+++ head/sys/amd64/vmm/vmm_dev.c	Mon Feb 11 20:36:07 2013	(r246686)
@@ -144,7 +144,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
 	struct vm_memory_segment *seg;
 	struct vm_register *vmreg;
 	struct vm_seg_desc* vmsegdesc;
-	struct vm_pin *vmpin;
 	struct vm_run *vmrun;
 	struct vm_event *vmevent;
 	struct vm_lapic_irq *vmirq;
@@ -170,7 +169,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
 	 */
 	switch (cmd) {
 	case VM_RUN:
-	case VM_SET_PINNING:
 	case VM_GET_REGISTER:
 	case VM_SET_REGISTER:
 	case VM_GET_SEGMENT_DESCRIPTOR:
@@ -301,16 +299,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
 		vmirq = (struct vm_lapic_irq *)data;
 		error = lapic_set_intr(sc->vm, vmirq->cpuid, vmirq->vector);
 		break;
-	case VM_SET_PINNING:
-		vmpin = (struct vm_pin *)data;
-		error = vm_set_pinning(sc->vm, vmpin->vm_cpuid,
-				       vmpin->host_cpuid);
-		break;
-	case VM_GET_PINNING:
-		vmpin = (struct vm_pin *)data;
-		error = vm_get_pinning(sc->vm, vmpin->vm_cpuid,
-				       &vmpin->host_cpuid);
-		break;
 	case VM_MAP_MEMORY:
 		seg = (struct vm_memory_segment *)data;
 		error = vm_malloc(sc->vm, seg->gpa, seg->len);

Modified: head/usr.sbin/bhyve/bhyverun.c
==============================================================================
--- head/usr.sbin/bhyve/bhyverun.c	Mon Feb 11 19:23:23 2013	(r246685)
+++ head/usr.sbin/bhyve/bhyverun.c	Mon Feb 11 20:36:07 2013	(r246686)
@@ -520,13 +520,17 @@ static vmexit_handler_t handler[VM_EXITC
 static void
 vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
 {
+	cpuset_t mask;
 	int error, rc, prevcpu;
 
 	if (guest_vcpu_mux)
 		setup_timeslice();
 
 	if (pincpu >= 0) {
-		error = vm_set_pinning(ctx, vcpu, pincpu + vcpu);
+		CPU_ZERO(&mask);
+		CPU_SET(pincpu + vcpu, &mask);
+		error = pthread_setaffinity_np(pthread_self(),
+					       sizeof(mask), &mask);
 		assert(error == 0);
 	}
 

Modified: head/usr.sbin/bhyvectl/bhyvectl.c
==============================================================================
--- head/usr.sbin/bhyvectl/bhyvectl.c	Mon Feb 11 19:23:23 2013	(r246685)
+++ head/usr.sbin/bhyvectl/bhyvectl.c	Mon Feb 11 20:36:07 2013	(r246686)
@@ -183,8 +183,6 @@ usage(void)
 	"       [--get-vmcs-exit-interruption-info]\n"
 	"       [--get-vmcs-exit-interruption-error]\n"
 	"       [--get-vmcs-interruptibility]\n"
-	"       [--set-pinning=<host_cpuid>]\n"
-	"       [--get-pinning]\n"
 	"       [--set-x2apic-state=<state>]\n"
 	"       [--get-x2apic-state]\n"
 	"       [--set-lowmem=<memory below 4GB in units of MB>]\n"
@@ -218,7 +216,6 @@ static int set_desc_tr, get_desc_tr;
 static int set_desc_ldtr, get_desc_ldtr;
 static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr;
 static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr;
-static int set_pinning, get_pinning, pincpu;
 static int set_x2apic_state, get_x2apic_state;
 enum x2apic_state x2apic_state;
 static int run;
@@ -374,7 +371,6 @@ enum {
 	SET_SS,
 	SET_TR,
 	SET_LDTR,
-	SET_PINNING,
 	SET_X2APIC_STATE,
 	SET_VMCS_EXCEPTION_BITMAP,
 	SET_VMCS_ENTRY_INTERRUPTION_INFO,
@@ -423,7 +419,6 @@ main(int argc, char *argv[])
 		{ "set-ss",	REQ_ARG,	0,	SET_SS },
 		{ "set-tr",	REQ_ARG,	0,	SET_TR },
 		{ "set-ldtr",	REQ_ARG,	0,	SET_LDTR },
-		{ "set-pinning",REQ_ARG,	0,	SET_PINNING },
 		{ "set-x2apic-state",REQ_ARG,	0,	SET_X2APIC_STATE },
 		{ "set-vmcs-exception-bitmap",
 				REQ_ARG,	0, SET_VMCS_EXCEPTION_BITMAP },
@@ -552,7 +547,6 @@ main(int argc, char *argv[])
 				NO_ARG,	&get_vmcs_exit_interruption_error, 1},
 		{ "get-vmcs-interruptibility",
 				NO_ARG, &get_vmcs_interruptibility, 1 },
-		{ "get-pinning",NO_ARG,		&get_pinning,	1 },
 		{ "get-x2apic-state",NO_ARG,	&get_x2apic_state, 1 },
 		{ "get-all",	NO_ARG,		&get_all,	1 },
 		{ "run",	NO_ARG,		&run,		1 },
@@ -659,10 +653,6 @@ main(int argc, char *argv[])
 			ldtr = strtoul(optarg, NULL, 0);
 			set_ldtr = 1;
 			break;
-		case SET_PINNING:
-			pincpu = strtol(optarg, NULL, 0);
-			set_pinning = 1;
-			break;
 		case SET_X2APIC_STATE:
 			x2apic_state = strtol(optarg, NULL, 0);
 			set_x2apic_state = 1;
@@ -812,9 +802,6 @@ main(int argc, char *argv[])
 	if (!error && set_ldtr)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_LDTR, ldtr);
 
-	if (!error && set_pinning)
-		error = vm_set_pinning(ctx, vcpu, pincpu);
-
 	if (!error && set_x2apic_state)
 		error = vm_set_x2apic_state(ctx, vcpu, x2apic_state);
 
@@ -1135,16 +1122,6 @@ main(int argc, char *argv[])
 			printf("ldtr[%d]\t\t0x%04lx\n", vcpu, ldtr);
 	}
 
-	if (!error && (get_pinning || get_all)) {
-		error = vm_get_pinning(ctx, vcpu, &pincpu);
-		if (error == 0) {
-			if (pincpu < 0)
-				printf("pincpu[%d]\tunpinned\n", vcpu);
-			else
-				printf("pincpu[%d]\t%d\n", vcpu, pincpu);
-		}
-	}
-
 	if (!error && (get_x2apic_state || get_all)) {
 		error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state);
 		if (error == 0)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201302112036.r1BKa8PW037243>