Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 16 Dec 2013 19:59:32 +0000 (UTC)
From:      Neel Natu <neel@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r259482 - in head: lib/libvmmapi sys/amd64/include sys/amd64/vmm sys/amd64/vmm/io usr.sbin/bhyve
Message-ID:  <201312161959.rBGJxWp2091559@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: neel
Date: Mon Dec 16 19:59:31 2013
New Revision: 259482
URL: http://svnweb.freebsd.org/changeset/base/259482

Log:
  Add an API to deliver message signalled interrupts to vcpus. This allows
  callers treat the MSI 'addr' and 'data' fields as opaque and also lets
  bhyve implement multiple destination modes: physical, flat and clustered.
  
  Submitted by:	Tycho Nightingale (tycho.nightingale@pluribusnetworks.com)
  Reviewed by:	grehan@

Modified:
  head/lib/libvmmapi/vmmapi.c
  head/lib/libvmmapi/vmmapi.h
  head/sys/amd64/include/vmm_dev.h
  head/sys/amd64/vmm/io/ppt.c
  head/sys/amd64/vmm/io/ppt.h
  head/sys/amd64/vmm/io/vhpet.c
  head/sys/amd64/vmm/io/vioapic.c
  head/sys/amd64/vmm/io/vlapic.c
  head/sys/amd64/vmm/io/vlapic.h
  head/sys/amd64/vmm/vmm_dev.c
  head/sys/amd64/vmm/vmm_lapic.c
  head/sys/amd64/vmm/vmm_lapic.h
  head/usr.sbin/bhyve/pci_emul.c
  head/usr.sbin/bhyve/pci_emul.h
  head/usr.sbin/bhyve/pci_passthru.c

Modified: head/lib/libvmmapi/vmmapi.c
==============================================================================
--- head/lib/libvmmapi/vmmapi.c	Mon Dec 16 19:54:55 2013	(r259481)
+++ head/lib/libvmmapi/vmmapi.c	Mon Dec 16 19:59:31 2013	(r259482)
@@ -397,6 +397,18 @@ vm_lapic_irq(struct vmctx *ctx, int vcpu
 }
 
 int
+vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg)
+{
+	struct vm_lapic_msi vmmsi;
+
+	bzero(&vmmsi, sizeof(vmmsi));
+	vmmsi.addr = addr;
+	vmmsi.msg = msg;
+
+	return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi));
+}
+
+int
 vm_ioapic_assert_irq(struct vmctx *ctx, int irq)
 {
 	struct vm_ioapic_irq ioapic_irq;
@@ -552,7 +564,7 @@ vm_map_pptdev_mmio(struct vmctx *ctx, in
 
 int
 vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
-	     int destcpu, int vector, int numvec)
+	     uint64_t addr, uint64_t msg, int numvec)
 {
 	struct vm_pptdev_msi pptmsi;
 
@@ -561,8 +573,8 @@ vm_setup_msi(struct vmctx *ctx, int vcpu
 	pptmsi.bus = bus;
 	pptmsi.slot = slot;
 	pptmsi.func = func;
-	pptmsi.destcpu = destcpu;
-	pptmsi.vector = vector;
+	pptmsi.msg = msg;
+	pptmsi.addr = addr;
 	pptmsi.numvec = numvec;
 
 	return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
@@ -570,7 +582,7 @@ vm_setup_msi(struct vmctx *ctx, int vcpu
 
 int	
 vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
-	      int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
+	      int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
 {
 	struct vm_pptdev_msix pptmsix;
 

Modified: head/lib/libvmmapi/vmmapi.h
==============================================================================
--- head/lib/libvmmapi/vmmapi.h	Mon Dec 16 19:54:55 2013	(r259481)
+++ head/lib/libvmmapi/vmmapi.h	Mon Dec 16 19:59:31 2013	(r259482)
@@ -67,6 +67,7 @@ int	vm_inject_event(struct vmctx *ctx, i
 int	vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type,
 			 int vector, int error_code);
 int	vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector);
+int	vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg);
 int	vm_ioapic_assert_irq(struct vmctx *ctx, int irq);
 int	vm_ioapic_deassert_irq(struct vmctx *ctx, int irq);
 int	vm_ioapic_pulse_irq(struct vmctx *ctx, int irq);
@@ -82,9 +83,9 @@ int	vm_unassign_pptdev(struct vmctx *ctx
 int	vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
 			   vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
 int	vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
-		     int dest, int vector, int numvec);
+	    uint64_t addr, uint64_t msg, int numvec);
 int	vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
-		      int idx, uint32_t msg, uint32_t vector_control, uint64_t addr);
+	    int idx, uint64_t addr, uint64_t msg, uint32_t vector_control);
 
 /*
  * Return a pointer to the statistics buffer. Note that this is not MT-safe.

Modified: head/sys/amd64/include/vmm_dev.h
==============================================================================
--- head/sys/amd64/include/vmm_dev.h	Mon Dec 16 19:54:55 2013	(r259481)
+++ head/sys/amd64/include/vmm_dev.h	Mon Dec 16 19:59:31 2013	(r259482)
@@ -66,6 +66,11 @@ struct vm_event {
 	int		error_code_valid;
 };
 
+struct vm_lapic_msi {
+	uint64_t	msg;
+	uint64_t	addr;
+};
+
 struct vm_lapic_irq {
 	int		cpuid;
 	int		vector;
@@ -103,8 +108,8 @@ struct vm_pptdev_msi {
 	int		slot;
 	int		func;
 	int		numvec;		/* 0 means disabled */
-	int		vector;
-	int		destcpu;
+	uint64_t	msg;
+	uint64_t	addr;
 };
 
 struct vm_pptdev_msix {
@@ -113,7 +118,7 @@ struct vm_pptdev_msix {
 	int		slot;
 	int		func;
 	int		idx;
-	uint32_t	msg;
+	uint64_t	msg;
 	uint32_t	vector_control;
 	uint64_t	addr;
 };
@@ -175,6 +180,7 @@ enum {
 	IOCNUM_IOAPIC_ASSERT_IRQ = 33,
 	IOCNUM_IOAPIC_DEASSERT_IRQ = 34,
 	IOCNUM_IOAPIC_PULSE_IRQ = 35,
+	IOCNUM_LAPIC_MSI = 36,
 
 	/* PCI pass-thru */
 	IOCNUM_BIND_PPTDEV = 40,
@@ -211,6 +217,8 @@ enum {
 	_IOW('v', IOCNUM_INJECT_EVENT, struct vm_event)
 #define	VM_LAPIC_IRQ 		\
 	_IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq)
+#define	VM_LAPIC_MSI		\
+	_IOW('v', IOCNUM_LAPIC_MSI, struct vm_lapic_msi)
 #define	VM_IOAPIC_ASSERT_IRQ	\
 	_IOW('v', IOCNUM_IOAPIC_ASSERT_IRQ, struct vm_ioapic_irq)
 #define	VM_IOAPIC_DEASSERT_IRQ	\

Modified: head/sys/amd64/vmm/io/ppt.c
==============================================================================
--- head/sys/amd64/vmm/io/ppt.c	Mon Dec 16 19:54:55 2013	(r259481)
+++ head/sys/amd64/vmm/io/ppt.c	Mon Dec 16 19:59:31 2013	(r259482)
@@ -72,8 +72,8 @@ MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Pas
 
 struct pptintr_arg {				/* pptintr(pptintr_arg) */
 	struct pptdev	*pptdev;
-	int		vec;
-	int 		vcpu;
+	uint64_t	addr;
+	uint64_t	msg_data;
 };
 
 static struct pptdev {
@@ -412,16 +412,14 @@ ppt_map_mmio(struct vm *vm, int bus, int
 static int
 pptintr(void *arg)
 {
-	int vec;
 	struct pptdev *ppt;
 	struct pptintr_arg *pptarg;
 	
 	pptarg = arg;
 	ppt = pptarg->pptdev;
-	vec = pptarg->vec;
 
 	if (ppt->vm != NULL)
-		lapic_intr_edge(ppt->vm, pptarg->vcpu, vec);
+		lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
 	else {
 		/*
 		 * XXX
@@ -441,15 +439,13 @@ pptintr(void *arg)
 
 int
 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
-	      int destcpu, int vector, int numvec)
+	      uint64_t addr, uint64_t msg, int numvec)
 {
 	int i, rid, flags;
 	int msi_count, startrid, error, tmp;
 	struct pptdev *ppt;
 
-	if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
-	    (vector < 0 || vector > 255) ||
-	    (numvec < 0 || numvec > MAX_MSIMSGS))
+	if (numvec < 0 || numvec > MAX_MSIMSGS)
 		return (EINVAL);
 
 	ppt = ppt_find(bus, slot, func);
@@ -513,8 +509,8 @@ ppt_setup_msi(struct vm *vm, int vcpu, i
 			break;
 
 		ppt->msi.arg[i].pptdev = ppt;
-		ppt->msi.arg[i].vec = vector + i;
-		ppt->msi.arg[i].vcpu = destcpu;
+		ppt->msi.arg[i].addr = addr;
+		ppt->msi.arg[i].msg_data = msg + i;
 
 		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
 				       INTR_TYPE_NET | INTR_MPSAFE,
@@ -534,7 +530,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, i
 
 int
 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
-	       int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
+	       int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
 {
 	struct pptdev *ppt;
 	struct pci_devinfo *dinfo;
@@ -605,8 +601,8 @@ ppt_setup_msix(struct vm *vm, int vcpu, 
 			return (ENXIO);
 	
 		ppt->msix.arg[idx].pptdev = ppt;
-		ppt->msix.arg[idx].vec = msg & 0xFF;
-		ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF;
+		ppt->msix.arg[idx].addr = addr;
+		ppt->msix.arg[idx].msg_data = msg;
 	
 		/* Setup the MSI-X interrupt */
 		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],

Modified: head/sys/amd64/vmm/io/ppt.h
==============================================================================
--- head/sys/amd64/vmm/io/ppt.h	Mon Dec 16 19:54:55 2013	(r259481)
+++ head/sys/amd64/vmm/io/ppt.h	Mon Dec 16 19:59:31 2013	(r259482)
@@ -33,9 +33,9 @@ int	ppt_unassign_all(struct vm *vm);
 int	ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
 		     vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
 int	ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
-		      int destcpu, int vector, int numvec);
+		      uint64_t addr, uint64_t msg, int numvec);
 int	ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
-		int idx, uint32_t msg, uint32_t vector_control, uint64_t addr);
+		int idx, uint64_t addr, uint64_t msg, uint32_t vector_control);
 int	ppt_num_devices(struct vm *vm);
 boolean_t ppt_is_mmio(struct vm *vm, vm_paddr_t gpa);
 

Modified: head/sys/amd64/vmm/io/vhpet.c
==============================================================================
--- head/sys/amd64/vmm/io/vhpet.c	Mon Dec 16 19:54:55 2013	(r259481)
+++ head/sys/amd64/vmm/io/vhpet.c	Mon Dec 16 19:59:31 2013	(r259482)
@@ -240,8 +240,7 @@ vhpet_timer_edge_trig(struct vhpet *vhpe
 static void
 vhpet_timer_interrupt(struct vhpet *vhpet, int n)
 {
-	int apicid, vector, vcpuid, pin;
-	cpuset_t dmask;
+	int pin;
 
 	/* If interrupts are not enabled for this timer then just return. */
 	if (!vhpet_timer_interrupt_enabled(vhpet, n))
@@ -256,26 +255,8 @@ vhpet_timer_interrupt(struct vhpet *vhpe
 	}
 
 	if (vhpet_timer_msi_enabled(vhpet, n)) {
-		/*
-		 * XXX should have an API 'vlapic_deliver_msi(vm, addr, data)'
-		 * - assuming physical delivery mode
-		 * - no need to interpret contents of 'msireg' here
-		 */
-		vector = vhpet->timer[n].msireg & 0xff;
-		apicid = (vhpet->timer[n].msireg >> (32 + 12)) & 0xff;
-		if (apicid != 0xff) {
-			/* unicast */
-			vcpuid = vm_apicid2vcpuid(vhpet->vm, apicid);
-			lapic_intr_edge(vhpet->vm, vcpuid, vector);
-		} else {
-			/* broadcast */
-			dmask = vm_active_cpus(vhpet->vm);
-			while ((vcpuid = CPU_FFS(&dmask)) != 0) {
-				vcpuid--;
-				CPU_CLR(vcpuid, &dmask);
-				lapic_intr_edge(vhpet->vm, vcpuid, vector);
-			}
-		}
+		lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32,
+		    vhpet->timer[n].msireg & 0xffffffff);
 		return;
 	}	
 

Modified: head/sys/amd64/vmm/io/vioapic.c
==============================================================================
--- head/sys/amd64/vmm/io/vioapic.c	Mon Dec 16 19:54:55 2013	(r259481)
+++ head/sys/amd64/vmm/io/vioapic.c	Mon Dec 16 19:59:31 2013	(r259482)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 
 #include "vmm_ktr.h"
 #include "vmm_lapic.h"
+#include "vlapic.h"
 #include "vioapic.h"
 
 #define	IOREGSEL	0x00
@@ -91,25 +92,14 @@ pinstate_str(bool asserted)
 	else
 		return ("deasserted");
 }
-
-static const char *
-trigger_str(bool level)
-{
-
-	if (level)
-		return ("level");
-	else
-		return ("edge");
-}
 #endif
 
 static void
 vioapic_send_intr(struct vioapic *vioapic, int pin)
 {
-	int vector, apicid, vcpuid;
-	uint32_t low, high;
-	cpuset_t dmask;
-	bool level;
+	int vector, delmode;
+	uint32_t low, high, dest;
+	bool level, phys;
 
 	KASSERT(pin >= 0 && pin < REDIR_ENTRIES,
 	    ("vioapic_set_pinstate: invalid pin number %d", pin));
@@ -120,52 +110,20 @@ vioapic_send_intr(struct vioapic *vioapi
 	low = vioapic->rtbl[pin].reg;
 	high = vioapic->rtbl[pin].reg >> 32;
 
-	/*
-	 * XXX We only deal with:
-	 * - physical destination
-	 * - fixed delivery mode
-	 */
-	if ((low & IOART_DESTMOD) != IOART_DESTPHY) {
-		VIOAPIC_CTR2(vioapic, "ioapic pin%d: unsupported dest mode "
-		    "0x%08x", pin, low);
-		return;
-	}
-
-	if ((low & IOART_DELMOD) != IOART_DELFIXED) {
-		VIOAPIC_CTR2(vioapic, "ioapic pin%d: unsupported delivery mode "
-		    "0x%08x", pin, low);
-		return;
-	}
-
 	if ((low & IOART_INTMASK) == IOART_INTMSET) {
 		VIOAPIC_CTR1(vioapic, "ioapic pin%d: masked", pin);
 		return;
 	}
 
+	phys = ((low & IOART_DESTMOD) == IOART_DESTPHY);
+	delmode = low & IOART_DELMOD;
 	level = low & IOART_TRGRLVL ? true : false;
 	if (level)
 		vioapic->rtbl[pin].reg |= IOART_REM_IRR;
 
 	vector = low & IOART_INTVEC;
-	apicid = high >> APIC_ID_SHIFT;
-	if (apicid != 0xff) {
-		/* unicast */
-		vcpuid = vm_apicid2vcpuid(vioapic->vm, apicid);
-		VIOAPIC_CTR4(vioapic, "ioapic pin%d: %s triggered intr "
-		    "vector %d on vcpuid %d", pin, trigger_str(level),
-		    vector, vcpuid);
-		lapic_set_intr(vioapic->vm, vcpuid, vector, level);
-	} else {
-		/* broadcast */
-		VIOAPIC_CTR3(vioapic, "ioapic pin%d: %s triggered intr "
-		    "vector %d on all vcpus", pin, trigger_str(level), vector);
-		dmask = vm_active_cpus(vioapic->vm);
-		while ((vcpuid = CPU_FFS(&dmask)) != 0) {
-			vcpuid--;
-			CPU_CLR(vcpuid, &dmask);
-			lapic_set_intr(vioapic->vm, vcpuid, vector, level);
-		}
-	}
+	dest = high >> APIC_ID_SHIFT;
+	vlapic_deliver_intr(vioapic->vm, level, dest, phys, delmode, vector);
 }
 
 static void

Modified: head/sys/amd64/vmm/io/vlapic.c
==============================================================================
--- head/sys/amd64/vmm/io/vlapic.c	Mon Dec 16 19:54:55 2013	(r259481)
+++ head/sys/amd64/vmm/io/vlapic.c	Mon Dec 16 19:59:31 2013	(r259482)
@@ -145,6 +145,84 @@ struct vlapic {
 
 #define VLAPIC_BUS_FREQ	tsc_freq
 
+static __inline uint32_t
+vlapic_get_id(struct vlapic *vlapic)
+{
+
+	if (x2apic(vlapic))
+		return (vlapic->vcpuid);
+	else
+		return (vlapic->vcpuid << 24);
+}
+
+static __inline uint32_t
+vlapic_get_ldr(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic;
+	int apicid;
+	uint32_t ldr;
+
+	lapic = &vlapic->apic;
+	if (x2apic(vlapic)) {
+		apicid = vlapic_get_id(vlapic);
+		ldr = 1 << (apicid & 0xf);
+		ldr |= (apicid & 0xffff0) << 12;
+		return (ldr);
+	} else
+		return (lapic->ldr);
+}
+
+static __inline uint32_t
+vlapic_get_dfr(struct vlapic *vlapic)
+{
+	struct LAPIC *lapic;
+
+	lapic = &vlapic->apic;
+	if (x2apic(vlapic))
+		return (0);
+	else
+		return (lapic->dfr);
+}
+
+static void
+vlapic_set_dfr(struct vlapic *vlapic, uint32_t data)
+{
+	uint32_t dfr;
+	struct LAPIC *lapic;
+	
+	if (x2apic(vlapic)) {
+		VM_CTR1(vlapic->vm, "write to DFR in x2apic mode: %#x", data);
+		return;
+	}
+
+	lapic = &vlapic->apic;
+	dfr = (lapic->dfr & APIC_DFR_RESERVED) | (data & APIC_DFR_MODEL_MASK);
+	if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT)
+		VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model");
+	else if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER)
+		VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model");
+	else
+		VLAPIC_CTR1(vlapic, "vlapic DFR in Unknown Model %#x", dfr);
+
+	lapic->dfr = dfr;
+}
+
+static void
+vlapic_set_ldr(struct vlapic *vlapic, uint32_t data)
+{
+	struct LAPIC *lapic;
+
+	/* LDR is read-only in x2apic mode */
+	if (x2apic(vlapic)) {
+		VLAPIC_CTR1(vlapic, "write to LDR in x2apic mode: %#x", data);
+		return;
+	}
+
+	lapic = &vlapic->apic;
+	lapic->ldr = data & ~APIC_LDR_RESERVED;
+	VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr);
+}
+
 static int
 vlapic_timer_divisor(uint32_t dcr)
 {
@@ -610,12 +688,115 @@ vlapic_set_icr_timer(struct vlapic *vlap
 	VLAPIC_TIMER_UNLOCK(vlapic);
 }
 
+/*
+ * This function populates 'dmask' with the set of vcpus that match the
+ * addressing specified by the (dest, phys, lowprio) tuple.
+ * 
+ * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
+ * or xAPIC (8-bit) destination field.
+ */
+static void
+vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
+    bool lowprio, bool x2apic_dest)
+{
+	struct vlapic *vlapic;
+	uint32_t dfr, ldr, ldest, cluster;
+	uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id;
+	cpuset_t amask;
+	int vcpuid;
+
+	if ((x2apic_dest && dest == 0xffffffff) ||
+	    (!x2apic_dest && dest == 0xff)) {
+		/*
+		 * Broadcast in both logical and physical modes.
+		 */
+		*dmask = vm_active_cpus(vm);
+		return;
+	}
+
+	if (phys) {
+		/*
+		 * Physical mode: destination is APIC ID.
+		 */
+		CPU_ZERO(dmask);
+		vcpuid = vm_apicid2vcpuid(vm, dest);
+		if (vcpuid < VM_MAXCPU)
+			CPU_SET(vcpuid, dmask);
+	} else {
+		/*
+		 * In the "Flat Model" the MDA is interpreted as an 8-bit wide
+		 * bitmask. This model is only avilable in the xAPIC mode.
+		 */
+		mda_flat_ldest = dest & 0xff;
+
+		/*
+		 * In the "Cluster Model" the MDA is used to identify a
+		 * specific cluster and a set of APICs in that cluster.
+		 */
+		if (x2apic_dest) {
+			mda_cluster_id = dest >> 16;
+			mda_cluster_ldest = dest & 0xffff;
+		} else {
+			mda_cluster_id = (dest >> 4) & 0xf;
+			mda_cluster_ldest = dest & 0xf;
+		}
+
+		/*
+		 * Logical mode: match each APIC that has a bit set
+		 * in it's LDR that matches a bit in the ldest.
+		 */
+		CPU_ZERO(dmask);
+		amask = vm_active_cpus(vm);
+		while ((vcpuid = CPU_FFS(&amask)) != 0) {
+			vcpuid--;
+			CPU_CLR(vcpuid, &amask);
+
+			vlapic = vm_lapic(vm, vcpuid);
+			dfr = vlapic_get_dfr(vlapic);
+			ldr = vlapic_get_ldr(vlapic);
+
+			if ((dfr & APIC_DFR_MODEL_MASK) ==
+			    APIC_DFR_MODEL_FLAT) {
+				ldest = ldr >> 24;
+				mda_ldest = mda_flat_ldest;
+			} else if ((dfr & APIC_DFR_MODEL_MASK) ==
+			    APIC_DFR_MODEL_CLUSTER) {
+				if (x2apic(vlapic)) {
+					cluster = ldr >> 16;
+					ldest = ldr & 0xffff;
+				} else {
+					cluster = ldr >> 28;
+					ldest = (ldr >> 24) & 0xf;
+				}
+				if (cluster != mda_cluster_id)
+					continue;
+				mda_ldest = mda_cluster_ldest;
+			} else {
+				/*
+				 * Guest has configured a bad logical
+				 * model for this vcpu - skip it.
+				 */
+				VLAPIC_CTR1(vlapic, "vlapic has bad logical "
+				    "model %x - cannot deliver interrupt", dfr);
+				continue;
+			}
+
+			if ((mda_ldest & ldest) != 0) {
+				CPU_SET(vcpuid, dmask);
+				if (lowprio)
+					break;
+			}
+		}
+	}
+}
+
 static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu");
 
 static int
 lapic_process_icr(struct vlapic *vlapic, uint64_t icrval, bool *retu)
 {
 	int i;
+	bool phys;
 	cpuset_t dmask;
 	uint32_t dest, vec, mode;
 	struct vlapic *vlapic2;
@@ -631,7 +812,9 @@ lapic_process_icr(struct vlapic *vlapic,
 	if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
 		switch (icrval & APIC_DEST_MASK) {
 		case APIC_DEST_DESTFLD:
-			CPU_SETOF(dest, &dmask);
+			phys = ((icrval & APIC_DESTMODE_LOG) == 0);
+			vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false,
+			    x2apic(vlapic));
 			break;
 		case APIC_DEST_SELF:
 			CPU_SETOF(vlapic->vcpuid, &dmask);
@@ -820,10 +1003,7 @@ vlapic_read(struct vlapic *vlapic, uint6
 	switch(offset)
 	{
 		case APIC_OFFSET_ID:
-			if (x2apic(vlapic))
-				*data = vlapic->vcpuid;
-			else
-				*data = vlapic->vcpuid << 24;
+			*data = vlapic_get_id(vlapic);
 			break;
 		case APIC_OFFSET_VER:
 			*data = lapic->version;
@@ -841,10 +1021,10 @@ vlapic_read(struct vlapic *vlapic, uint6
 			*data = lapic->eoi;
 			break;
 		case APIC_OFFSET_LDR:
-			*data = lapic->ldr;
+			*data = vlapic_get_ldr(vlapic);
 			break;
 		case APIC_OFFSET_DFR:
-			*data = lapic->dfr;
+			*data = vlapic_get_dfr(vlapic);
 			break;
 		case APIC_OFFSET_SVR:
 			*data = lapic->svr;
@@ -921,8 +1101,10 @@ vlapic_write(struct vlapic *vlapic, uint
 			vlapic_process_eoi(vlapic);
 			break;
 		case APIC_OFFSET_LDR:
+			vlapic_set_ldr(vlapic, data);
 			break;
 		case APIC_OFFSET_DFR:
+			vlapic_set_dfr(vlapic, data);
 			break;
 		case APIC_OFFSET_SVR:
 			lapic_set_svr(vlapic, data);
@@ -1041,6 +1223,34 @@ vlapic_set_x2apic_state(struct vm *vm, i
 		vlapic->msr_apicbase &= ~APICBASE_X2APIC;
 }
 
+void
+vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
+    int delmode, int vec)
+{
+	bool lowprio;
+	int vcpuid;
+	cpuset_t dmask;
+
+	if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) {
+		VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode);
+		return;
+	}
+	lowprio = (delmode == APIC_DELMODE_LOWPRIO);
+
+	/*
+	 * We don't provide any virtual interrupt redirection hardware so
+	 * all interrupts originating from the ioapic or MSI specify the
+	 * 'dest' in the legacy xAPIC format.
+	 */
+	vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false);
+
+	while ((vcpuid = CPU_FFS(&dmask)) != 0) {
+		vcpuid--;
+		CPU_CLR(vcpuid, &dmask);
+		lapic_set_intr(vm, vcpuid, vec, level);
+	}
+}
+
 bool
 vlapic_enabled(struct vlapic *vlapic)
 {

Modified: head/sys/amd64/vmm/io/vlapic.h
==============================================================================
--- head/sys/amd64/vmm/io/vlapic.h	Mon Dec 16 19:54:55 2013	(r259481)
+++ head/sys/amd64/vmm/io/vlapic.h	Mon Dec 16 19:59:31 2013	(r259482)
@@ -103,4 +103,6 @@ void vlapic_set_apicbase(struct vlapic *
 void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state s);
 bool vlapic_enabled(struct vlapic *vlapic);
 
+void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
+    int delmode, int vec);
 #endif	/* _VLAPIC_H_ */

Modified: head/sys/amd64/vmm/vmm_dev.c
==============================================================================
--- head/sys/amd64/vmm/vmm_dev.c	Mon Dec 16 19:54:55 2013	(r259481)
+++ head/sys/amd64/vmm/vmm_dev.c	Mon Dec 16 19:59:31 2013	(r259482)
@@ -152,6 +152,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
 	struct vm_run *vmrun;
 	struct vm_event *vmevent;
 	struct vm_lapic_irq *vmirq;
+	struct vm_lapic_msi *vmmsi;
 	struct vm_ioapic_irq *ioapic_irq;
 	struct vm_capability *vmcap;
 	struct vm_pptdev *pptdev;
@@ -254,7 +255,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
 		pptmsi = (struct vm_pptdev_msi *)data;
 		error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
 				      pptmsi->bus, pptmsi->slot, pptmsi->func,
-				      pptmsi->destcpu, pptmsi->vector,
+				      pptmsi->addr, pptmsi->msg,
 				      pptmsi->numvec);
 		break;
 	case VM_PPTDEV_MSIX:
@@ -262,8 +263,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
 		error = ppt_setup_msix(sc->vm, pptmsix->vcpu,
 				       pptmsix->bus, pptmsix->slot, 
 				       pptmsix->func, pptmsix->idx,
-				       pptmsix->msg, pptmsix->vector_control,
-				       pptmsix->addr);
+				       pptmsix->addr, pptmsix->msg,
+				       pptmsix->vector_control);
 		break;
 	case VM_MAP_PPTDEV_MMIO:
 		pptmmio = (struct vm_pptdev_mmio *)data;
@@ -296,6 +297,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long c
 		vmirq = (struct vm_lapic_irq *)data;
 		error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector);
 		break;
+	case VM_LAPIC_MSI:
+		vmmsi = (struct vm_lapic_msi *)data;
+		error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg);
+		break;
 	case VM_IOAPIC_ASSERT_IRQ:
 		ioapic_irq = (struct vm_ioapic_irq *)data;
 		error = vioapic_assert_irq(sc->vm, ioapic_irq->irq);

Modified: head/sys/amd64/vmm/vmm_lapic.c
==============================================================================
--- head/sys/amd64/vmm/vmm_lapic.c	Mon Dec 16 19:54:55 2013	(r259481)
+++ head/sys/amd64/vmm/vmm_lapic.c	Mon Dec 16 19:59:31 2013	(r259482)
@@ -38,9 +38,18 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/vmm.h>
 #include "vmm_ipi.h"
+#include "vmm_ktr.h"
 #include "vmm_lapic.h"
 #include "vlapic.h"
 
+/*
+ * Some MSI message definitions
+ */
+#define	MSI_X86_ADDR_MASK	0xfff00000
+#define	MSI_X86_ADDR_BASE	0xfee00000
+#define	MSI_X86_ADDR_RH		0x00000008	/* Redirection Hint */
+#define	MSI_X86_ADDR_LOG	0x00000004	/* Destination Mode */
+
 int
 lapic_pending_intr(struct vm *vm, int cpu)
 {
@@ -80,6 +89,44 @@ lapic_set_intr(struct vm *vm, int cpu, i
 	return (0);
 }
 
+int
+lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg)
+{
+	int delmode, vec;
+	uint32_t dest;
+	bool phys;
+
+	VM_CTR2(vm, "lapic MSI addr: %#lx msg: %#lx", addr, msg);
+
+	if ((addr & MSI_X86_ADDR_MASK) != MSI_X86_ADDR_BASE) {
+		VM_CTR1(vm, "lapic MSI invalid addr %#lx", addr);
+		return (-1);
+	}
+
+	/*
+	 * Extract the x86-specific fields from the MSI addr/msg
+	 * params according to the Intel Arch spec, Vol3 Ch 10.
+	 *
+	 * The PCI specification does not support level triggered
+	 * MSI/MSI-X so ignore trigger level in 'msg'.
+	 *
+	 * The 'dest' is interpreted as a logical APIC ID if both
+	 * the Redirection Hint and Destination Mode are '1' and
+	 * physical otherwise.
+	 */
+	dest = (addr >> 12) & 0xff;
+	phys = ((addr & (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG)) !=
+	    (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG));
+	delmode = msg & APIC_DELMODE_MASK;
+	vec = msg & 0xff;
+
+	VM_CTR3(vm, "lapic MSI %s dest %#x, vec %d",
+	    phys ? "physical" : "logical", dest, vec);
+
+	vlapic_deliver_intr(vm, LAPIC_TRIG_EDGE, dest, phys, delmode, vec);
+	return (0);
+}
+
 static boolean_t
 x2apic_msr(u_int msr)
 {

Modified: head/sys/amd64/vmm/vmm_lapic.h
==============================================================================
--- head/sys/amd64/vmm/vmm_lapic.h	Mon Dec 16 19:54:55 2013	(r259481)
+++ head/sys/amd64/vmm/vmm_lapic.h	Mon Dec 16 19:59:31 2013	(r259482)
@@ -84,4 +84,5 @@ lapic_intr_edge(struct vm *vm, int cpu, 
 	return (lapic_set_intr(vm, cpu, vector, LAPIC_TRIG_EDGE));
 }
 
+int	lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg);
 #endif

Modified: head/usr.sbin/bhyve/pci_emul.c
==============================================================================
--- head/usr.sbin/bhyve/pci_emul.c	Mon Dec 16 19:54:55 2013	(r259481)
+++ head/usr.sbin/bhyve/pci_emul.c	Mon Dec 16 19:59:31 2013	(r259482)
@@ -850,19 +850,14 @@ msicap_cfgwrite(struct pci_devinst *pi, 
 		else
 			msgdata = pci_get_cfgdata16(pi, capoff + 8);
 
-		/*
-		 * XXX check delivery mode, destination mode etc
-		 */
 		mme = msgctrl & PCIM_MSICTRL_MME_MASK;
 		pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0;
 		if (pi->pi_msi.enabled) {
-			pi->pi_msi.cpu = (addrlo >> 12) & 0xff;
-			pi->pi_msi.vector = msgdata & 0xff;
-			pi->pi_msi.msgnum = 1 << (mme >> 4);
+			pi->pi_msi.addr = addrlo;
+			pi->pi_msi.msg_data = msgdata;
+			pi->pi_msi.maxmsgnum = 1 << (mme >> 4);
 		} else {
-			pi->pi_msi.cpu = 0;
-			pi->pi_msi.vector = 0;
-			pi->pi_msi.msgnum = 0;
+			pi->pi_msi.maxmsgnum = 0;
 		}
 	}
 
@@ -1060,10 +1055,10 @@ pci_msi_enabled(struct pci_devinst *pi)
 }
 
 int
-pci_msi_msgnum(struct pci_devinst *pi)
+pci_msi_maxmsgnum(struct pci_devinst *pi)
 {
 	if (pi->pi_msi.enabled)
-		return (pi->pi_msi.msgnum);
+		return (pi->pi_msi.maxmsgnum);
 	else
 		return (0);
 }
@@ -1092,19 +1087,17 @@ pci_generate_msix(struct pci_devinst *pi
 	mte = &pi->pi_msix.table[index];
 	if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
 		/* XXX Set PBA bit if interrupt is disabled */
-		vm_lapic_irq(pi->pi_vmctx,
-			     (mte->addr >> 12) & 0xff, mte->msg_data & 0xff);
+		vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data);
 	}
 }
 
 void
-pci_generate_msi(struct pci_devinst *pi, int msg)
+pci_generate_msi(struct pci_devinst *pi, int index)
 {
 
-	if (pci_msi_enabled(pi) && msg < pci_msi_msgnum(pi)) {
-		vm_lapic_irq(pi->pi_vmctx,
-			     pi->pi_msi.cpu,
-			     pi->pi_msi.vector + msg);
+	if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) {
+		vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr,
+			     pi->pi_msi.msg_data + index);
 	}
 }
 
@@ -1511,10 +1504,10 @@ pci_emul_diow(struct vmctx *ctx, int vcp
 		 * Special magic value to generate an interrupt
 		 */
 		if (offset == 4 && size == 4 && pci_msi_enabled(pi))
-			pci_generate_msi(pi, value % pci_msi_msgnum(pi));
+			pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi));
 
 		if (value == 0xabcdef) {
-			for (i = 0; i < pci_msi_msgnum(pi); i++)
+			for (i = 0; i < pci_msi_maxmsgnum(pi); i++)
 				pci_generate_msi(pi, i);
 		}
 	}

Modified: head/usr.sbin/bhyve/pci_emul.h
==============================================================================
--- head/usr.sbin/bhyve/pci_emul.h	Mon Dec 16 19:54:55 2013	(r259481)
+++ head/usr.sbin/bhyve/pci_emul.h	Mon Dec 16 19:59:31 2013	(r259482)
@@ -109,10 +109,10 @@ struct pci_devinst {
 	int	  pi_bar_getsize;
 
 	struct {
-		int	enabled;
-		int	cpu;
-		int	vector;
-		int	msgnum;
+		int		enabled;
+		uint64_t	addr;
+		uint64_t	msg_data;
+		int		maxmsgnum;
 	} pi_msi;
 
 	struct {

Modified: head/usr.sbin/bhyve/pci_passthru.c
==============================================================================
--- head/usr.sbin/bhyve/pci_passthru.c	Mon Dec 16 19:54:55 2013	(r259481)
+++ head/usr.sbin/bhyve/pci_passthru.c	Mon Dec 16 19:59:31 2013	(r259482)
@@ -348,9 +348,9 @@ msix_table_write(struct vmctx *ctx, int 
 			error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus,
 					      sc->psc_sel.pc_dev, 
 					      sc->psc_sel.pc_func,
-					      index, entry->msg_data, 
-					      entry->vector_control,
-					      entry->addr);
+					      index, entry->addr,
+					      entry->msg_data, 
+					      entry->vector_control);
 		}
 	}
 }
@@ -653,8 +653,9 @@ passthru_cfgwrite(struct vmctx *ctx, int
 		msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val);
 
 		error = vm_setup_msi(ctx, vcpu, sc->psc_sel.pc_bus,
-			sc->psc_sel.pc_dev, sc->psc_sel.pc_func, pi->pi_msi.cpu,
-			pi->pi_msi.vector, pi->pi_msi.msgnum);
+			sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
+			pi->pi_msi.addr, pi->pi_msi.msg_data,
+			pi->pi_msi.maxmsgnum);
 		if (error != 0) {
 			printf("vm_setup_msi returned error %d\r\n", errno);
 			exit(1);
@@ -667,15 +668,16 @@ passthru_cfgwrite(struct vmctx *ctx, int
 		if (pi->pi_msix.enabled) {
 			msix_table_entries = pi->pi_msix.table_count;
 			for (i = 0; i < msix_table_entries; i++) {
-				error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus,
-						      sc->psc_sel.pc_dev, 
-						      sc->psc_sel.pc_func, i, 
-						      pi->pi_msix.table[i].msg_data,
-						      pi->pi_msix.table[i].vector_control,
-						      pi->pi_msix.table[i].addr);
+				error = vm_setup_msix(ctx, vcpu,
+				    sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, 
+				    sc->psc_sel.pc_func, i, 
+				    pi->pi_msix.table[i].addr,
+				    pi->pi_msix.table[i].msg_data,
+				    pi->pi_msix.table[i].vector_control);
 		
 				if (error) {
-					printf("vm_setup_msix returned error %d\r\n", errno);
+					printf("vm_setup_msix error %d\r\n",
+					    errno);
 					exit(1);	
 				}
 			}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201312161959.rBGJxWp2091559>