Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 30 Sep 2016 01:39:18 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r306471 - in stable/11: share/man/man9 sys/amd64/vmm sys/amd64/vmm/io sys/dev/pci
Message-ID:  <201609300139.u8U1dILm088609@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Fri Sep 30 01:39:18 2016
New Revision: 306471
URL: https://svnweb.freebsd.org/changeset/base/306471

Log:
  MFC 304858,305485,305497: Fix various issues with PCI pass through and VT-d.
  
  304858:
  Enable I/O MMU when PCI pass through is first used.
  
  Rather than enabling the I/O MMU when the vmm module is loaded,
  defer initialization until the first attempt to pass a PCI device
  through to a guest.  If the I/O MMU fails to initialize or is not
  present, than fail the attempt to pass a PCI device through to a
  guest.
  
  The hw.vmm.force_iommu tunable has been removed since the I/O MMU is
  no longer enabled during boot.  However, the I/O MMU support can be
  disabled by setting the hw.vmm.iommu.enable tunable to 0 to prevent
  use of the I/O MMU on any systems where it is buggy.
  
  305485:
  Leave ppt devices in the host domain when they are not attached to a VM.
  
  This allows a pass through device to be reset to a normal device driver
  on the host and reused on the host.  ppt devices are now always active in
  some I/O MMU domain when the I/O MMU is active, either the host domain
  or the domain of a VM they are attached to.
  
  305497:
  Update the I/O MMU in bhyve when PCI devices are added and removed.
  
  When the I/O MMU is active in bhyve, all PCI devices need valid entries
  in the DMAR context tables. The I/O MMU code does a single enumeration
  of the available PCI devices during initialization to add all existing
  devices to a domain representing the host. The ppt(4) driver then moves
  pass through devices in and out of domains for virtual machines as needed.
  However, when new PCI devices were added at runtime either via SR-IOV or
  HotPlug, the I/O MMU tables were not updated.
  
  This change adds a new set of EVENTHANDLERS that are invoked when PCI
  devices are added and deleted. The I/O MMU driver in bhyve installs
  handlers for these events which it uses to add and remove devices to
  the "host" domain.
  
  Sponsored by:	Chelsio Communications

Modified:
  stable/11/share/man/man9/pci.9
  stable/11/sys/amd64/vmm/io/iommu.c
  stable/11/sys/amd64/vmm/io/iommu.h
  stable/11/sys/amd64/vmm/io/ppt.c
  stable/11/sys/amd64/vmm/vmm.c
  stable/11/sys/dev/pci/pci.c
  stable/11/sys/dev/pci/pcivar.h
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/share/man/man9/pci.9
==============================================================================
--- stable/11/share/man/man9/pci.9	Fri Sep 30 01:16:09 2016	(r306470)
+++ stable/11/share/man/man9/pci.9	Fri Sep 30 01:39:18 2016	(r306471)
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd September 1, 2016
+.Dd September 6, 2016
 .Dt PCI 9
 .Os
 .Sh NAME
@@ -149,6 +149,10 @@
 .Fn pcie_read_config "device_t dev" "int reg" "int width"
 .Ft void
 .Fn pcie_write_config "device_t dev" "int reg" "uint32_t val" "int width"
+.Ft void
+.Fn pci_event_fn "void *arg" "device_t dev"
+.Fn EVENTHANDLER_REGISTER "pci_add_device" "pci_event_fn"
+.Fn EVENTHANDLER_DEREGISTER "pci_delete_resource" "pci_event_fn"
 .In dev/pci/pci_iov.h
 .Ft int
 .Fn pci_iov_attach "device_t dev" "nvlist_t *pf_schema" "nvlist_t *vf_schema"
@@ -910,6 +914,24 @@ with one in the new distribution.
 The
 .Fn pci_remap_msix
 function will fail if this condition is not met.
+.Ss Device Events
+The
+.Va pci_add_device
+event handler is invoked every time a new PCI device is added to the system.
+This includes the creation of Virtual Functions via SR-IOV.
+.Pp
+The
+.Va pci_delete_device
+event handler is invoked every time a PCI device is removed from the system.
+.Pp
+Both event handlers pass the
+.Vt device_t
+object of the relevant PCI device as
+.Fa dev
+to each callback function.
+Both event handlers are invoked while
+.Fa dev
+is unattached but with valid instance variables.
 .Sh SEE ALSO
 .Xr pci 4 ,
 .Xr pciconf 8 ,
@@ -921,6 +943,7 @@ function will fail if this condition is 
 .Xr devclass 9 ,
 .Xr device 9 ,
 .Xr driver 9 ,
+.Xr eventhandler 9 ,
 .Xr rman 9
 .Rs
 .%B FreeBSD Developers' Handbook

Modified: stable/11/sys/amd64/vmm/io/iommu.c
==============================================================================
--- stable/11/sys/amd64/vmm/io/iommu.c	Fri Sep 30 01:16:09 2016	(r306470)
+++ stable/11/sys/amd64/vmm/io/iommu.c	Fri Sep 30 01:39:18 2016	(r306471)
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
+#include <machine/cpu.h>
 #include <machine/md_var.h>
 
 #include "vmm_util.h"
@@ -51,8 +52,13 @@ static int iommu_avail;
 SYSCTL_INT(_hw_vmm_iommu, OID_AUTO, initialized, CTLFLAG_RD, &iommu_avail,
     0, "bhyve iommu initialized?");
 
+static int iommu_enable = 1;
+SYSCTL_INT(_hw_vmm_iommu, OID_AUTO, enable, CTLFLAG_RDTUN, &iommu_enable, 0,
+    "Enable use of I/O MMU (required for PCI passthrough).");
+
 static struct iommu_ops *ops;
 static void *host_domain;
+static eventhandler_tag add_tag, delete_tag;
 
 static __inline int
 IOMMU_INIT(void)
@@ -148,14 +154,31 @@ IOMMU_DISABLE(void)
 		(*ops->disable)();
 }
 
-void
+static void
+iommu_pci_add(void *arg, device_t dev)
+{
+
+	/* Add new devices to the host domain. */
+	iommu_add_device(host_domain, pci_get_rid(dev));
+}
+
+static void
+iommu_pci_delete(void *arg, device_t dev)
+{
+
+	iommu_remove_device(host_domain, pci_get_rid(dev));
+}
+
+static void
 iommu_init(void)
 {
 	int error, bus, slot, func;
 	vm_paddr_t maxaddr;
-	const char *name;
 	device_t dev;
 
+	if (!iommu_enable)
+		return;
+
 	if (vmm_is_intel())
 		ops = &iommu_ops_intel;
 	else if (vmm_is_amd())
@@ -174,8 +197,13 @@ iommu_init(void)
 	 */
 	maxaddr = vmm_mem_maxaddr();
 	host_domain = IOMMU_CREATE_DOMAIN(maxaddr);
-	if (host_domain == NULL)
-		panic("iommu_init: unable to create a host domain");
+	if (host_domain == NULL) {
+		printf("iommu_init: unable to create a host domain");
+		IOMMU_CLEANUP();
+		ops = NULL;
+		iommu_avail = 0;
+		return;
+	}
 
 	/*
 	 * Create 1:1 mappings from '0' to 'maxaddr' for devices assigned to
@@ -183,6 +211,9 @@ iommu_init(void)
 	 */
 	iommu_create_mapping(host_domain, 0, 0, maxaddr);
 
+	add_tag = EVENTHANDLER_REGISTER(pci_add_device, iommu_pci_add, NULL, 0);
+	delete_tag = EVENTHANDLER_REGISTER(pci_delete_device, iommu_pci_delete,
+	    NULL, 0);
 	for (bus = 0; bus <= PCI_BUSMAX; bus++) {
 		for (slot = 0; slot <= PCI_SLOTMAX; slot++) {
 			for (func = 0; func <= PCI_FUNCMAX; func++) {
@@ -190,12 +221,7 @@ iommu_init(void)
 				if (dev == NULL)
 					continue;
 
-				/* skip passthrough devices */
-				name = device_get_name(dev);
-				if (name != NULL && strcmp(name, "ppt") == 0)
-					continue;
-
-				/* everything else belongs to the host domain */
+				/* Everything belongs to the host domain. */
 				iommu_add_device(host_domain,
 				    pci_get_rid(dev));
 			}
@@ -208,6 +234,15 @@ iommu_init(void)
 void
 iommu_cleanup(void)
 {
+
+	if (add_tag != NULL) {
+		EVENTHANDLER_DEREGISTER(pci_add_device, add_tag);
+		add_tag = NULL;
+	}
+	if (delete_tag != NULL) {
+		EVENTHANDLER_DEREGISTER(pci_delete_device, delete_tag);
+		delete_tag = NULL;
+	}
 	IOMMU_DISABLE();
 	IOMMU_DESTROY_DOMAIN(host_domain);
 	IOMMU_CLEANUP();
@@ -216,7 +251,16 @@ iommu_cleanup(void)
 void *
 iommu_create_domain(vm_paddr_t maxaddr)
 {
+	static volatile int iommu_initted;
 
+	if (iommu_initted < 2) {
+		if (atomic_cmpset_int(&iommu_initted, 0, 1)) {
+			iommu_init();
+			atomic_store_rel_int(&iommu_initted, 2);
+		} else
+			while (iommu_initted == 1)
+				cpu_spinwait();
+	}
 	return (IOMMU_CREATE_DOMAIN(maxaddr));
 }
 

Modified: stable/11/sys/amd64/vmm/io/iommu.h
==============================================================================
--- stable/11/sys/amd64/vmm/io/iommu.h	Fri Sep 30 01:16:09 2016	(r306470)
+++ stable/11/sys/amd64/vmm/io/iommu.h	Fri Sep 30 01:39:18 2016	(r306471)
@@ -61,7 +61,6 @@ struct iommu_ops {
 extern struct iommu_ops iommu_ops_intel;
 extern struct iommu_ops iommu_ops_amd;
 
-void	iommu_init(void);
 void	iommu_cleanup(void);
 void	*iommu_host_domain(void);
 void	*iommu_create_domain(vm_paddr_t maxaddr);

Modified: stable/11/sys/amd64/vmm/io/ppt.c
==============================================================================
--- stable/11/sys/amd64/vmm/io/ppt.c	Fri Sep 30 01:16:09 2016	(r306470)
+++ stable/11/sys/amd64/vmm/io/ppt.c	Fri Sep 30 01:39:18 2016	(r306471)
@@ -363,6 +363,7 @@ ppt_assign_device(struct vm *vm, int bus
 			return (EBUSY);
 
 		ppt->vm = vm;
+		iommu_remove_device(iommu_host_domain(), pci_get_rid(ppt->dev));
 		iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
 		return (0);
 	}
@@ -385,6 +386,7 @@ ppt_unassign_device(struct vm *vm, int b
 		ppt_teardown_msi(ppt);
 		ppt_teardown_msix(ppt);
 		iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
+		iommu_add_device(iommu_host_domain(), pci_get_rid(ppt->dev));
 		ppt->vm = NULL;
 		return (0);
 	}

Modified: stable/11/sys/amd64/vmm/vmm.c
==============================================================================
--- stable/11/sys/amd64/vmm/vmm.c	Fri Sep 30 01:16:09 2016	(r306470)
+++ stable/11/sys/amd64/vmm/vmm.c	Fri Sep 30 01:39:18 2016	(r306471)
@@ -224,11 +224,6 @@ SYSCTL_INT(_hw_vmm, OID_AUTO, trace_gues
     &trace_guest_exceptions, 0,
     "Trap into hypervisor on all guest exceptions and reflect them back");
 
-static int vmm_force_iommu = 0;
-TUNABLE_INT("hw.vmm.force_iommu", &vmm_force_iommu);
-SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0,
-    "Force use of I/O MMU even if no passthrough devices were found.");
-
 static void vm_free_memmap(struct vm *vm, int ident);
 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
 static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
@@ -358,8 +353,6 @@ vmm_handler(module_t mod, int what, void
 	switch (what) {
 	case MOD_LOAD:
 		vmmdev_init();
-		if (vmm_force_iommu || ppt_avail_devices() > 0)
-			iommu_init();
 		error = vmm_init();
 		if (error == 0)
 			vmm_initialized = 1;
@@ -396,9 +389,6 @@ static moduledata_t vmm_kmod = {
 /*
  * vmm initialization has the following dependencies:
  *
- * - iommu initialization must happen after the pci passthru driver has had
- *   a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
- *
  * - VT-x initialization requires smp_rendezvous() and therefore must happen
  *   after SMP is fully functional (after SI_SUB_SMP).
  */
@@ -893,6 +883,8 @@ vm_assign_pptdev(struct vm *vm, int bus,
 		    ("vm_assign_pptdev: iommu must be NULL"));
 		maxaddr = sysmem_maxaddr(vm);
 		vm->iommu = iommu_create_domain(maxaddr);
+		if (vm->iommu == NULL)
+			return (ENXIO);
 		vm_iommu_map(vm);
 	}
 

Modified: stable/11/sys/dev/pci/pci.c
==============================================================================
--- stable/11/sys/dev/pci/pci.c	Fri Sep 30 01:16:09 2016	(r306470)
+++ stable/11/sys/dev/pci/pci.c	Fri Sep 30 01:39:18 2016	(r306471)
@@ -4070,6 +4070,7 @@ pci_add_child(device_t bus, struct pci_d
 	pci_print_verbose(dinfo);
 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
 	pci_child_added(dinfo->cfg.dev);
+	EVENTHANDLER_INVOKE(pci_add_device, dinfo->cfg.dev);
 }
 
 void
@@ -5311,6 +5312,8 @@ pci_child_deleted(device_t dev, device_t
 	dinfo = device_get_ivars(child);
 	rl = &dinfo->resources;
 
+	EVENTHANDLER_INVOKE(pci_delete_device, child);
+
 	/* Turn off access to resources we're about to free */
 	if (bus_child_present(child) != 0) {
 		pci_write_config(child, PCIR_COMMAND, pci_read_config(child,

Modified: stable/11/sys/dev/pci/pcivar.h
==============================================================================
--- stable/11/sys/dev/pci/pcivar.h	Fri Sep 30 01:16:09 2016	(r306470)
+++ stable/11/sys/dev/pci/pcivar.h	Fri Sep 30 01:39:18 2016	(r306471)
@@ -31,6 +31,7 @@
 #define	_PCIVAR_H_
 
 #include <sys/queue.h>
+#include <sys/eventhandler.h>
 
 /* some PCI bus constants */
 #define	PCI_MAXMAPS_0	6	/* max. no. of memory/port maps */
@@ -631,4 +632,12 @@ void *	vga_pci_map_bios(device_t dev, si
 void	vga_pci_unmap_bios(device_t dev, void *bios);
 int	vga_pci_repost(device_t dev);
 
+/**
+ * Global eventhandlers invoked when PCI devices are added or removed
+ * from the system.
+ */
+typedef void (*pci_event_fn)(void *arg, device_t dev);
+EVENTHANDLER_DECLARE(pci_add_device, pci_event_fn);
+EVENTHANDLER_DECLARE(pci_delete_device, pci_event_fn);
+
 #endif /* _PCIVAR_H_ */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201609300139.u8U1dILm088609>