Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 19 Jun 2019 06:41:07 +0000 (UTC)
From:      Scott Long <scottl@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r349184 - head/sys/amd64/vmm/intel
Message-ID:  <201906190641.x5J6f7ej006327@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: scottl
Date: Wed Jun 19 06:41:07 2019
New Revision: 349184
URL: https://svnweb.freebsd.org/changeset/base/349184

Log:
  Implement VT-d capability detection on chipsets that have multiple
  translation units with differing capabilities
  
  From the author via Bugzilla:
  ---
  When an attempt is made to passthrough a PCI device to a bhyve VM
  (causing initialisation of IOMMU) on certain Intel chipsets using
  VT-d the PCI bus stops working entirely. This issue occurs on the
  E3-1275 v5 processor on C236 chipset and has also been encountered
  by others on the forums with different hardware in the Skylake
  series.
  
  The chipset has two VT-d translation units. The issue is caused by
  an attempt to use the VT-d device-IOTLB capability that is
  supported by only the first unit for devices attached to the
  second unit which lacks that capability. Only the capabilities of
  the first unit are checked and are assumed to be the same for all
  units.
  
  Attached is a patch to rectify this issue by determining which
  unit is responsible for the device being added to a domain and
  then checking that unit's device-IOTLB capability. In addition to
  this a few fixes have been made to other instances where the first
  unit's capabilities are assumed for all units for domains they
  share. In these cases a mutual set of capabilities is determined.
  The patch should hopefully fix any bugs for current/future
  hardware with multiple translation units supporting different
  capabilities.
  
  A description is on the forums at
  https://forums.freebsd.org/threads/pci-passthrough-bhyve-usb-xhci.65235
  The thread includes observations by other users of the bug
  occurring, and description as well as confirmation of the fix.
  I'd also like to thank Ordoban for their help.
  
  ---
  Personally tested on a Skylake laptop, Skylake Xeon server, and
  a Xeon-D-1541, passing through XHCI and NVMe functions.  Passthru
  is hit-or-miss to the point of being unusable without this
  patch.
  
  PR: 229852
  Submitted by: callum@aitchison.org
  MFC after: 1 week

Modified:
  head/sys/amd64/vmm/intel/vtd.c

Modified: head/sys/amd64/vmm/intel/vtd.c
==============================================================================
--- head/sys/amd64/vmm/intel/vtd.c	Wed Jun 19 03:33:00 2019	(r349183)
+++ head/sys/amd64/vmm/intel/vtd.c	Wed Jun 19 06:41:07 2019	(r349184)
@@ -51,6 +51,8 @@ __FBSDID("$FreeBSD$");
  * Architecture Spec, September 2008.
  */
 
+#define VTD_DRHD_INCLUDE_PCI_ALL(Flags)  (((Flags) >> 0) & 0x1)
+
 /* Section 10.4 "Register Descriptions" */
 struct vtdmap {
 	volatile uint32_t	version;
@@ -116,10 +118,11 @@ struct domain {
 static SLIST_HEAD(, domain) domhead;
 
 #define	DRHD_MAX_UNITS	8
-static int		drhd_num;
-static struct vtdmap	*vtdmaps[DRHD_MAX_UNITS];
-static int		max_domains;
-typedef int		(*drhd_ident_func_t)(void);
+static ACPI_DMAR_HARDWARE_UNIT	*drhds[DRHD_MAX_UNITS];
+static int			drhd_num;
+static struct vtdmap		*vtdmaps[DRHD_MAX_UNITS];
+static int			max_domains;
+typedef int			(*drhd_ident_func_t)(void);
 
 static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
 static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
@@ -175,6 +178,69 @@ domain_id(void)
 	return (id);
 }
 
+static struct vtdmap *
+vtd_device_scope(uint16_t rid)
+{
+	int i, remaining, pathremaining;
+	char *end, *pathend;
+	struct vtdmap *vtdmap;
+	ACPI_DMAR_HARDWARE_UNIT *drhd;
+	ACPI_DMAR_DEVICE_SCOPE *device_scope;
+	ACPI_DMAR_PCI_PATH *path;
+
+	for (i = 0; i < drhd_num; i++) {
+		drhd = drhds[i];
+
+		if (VTD_DRHD_INCLUDE_PCI_ALL(drhd->Flags)) {
+			/*
+			 * From Intel VT-d arch spec, version 3.0:
+			 * If a DRHD structure with INCLUDE_PCI_ALL flag Set is reported
+			 * for a Segment, it must be enumerated by BIOS after all other
+			 * DRHD structures for the same Segment.
+			 */
+			vtdmap = vtdmaps[i];
+			return(vtdmap);
+		}
+
+		end = (char *)drhd + drhd->Header.Length;
+		remaining = drhd->Header.Length - sizeof(ACPI_DMAR_HARDWARE_UNIT);
+		while (remaining > sizeof(ACPI_DMAR_DEVICE_SCOPE)) {
+			device_scope = (ACPI_DMAR_DEVICE_SCOPE *)(end - remaining);
+			remaining -= device_scope->Length;
+
+			switch (device_scope->EntryType){
+				/* 0x01 and 0x02 are PCI device entries */
+				case 0x01:
+				case 0x02:
+					break;
+				default:
+					continue;
+			}
+
+			if (PCI_RID2BUS(rid) != device_scope->Bus)
+				continue;
+
+			pathend = (char *)device_scope + device_scope->Length;
+			pathremaining = device_scope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE);
+			while (pathremaining >= sizeof(ACPI_DMAR_PCI_PATH)) {
+				path = (ACPI_DMAR_PCI_PATH *)(pathend - pathremaining);
+				pathremaining -= sizeof(ACPI_DMAR_PCI_PATH);
+
+				if (PCI_RID2SLOT(rid) != path->Device)
+					continue;
+				if (PCI_RID2FUNC(rid) != path->Function)
+					continue;
+
+				vtdmap = vtdmaps[i];
+				return (vtdmap);
+			}
+		}
+	}
+
+	/* No matching scope */
+	return (NULL);
+}
+
 static void
 vtd_wbflush(struct vtdmap *vtdmap)
 {
@@ -240,7 +306,7 @@ vtd_translation_disable(struct vtdmap *vtdmap)
 static int
 vtd_init(void)
 {
-	int i, units, remaining;
+	int i, units, remaining, tmp;
 	struct vtdmap *vtdmap;
 	vm_paddr_t ctx_paddr;
 	char *end, envname[32];
@@ -291,8 +357,9 @@ vtd_init(void)
 			break;
 
 		drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr;
-		vtdmaps[units++] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
-		if (units >= DRHD_MAX_UNITS)
+		drhds[units] = drhd;
+		vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
+		if (++units >= DRHD_MAX_UNITS)
 			break;
 		remaining -= hdr->Length;
 	}
@@ -302,13 +369,19 @@ vtd_init(void)
 
 skip_dmar:
 	drhd_num = units;
-	vtdmap = vtdmaps[0];
 
-	if (VTD_CAP_CM(vtdmap->cap) != 0)
-		panic("vtd_init: invalid caching mode");
+	max_domains = 64 * 1024; /* maximum valid value */
+	for (i = 0; i < drhd_num; i++){
+		vtdmap = vtdmaps[i];
 
-	max_domains = vtd_max_domains(vtdmap);
+		if (VTD_CAP_CM(vtdmap->cap) != 0)
+			panic("vtd_init: invalid caching mode");
 
+		/* take most compatible (minimum) value */
+		if ((tmp = vtd_max_domains(vtdmap)) < max_domains)
+			max_domains = tmp;
+	}
+
 	/*
 	 * Set up the root-table to point to the context-entry tables
 	 */
@@ -373,7 +446,6 @@ vtd_add_device(void *arg, uint16_t rid)
 	struct vtdmap *vtdmap;
 	uint8_t bus;
 
-	vtdmap = vtdmaps[0];
 	bus = PCI_RID2BUS(rid);
 	ctxp = ctx_tables[bus];
 	pt_paddr = vtophys(dom->ptp);
@@ -385,6 +457,10 @@ vtd_add_device(void *arg, uint16_t rid)
 		      (uint16_t)(ctxp[idx + 1] >> 8));
 	}
 
+	if ((vtdmap = vtd_device_scope(rid)) == NULL)
+		panic("vtd_add_device: device %x is not in scope for "
+		      "any DMA remapping unit", rid);
+
 	/*
 	 * Order is important. The 'present' bit is set only after all fields
 	 * of the context pointer are initialized.
@@ -568,8 +644,6 @@ vtd_create_domain(vm_paddr_t maxaddr)
 	if (drhd_num <= 0)
 		panic("vtd_create_domain: no dma remapping hardware available");
 
-	vtdmap = vtdmaps[0];
-
 	/*
 	 * Calculate AGAW.
 	 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
@@ -594,7 +668,14 @@ vtd_create_domain(vm_paddr_t maxaddr)
 	pt_levels = 2;
 	sagaw = 30;
 	addrwidth = 0;
-	tmp = VTD_CAP_SAGAW(vtdmap->cap);
+
+	tmp = ~0;
+	for (i = 0; i < drhd_num; i++) {
+		vtdmap = vtdmaps[i];
+		/* take most compatible value */
+		tmp &= VTD_CAP_SAGAW(vtdmap->cap);
+	}
+
 	for (i = 0; i < 5; i++) {
 		if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
 			break;
@@ -606,8 +687,8 @@ vtd_create_domain(vm_paddr_t maxaddr)
 	}
 
 	if (i >= 5) {
-		panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d",
-		      VTD_CAP_SAGAW(vtdmap->cap), agaw);
+		panic("vtd_create_domain: SAGAW 0x%x does not support AGAW %d",
+		      tmp, agaw);
 	}
 
 	dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
@@ -634,7 +715,12 @@ vtd_create_domain(vm_paddr_t maxaddr)
 	 * There is not any code to deal with the demotion at the moment
 	 * so we disable superpage mappings altogether.
 	 */
-	dom->spsmask = VTD_CAP_SPS(vtdmap->cap);
+	dom->spsmask = ~0;
+	for (i = 0; i < drhd_num; i++) {
+		vtdmap = vtdmaps[i];
+		/* take most compatible value */
+		dom->spsmask &= VTD_CAP_SPS(vtdmap->cap);
+	}
 #endif
 
 	SLIST_INSERT_HEAD(&domhead, dom, next);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201906190641.x5J6f7ej006327>