Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 10 Jun 2016 18:34:31 +0000 (UTC)
From:      Garrett Cooper <ngie@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r301811 - stable/10/sys/dev/ntb/ntb_hw
Message-ID:  <201606101834.u5AIYVDm033950@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: ngie
Date: Fri Jun 10 18:34:31 2016
New Revision: 301811
URL: https://svnweb.freebsd.org/changeset/base/301811

Log:
  MFC r295618,r300100,r300531:
  
  r295618 (by cem):
  
  NTB: workaround for high traffic hardware hang
  
  This patch comes from Dave Jiang's Linux tree, davejiang/ntb.  It hasn't
  been accepted into Linus' tree, so I do not have an authoritative SHA1
  to point at.  Original commit log:
  
  =====================================================================
  A hardware errata causes the NTB to hang when heavy bi-directional
  traffic in addition to the usage of BAR0/1 (where the registers reside,
  including the doorbell registers to trigger interrupts).
  
  This workaround is only available on Haswell and Broadwell platform.
  The workaround is to enable split BAR in the BIOS to allow the 64bit
  BAR4 to be split into two 32bit BAR4 and BAR5. The BAR4 shall be pointed
  to LAPIC region of the remote host. We will bypass the db mechanism and
  directly trigger the MSIX interrupts. The offsets and vectors are
  exchanged during transport scratch pad negotiation. The scratch pads are
  now overloaded in order to allow the exchange of the information. This
  gets around using the doorbell and prevents the lockup with additional
  pcode changes in BIOS.
  
  Signed-off-by:	Dave Jiang <dave.jiang@intel.com>
  =====================================================================
  
  Notable changes in the FreeBSD version of this patch:
  * The MSIX BAR is configurable, like hw.ntb.b2b_mw_idx (msix_mw_idx).
    The Linux version of the patch only uses BAR4.
  * MSIX negotiation aborts if the link goes down.
  
  Obtained from:	Linux (Dual BSD/GPL driver)
  
  r300100 (by cem):
  
  ntb_hw(4): Add sysctls for administrative/test link config, state
  
  dev.ntb_hw.0.admin_up=0/1: Like ifconfig UP/DOWN.
  dev.ntb_hw.0.active=0/1:   Like ifconfig 'status'
  
  r300531 (by cem):
  
  ntb_hw(4): Only record the first three MSIX vectors
  
  Don't overrun the msix_data array by reading the (unused) link state
  interrupt information.

Modified:
  stable/10/sys/dev/ntb/ntb_hw/ntb_hw.c
  stable/10/sys/dev/ntb/ntb_hw/ntb_hw.h
  stable/10/sys/dev/ntb/ntb_hw/ntb_regs.h
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/dev/ntb/ntb_hw/ntb_hw.c
==============================================================================
--- stable/10/sys/dev/ntb/ntb_hw/ntb_hw.c	Fri Jun 10 18:22:21 2016	(r301810)
+++ stable/10/sys/dev/ntb/ntb_hw/ntb_hw.c	Fri Jun 10 18:34:31 2016	(r301811)
@@ -35,6 +35,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/endian.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/pciio.h>
 #include <sys/queue.h>
 #include <sys/rman.h>
 #include <sys/sbuf.h>
@@ -42,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <machine/bus.h>
+#include <machine/intr_machdep.h>
 #include <machine/pmap.h>
 #include <machine/resource.h>
 #include <dev/pci/pcireg.h>
@@ -70,6 +73,19 @@ __FBSDID("$FreeBSD$");
 
 #define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
 
+#define	NTB_MSIX_VER_GUARD	0xaabbccdd
+#define	NTB_MSIX_RECEIVED	0xe0f0e0f0
+#define	ONE_MB			(1024u * 1024)
+
+/*
+ * PCI constants could be somewhere more generic, but aren't defined/used in
+ * pci.c.
+ */
+#define	PCI_MSIX_ENTRY_SIZE		16
+#define	PCI_MSIX_ENTRY_LOWER_ADDR	0
+#define	PCI_MSIX_ENTRY_UPPER_ADDR	4
+#define	PCI_MSIX_ENTRY_DATA		8
+
 enum ntb_device_type {
 	NTB_XEON,
 	NTB_ATOM
@@ -95,6 +111,18 @@ enum ntb_bar {
 	NTB_MAX_BARS
 };
 
+enum {
+	NTB_MSIX_GUARD = 0,
+	NTB_MSIX_DATA0,
+	NTB_MSIX_DATA1,
+	NTB_MSIX_DATA2,
+	NTB_MSIX_OFS0,
+	NTB_MSIX_OFS1,
+	NTB_MSIX_OFS2,
+	NTB_MSIX_DONE,
+	NTB_MAX_MSIX_SPAD
+};
+
 /* Device features and workarounds */
 #define HAS_FEATURE(feature)	\
 	((ntb->features & (feature)) != 0)
@@ -131,6 +159,7 @@ struct ntb_int_info {
 struct ntb_vec {
 	struct ntb_softc	*ntb;
 	uint32_t		num;
+	unsigned		masked;
 };
 
 struct ntb_reg {
@@ -169,6 +198,11 @@ struct ntb_b2b_addr {
 	uint64_t	bar5_addr32;
 };
 
+struct ntb_msix_data {
+	uint32_t	nmd_ofs;
+	uint32_t	nmd_data;
+};
+
 struct ntb_softc {
 	device_t		device;
 	enum ntb_device_type	type;
@@ -178,6 +212,13 @@ struct ntb_softc {
 	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
 	uint32_t		allocated_interrupts;
 
+	struct ntb_msix_data	peer_msix_data[XEON_NONLINK_DB_MSIX_BITS];
+	struct ntb_msix_data	msix_data[XEON_NONLINK_DB_MSIX_BITS];
+	bool			peer_msix_good;
+	bool			peer_msix_done;
+	struct ntb_pci_bar_info	*peer_lapic_bar;
+	struct callout		peer_msix_work;
+
 	struct callout		heartbeat_timer;
 	struct callout		lr_timer;
 
@@ -198,6 +239,7 @@ struct ntb_softc {
 	/* Memory window used to access peer bar0 */
 #define B2B_MW_DISABLED			UINT8_MAX
 	uint8_t				b2b_mw_idx;
+	uint8_t				msix_mw_idx;
 
 	uint8_t				mw_count;
 	uint8_t				spad_count;
@@ -292,6 +334,8 @@ static inline void db_iowrite(struct ntb
 static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
 static int ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
 static void ntb_free_msix_vec(struct ntb_softc *ntb);
+static void ntb_get_msix_info(struct ntb_softc *ntb);
+static void ntb_exchange_msix(void *);
 static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
 static void ntb_detect_max_mw(struct ntb_softc *ntb);
 static int ntb_detect_xeon(struct ntb_softc *ntb);
@@ -308,7 +352,9 @@ static void xeon_set_pbar_xlat(struct nt
     enum ntb_bar idx);
 static int xeon_setup_b2b_mw(struct ntb_softc *,
     const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
+static int xeon_setup_msix_bar(struct ntb_softc *);
 static inline bool link_is_up(struct ntb_softc *ntb);
+static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
 static inline bool atom_link_is_err(struct ntb_softc *ntb);
 static inline enum ntb_speed ntb_link_sta_speed(struct ntb_softc *);
 static inline enum ntb_width ntb_link_sta_width(struct ntb_softc *);
@@ -319,6 +365,8 @@ static bool ntb_poll_link(struct ntb_sof
 static void save_bar_parameters(struct ntb_pci_bar_info *bar);
 static void ntb_sysctl_init(struct ntb_softc *);
 static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
+static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
+static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
 
@@ -397,6 +445,13 @@ ntb_vm_memattr_to_str(vm_memattr_t pat)
 	}
 }
 
+static int g_ntb_msix_idx = 0;
+SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
+    0, "Use this memory window to access the peer MSIX message complex on "
+    "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
+    "Like b2b_mw_idx, negative values index from the last available memory "
+    "window.  (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)");
+
 static int g_ntb_mw_idx = -1;
 TUNABLE_INT("hw.ntb.b2b_mw_idx", &g_ntb_mw_idx);
 SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
@@ -604,10 +659,12 @@ ntb_attach(device_t device)
 	ntb->type = p->type;
 	ntb->features = p->features;
 	ntb->b2b_mw_idx = B2B_MW_DISABLED;
+	ntb->msix_mw_idx = B2B_MW_DISABLED;
 
 	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
 	callout_init(&ntb->heartbeat_timer, CALLOUT_MPSAFE);
 	callout_init(&ntb->lr_timer, CALLOUT_MPSAFE);
+	callout_init(&ntb->peer_msix_work, 1);
 	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
 	mtx_init(&ntb->ctx_lock, "ntb ctx", NULL, MTX_DEF);
 
@@ -632,6 +689,8 @@ ntb_attach(device_t device)
 	if (error != 0)
 		goto out;
 
+	ntb_spad_clear(ntb);
+
 	ntb_poll_link(ntb);
 
 	ntb_sysctl_init(ntb);
@@ -649,10 +708,14 @@ ntb_detach(device_t device)
 
 	ntb = DEVICE2SOFTC(device);
 
-	if (ntb->self_reg != NULL)
-		ntb_db_set_mask(ntb, ntb->db_valid_mask);
+	if (ntb->self_reg != NULL) {
+		DB_MASK_LOCK(ntb);
+		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask);
+		DB_MASK_UNLOCK(ntb);
+	}
 	callout_drain(&ntb->heartbeat_timer);
 	callout_drain(&ntb->lr_timer);
+	callout_drain(&ntb->peer_msix_work);
 	pci_disable_busmaster(ntb->device);
 	if (ntb->type == NTB_XEON)
 		ntb_teardown_xeon(ntb);
@@ -978,9 +1041,12 @@ ntb_init_isr(struct ntb_softc *ntb)
 	ntb->last_ts = ticks;
 
 	/*
-	 * Mask all doorbell interrupts.
+	 * Mask all doorbell interrupts.  (Except link events!)
 	 */
-	ntb_db_set_mask(ntb, ntb->db_valid_mask);
+	DB_MASK_LOCK(ntb);
+	ntb->db_mask = ntb->db_valid_mask;
+	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+	DB_MASK_UNLOCK(ntb);
 
 	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
 	    ntb->db_count);
@@ -1005,12 +1071,28 @@ ntb_init_isr(struct ntb_softc *ntb)
 		num_vectors = 1;
 
 	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
+		if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+			device_printf(ntb->device,
+			    "Errata workaround does not support MSI or INTX\n");
+			return (EINVAL);
+		}
+
 		ntb->db_vec_count = 1;
 		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
 		rc = ntb_setup_legacy_interrupt(ntb);
 	} else {
+		if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
+		    HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+			device_printf(ntb->device,
+			    "Errata workaround expects %d doorbell bits\n",
+			    XEON_NONLINK_DB_MSIX_BITS);
+			return (EINVAL);
+		}
+
 		ntb_create_msix_vec(ntb, num_vectors);
 		rc = ntb_setup_msix(ntb, num_vectors);
+		if (rc == 0 && HAS_FEATURE(NTB_SB01BASE_LOCKUP))
+			ntb_get_msix_info(ntb);
 	}
 	if (rc != 0) {
 		device_printf(ntb->device,
@@ -1116,6 +1198,9 @@ void
 ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits)
 {
 
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
+		return;
+
 	DB_MASK_LOCK(ntb);
 	ntb->db_mask |= bits;
 	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
@@ -1131,6 +1216,9 @@ ntb_db_clear_mask(struct ntb_softc *ntb,
 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
 	     (uintmax_t)ntb->db_valid_mask));
 
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
+		return;
+
 	DB_MASK_LOCK(ntb);
 	ntb->db_mask &= ~bits;
 	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
@@ -1141,6 +1229,18 @@ uint64_t
 ntb_db_read(struct ntb_softc *ntb)
 {
 
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+		uint64_t res;
+		unsigned i;
+
+		res = 0;
+		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+			if (ntb->msix_vec[i].masked != 0)
+				res |= ntb_db_vector_mask(ntb, i);
+		}
+		return (res);
+	}
+
 	return (db_ioread(ntb, ntb->self_reg->db_bell));
 }
 
@@ -1153,6 +1253,25 @@ ntb_db_clear(struct ntb_softc *ntb, uint
 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
 	     (uintmax_t)ntb->db_valid_mask));
 
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+		unsigned i;
+
+		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+			if ((bits & ntb_db_vector_mask(ntb, i)) != 0) {
+				DB_MASK_LOCK(ntb);
+				if (ntb->msix_vec[i].masked != 0) {
+					/* XXX These need a public API. */
+#if 0
+					pci_unmask_msix(ntb->device, i);
+#endif
+					ntb->msix_vec[i].masked = 0;
+				}
+				DB_MASK_UNLOCK(ntb);
+			}
+		}
+		return;
+	}
+
 	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
 }
 
@@ -1179,6 +1298,19 @@ ntb_interrupt(struct ntb_softc *ntb, uin
 			ntb_link_event(ntb);
 	}
 
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) &&
+	    (vec_mask & ntb->db_link_mask) == 0) {
+		DB_MASK_LOCK(ntb);
+		if (ntb->msix_vec[vec].masked == 0) {
+			/* XXX These need a public API. */
+#if 0
+			pci_mask_msix(ntb->device, vec);
+#endif
+			ntb->msix_vec[vec].masked = 1;
+		}
+		DB_MASK_UNLOCK(ntb);
+	}
+
 	if ((vec_mask & ntb->db_valid_mask) != 0)
 		ntb_db_event(ntb, vec);
 }
@@ -1224,6 +1356,40 @@ ntb_free_msix_vec(struct ntb_softc *ntb)
 	ntb->msix_vec = NULL;
 }
 
+static void
+ntb_get_msix_info(struct ntb_softc *ntb)
+{
+	struct pci_devinfo *dinfo;
+	struct pcicfg_msix *msix;
+	uint32_t laddr, data, i, offset;
+
+	dinfo = device_get_ivars(ntb->device);
+	msix = &dinfo->cfg.msix;
+
+	laddr = data = 0;
+
+	CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
+
+	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+		offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE;
+
+		laddr = bus_read_4(msix->msix_table_res, offset +
+		    PCI_MSIX_ENTRY_LOWER_ADDR);
+		ntb_printf(2, "local lower MSIX addr(%u): 0x%x\n", i, laddr);
+
+		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
+		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
+		     MSI_INTEL_ADDR_BASE));
+		ntb->msix_data[i].nmd_ofs = laddr & ~MSI_INTEL_ADDR_BASE;
+
+		data = bus_read_4(msix->msix_table_res, offset +
+		    PCI_MSIX_ENTRY_DATA);
+		ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
+
+		ntb->msix_data[i].nmd_data = data;
+	}
+}
+
 static struct ntb_hw_info *
 ntb_get_device_info(uint32_t device_id)
 {
@@ -1276,9 +1442,12 @@ ntb_detect_xeon(struct ntb_softc *ntb)
 	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
 		ntb->features |= NTB_SPLIT_BAR;
 
-	/* SB01BASE_LOCKUP errata is a superset of SDOORBELL errata */
+	/*
+	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
+	 * errata workaround; only do one at a time.
+	 */
 	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
-		ntb->features |= NTB_SDOORBELL_LOCKUP;
+		ntb->features &= ~NTB_SDOORBELL_LOCKUP;
 
 	conn_type = ppd & XEON_PPD_CONN_TYPE;
 	switch (conn_type) {
@@ -1342,19 +1511,28 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
 	ntb->peer_reg = &xeon_b2b_reg;
 	ntb->xlat_reg = &xeon_sec_xlat;
 
-	/*
-	 * There is a Xeon hardware errata related to writes to SDOORBELL or
-	 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
-	 * which may hang the system.  To workaround this, use a memory
-	 * window to access the interrupt and scratch pad registers on the
-	 * remote system.
-	 */
-	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
+		    ntb->mw_count;
+		ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
+		    g_ntb_msix_idx, ntb->msix_mw_idx);
+		rc = ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
+		    VM_MEMATTR_UNCACHEABLE);
+		KASSERT(rc == 0, ("shouldn't fail"));
+	} else if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
+		/*
+		 * There is a Xeon hardware errata related to writes to SDOORBELL or
+		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
+		 * which may hang the system.  To workaround this, use a memory
+		 * window to access the interrupt and scratch pad registers on the
+		 * remote system.
+		 */
 		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
 		    ntb->mw_count;
 		ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
 		    g_ntb_mw_idx, ntb->b2b_mw_idx);
-		rc = ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx, VM_MEMATTR_UNCACHEABLE);
+		rc = ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
+		    VM_MEMATTR_UNCACHEABLE);
 		KASSERT(rc == 0, ("shouldn't fail"));
 	} else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14))
 		/*
@@ -1385,7 +1563,14 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
 	/*
 	 * Mask all doorbell interrupts.
 	 */
-	ntb_db_set_mask(ntb, ntb->db_valid_mask);
+	DB_MASK_LOCK(ntb);
+	ntb->db_mask = ntb->db_valid_mask;
+	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+	DB_MASK_UNLOCK(ntb);
+
+	rc = xeon_setup_msix_bar(ntb);
+	if (rc != 0)
+		return (rc);
 
 	rc = ntb_init_isr(ntb);
 	return (rc);
@@ -1489,6 +1674,15 @@ xeon_reset_sbar_size(struct ntb_softc *n
 			bar_sz--;
 		else
 			bar_sz = 0;
+	} else if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) &&
+	    ntb_mw_to_bar(ntb, ntb->msix_mw_idx) == idx) {
+		/* Restrict LAPIC BAR to 1MB */
+		pci_write_config(ntb->device, bar->psz_off, 20, 1);
+		pci_write_config(ntb->device, bar->ssz_off, 20, 1);
+		bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
+		bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
+		(void)bar_sz;
+		return;
 	}
 	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
 	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
@@ -1499,28 +1693,37 @@ static void
 xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
     enum ntb_bar idx, enum ntb_bar regbar)
 {
-	uint64_t reg_val;
+	uint64_t reg_val, lmt_addr;
 	uint32_t base_reg, lmt_reg;
 
 	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
 	if (idx == regbar)
 		bar_addr += ntb->b2b_off;
+	lmt_addr = bar_addr;
+
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) &&
+	    ntb_mw_to_bar(ntb, ntb->msix_mw_idx) == idx)
+		lmt_addr += ONE_MB;
 
+	/*
+	 * Set limit registers first to avoid an errata where setting the base
+	 * registers locks the limit registers.
+	 */
 	if (!bar_is_64bit(ntb, idx)) {
-		ntb_reg_write(4, base_reg, bar_addr);
-		reg_val = ntb_reg_read(4, base_reg);
+		ntb_reg_write(4, lmt_reg, lmt_addr);
+		reg_val = ntb_reg_read(4, lmt_reg);
 		(void)reg_val;
 
-		ntb_reg_write(4, lmt_reg, bar_addr);
-		reg_val = ntb_reg_read(4, lmt_reg);
+		ntb_reg_write(4, base_reg, bar_addr);
+		reg_val = ntb_reg_read(4, base_reg);
 		(void)reg_val;
 	} else {
-		ntb_reg_write(8, base_reg, bar_addr);
-		reg_val = ntb_reg_read(8, base_reg);
+		ntb_reg_write(8, lmt_reg, lmt_addr);
+		reg_val = ntb_reg_read(8, lmt_reg);
 		(void)reg_val;
 
-		ntb_reg_write(8, lmt_reg, bar_addr);
-		reg_val = ntb_reg_read(8, lmt_reg);
+		ntb_reg_write(8, base_reg, bar_addr);
+		reg_val = ntb_reg_read(8, base_reg);
 		(void)reg_val;
 	}
 }
@@ -1542,6 +1745,37 @@ xeon_set_pbar_xlat(struct ntb_softc *ntb
 }
 
 static int
+xeon_setup_msix_bar(struct ntb_softc *ntb)
+{
+	struct ntb_pci_bar_info *lapic_bar;
+	enum ntb_bar bar_num;
+	int rc;
+
+	if (!HAS_FEATURE(NTB_SB01BASE_LOCKUP))
+		return (0);
+
+	bar_num = ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
+	lapic_bar = &ntb->bar_info[bar_num];
+
+	/* Restrict LAPIC BAR to 1MB */
+	if (lapic_bar->size > ONE_MB) {
+		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
+		    lapic_bar->pci_resource, lapic_bar->pbase,
+		    lapic_bar->pbase + ONE_MB - 1);
+		if (rc == 0)
+			lapic_bar->size = ONE_MB;
+		else {
+			ntb_printf(0, "Failed to shrink LAPIC BAR resource to "
+			    "1 MB: %d\n", rc);
+			/* Ignore error */
+		}
+	}
+
+	ntb->peer_lapic_bar = lapic_bar;
+	return (0);
+}
+
+static int
 xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
     const struct ntb_b2b_addr *peer_addr)
 {
@@ -1619,6 +1853,43 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb,
 	ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
 	ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
 
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+		size_t size, xlatoffset;
+
+		switch (ntb_mw_to_bar(ntb, ntb->msix_mw_idx)) {
+		case NTB_B2B_BAR_1:
+			size = 8;
+			xlatoffset = XEON_SBAR2XLAT_OFFSET;
+			break;
+		case NTB_B2B_BAR_2:
+			xlatoffset = XEON_SBAR4XLAT_OFFSET;
+			if (HAS_FEATURE(NTB_SPLIT_BAR))
+				size = 4;
+			else
+				size = 8;
+			break;
+		case NTB_B2B_BAR_3:
+			xlatoffset = XEON_SBAR5XLAT_OFFSET;
+			size = 4;
+			break;
+		default:
+			KASSERT(false, ("Bogus msix mw idx: %u",
+			    ntb->msix_mw_idx));
+			return (EINVAL);
+		}
+
+		/*
+		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
+		 * workaround
+		 */
+		if (size == 4)
+			ntb_reg_write(4, xlatoffset, MSI_INTEL_ADDR_BASE);
+		else
+			ntb_reg_write(8, xlatoffset, MSI_INTEL_ADDR_BASE);
+	}
+	(void)ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
+	(void)ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
+
 	/* Zero outgoing translation limits (whole bar size windows) */
 	ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
 	ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
@@ -1656,14 +1927,21 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb,
 }
 
 static inline bool
+_xeon_link_is_up(struct ntb_softc *ntb)
+{
+
+	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
+		return (true);
+	return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
+}
+
+static inline bool
 link_is_up(struct ntb_softc *ntb)
 {
 
-	if (ntb->type == NTB_XEON) {
-		if (ntb->conn_type == NTB_CONN_TRANSPARENT)
-			return (true);
-		return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
-	}
+	if (ntb->type == NTB_XEON)
+		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
+		    !HAS_FEATURE(NTB_SB01BASE_LOCKUP)));
 
 	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
 	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
@@ -1881,6 +2159,8 @@ ntb_link_enable(struct ntb_softc *ntb, e
 {
 	uint32_t cntl;
 
+	ntb_printf(2, "%s\n", __func__);
+
 	if (ntb->type == NTB_ATOM) {
 		pci_write_config(ntb->device, NTB_PPD_OFFSET,
 		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
@@ -1919,6 +2199,8 @@ ntb_link_disable(struct ntb_softc *ntb)
 {
 	uint32_t cntl;
 
+	ntb_printf(2, "%s\n", __func__);
+
 	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
 		ntb_link_event(ntb);
 		return (0);
@@ -1934,6 +2216,23 @@ ntb_link_disable(struct ntb_softc *ntb)
 	return (0);
 }
 
+bool
+ntb_link_enabled(struct ntb_softc *ntb)
+{
+	uint32_t cntl;
+
+	if (ntb->type == NTB_ATOM) {
+		cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
+		return ((cntl & ATOM_PPD_INIT_LINK) != 0);
+	}
+
+	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
+		return (true);
+
+	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
+	return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
+}
+
 static void
 recover_atom_link(void *arg)
 {
@@ -2002,6 +2301,19 @@ ntb_poll_link(struct ntb_softc *ntb)
 			return (false);
 
 		ntb->lnk_sta = reg_val;
+
+		if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+			if (_xeon_link_is_up(ntb)) {
+				if (!ntb->peer_msix_good) {
+					callout_reset(&ntb->peer_msix_work, 0,
+					    ntb_exchange_msix, ntb);
+					return (false);
+				}
+			} else {
+				ntb->peer_msix_good = false;
+				ntb->peer_msix_done = false;
+			}
+		}
 	}
 	return (true);
 }
@@ -2040,16 +2352,26 @@ SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_inf
 static void
 ntb_sysctl_init(struct ntb_softc *ntb)
 {
-	struct sysctl_oid_list *tree_par, *regpar, *statpar, *errpar;
+	struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *tree, *tmptree;
 
 	ctx = device_get_sysctl_ctx(ntb->device);
+	globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device));
+
+	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status",
+	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0,
+	    sysctl_handle_link_status_human, "A",
+	    "Link status (human readable)");
+	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active",
+	    CTLFLAG_RD | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_status,
+	    "IU", "Link status (1=active, 0=inactive)");
+	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up",
+	    CTLFLAG_RW | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_admin,
+	    "IU", "Set/get interface status (1=UP, 0=DOWN)");
 
-	tree = SYSCTL_ADD_NODE(ctx,
-	    SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device)), OID_AUTO,
-	    "debug_info", CTLFLAG_RD, NULL,
-	    "Driver state, statistics, and HW registers");
+	tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info",
+	    CTLFLAG_RD, NULL, "Driver state, statistics, and HW registers");
 	tree_par = SYSCTL_CHILDREN(tree);
 
 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
@@ -2081,10 +2403,6 @@ ntb_sysctl_init(struct ntb_softc *ntb)
 	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
 	    "LNK STA register (cached)");
 
-	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "link_status",
-	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_link_status,
-	    "A", "Link status");
-
 #ifdef notyet
 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
 	    &ntb->mw_count, 0, "MW count");
@@ -2332,7 +2650,37 @@ sysctl_handle_features(SYSCTL_HANDLER_AR
 }
 
 static int
-sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
+sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
+{
+	struct ntb_softc *ntb;
+	unsigned old, new;
+	int error;
+
+	error = 0;
+	ntb = arg1;
+
+	old = ntb_link_enabled(ntb);
+
+	error = SYSCTL_OUT(req, &old, sizeof(old));
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+
+	error = SYSCTL_IN(req, &new, sizeof(new));
+	if (error != 0)
+		return (error);
+
+	ntb_printf(0, "Admin set interface state to '%sabled'\n",
+	    (new != 0)? "en" : "dis");
+
+	if (new != 0)
+		error = ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+	else
+		error = ntb_link_disable(ntb);
+	return (error);
+}
+
+static int
+sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
 {
 	struct ntb_softc *ntb;
 	struct sbuf sb;
@@ -2360,6 +2708,24 @@ sysctl_handle_link_status(SYSCTL_HANDLER
 }
 
 static int
+sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
+{
+	struct ntb_softc *ntb;
+	unsigned res;
+	int error;
+
+	error = 0;
+	ntb = arg1;
+
+	res = ntb_link_is_up(ntb, NULL, NULL);
+
+	error = SYSCTL_OUT(req, &res, sizeof(res));
+	if (error || !req->newptr)
+		return (error);
+	return (EINVAL);
+}
+
+static int
 sysctl_handle_register(SYSCTL_HANDLER_ARGS)
 {
 	struct ntb_softc *ntb;
@@ -2434,12 +2800,70 @@ static unsigned
 ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
 {
 
-	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
-	    uidx >= ntb->b2b_mw_idx)
-		return (uidx + 1);
+	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
+	    uidx >= ntb->b2b_mw_idx) ||
+	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
+		uidx++;
+	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
+	    uidx >= ntb->b2b_mw_idx) &&
+	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
+		uidx++;
 	return (uidx);
 }
 
+static void
+ntb_exchange_msix(void *ctx)
+{
+	struct ntb_softc *ntb;
+	uint32_t val;
+	unsigned i;
+
+	ntb = ctx;
+
+	if (ntb->peer_msix_done)
+		goto msix_done;
+
+	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+		ntb_peer_spad_write(ntb, NTB_MSIX_DATA0 + i,
+		    ntb->msix_data[i].nmd_data);
+		ntb_peer_spad_write(ntb, NTB_MSIX_OFS0 + i,
+		    ntb->msix_data[i].nmd_ofs);
+	}
+	ntb_peer_spad_write(ntb, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
+
+	ntb_spad_read(ntb, NTB_MSIX_GUARD, &val);
+	if (val != NTB_MSIX_VER_GUARD)
+		goto reschedule;
+
+	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+		ntb_spad_read(ntb, NTB_MSIX_DATA0 + i, &val);
+		ntb->peer_msix_data[i].nmd_data = val;
+		ntb_spad_read(ntb, NTB_MSIX_OFS0 + i, &val);
+		ntb->peer_msix_data[i].nmd_ofs = val;
+	}
+
+	ntb->peer_msix_done = true;
+
+msix_done:
+	ntb_peer_spad_write(ntb, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
+	ntb_spad_read(ntb, NTB_MSIX_DONE, &val);
+	if (val != NTB_MSIX_RECEIVED)
+		goto reschedule;
+
+	ntb->peer_msix_good = true;
+
+	ntb_poll_link(ntb);
+	ntb_link_event(ntb);
+	return;
+
+reschedule:
+	ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
+	if (_xeon_link_is_up(ntb))
+		callout_reset(&ntb->peer_msix_work, hz / 100, ntb_exchange_msix, ntb);
+	else
+		ntb_spad_clear(ntb);
+}
+
 /*
  * Public API to the rest of the OS
  */
@@ -2469,10 +2893,14 @@ ntb_get_max_spads(struct ntb_softc *ntb)
 uint8_t
 ntb_mw_count(struct ntb_softc *ntb)
 {
+	uint8_t res;
 
+	res = ntb->mw_count;
 	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
-		return (ntb->mw_count - 1);
-	return (ntb->mw_count);
+		res--;
+	if (ntb->msix_mw_idx != B2B_MW_DISABLED)
+		res--;
+	return (res);
 }
 
 /**
@@ -2498,6 +2926,18 @@ ntb_spad_write(struct ntb_softc *ntb, un
 	return (0);
 }
 
+/*
+ * Zeros the local scratchpad.
+ */
+void
+ntb_spad_clear(struct ntb_softc *ntb)
+{
+	unsigned i;
+
+	for (i = 0; i < ntb->spad_count; i++)
+		ntb_spad_write(ntb, i, 0);
+}
+
 /**
  * ntb_spad_read() - read from the primary scratchpad register
  * @ntb: pointer to ntb_softc instance
@@ -2826,6 +3266,22 @@ void
 ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
 {
 
+	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+		struct ntb_pci_bar_info *lapic;
+		unsigned i;
+
+		lapic = ntb->peer_lapic_bar;
+
+		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+			if ((bit & ntb_db_vector_mask(ntb, i)) != 0)
+				bus_space_write_4(lapic->pci_bus_tag,
+				    lapic->pci_bus_handle,
+				    ntb->peer_msix_data[i].nmd_ofs,
+				    ntb->peer_msix_data[i].nmd_data);
+		}
+		return;
+	}
+
 	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
 		ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
 		return;

Modified: stable/10/sys/dev/ntb/ntb_hw/ntb_hw.h
==============================================================================
--- stable/10/sys/dev/ntb/ntb_hw/ntb_hw.h	Fri Jun 10 18:22:21 2016	(r301810)
+++ stable/10/sys/dev/ntb/ntb_hw/ntb_hw.h	Fri Jun 10 18:34:31 2016	(r301811)
@@ -70,6 +70,7 @@ bool ntb_link_is_up(struct ntb_softc *, 
 void ntb_link_event(struct ntb_softc *);
 int ntb_link_enable(struct ntb_softc *, enum ntb_speed, enum ntb_width);
 int ntb_link_disable(struct ntb_softc *);
+bool ntb_link_enabled(struct ntb_softc *);
 
 int ntb_set_ctx(struct ntb_softc *, void *, const struct ntb_ctx_ops *);
 void *ntb_get_ctx(struct ntb_softc *, const struct ntb_ctx_ops **);
@@ -86,6 +87,7 @@ int ntb_mw_get_wc(struct ntb_softc *, un
 int ntb_mw_set_wc(struct ntb_softc *, unsigned mw_idx, vm_memattr_t mode);
 
 uint8_t ntb_get_max_spads(struct ntb_softc *ntb);
+void ntb_spad_clear(struct ntb_softc *ntb);
 int ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val);
 int ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val);
 int ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx,

Modified: stable/10/sys/dev/ntb/ntb_hw/ntb_regs.h
==============================================================================
--- stable/10/sys/dev/ntb/ntb_hw/ntb_regs.h	Fri Jun 10 18:22:21 2016	(r301810)
+++ stable/10/sys/dev/ntb/ntb_hw/ntb_regs.h	Fri Jun 10 18:34:31 2016	(r301811)
@@ -44,6 +44,7 @@
 #define XEON_DB_MSIX_VECTOR_COUNT	4
 #define XEON_DB_MSIX_VECTOR_SHIFT	5
 #define XEON_DB_LINK_BIT	(1 << XEON_DB_LINK)
+#define XEON_NONLINK_DB_MSIX_BITS	3
 
 #define XEON_SPCICMD_OFFSET	0x0504
 #define XEON_DEVCTRL_OFFSET	0x0598



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201606101834.u5AIYVDm033950>