Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 28 Sep 2009 15:11:49 +0000 (UTC)
From:      Andrew Gallatin <gallatin@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-6@freebsd.org
Subject:   svn commit: r197578 - in stable/6/sys: . conf contrib/pf dev/cxgb dev/mxge
Message-ID:  <200909281511.n8SFBn7B058213@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gallatin
Date: Mon Sep 28 15:11:49 2009
New Revision: 197578
URL: http://svn.freebsd.org/changeset/base/197578

Log:
  MFC 197395: Improve mxge watchdog routine's ability to reliably reset a failed NIC

Modified:
  stable/6/sys/   (props changed)
  stable/6/sys/conf/   (props changed)
  stable/6/sys/contrib/pf/   (props changed)
  stable/6/sys/dev/cxgb/   (props changed)
  stable/6/sys/dev/mxge/if_mxge.c

Modified: stable/6/sys/dev/mxge/if_mxge.c
==============================================================================
--- stable/6/sys/dev/mxge/if_mxge.c	Mon Sep 28 15:10:08 2009	(r197577)
+++ stable/6/sys/dev/mxge/if_mxge.c	Mon Sep 28 15:11:49 2009	(r197578)
@@ -135,7 +135,7 @@ MODULE_DEPEND(mxge, zlib, 1, 1, 1);
 
 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
-static int mxge_close(mxge_softc_t *sc);
+static int mxge_close(mxge_softc_t *sc, int down);
 static int mxge_open(mxge_softc_t *sc);
 static void mxge_tick(void *arg);
 
@@ -1291,8 +1291,7 @@ mxge_reset(mxge_softc_t *sc, int interru
 		ss->lro_queued = 0;
 		ss->lro_flushed = 0;
 		if (ss->fw_stats != NULL) {
-			ss->fw_stats->valid = 0;
-			ss->fw_stats->send_done_count = 0;
+			bzero(ss->fw_stats, sizeof *ss->fw_stats);
 		}
 	}
 	sc->rdma_tags_available = 15;
@@ -1365,7 +1364,7 @@ mxge_change_lro_locked(mxge_softc_t *sc,
 		ifp->if_capenable |= IFCAP_LRO;
 	sc->lro_cnt = lro_cnt;
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-		mxge_close(sc);
+		mxge_close(sc, 0);
 		err = mxge_open(sc);
 	}
 	return err;
@@ -1481,6 +1480,10 @@ mxge_add_sysctls(mxge_softc_t *sc)
 		       "read_write_dma_MBs",
 		       CTLFLAG_RD, &sc->read_write_dma,
 		       0, "DMA concurrent Read/Write speed in MB/s");
+	SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
+		       "watchdog_resets",
+		       CTLFLAG_RD, &sc->watchdog_resets,
+		       0, "Number of times NIC was reset");
 
 
 	/* performance related tunables */
@@ -3377,28 +3380,30 @@ abort:
 }
 
 static int
-mxge_close(mxge_softc_t *sc)
+mxge_close(mxge_softc_t *sc, int down)
 {
 	mxge_cmd_t cmd;
 	int err, old_down_cnt;
 
 	callout_stop(&sc->co_hdl);
 	sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-	old_down_cnt = sc->down_cnt;
-	mb();
-	err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
-	if (err) {
-		device_printf(sc->dev, "Couldn't bring down link\n");
-	}
-	if (old_down_cnt == sc->down_cnt) {
-		/* wait for down irq */
-		DELAY(10 * sc->intr_coal_delay);
-	}
-	mb();
-	if (old_down_cnt == sc->down_cnt) {
-		device_printf(sc->dev, "never got down irq\n");
+	if (!down) {
+		old_down_cnt = sc->down_cnt;
+		mb();
+		err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
+		if (err) {
+			device_printf(sc->dev,
+				      "Couldn't bring down link\n");
+		}
+		if (old_down_cnt == sc->down_cnt) {
+			/* wait for down irq */
+			DELAY(10 * sc->intr_coal_delay);
+		}
+		mb();
+		if (old_down_cnt == sc->down_cnt) {
+			device_printf(sc->dev, "never got down irq\n");
+		}
 	}
-
 	mxge_free_mbufs(sc);
 
 	return 0;
@@ -3451,7 +3456,8 @@ static int
 mxge_watchdog_reset(mxge_softc_t *sc)
 {
 	struct pci_devinfo *dinfo;
-	int err;
+	struct mxge_slice_state *ss;
+	int err, running, s, num_tx_slices = 1;
 	uint32_t reboot;
 	uint16_t cmd;
 
@@ -3485,6 +3491,30 @@ mxge_watchdog_reset(mxge_softc_t *sc)
 		reboot = mxge_read_reboot(sc);
 		device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
 			      reboot);
+		running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
+		if (running) {
+
+			/* 
+			 * quiesce NIC so that TX routines will not try to
+			 * xmit after restoration of BAR
+			 */
+
+			/* Mark the link as down */
+			if (sc->link_state) {
+				sc->link_state = 0;
+				if_link_state_change(sc->ifp,
+						     LINK_STATE_DOWN);
+			}
+#ifdef IFNET_BUF_RING
+			num_tx_slices = sc->num_slices;
+#endif
+			/* grab all TX locks to ensure no tx  */
+			for (s = 0; s < num_tx_slices; s++) {
+				ss = &sc->ss[s];
+				mtx_lock(&ss->tx.mtx);
+			}
+			mxge_close(sc, 1);
+		}
 		/* restore PCI configuration space */
 		dinfo = device_get_ivars(sc->dev);
 		pci_cfg_restore(sc->dev, dinfo);
@@ -3492,10 +3522,22 @@ mxge_watchdog_reset(mxge_softc_t *sc)
 		/* and redo any changes we made to our config space */
 		mxge_setup_cfg_space(sc);
 
-		if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) {
-			mxge_close(sc);
-			err = mxge_open(sc);
+		/* reload f/w */
+		err = mxge_load_firmware(sc, 0);
+		if (err) {
+			device_printf(sc->dev,
+				      "Unable to re-load f/w\n");
 		}
+		if (running) {
+			if (!err)
+				err = mxge_open(sc);
+			/* release all TX locks */
+			for (s = 0; s < num_tx_slices; s++) {
+				ss = &sc->ss[s];
+				mtx_unlock(&ss->tx.mtx);
+			}
+		}
+		sc->watchdog_resets++;
 	} else {
 		device_printf(sc->dev, "NIC did not reboot, ring state:\n");
 		device_printf(sc->dev, "tx.req=%d tx.done=%d\n",
@@ -3505,6 +3547,9 @@ mxge_watchdog_reset(mxge_softc_t *sc)
 			      be32toh(sc->ss->fw_stats->send_done_count));
 		device_printf(sc->dev, "not resetting\n");
 	}
+	if (err)
+		device_printf(sc->dev, "watchdog reset failed\n");
+
 	return (err);
 }
 
@@ -3590,11 +3635,11 @@ mxge_change_mtu(mxge_softc_t *sc, int mt
 	old_mtu = ifp->if_mtu;
 	ifp->if_mtu = mtu;
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-		mxge_close(sc);
+		mxge_close(sc, 0);
 		err = mxge_open(sc);
 		if (err != 0) {
 			ifp->if_mtu = old_mtu;
-			mxge_close(sc);
+			mxge_close(sc, 0);
 			(void) mxge_open(sc);
 		}
 	}
@@ -3648,7 +3693,7 @@ mxge_ioctl(struct ifnet *ifp, u_long com
 			}
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-				mxge_close(sc);
+				mxge_close(sc, 0);
 			}
 		}
 		mtx_unlock(&sc->driver_mtx);
@@ -4345,7 +4390,7 @@ mxge_detach(device_t dev)
 	}
 	mtx_lock(&sc->driver_mtx);
 	if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
-		mxge_close(sc);
+		mxge_close(sc, 0);
 	mtx_unlock(&sc->driver_mtx);
 	ether_ifdetach(sc->ifp);
 	callout_drain(&sc->co_hdl);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200909281511.n8SFBn7B058213>