Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 20 Oct 2009 18:58:28 +0000 (UTC)
From:      Andrew Gallatin <gallatin@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r198303 - head/sys/dev/mxge
Message-ID:  <200910201858.n9KIwSDS075456@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gallatin
Date: Tue Oct 20 18:58:28 2009
New Revision: 198303
URL: http://svn.freebsd.org/changeset/base/198303

Log:
  Make mxge do a better job recovering from NIC h/w faults
  by checking PCI config space when the NIC is not
  transmitting.  Previously, a h/w fault would not have been
  detected if the NIC was down, or handling an RX only
  workload.

Modified:
  head/sys/dev/mxge/if_mxge.c

Modified: head/sys/dev/mxge/if_mxge.c
==============================================================================
--- head/sys/dev/mxge/if_mxge.c	Tue Oct 20 18:54:51 2009	(r198302)
+++ head/sys/dev/mxge/if_mxge.c	Tue Oct 20 18:58:28 2009	(r198303)
@@ -3640,7 +3640,6 @@ mxge_open(mxge_softc_t *sc)
 #endif
 	sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-	callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
 
 	return 0;
 
@@ -3661,7 +3660,6 @@ mxge_close(mxge_softc_t *sc, int down)
 	int slice;
 #endif
 
-	callout_stop(&sc->co_hdl);
 #ifdef IFNET_BUF_RING
 	for (slice = 0; slice < sc->num_slices; slice++) {
 		ss = &sc->ss[slice];
@@ -3836,9 +3834,9 @@ mxge_watchdog_reset(mxge_softc_t *sc)
 	if (err) {
 		device_printf(sc->dev, "watchdog reset failed\n");
 	} else {
-		if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
-			callout_reset(&sc->co_hdl, mxge_ticks,
-				      mxge_tick, sc);
+		if (sc->dying == 2)
+			sc->dying = 0;
+		callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
 	}
 }
 
@@ -3909,10 +3907,11 @@ mxge_watchdog(mxge_softc_t *sc)
 	return (err);
 }
 
-static void
+static u_long
 mxge_update_stats(mxge_softc_t *sc)
 {
 	struct mxge_slice_state *ss;
+	u_long pkts = 0;
 	u_long ipackets = 0;
 	u_long opackets = 0;
 #ifdef IFNET_BUF_RING
@@ -3934,6 +3933,8 @@ mxge_update_stats(mxge_softc_t *sc)
 #endif
 		oerrors += ss->oerrors;
 	}
+	pkts = (ipackets - sc->ifp->if_ipackets);
+	pkts += (opackets - sc->ifp->if_opackets);
 	sc->ifp->if_ipackets = ipackets;
 	sc->ifp->if_opackets = opackets;
 #ifdef IFNET_BUF_RING
@@ -3942,23 +3943,45 @@ mxge_update_stats(mxge_softc_t *sc)
 	sc->ifp->if_snd.ifq_drops = odrops;
 #endif
 	sc->ifp->if_oerrors = oerrors;
+	return pkts;
 }
 
 static void
 mxge_tick(void *arg)
 {
 	mxge_softc_t *sc = arg;
+	u_long pkts = 0;
 	int err = 0;
+	int running, ticks;
+	uint16_t cmd;
 
-	/* aggregate stats from different slices */
-	mxge_update_stats(sc);
-	if (!sc->watchdog_countdown) {
-		err = mxge_watchdog(sc);
-		sc->watchdog_countdown = 4;
+	ticks = mxge_ticks;
+	mtx_lock(&sc->driver_mtx);
+	running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
+	mtx_unlock(&sc->driver_mtx);
+	if (running) {
+		/* aggregate stats from different slices */
+		pkts = mxge_update_stats(sc);
+		if (!sc->watchdog_countdown) {
+			err = mxge_watchdog(sc);
+			sc->watchdog_countdown = 4;
+		}
+		sc->watchdog_countdown--;
+	}
+	if (pkts == 0) {
+		/* ensure NIC did not suffer h/w fault while idle */
+		cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);		
+		if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
+			sc->dying = 2;
+			taskqueue_enqueue(sc->tq, &sc->watchdog_task);
+			err = ENXIO;
+		}
+		/* look less often if NIC is idle */
+		ticks *= 4;
 	}
-	sc->watchdog_countdown--;
+
 	if (err == 0)
-		callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
+		callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
 
 }
 
@@ -4747,6 +4770,7 @@ mxge_attach(device_t dev)
 	ifp->if_transmit = mxge_transmit;
 	ifp->if_qflush = mxge_qflush;
 #endif
+	callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
 	return 0;
 
 abort_with_rings:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200910201858.n9KIwSDS075456>