Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 5 Apr 2011 21:55:43 +0000 (UTC)
From:      Jack F Vogel <jfv@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r220375 - head/sys/dev/e1000
Message-ID:  <201104052155.p35Lthob031767@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jfv
Date: Tue Apr  5 21:55:43 2011
New Revision: 220375
URL: http://svn.freebsd.org/changeset/base/220375

Log:
  Important update for the igb driver:
    - Add the change made in em to the actual unrefreshed number
      of descriptors is used as a basis in rxeof on the way out
      to determine if more refresh is needed. NOTE: there is a
      difference in the ring setup in igb, this is not accidental,
      it is necessitated by hardware behavior, when you reset the
      newer adapters it will not let you write RDH, it ALWAYS sets
      it to 0. Thus the way em does it is not possible.
    - Change the sysctl handling of flow control, it will now make
      the change dynamically when the variable setting changes rather
      than requiring a reset.
    - Change the eee sysctl naming, validation found the old unintuitive :)
    - Last but not least, some important performance tweaks in the TX
      path, I found that UDP behavior could be drastically hindered or
      improved with just small changes in the start loop. What I have
      here is what testing has shown to be the best overall. Its interesting
      to note that changing the clean threshold to start at a full half of
      the ring, made a BIG difference in performance.  I hope that this
      will prove to be advantageous for most workloads.
  
  MFC in a week.

Modified:
  head/sys/dev/e1000/if_igb.c
  head/sys/dev/e1000/if_igb.h

Modified: head/sys/dev/e1000/if_igb.c
==============================================================================
--- head/sys/dev/e1000/if_igb.c	Tue Apr  5 21:26:05 2011	(r220374)
+++ head/sys/dev/e1000/if_igb.c	Tue Apr  5 21:55:43 2011	(r220375)
@@ -1,6 +1,6 @@
 /******************************************************************************
 
-  Copyright (c) 2001-2010, Intel Corporation 
+  Copyright (c) 2001-2011, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
@@ -99,7 +99,7 @@ int	igb_display_debug_stats = 0;
 /*********************************************************************
  *  Driver version:
  *********************************************************************/
-char igb_driver_version[] = "version - 2.1.7";
+char igb_driver_version[] = "version - 2.2.3";
 
 
 /*********************************************************************
@@ -262,6 +262,7 @@ static void	igb_handle_link(void *contex
 
 static void	igb_set_sysctl_value(struct adapter *, const char *,
 		    const char *, int *, int);
+static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
 
 #ifdef DEVICE_POLLING
 static poll_handler_t igb_poll;
@@ -350,8 +351,8 @@ static int igb_fc_setting = e1000_fc_ful
 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
 
 /* Energy Efficient Ethernet - default to off */
-static int igb_eee_setting = FALSE;
-TUNABLE_INT("hw.igb.ee_setting", &igb_eee_setting);
+static int igb_eee_disabled = TRUE;
+TUNABLE_INT("hw.igb.eee_disabled", &igb_eee_disabled);
 
 /*
 ** DMA Coalescing, only for i350 - default to off,
@@ -445,6 +446,11 @@ igb_attach(device_t dev)
 	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
 	    &igb_enable_aim, 1, "Interrupt Moderation");
 
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
+	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
+
 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
 
 	/* Determine hardware and mac info */
@@ -471,11 +477,6 @@ igb_attach(device_t dev)
 	    "max number of rx packets to process", &adapter->rx_process_limit,
 	    igb_rx_process_limit);
 
-       /* Sysctl for setting the interface flow control */
-	igb_set_sysctl_value(adapter, "flow_control",
-	    "configure flow control",
-	    &adapter->fc_setting, igb_fc_setting);
-
 	/*
 	 * Validate number of transmit and receive descriptors. It
 	 * must not exceed hardware maximum, and must be multiple
@@ -552,10 +553,10 @@ igb_attach(device_t dev)
 		igb_set_sysctl_value(adapter, "dma_coalesce",
 		    "configure dma coalesce",
 		    &adapter->dma_coalesce, igb_dma_coalesce);
-		igb_set_sysctl_value(adapter, "eee_control",
+		igb_set_sysctl_value(adapter, "eee_disabled",
 		    "enable Energy Efficient Ethernet",
 		    &adapter->hw.dev_spec._82575.eee_disable,
-		    igb_eee_setting);
+		    igb_eee_disabled);
 		e1000_set_eee_i350(&adapter->hw);
 	}
 
@@ -822,11 +823,12 @@ igb_start_locked(struct tx_ring *txr, st
 	if (!adapter->link_active)
 		return;
 
+	/* Call cleanup if number of TX descriptors low */
+	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
+		igb_txeof(txr);
+
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
-		/* Cleanup if TX descriptors are low */
-		if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
-			igb_txeof(txr);
-		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
+		if (txr->tx_avail <= IGB_MAX_SCATTER) {
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			break;
 		}
@@ -932,13 +934,6 @@ igb_mq_start_locked(struct ifnet *ifp, s
 
 	/* Process the queue */
 	while (next != NULL) {
-		/* Call cleanup if number of TX descriptors low */
-		if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
-			igb_txeof(txr);
-		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
-			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
-			break;
-		}
 		if ((err = igb_xmit(txr, &next)) != 0) {
 			if (next != NULL)
 				err = drbr_enqueue(ifp, txr->br, next);
@@ -949,6 +944,12 @@ igb_mq_start_locked(struct ifnet *ifp, s
 		ETHER_BPF_MTAP(ifp, next);
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
+		if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
+			igb_txeof(txr);
+		if (txr->tx_avail <= IGB_MAX_SCATTER) {
+			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+			break;
+		}
 		next = drbr_dequeue(ifp, txr->br);
 	}
 	if (enq > 0) {
@@ -1266,10 +1267,13 @@ igb_init_locked(struct adapter *adapter)
 	else
 #endif /* DEVICE_POLLING */
 	{
-	igb_enable_intr(adapter);
-	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
+		igb_enable_intr(adapter);
+		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
 	}
 
+	/* Set Energy Efficient Ethernet */
+	e1000_set_eee_i350(&adapter->hw);
+
 	/* Don't reset the phy next time init gets called */
 	adapter->hw.phy.reset_disable = TRUE;
 }
@@ -1458,10 +1462,6 @@ igb_msix_que(void *arg)
 	more_tx = igb_txeof(txr);
 	IGB_TX_UNLOCK(txr);
 
-	/* If RX ring is depleted do refresh first */
-	if (rxr->next_to_check == rxr->next_to_refresh)
-		igb_refresh_mbufs(rxr, rxr->next_to_check);
-
 	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
 
 	if (igb_enable_aim == FALSE)
@@ -2670,14 +2670,6 @@ igb_reset(struct adapter *adapter)
 	fc->pause_time = IGB_FC_PAUSE_TIME;
 	fc->send_xon = TRUE;
 
-	/* Set Flow control, use the tunable location if sane */
-	if ((igb_fc_setting >= 0) && (igb_fc_setting < 4))
-		fc->requested_mode = adapter->fc_setting;
-	else
-		fc->requested_mode = e1000_fc_none;
-
-	fc->current_mode = fc->requested_mode;
-
 	/* Issue a global reset */
 	e1000_reset_hw(hw);
 	E1000_WRITE_REG(hw, E1000_WUC, 0);
@@ -2864,7 +2856,7 @@ igb_dma_malloc(struct adapter *adapter, 
 	}
 
 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
-	    BUS_DMA_NOWAIT, &dma->dma_map);
+	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
 	if (error) {
 		device_printf(adapter->dev,
 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
@@ -3631,19 +3623,17 @@ igb_txeof(struct tx_ring *txr)
          * If we have a minimum free, clear IFF_DRV_OACTIVE
          * to tell the stack that it is OK to send packets.
          */
-        if (txr->tx_avail > IGB_TX_OP_THRESHOLD)               
+        if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {                
                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-
-	/* All clean, turn off the watchdog */
-	if (txr->tx_avail == adapter->num_tx_desc) {
-		txr->queue_status = IGB_QUEUE_IDLE;
-		return (FALSE);
-	}
-
+		/* All clean, turn off the watchdog */
+                if (txr->tx_avail == adapter->num_tx_desc) {
+			txr->queue_status = IGB_QUEUE_IDLE;
+			return (FALSE);
+		}
+        }
 	return (TRUE);
 }
 
-
 /*********************************************************************
  *
  *  Refresh mbuf buffers for RX descriptor rings
@@ -3830,13 +3820,11 @@ fail:
 static void
 igb_free_receive_ring(struct rx_ring *rxr)
 {
-	struct	adapter		*adapter;
+	struct	adapter		*adapter = rxr->adapter;
 	struct igb_rx_buf	*rxbuf;
-	int i;
 
-	adapter = rxr->adapter;
-	i = rxr->next_to_check;
-	while (i != rxr->next_to_refresh) {
+
+	for (int i = 0; i < adapter->num_rx_desc; i++) {
 		rxbuf = &rxr->rx_buffers[i];
 		if (rxbuf->m_head != NULL) {
 			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
@@ -3854,12 +3842,7 @@ igb_free_receive_ring(struct rx_ring *rx
 		}
 		rxbuf->m_head = NULL;
 		rxbuf->m_pack = NULL;
-
-		if (++i == adapter->num_rx_desc)
-			i = 0;
 	}
-	rxr->next_to_check = 0;
-	rxr->next_to_refresh = 0;
 }
 
 
@@ -3877,33 +3860,32 @@ igb_setup_receive_ring(struct rx_ring *r
 	struct igb_rx_buf	*rxbuf;
 	bus_dma_segment_t	pseg[1], hseg[1];
 	struct lro_ctrl		*lro = &rxr->lro;
-	int			i, j, nsegs, error = 0;
+	int			rsize, nsegs, error = 0;
 
 	adapter = rxr->adapter;
 	dev = adapter->dev;
 	ifp = adapter->ifp;
 
+	/* Clear the ring contents */
 	IGB_RX_LOCK(rxr);
-	/* Invalidate all descriptors */
-	for (i = 0; i < adapter->num_rx_desc; i++) {
-		union e1000_adv_rx_desc* cur;
-		cur = &rxr->rx_base[i];
-		cur->wb.upper.status_error = 0;
-        }
+	rsize = roundup2(adapter->num_rx_desc *
+	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
+	bzero((void *)rxr->rx_base, rsize);
+
+	/*
+	** Free current RX buffer structures and their mbufs
+	*/
+	igb_free_receive_ring(rxr);
 
 	/* Configure for header split? */
 	if (igb_header_split)
 		rxr->hdr_split = TRUE;
 
-        /* Get our indices */
-	i = j = rxr->next_to_refresh;
-	if (++j == adapter->num_rx_desc)
-		j = 0;
         /* Now replenish the ring mbufs */
-	while (j != rxr->next_to_check) {
+	for (int j = 0; j < adapter->num_rx_desc; ++j) {
 		struct mbuf	*mh, *mp;
 
-		rxbuf = &rxr->rx_buffers[i];
+		rxbuf = &rxr->rx_buffers[j];
 		if (rxr->hdr_split == FALSE)
 			goto skip_head;
 
@@ -3926,7 +3908,7 @@ igb_setup_receive_ring(struct rx_ring *r
 		bus_dmamap_sync(rxr->htag,
 		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
 		/* Update descriptor */
-		rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr);
+		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
 
 skip_head:
 		/* Now the payload cluster */
@@ -3947,16 +3929,12 @@ skip_head:
 		bus_dmamap_sync(rxr->ptag,
 		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
 		/* Update descriptor */
-		rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr);
-
-		/* Setup for next loop */
-		i = j;
-		if (++j == adapter->num_rx_desc)
-			j = 0;
+		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
         }
 
 	/* Setup our descriptor indices */
-	rxr->next_to_refresh = i;
+	rxr->next_to_check = 0;
+	rxr->next_to_refresh = adapter->num_rx_desc - 1;
 	rxr->lro_enabled = FALSE;
 	rxr->rx_split_packets = 0;
 	rxr->rx_bytes = 0;
@@ -3989,12 +3967,12 @@ skip_head:
 	return (0);
 
 fail:
-	rxr->next_to_refresh = i;
 	igb_free_receive_ring(rxr);
 	IGB_RX_UNLOCK(rxr);
 	return (error);
 }
 
+
 /*********************************************************************
  *
  *  Initialize all receive rings.
@@ -4528,7 +4506,7 @@ next_desc:
 	}
 
 	/* Catch any remainders */
-	if (processed != 0 || i == rxr->next_to_refresh)
+	if (igb_rx_unrefreshed(rxr))
 		igb_refresh_mbufs(rxr, i);
 
 	rxr->next_to_check = i;
@@ -5552,3 +5530,38 @@ igb_set_sysctl_value(struct adapter *ada
 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
 }
 
+/*
+** Set flow control using sysctl:
+** Flow control values:
+** 	0 - off
+**	1 - rx pause
+**	2 - tx pause
+**	3 - full
+*/
+static int
+igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	struct adapter *adapter;
+
+	error = sysctl_handle_int(oidp, &igb_fc_setting, 0, req);
+
+	if (error)
+		return (error);
+
+	adapter = (struct adapter *) arg1;
+	switch (igb_fc_setting) {
+		case e1000_fc_rx_pause:
+		case e1000_fc_tx_pause:
+		case e1000_fc_full:
+			adapter->hw.fc.requested_mode = igb_fc_setting;
+			break;
+		case e1000_fc_none:
+		default:
+			adapter->hw.fc.requested_mode = e1000_fc_none;
+	}
+
+	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
+	e1000_force_mac_fc(&adapter->hw);
+	return error;
+}

Modified: head/sys/dev/e1000/if_igb.h
==============================================================================
--- head/sys/dev/e1000/if_igb.h	Tue Apr  5 21:26:05 2011	(r220374)
+++ head/sys/dev/e1000/if_igb.h	Tue Apr  5 21:55:43 2011	(r220375)
@@ -1,6 +1,6 @@
 /******************************************************************************
 
-  Copyright (c) 2001-2010, Intel Corporation 
+  Copyright (c) 2001-2011, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
@@ -132,10 +132,9 @@
 
 /*
  * This parameter controls when the driver calls the routine to reclaim
- * transmit descriptors.
+ * transmit descriptors. Cleaning earlier seems a win.
  */
-#define IGB_TX_CLEANUP_THRESHOLD	(adapter->num_tx_desc / 8)
-#define IGB_TX_OP_THRESHOLD	(adapter->num_tx_desc / 32)
+#define IGB_TX_CLEANUP_THRESHOLD	(adapter->num_tx_desc / 2)
 
 /*
  * This parameter controls whether or not autonegotation is enabled.
@@ -400,7 +399,6 @@ struct adapter {
 	u16		link_speed;
 	u16		link_duplex;
 	u32		smartspeed;
-	u32		fc_setting;
 	u32		dma_coalesce;
 
 	/* Interface queues */
@@ -483,6 +481,21 @@ struct igb_rx_buf {
 	bus_dmamap_t	pmap;	/* bus_dma map for packet */
 };
 
+/*
+** Find the number of unrefreshed RX descriptors
+*/
+static inline u16
+igb_rx_unrefreshed(struct rx_ring *rxr)
+{
+	struct adapter  *adapter = rxr->adapter;
+ 
+	if (rxr->next_to_check > rxr->next_to_refresh)
+		return (rxr->next_to_check - rxr->next_to_refresh - 1);
+	else
+		return ((adapter->num_rx_desc + rxr->next_to_check) -
+		    rxr->next_to_refresh - 1);
+}
+
 #define	IGB_CORE_LOCK_INIT(_sc, _name) \
 	mtx_init(&(_sc)->core_mtx, _name, "IGB Core Lock", MTX_DEF)
 #define	IGB_CORE_LOCK_DESTROY(_sc)	mtx_destroy(&(_sc)->core_mtx)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201104052155.p35Lthob031767>