Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 20 Jan 2012 13:10:41 +0000 (UTC)
From:      Luigi Rizzo <luigi@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r230378 - in user/luigi/netmap/sys/dev: ixgbe netmap
Message-ID:  <201201201310.q0KDAfEb042963@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: luigi
Date: Fri Jan 20 13:10:40 2012
New Revision: 230378
URL: http://svn.freebsd.org/changeset/base/230378

Log:
  snapshot of current code:
  - implement more aggressive interrupt throttling on the tx queues
  - pass rx interrupt mitigation up to the reading process
  - make interrupt_rate a writable sysctl parameter
  - correct the value used for computations of interrupt rate
    (it was off by a factor of 2)

Modified:
  user/luigi/netmap/sys/dev/ixgbe/ixgbe.c
  user/luigi/netmap/sys/dev/netmap/ixgbe_netmap.h
  user/luigi/netmap/sys/dev/netmap/netmap.c
  user/luigi/netmap/sys/dev/netmap/netmap_kern.h

Modified: user/luigi/netmap/sys/dev/ixgbe/ixgbe.c
==============================================================================
--- user/luigi/netmap/sys/dev/ixgbe/ixgbe.c	Fri Jan 20 12:59:12 2012	(r230377)
+++ user/luigi/netmap/sys/dev/ixgbe/ixgbe.c	Fri Jan 20 13:10:40 2012	(r230378)
@@ -229,10 +229,10 @@ MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
 ** is varied over time based on the
 ** traffic for that interrupt vector
 */
-static int ixgbe_enable_aim = TRUE;
+static int ixgbe_enable_aim = 0; // TRUE;
 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
 
-static int ixgbe_max_interrupt_rate = (8000000 / IXGBE_LOW_LATENCY);
+static int ixgbe_max_interrupt_rate = (8000000 / 250); // IXGBE_LOW_LATENCY);
 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
 
 /* How many packets rxeof tries to clean at a time */
@@ -3385,7 +3385,11 @@ ixgbe_txeof(struct tx_ring *txr)
 #ifdef DEV_NETMAP
 	if (ifp->if_capenable & IFCAP_NETMAP) {
 		struct netmap_adapter *na = NA(ifp);
+		struct netmap_kring *kring = &na->tx_rings[txr->me];
+		tx_desc = (struct ixgbe_legacy_tx_desc *)txr->tx_base;
 
+		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
+		    BUS_DMASYNC_POSTREAD);
 		/*
 		 * In netmap mode, all the work is done in the context
 		 * of the client thread. Interrupt handlers only wake up
@@ -3395,12 +3399,17 @@ ixgbe_txeof(struct tx_ring *txr)
 		 * release and re-acquire txlock to avoid deadlocks.
 		 * XXX see if we can find a better way.
 		 */
-		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
-		IXGBE_TX_UNLOCK(txr);
-		IXGBE_CORE_LOCK(adapter);
-		selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
-		IXGBE_CORE_UNLOCK(adapter);
-		IXGBE_TX_LOCK(txr);
+		if (!netmap_mitigate ||
+		    (kring->nr_kflags < kring->nkr_num_slots &&
+		     tx_desc[kring->nr_kflags].upper.fields.status & IXGBE_TXD_STAT_DD)) {
+			kring->nr_kflags = kring->nkr_num_slots; // invalidate
+			selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
+			IXGBE_TX_UNLOCK(txr);
+			IXGBE_CORE_LOCK(adapter);
+			selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
+			IXGBE_CORE_UNLOCK(adapter);
+			IXGBE_TX_LOCK(txr);
+		}
 		return FALSE;
 	}
 #endif /* DEV_NETMAP */
@@ -4302,6 +4311,7 @@ ixgbe_rxeof(struct ix_queue *que, int co
 		 */
 		struct netmap_adapter *na = NA(ifp);
 
+		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
 		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
 		IXGBE_RX_UNLOCK(rxr);
 		IXGBE_CORE_LOCK(adapter);
@@ -4830,7 +4840,7 @@ ixgbe_configure_ivars(struct adapter *ad
 	u32 newitr;
 
 	if (ixgbe_max_interrupt_rate > 0)
-		newitr = (8000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
+		newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
 	else
 		newitr = 0;
 
@@ -5193,12 +5203,21 @@ ixgbe_sysctl_interrupt_rate_handler(SYSC
 	reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
 	usec = ((reg & 0x0FF8) >> 3);
 	if (usec > 0)
-		rate = 1000000 / usec;
+		rate = 500000 / usec;
 	else
 		rate = 0;
 	error = sysctl_handle_int(oidp, &rate, 0, req);
 	if (error || !req->newptr)
 		return error;
+	reg &= ~0xfff; /* default, no limitation */
+	ixgbe_max_interrupt_rate = 0;
+	if (rate > 0 && rate < 500000) {
+		if (rate < 1000)
+			rate = 1000;
+		ixgbe_max_interrupt_rate = rate;
+		reg |= ((4000000/rate) & 0xff8 );
+	}
+	IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
 	return 0;
 }
 
@@ -5252,10 +5271,13 @@ ixgbe_add_hw_stats(struct adapter *adapt
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
-				CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
+				CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
 				sizeof(&adapter->queues[i]),
 				ixgbe_sysctl_interrupt_rate_handler, "IU",
 				"Interrupt Rate");
+		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
+				CTLFLAG_RD, &(adapter->queues[i].irqs),
+				"irqs on this queue");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
 				CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
 				ixgbe_sysctl_tdh_handler, "IU",

Modified: user/luigi/netmap/sys/dev/netmap/ixgbe_netmap.h
==============================================================================
--- user/luigi/netmap/sys/dev/netmap/ixgbe_netmap.h	Fri Jan 20 12:59:12 2012	(r230377)
+++ user/luigi/netmap/sys/dev/netmap/ixgbe_netmap.h	Fri Jan 20 13:10:40 2012	(r230378)
@@ -191,6 +191,10 @@ fail:
  * (this is also true for every use of ring in the kernel).
  *
  * ring->avail is never used, only checked for bogus values.
+ *
+ * If do_lock is set, it means the function has been called from the ioctl
+ * handler: in this particular case, do_lock has also the special meaning of
+ * force the update of NIC registers
  */
 static int
 ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock)
@@ -292,10 +296,11 @@ ring_reset:
 			 * need this.
 			 */
 			curr->read.buffer_addr = htole64(paddr);
-			curr->read.olinfo_status = 0;
+			curr->read.olinfo_status = htole32(len << IXGBE_ADVTXD_PAYLEN_SHIFT);
 			curr->read.cmd_type_len =
 			    htole32(txr->txd_cmd | len |
 				(IXGBE_ADVTXD_DTYP_DATA |
+				    IXGBE_ADVTXD_DCMD_DEXT | // XXX
 				    IXGBE_ADVTXD_DCMD_IFCS |
 				    IXGBE_TXD_CMD_EOP | flags) );
 			/* If the buffer has changed, unload and reload map
@@ -336,7 +341,29 @@ ring_reset:
 	 * (meaning that probably the caller really wanted to check
 	 * for completed transmissions).
 	 */
-	if (n == 0 || kring->nr_hwavail < 1) {
+	if (do_lock) {
+		kring->nr_kflags = kring->nkr_num_slots; // filter interrupts
+		j = 1; // force read
+	} else if (kring->nr_hwavail > 0) { // no need to block
+		kring->nr_kflags = kring->nkr_num_slots; // filter interrupts
+		j = 0;
+	} else {
+		struct ixgbe_legacy_tx_desc *txd = (struct ixgbe_legacy_tx_desc *)txr->tx_base;
+
+		// wake me up every half ring (more or less)
+		j = txr->next_to_clean + kring->nkr_num_slots/2;
+		if (j >= kring->nkr_num_slots)
+			j -= kring->nkr_num_slots;
+		// round to the closest with dd set
+		j= (j < kring->nkr_num_slots / 4 || j >= kring->nkr_num_slots*3/4) ?
+			0 : report_frequency;
+		kring->nr_kflags = j; // remember where to look at in the interrupt
+		// now check if we have data ready
+		j = txd[j].upper.fields.status & IXGBE_TXD_STAT_DD;
+	}
+	if (!j) {
+		netmap_skip_txsync++;
+	} else {
 		int delta;
 
 		/*
@@ -362,6 +389,7 @@ ring_reset:
 			/* some tx completed, increment avail */
 			if (delta < 0)
 				delta += kring->nkr_num_slots;
+			netmap_delta[ring_nr] = (netmap_delta[ring_nr] * 15 + delta)/16;
 			txr->next_to_clean = l;
 			kring->nr_hwavail += delta;
 			if (kring->nr_hwavail > lim)
@@ -391,6 +419,8 @@ ring_reset:
  * We must subtract the newly consumed slots (cur - nr_hwcur)
  * from nr_hwavail, make the descriptors available for the next reads,
  * and set kring->nr_hwcur = ring->cur and ring->avail = kring->nr_hwavail.
+ *
+ * do_lock has a special meaning: please refer to txsync.
  */
 static int
 ixgbe_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
@@ -401,6 +431,7 @@ ixgbe_netmap_rxsync(void *a, u_int ring_
 	struct netmap_kring *kring = &na->rx_rings[ring_nr];
 	struct netmap_ring *ring = kring->ring;
 	int j, k, l, n, lim = kring->nkr_num_slots - 1;
+	int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR;
 
 	k = ring->cur;	/* cache and check value, same as in txsync */
 	n = k - kring->nr_hwcur;
@@ -437,12 +468,14 @@ ixgbe_netmap_rxsync(void *a, u_int ring_
 	if (j > lim)
 		j -= lim + 1;
 
+    if (force_update) {
 	for (n = 0; ; n++) {
 		union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l];
 		uint32_t staterr = le32toh(curr->wb.upper.status_error);
 
 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
 			break;
+		// XXX add -4 if crcstrip
 		ring->slot[j].len = le16toh(curr->wb.upper.length);
 		bus_dmamap_sync(rxr->ptag,
 			rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
@@ -453,6 +486,8 @@ ixgbe_netmap_rxsync(void *a, u_int ring_
 		rxr->next_to_check = l;
 		kring->nr_hwavail += n;
 	}
+	kring->nr_kflags &= ~NKR_PENDINTR;
+    }
 
 	/*
 	 * Skip past packets that userspace has already processed

Modified: user/luigi/netmap/sys/dev/netmap/netmap.c
==============================================================================
--- user/luigi/netmap/sys/dev/netmap/netmap.c	Fri Jan 20 12:59:12 2012	(r230377)
+++ user/luigi/netmap/sys/dev/netmap/netmap.c	Fri Jan 20 13:10:40 2012	(r230378)
@@ -146,6 +146,17 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, total_
     CTLFLAG_RD, &nm_buf_pool.total_buffers, 0, "total_buffers");
 SYSCTL_INT(_dev_netmap, OID_AUTO, free_buffers,
     CTLFLAG_RD, &nm_buf_pool.free, 0, "free_buffers");
+int netmap_mitigate = 1;
+SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
+int netmap_skip_txsync;
+SYSCTL_INT(_dev_netmap, OID_AUTO, skip_txsync, CTLFLAG_RW, &netmap_skip_txsync, 0, "");
+int netmap_skip_rxsync;
+SYSCTL_INT(_dev_netmap, OID_AUTO, skip_rxsync, CTLFLAG_RW, &netmap_skip_rxsync, 0, "");
+int netmap_delta[8];
+SYSCTL_INT(_dev_netmap, OID_AUTO, delta0, CTLFLAG_RW, &netmap_delta[0], 0, "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, delta1, CTLFLAG_RW, &netmap_delta[1], 0, "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, delta2, CTLFLAG_RW, &netmap_delta[2], 0, "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, delta3, CTLFLAG_RW, &netmap_delta[3], 0, "");
 
 /*
  * Allocate n buffers from the ring, and fill the slot.

Modified: user/luigi/netmap/sys/dev/netmap/netmap_kern.h
==============================================================================
--- user/luigi/netmap/sys/dev/netmap/netmap_kern.h	Fri Jan 20 12:59:12 2012	(r230377)
+++ user/luigi/netmap/sys/dev/netmap/netmap_kern.h	Fri Jan 20 13:10:40 2012	(r230378)
@@ -65,7 +65,8 @@ struct netmap_kring {
 	struct netmap_ring *ring;
 	u_int nr_hwcur;
 	int nr_hwavail;
-	u_int nr_kflags;
+	u_int nr_kflags;	/* private driver flags */
+#define NKR_PENDINTR   0x1     // Pending interrupt.
 	u_int nkr_num_slots;
 
 	int	nkr_hwofs;	/* offset between NIC and netmap ring */
@@ -171,6 +172,9 @@ struct netmap_slot *netmap_reset(struct 
 	enum txrx tx, int n, u_int new_cur);
 int netmap_ring_reinit(struct netmap_kring *);
 
+extern int netmap_mitigate;
+extern int netmap_skip_txsync, netmap_skip_rxsync;
+extern int netmap_delta[8];
 extern u_int netmap_total_buffers;
 extern char *netmap_buffer_base;
 extern int netmap_verbose;	// XXX debugging



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201201201310.q0KDAfEb042963>