From owner-svn-src-all@FreeBSD.ORG  Thu Jul 10 05:36:05 2014
Return-Path: <owner-svn-src-all@FreeBSD.ORG>
Delivered-To: svn-src-all@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org
 [IPv6:2001:1900:2254:206a::19:1])
 (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits))
 (No client certificate requested)
 by hub.freebsd.org (Postfix) with ESMTPS id 4D822AC6;
 Thu, 10 Jul 2014 05:36:05 +0000 (UTC)
Received: from svn.freebsd.org (svn.freebsd.org
 [IPv6:2001:1900:2254:2068::e6a:0])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (Client did not present a certificate)
 by mx1.freebsd.org (Postfix) with ESMTPS id 39ED02310;
 Thu, 10 Jul 2014 05:36:05 +0000 (UTC)
Received: from svn.freebsd.org ([127.0.1.70])
 by svn.freebsd.org (8.14.8/8.14.8) with ESMTP id s6A5a5pC065553;
 Thu, 10 Jul 2014 05:36:05 GMT (envelope-from bryanv@svn.freebsd.org)
Received: (from bryanv@localhost)
 by svn.freebsd.org (8.14.8/8.14.8/Submit) id s6A5a49s065550;
 Thu, 10 Jul 2014 05:36:04 GMT (envelope-from bryanv@svn.freebsd.org)
Message-Id: <201407100536.s6A5a49s065550@svn.freebsd.org>
From: Bryan Venteicher <bryanv@FreeBSD.org>
Date: Thu, 10 Jul 2014 05:36:04 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
 svn-src-head@freebsd.org
Subject: svn commit: r268481 - head/sys/dev/virtio/network
X-SVN-Group: head
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
X-BeenThere: svn-src-all@freebsd.org
X-Mailman-Version: 2.1.18
Precedence: list
List-Id: "SVN commit messages for the entire src tree \(except for &quot;
 user&quot; and &quot; projects&quot; \)" <svn-src-all.freebsd.org>
List-Unsubscribe: <http://lists.freebsd.org/mailman/options/svn-src-all>,
 <mailto:svn-src-all-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/svn-src-all/>
List-Post: <mailto:svn-src-all@freebsd.org>
List-Help: <mailto:svn-src-all-request@freebsd.org?subject=help>
List-Subscribe: <http://lists.freebsd.org/mailman/listinfo/svn-src-all>,
 <mailto:svn-src-all-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Thu, 10 Jul 2014 05:36:05 -0000

Author: bryanv
Date: Thu Jul 10 05:36:04 2014
New Revision: 268481
URL: http://svnweb.freebsd.org/changeset/base/268481

Log:
  Rework when the Tx queue completion interrupt is enabled
  
  The Tx interrupt is now kept disabled in the common case, only
  enabled when the number of free descriptors in the queue falls
  below a threshold. Transmitted frames are cleared from the VQ
  before subsequent transmit, or in the watchdog timer.
  
  This was a very big performance improvement for an experimental
  Netmap bhyve backend.
  
  MFC after:	1 month

Modified:
  head/sys/dev/virtio/network/if_vtnet.c
  head/sys/dev/virtio/network/if_vtnetvar.h

Modified: head/sys/dev/virtio/network/if_vtnet.c
==============================================================================
--- head/sys/dev/virtio/network/if_vtnet.c	Thu Jul 10 05:26:01 2014	(r268480)
+++ head/sys/dev/virtio/network/if_vtnet.c	Thu Jul 10 05:36:04 2014	(r268481)
@@ -128,6 +128,8 @@ static int	vtnet_rxq_eof(struct vtnet_rx
 static void	vtnet_rx_vq_intr(void *);
 static void	vtnet_rxq_tq_intr(void *, int);
 
+static int	vtnet_txq_below_threshold(struct vtnet_txq *);
+static int	vtnet_txq_notify(struct vtnet_txq *);
 static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
 static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
 		    int *, int *, int *);
@@ -149,7 +151,7 @@ static void	vtnet_txq_tq_deferred(void *
 #endif
 static void	vtnet_txq_start(struct vtnet_txq *);
 static void	vtnet_txq_tq_intr(void *, int);
-static void	vtnet_txq_eof(struct vtnet_txq *);
+static int	vtnet_txq_eof(struct vtnet_txq *);
 static void	vtnet_tx_vq_intr(void *);
 static void	vtnet_tx_start_all(struct vtnet_softc *);
 
@@ -206,6 +208,8 @@ static void	vtnet_ifmedia_sts(struct ifn
 static void	vtnet_get_hwaddr(struct vtnet_softc *);
 static void	vtnet_set_hwaddr(struct vtnet_softc *);
 static void	vtnet_vlan_tag_remove(struct mbuf *);
+static void	vtnet_set_rx_process_limit(struct vtnet_softc *);
+static void	vtnet_set_tx_intr_threshold(struct vtnet_softc *);
 
 static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
 		    struct sysctl_oid_list *, struct vtnet_rxq *);
@@ -241,19 +245,6 @@ TUNABLE_INT("hw.vtnet.mq_max_pairs", &vt
 static int vtnet_rx_process_limit = 512;
 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
 
-/*
- * Reducing the number of transmit completed interrupts can improve
- * performance. To do so, the define below keeps the Tx vq interrupt
- * disabled and adds calls to vtnet_txeof() in the start and watchdog
- * paths. The price to pay for this is the m_free'ing of transmitted
- * mbufs may be delayed until the watchdog fires.
- *
- * BMV: Reintroduce this later as a run-time option, if it makes
- * sense after the EVENT_IDX feature is supported.
- *
- * #define VTNET_TX_INTR_MODERATION
- */
-
 static uma_zone_t vtnet_tx_header_zone;
 
 static struct virtio_feature_desc vtnet_feature_desc[] = {
@@ -903,7 +894,6 @@ vtnet_setup_interface(struct vtnet_softc
 {
 	device_t dev;
 	struct ifnet *ifp;
-	int limit;
 
 	dev = sc->vtnet_dev;
 
@@ -1002,11 +992,8 @@ vtnet_setup_interface(struct vtnet_softc
 		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
 	}
 
-	limit = vtnet_tunable_int(sc, "rx_process_limit",
-	    vtnet_rx_process_limit);
-	if (limit < 0)
-		limit = INT_MAX;
-	sc->vtnet_rx_process_limit = limit;
+	vtnet_set_rx_process_limit(sc);
+	vtnet_set_tx_intr_threshold(sc);
 
 	return (0);
 }
@@ -1897,6 +1884,44 @@ vtnet_rxq_tq_intr(void *xrxq, int pendin
 	VTNET_RXQ_UNLOCK(rxq);
 }
 
+static int
+vtnet_txq_below_threshold(struct vtnet_txq *txq)
+{
+	struct vtnet_softc *sc;
+	struct virtqueue *vq;
+
+	sc = txq->vtntx_sc;
+	vq = txq->vtntx_vq;
+
+	return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh);
+}
+
+static int
+vtnet_txq_notify(struct vtnet_txq *txq)
+{
+	struct virtqueue *vq;
+
+	vq = txq->vtntx_vq;
+
+	txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+	virtqueue_notify(vq);
+
+	if (vtnet_txq_enable_intr(txq) == 0)
+		return (0);
+
+	/*
+	 * Drain frames that were completed since last checked. If this
+	 * causes the queue to go above the threshold, the caller should
+	 * continue transmitting.
+	 */
+	if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
+		virtqueue_disable_intr(vq);
+		return (1);
+	}
+
+	return (0);
+}
+
 static void
 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
 {
@@ -2171,11 +2196,11 @@ vtnet_start_locked(struct vtnet_txq *txq
 	struct vtnet_softc *sc;
 	struct virtqueue *vq;
 	struct mbuf *m0;
-	int enq;
+	int tries, enq;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
-	enq = 0;
+	tries = 0;
 
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
@@ -2185,6 +2210,9 @@ vtnet_start_locked(struct vtnet_txq *txq
 
 	vtnet_txq_eof(txq);
 
+again:
+	enq = 0;
+
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		if (virtqueue_full(vq))
 			break;
@@ -2203,9 +2231,12 @@ vtnet_start_locked(struct vtnet_txq *txq
 		ETHER_BPF_MTAP(ifp, m0);
 	}
 
-	if (enq > 0) {
-		virtqueue_notify(vq);
-		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
+		if (tries++ < VTNET_NOTIFY_RETRIES)
+			goto again;
+
+		txq->vtntx_stats.vtxs_rescheduled++;
+		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 	}
 }
 
@@ -2232,13 +2263,13 @@ vtnet_txq_mq_start_locked(struct vtnet_t
 	struct virtqueue *vq;
 	struct buf_ring *br;
 	struct ifnet *ifp;
-	int enq, error;
+	int enq, tries, error;
 
 	sc = txq->vtntx_sc;
 	vq = txq->vtntx_vq;
 	br = txq->vtntx_br;
 	ifp = sc->vtnet_ifp;
-	enq = 0;
+	tries = 0;
 	error = 0;
 
 	VTNET_TXQ_LOCK_ASSERT(txq);
@@ -2258,14 +2289,16 @@ vtnet_txq_mq_start_locked(struct vtnet_t
 
 	vtnet_txq_eof(txq);
 
+again:
+	enq = 0;
+
 	while ((m = drbr_peek(ifp, br)) != NULL) {
 		if (virtqueue_full(vq)) {
 			drbr_putback(ifp, br, m);
 			break;
 		}
 
-		error = vtnet_txq_encap(txq, &m);
-		if (error) {
+		if (vtnet_txq_encap(txq, &m) != 0) {
 			if (m != NULL)
 				drbr_putback(ifp, br, m);
 			else
@@ -2278,9 +2311,12 @@ vtnet_txq_mq_start_locked(struct vtnet_t
 		ETHER_BPF_MTAP(ifp, m);
 	}
 
-	if (enq > 0) {
-		virtqueue_notify(vq);
-		txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
+	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
+		if (tries++ < VTNET_NOTIFY_RETRIES)
+			goto again;
+
+		txq->vtntx_stats.vtxs_rescheduled++;
+		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
 	}
 
 	return (0);
@@ -2368,30 +2404,26 @@ vtnet_txq_tq_intr(void *xtxq, int pendin
 	}
 
 	vtnet_txq_eof(txq);
-
 	vtnet_txq_start(txq);
 
-	if (vtnet_txq_enable_intr(txq) != 0) {
-		vtnet_txq_disable_intr(txq);
-		txq->vtntx_stats.vtxs_rescheduled++;
-		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
-	}
-
 	VTNET_TXQ_UNLOCK(txq);
 }
 
-static void
+static int
 vtnet_txq_eof(struct vtnet_txq *txq)
 {
 	struct virtqueue *vq;
 	struct vtnet_tx_header *txhdr;
 	struct mbuf *m;
+	int deq;
 
 	vq = txq->vtntx_vq;
+	deq = 0;
 	VTNET_TXQ_LOCK_ASSERT(txq);
 
 	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
 		m = txhdr->vth_mbuf;
+		deq++;
 
 		txq->vtntx_stats.vtxs_opackets++;
 		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
@@ -2404,6 +2436,8 @@ vtnet_txq_eof(struct vtnet_txq *txq)
 
 	if (virtqueue_empty(vq))
 		txq->vtntx_watchdog = 0;
+
+	return (deq);
 }
 
 static void
@@ -2412,12 +2446,10 @@ vtnet_tx_vq_intr(void *xtxq)
 	struct vtnet_softc *sc;
 	struct vtnet_txq *txq;
 	struct ifnet *ifp;
-	int tries;
 
 	txq = xtxq;
 	sc = txq->vtntx_sc;
 	ifp = sc->vtnet_ifp;
-	tries = 0;
 
 	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
 		/*
@@ -2432,30 +2464,15 @@ vtnet_tx_vq_intr(void *xtxq)
 
 	VTNET_TXQ_LOCK(txq);
 
-again:
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		VTNET_TXQ_UNLOCK(txq);
 		return;
 	}
 
 	vtnet_txq_eof(txq);
-
 	vtnet_txq_start(txq);
 
-	if (vtnet_txq_enable_intr(txq) != 0) {
-		vtnet_txq_disable_intr(txq);
-		/*
-		 * This is an occasional race, so retry a few times
-		 * before scheduling the taskqueue.
-		 */
-		if (tries++ < VTNET_INTR_DISABLE_RETRIES)
-			goto again;
-
-		VTNET_TXQ_UNLOCK(txq);
-		txq->vtntx_stats.vtxs_rescheduled++;
-		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
-	} else
-		VTNET_TXQ_UNLOCK(txq);
+	VTNET_TXQ_UNLOCK(txq);
 }
 
 static void
@@ -2502,21 +2519,31 @@ vtnet_qflush(struct ifnet *ifp)
 static int
 vtnet_watchdog(struct vtnet_txq *txq)
 {
-	struct vtnet_softc *sc;
+	struct ifnet *ifp;
 
-	sc = txq->vtntx_sc;
+	ifp = txq->vtntx_sc->vtnet_ifp;
 
 	VTNET_TXQ_LOCK(txq);
-	if (sc->vtnet_flags & VTNET_FLAG_EVENT_IDX)
-		vtnet_txq_eof(txq);
+	if (txq->vtntx_watchdog == 1) {
+		/*
+		 * Only drain completed frames if the watchdog is about to
+		 * expire. If any frames were drained, there may be enough
+		 * free descriptors now available to transmit queued frames.
+		 * In that case, the timer will immediately be decremented
+		 * below, but the timeout is generous enough that should not
+		 * be a problem.
+		 */
+		if (vtnet_txq_eof(txq) != 0)
+			vtnet_txq_start(txq);
+	}
+
 	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
 		VTNET_TXQ_UNLOCK(txq);
 		return (0);
 	}
 	VTNET_TXQ_UNLOCK(txq);
 
-	if_printf(sc->vtnet_ifp, "watchdog timeout on queue %d\n",
-	    txq->vtntx_id);
+	if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
 	return (1);
 }
 
@@ -3566,6 +3593,50 @@ vtnet_vlan_tag_remove(struct mbuf *m)
 }
 
 static void
+vtnet_set_rx_process_limit(struct vtnet_softc *sc)
+{
+	int limit;
+
+	limit = vtnet_tunable_int(sc, "rx_process_limit",
+	    vtnet_rx_process_limit);
+	if (limit < 0)
+		limit = INT_MAX;
+	sc->vtnet_rx_process_limit = limit;
+}
+
+static void
+vtnet_set_tx_intr_threshold(struct vtnet_softc *sc)
+{
+	device_t dev;
+	int size, thresh;
+
+	dev = sc->vtnet_dev;
+	size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq);
+
+	/*
+	 * The Tx interrupt is disabled until the queue free count falls
+	 * below our threshold. Completed frames are drained from the Tx
+	 * virtqueue before transmitting new frames and in the watchdog
+	 * callout, so the frequency of Tx interrupts is greatly reduced,
+	 * at the cost of not freeing mbufs as quickly as they otherwise
+	 * would be.
+	 *
+	 * N.B. We assume all the Tx queues are the same size.
+	 */
+	thresh = size / 4;
+
+	/*
+	 * Without indirect descriptors, leave enough room for the most
+	 * segments we handle.
+	 */
+	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0 &&
+	    thresh < sc->vtnet_tx_nsegs)
+		thresh = sc->vtnet_tx_nsegs;
+
+	sc->vtnet_tx_intr_thresh = thresh;
+}
+
+static void
 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
     struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
 {
@@ -3760,8 +3831,18 @@ vtnet_rxq_disable_intr(struct vtnet_rxq 
 static int
 vtnet_txq_enable_intr(struct vtnet_txq *txq)
 {
+	struct virtqueue *vq;
+
+	vq = txq->vtntx_vq;
+
+	if (vtnet_txq_below_threshold(txq) != 0)
+		return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
 
-	return (virtqueue_postpone_intr(txq->vtntx_vq, VQ_POSTPONE_LONG));
+	/*
+	 * The free count is above our threshold. Keep the Tx interrupt
+	 * disabled until the queue is fuller.
+	 */
+	return (0);
 }
 
 static void

Modified: head/sys/dev/virtio/network/if_vtnetvar.h
==============================================================================
--- head/sys/dev/virtio/network/if_vtnetvar.h	Thu Jul 10 05:26:01 2014	(r268480)
+++ head/sys/dev/virtio/network/if_vtnetvar.h	Thu Jul 10 05:36:04 2014	(r268481)
@@ -149,6 +149,7 @@ struct vtnet_softc {
 	int			 vtnet_rx_nmbufs;
 	int			 vtnet_rx_clsize;
 	int			 vtnet_rx_new_clsize;
+	int			 vtnet_tx_intr_thresh;
 	int			 vtnet_tx_nsegs;
 	int			 vtnet_if_flags;
 	int			 vtnet_act_vq_pairs;
@@ -183,6 +184,14 @@ struct vtnet_softc {
 #define VTNET_INTR_DISABLE_RETRIES	4
 
 /*
+ * Similarly, additional completed entries can appear in a virtqueue
+ * between when lasted checked and before notifying the host. Number
+ * of times to retry before scheduling the taskqueue to process the
+ * queue.
+ */
+#define VTNET_NOTIFY_RETRIES		4
+
+/*
  * Fake the media type. The host does not provide us with any real media
  * information.
  */