From owner-svn-src-all@FreeBSD.ORG Thu Jul 10 05:36:05 2014 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 4D822AC6; Thu, 10 Jul 2014 05:36:05 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 39ED02310; Thu, 10 Jul 2014 05:36:05 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.8/8.14.8) with ESMTP id s6A5a5pC065553; Thu, 10 Jul 2014 05:36:05 GMT (envelope-from bryanv@svn.freebsd.org) Received: (from bryanv@localhost) by svn.freebsd.org (8.14.8/8.14.8/Submit) id s6A5a49s065550; Thu, 10 Jul 2014 05:36:04 GMT (envelope-from bryanv@svn.freebsd.org) Message-Id: <201407100536.s6A5a49s065550@svn.freebsd.org> From: Bryan Venteicher Date: Thu, 10 Jul 2014 05:36:04 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r268481 - head/sys/dev/virtio/network X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 10 Jul 2014 05:36:05 -0000 Author: bryanv Date: Thu Jul 10 05:36:04 2014 New Revision: 268481 URL: http://svnweb.freebsd.org/changeset/base/268481 Log: Rework when the Tx queue completion interrupt is enabled The Tx interrupt is now kept disabled in the common case, only enabled when the number of free descriptors in the queue falls below a threshold. Transmitted frames are cleared from the VQ before subsequent transmit, or in the watchdog timer. This was a very big performance improvement for an experimental Netmap bhyve backend. MFC after: 1 month Modified: head/sys/dev/virtio/network/if_vtnet.c head/sys/dev/virtio/network/if_vtnetvar.h Modified: head/sys/dev/virtio/network/if_vtnet.c ============================================================================== --- head/sys/dev/virtio/network/if_vtnet.c Thu Jul 10 05:26:01 2014 (r268480) +++ head/sys/dev/virtio/network/if_vtnet.c Thu Jul 10 05:36:04 2014 (r268481) @@ -128,6 +128,8 @@ static int vtnet_rxq_eof(struct vtnet_rx static void vtnet_rx_vq_intr(void *); static void vtnet_rxq_tq_intr(void *, int); +static int vtnet_txq_below_threshold(struct vtnet_txq *); +static int vtnet_txq_notify(struct vtnet_txq *); static void vtnet_txq_free_mbufs(struct vtnet_txq *); static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *, int *, int *, int *); @@ -149,7 +151,7 @@ static void vtnet_txq_tq_deferred(void * #endif static void vtnet_txq_start(struct vtnet_txq *); static void vtnet_txq_tq_intr(void *, int); -static void vtnet_txq_eof(struct vtnet_txq *); +static int vtnet_txq_eof(struct vtnet_txq *); static void vtnet_tx_vq_intr(void *); static void vtnet_tx_start_all(struct vtnet_softc *); @@ -206,6 +208,8 @@ static void vtnet_ifmedia_sts(struct ifn static void vtnet_get_hwaddr(struct vtnet_softc *); static void vtnet_set_hwaddr(struct vtnet_softc *); static void vtnet_vlan_tag_remove(struct mbuf *); +static void vtnet_set_rx_process_limit(struct vtnet_softc *); +static void vtnet_set_tx_intr_threshold(struct vtnet_softc *); static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct vtnet_rxq *); @@ -241,19 +245,6 @@ TUNABLE_INT("hw.vtnet.mq_max_pairs", &vt static int vtnet_rx_process_limit = 512; TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit); -/* - * Reducing the number of transmit completed interrupts can improve - * performance. To do so, the define below keeps the Tx vq interrupt - * disabled and adds calls to vtnet_txeof() in the start and watchdog - * paths. The price to pay for this is the m_free'ing of transmitted - * mbufs may be delayed until the watchdog fires. - * - * BMV: Reintroduce this later as a run-time option, if it makes - * sense after the EVENT_IDX feature is supported. - * - * #define VTNET_TX_INTR_MODERATION - */ - static uma_zone_t vtnet_tx_header_zone; static struct virtio_feature_desc vtnet_feature_desc[] = { @@ -903,7 +894,6 @@ vtnet_setup_interface(struct vtnet_softc { device_t dev; struct ifnet *ifp; - int limit; dev = sc->vtnet_dev; @@ -1002,11 +992,8 @@ vtnet_setup_interface(struct vtnet_softc vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); } - limit = vtnet_tunable_int(sc, "rx_process_limit", - vtnet_rx_process_limit); - if (limit < 0) - limit = INT_MAX; - sc->vtnet_rx_process_limit = limit; + vtnet_set_rx_process_limit(sc); + vtnet_set_tx_intr_threshold(sc); return (0); } @@ -1897,6 +1884,44 @@ vtnet_rxq_tq_intr(void *xrxq, int pendin VTNET_RXQ_UNLOCK(rxq); } +static int +vtnet_txq_below_threshold(struct vtnet_txq *txq) +{ + struct vtnet_softc *sc; + struct virtqueue *vq; + + sc = txq->vtntx_sc; + vq = txq->vtntx_vq; + + return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh); +} + +static int +vtnet_txq_notify(struct vtnet_txq *txq) +{ + struct virtqueue *vq; + + vq = txq->vtntx_vq; + + txq->vtntx_watchdog = VTNET_TX_TIMEOUT; + virtqueue_notify(vq); + + if (vtnet_txq_enable_intr(txq) == 0) + return (0); + + /* + * Drain frames that were completed since last checked. If this + * causes the queue to go above the threshold, the caller should + * continue transmitting. + */ + if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) { + virtqueue_disable_intr(vq); + return (1); + } + + return (0); +} + static void vtnet_txq_free_mbufs(struct vtnet_txq *txq) { @@ -2171,11 +2196,11 @@ vtnet_start_locked(struct vtnet_txq *txq struct vtnet_softc *sc; struct virtqueue *vq; struct mbuf *m0; - int enq; + int tries, enq; sc = txq->vtntx_sc; vq = txq->vtntx_vq; - enq = 0; + tries = 0; VTNET_TXQ_LOCK_ASSERT(txq); @@ -2185,6 +2210,9 @@ vtnet_start_locked(struct vtnet_txq *txq vtnet_txq_eof(txq); +again: + enq = 0; + while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { if (virtqueue_full(vq)) break; @@ -2203,9 +2231,12 @@ vtnet_start_locked(struct vtnet_txq *txq ETHER_BPF_MTAP(ifp, m0); } - if (enq > 0) { - virtqueue_notify(vq); - txq->vtntx_watchdog = VTNET_TX_TIMEOUT; + if (enq > 0 && vtnet_txq_notify(txq) != 0) { + if (tries++ < VTNET_NOTIFY_RETRIES) + goto again; + + txq->vtntx_stats.vtxs_rescheduled++; + taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); } } @@ -2232,13 +2263,13 @@ vtnet_txq_mq_start_locked(struct vtnet_t struct virtqueue *vq; struct buf_ring *br; struct ifnet *ifp; - int enq, error; + int enq, tries, error; sc = txq->vtntx_sc; vq = txq->vtntx_vq; br = txq->vtntx_br; ifp = sc->vtnet_ifp; - enq = 0; + tries = 0; error = 0; VTNET_TXQ_LOCK_ASSERT(txq); @@ -2258,14 +2289,16 @@ vtnet_txq_mq_start_locked(struct vtnet_t vtnet_txq_eof(txq); +again: + enq = 0; + while ((m = drbr_peek(ifp, br)) != NULL) { if (virtqueue_full(vq)) { drbr_putback(ifp, br, m); break; } - error = vtnet_txq_encap(txq, &m); - if (error) { + if (vtnet_txq_encap(txq, &m) != 0) { if (m != NULL) drbr_putback(ifp, br, m); else @@ -2278,9 +2311,12 @@ vtnet_txq_mq_start_locked(struct vtnet_t ETHER_BPF_MTAP(ifp, m); } - if (enq > 0) { - virtqueue_notify(vq); - txq->vtntx_watchdog = VTNET_TX_TIMEOUT; + if (enq > 0 && vtnet_txq_notify(txq) != 0) { + if (tries++ < VTNET_NOTIFY_RETRIES) + goto again; + + txq->vtntx_stats.vtxs_rescheduled++; + taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); } return (0); @@ -2368,30 +2404,26 @@ vtnet_txq_tq_intr(void *xtxq, int pendin } vtnet_txq_eof(txq); - vtnet_txq_start(txq); - if (vtnet_txq_enable_intr(txq) != 0) { - vtnet_txq_disable_intr(txq); - txq->vtntx_stats.vtxs_rescheduled++; - taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); - } - VTNET_TXQ_UNLOCK(txq); } -static void +static int vtnet_txq_eof(struct vtnet_txq *txq) { struct virtqueue *vq; struct vtnet_tx_header *txhdr; struct mbuf *m; + int deq; vq = txq->vtntx_vq; + deq = 0; VTNET_TXQ_LOCK_ASSERT(txq); while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) { m = txhdr->vth_mbuf; + deq++; txq->vtntx_stats.vtxs_opackets++; txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len; @@ -2404,6 +2436,8 @@ vtnet_txq_eof(struct vtnet_txq *txq) if (virtqueue_empty(vq)) txq->vtntx_watchdog = 0; + + return (deq); } static void @@ -2412,12 +2446,10 @@ vtnet_tx_vq_intr(void *xtxq) struct vtnet_softc *sc; struct vtnet_txq *txq; struct ifnet *ifp; - int tries; txq = xtxq; sc = txq->vtntx_sc; ifp = sc->vtnet_ifp; - tries = 0; if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) { /* @@ -2432,30 +2464,15 @@ vtnet_tx_vq_intr(void *xtxq) VTNET_TXQ_LOCK(txq); -again: if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { VTNET_TXQ_UNLOCK(txq); return; } vtnet_txq_eof(txq); - vtnet_txq_start(txq); - if (vtnet_txq_enable_intr(txq) != 0) { - vtnet_txq_disable_intr(txq); - /* - * This is an occasional race, so retry a few times - * before scheduling the taskqueue. - */ - if (tries++ < VTNET_INTR_DISABLE_RETRIES) - goto again; - - VTNET_TXQ_UNLOCK(txq); - txq->vtntx_stats.vtxs_rescheduled++; - taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); - } else - VTNET_TXQ_UNLOCK(txq); + VTNET_TXQ_UNLOCK(txq); } static void @@ -2502,21 +2519,31 @@ vtnet_qflush(struct ifnet *ifp) static int vtnet_watchdog(struct vtnet_txq *txq) { - struct vtnet_softc *sc; + struct ifnet *ifp; - sc = txq->vtntx_sc; + ifp = txq->vtntx_sc->vtnet_ifp; VTNET_TXQ_LOCK(txq); - if (sc->vtnet_flags & VTNET_FLAG_EVENT_IDX) - vtnet_txq_eof(txq); + if (txq->vtntx_watchdog == 1) { + /* + * Only drain completed frames if the watchdog is about to + * expire. If any frames were drained, there may be enough + * free descriptors now available to transmit queued frames. + * In that case, the timer will immediately be decremented + * below, but the timeout is generous enough that should not + * be a problem. + */ + if (vtnet_txq_eof(txq) != 0) + vtnet_txq_start(txq); + } + if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) { VTNET_TXQ_UNLOCK(txq); return (0); } VTNET_TXQ_UNLOCK(txq); - if_printf(sc->vtnet_ifp, "watchdog timeout on queue %d\n", - txq->vtntx_id); + if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id); return (1); } @@ -3566,6 +3593,50 @@ vtnet_vlan_tag_remove(struct mbuf *m) } static void +vtnet_set_rx_process_limit(struct vtnet_softc *sc) +{ + int limit; + + limit = vtnet_tunable_int(sc, "rx_process_limit", + vtnet_rx_process_limit); + if (limit < 0) + limit = INT_MAX; + sc->vtnet_rx_process_limit = limit; +} + +static void +vtnet_set_tx_intr_threshold(struct vtnet_softc *sc) +{ + device_t dev; + int size, thresh; + + dev = sc->vtnet_dev; + size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq); + + /* + * The Tx interrupt is disabled until the queue free count falls + * below our threshold. Completed frames are drained from the Tx + * virtqueue before transmitting new frames and in the watchdog + * callout, so the frequency of Tx interrupts is greatly reduced, + * at the cost of not freeing mbufs as quickly as they otherwise + * would be. + * + * N.B. We assume all the Tx queues are the same size. + */ + thresh = size / 4; + + /* + * Without indirect descriptors, leave enough room for the most + * segments we handle. + */ + if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0 && + thresh < sc->vtnet_tx_nsegs) + thresh = sc->vtnet_tx_nsegs; + + sc->vtnet_tx_intr_thresh = thresh; +} + +static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct vtnet_rxq *rxq) { @@ -3760,8 +3831,18 @@ vtnet_rxq_disable_intr(struct vtnet_rxq static int vtnet_txq_enable_intr(struct vtnet_txq *txq) { + struct virtqueue *vq; + + vq = txq->vtntx_vq; + + if (vtnet_txq_below_threshold(txq) != 0) + return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG)); - return (virtqueue_postpone_intr(txq->vtntx_vq, VQ_POSTPONE_LONG)); + /* + * The free count is above our threshold. Keep the Tx interrupt + * disabled until the queue is fuller. + */ + return (0); } static void Modified: head/sys/dev/virtio/network/if_vtnetvar.h ============================================================================== --- head/sys/dev/virtio/network/if_vtnetvar.h Thu Jul 10 05:26:01 2014 (r268480) +++ head/sys/dev/virtio/network/if_vtnetvar.h Thu Jul 10 05:36:04 2014 (r268481) @@ -149,6 +149,7 @@ struct vtnet_softc { int vtnet_rx_nmbufs; int vtnet_rx_clsize; int vtnet_rx_new_clsize; + int vtnet_tx_intr_thresh; int vtnet_tx_nsegs; int vtnet_if_flags; int vtnet_act_vq_pairs; @@ -183,6 +184,14 @@ struct vtnet_softc { #define VTNET_INTR_DISABLE_RETRIES 4 /* + * Similarly, additional completed entries can appear in a virtqueue + * between when lasted checked and before notifying the host. Number + * of times to retry before scheduling the taskqueue to process the + * queue. + */ +#define VTNET_NOTIFY_RETRIES 4 + +/* * Fake the media type. The host does not provide us with any real media * information. */