From owner-svn-src-all@FreeBSD.ORG Mon Jun 9 15:24:48 2014 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 0D7139F6; Mon, 9 Jun 2014 15:24:48 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id ED0EB2251; Mon, 9 Jun 2014 15:24:47 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.8/8.14.8) with ESMTP id s59FOl3t069028; Mon, 9 Jun 2014 15:24:47 GMT (envelope-from luigi@svn.freebsd.org) Received: (from luigi@localhost) by svn.freebsd.org (8.14.8/8.14.8/Submit) id s59FOkR3069016; Mon, 9 Jun 2014 15:24:46 GMT (envelope-from luigi@svn.freebsd.org) Message-Id: <201406091524.s59FOkR3069016@svn.freebsd.org> From: Luigi Rizzo Date: Mon, 9 Jun 2014 15:24:46 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r267282 - stable/10/sys/dev/netmap X-SVN-Group: stable-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 09 Jun 2014 15:24:48 -0000 Author: luigi Date: Mon Jun 9 15:24:45 2014 New Revision: 267282 URL: http://svnweb.freebsd.org/changeset/base/267282 Log: sync netmap code with the version in HEAD: - fix handling of tx mbufs in emulated netmap mode; - introduce mbq_lock() and mbq_unlock() - rate limit some error messages - many whitespace and comment fixes Modified: stable/10/sys/dev/netmap/netmap.c stable/10/sys/dev/netmap/netmap_freebsd.c stable/10/sys/dev/netmap/netmap_generic.c stable/10/sys/dev/netmap/netmap_kern.h stable/10/sys/dev/netmap/netmap_mbq.c stable/10/sys/dev/netmap/netmap_mbq.h stable/10/sys/dev/netmap/netmap_mem2.c stable/10/sys/dev/netmap/netmap_pipe.c stable/10/sys/dev/netmap/netmap_vale.c Modified: stable/10/sys/dev/netmap/netmap.c ============================================================================== --- stable/10/sys/dev/netmap/netmap.c Mon Jun 9 15:16:17 2014 (r267281) +++ stable/10/sys/dev/netmap/netmap.c Mon Jun 9 15:24:45 2014 (r267282) @@ -270,6 +270,7 @@ netmap_disable_ring(struct netmap_kring } +/* stop or enable all the rings of na */ static void netmap_set_all_rings(struct ifnet *ifp, int stopped) { @@ -303,6 +304,13 @@ netmap_set_all_rings(struct ifnet *ifp, } +/* + * Convenience function used in drivers. Waits for current txsync()s/rxsync()s + * to finish and prevents any new one from starting. Call this before turning + * netmap mode off, or before removing the harware rings (e.g., on module + * onload). As a rule of thumb for linux drivers, this should be placed near + * each napi_disable(). + */ void netmap_disable_all_rings(struct ifnet *ifp) { @@ -310,6 +318,11 @@ netmap_disable_all_rings(struct ifnet *i } +/* + * Convenience function used in drivers. Re-enables rxsync and txsync on the + * adapter's rings In linux drivers, this should be placed near each + * napi_enable(). + */ void netmap_enable_all_rings(struct ifnet *ifp) { @@ -393,6 +406,7 @@ nm_dump_buf(char *p, int len, int lim, c * Fetch configuration from the device, to cope with dynamic * reconfigurations after loading the module. */ +/* call with NMG_LOCK held */ int netmap_update_config(struct netmap_adapter *na) { @@ -447,18 +461,20 @@ netmap_rxsync_compat(struct netmap_kring return na->nm_rxsync(na, kring->ring_id, flags); } +/* kring->nm_sync callback for the host tx ring */ static int netmap_txsync_to_host_compat(struct netmap_kring *kring, int flags) { - (void)flags; + (void)flags; /* unused */ netmap_txsync_to_host(kring->na); return 0; } +/* kring->nm_sync callback for the host rx ring */ static int netmap_rxsync_from_host_compat(struct netmap_kring *kring, int flags) { - (void)flags; + (void)flags; /* unused */ netmap_rxsync_from_host(kring->na, NULL, NULL); return 0; } @@ -489,6 +505,7 @@ netmap_rxsync_from_host_compat(struct ne * Note: for compatibility, host krings are created even when not needed. * The tailroom space is currently used by vale ports for allocating leases. */ +/* call with NMG_LOCK held */ int netmap_krings_create(struct netmap_adapter *na, u_int tailroom) { @@ -567,6 +584,7 @@ netmap_krings_create(struct netmap_adapt /* undo the actions performed by netmap_krings_create */ +/* call with NMG_LOCK held */ void netmap_krings_delete(struct netmap_adapter *na) { @@ -586,6 +604,7 @@ netmap_krings_delete(struct netmap_adapt * on the rings connected to the host so we need to purge * them first. */ +/* call with NMG_LOCK held */ static void netmap_hw_krings_delete(struct netmap_adapter *na) { @@ -598,6 +617,12 @@ netmap_hw_krings_delete(struct netmap_ad } +/* create a new netmap_if for a newly registered fd. + * If this is the first registration of the adapter, + * also create the netmap rings and their in-kernel view, + * the netmap krings. + */ +/* call with NMG_LOCK held */ static struct netmap_if* netmap_if_new(const char *ifname, struct netmap_adapter *na) { @@ -608,17 +633,23 @@ netmap_if_new(const char *ifname, struct return NULL; } - if (na->active_fds) + if (na->active_fds) /* already registered */ goto final; + /* create and init the krings arrays. + * Depending on the adapter, this may also create + * the netmap rings themselves + */ if (na->nm_krings_create(na)) goto cleanup; + /* create all missing netmap rings */ if (netmap_mem_rings_create(na)) goto cleanup; final: + /* in all cases, create a new netmap if */ nifp = netmap_mem_if_new(ifname, na); if (nifp == NULL) goto cleanup; @@ -638,8 +669,8 @@ cleanup: /* grab a reference to the memory allocator, if we don't have one already. The * reference is taken from the netmap_adapter registered with the priv. - * */ +/* call with NMG_LOCK held */ static int netmap_get_memory_locked(struct netmap_priv_d* p) { @@ -672,6 +703,7 @@ netmap_get_memory_locked(struct netmap_p } +/* call with NMG_LOCK *not* held */ int netmap_get_memory(struct netmap_priv_d* p) { @@ -683,6 +715,7 @@ netmap_get_memory(struct netmap_priv_d* } +/* call with NMG_LOCK held */ static int netmap_have_memory_locked(struct netmap_priv_d* p) { @@ -690,6 +723,7 @@ netmap_have_memory_locked(struct netmap_ } +/* call with NMG_LOCK held */ static void netmap_drop_memory_locked(struct netmap_priv_d* p) { @@ -755,6 +789,7 @@ netmap_do_unregif(struct netmap_priv_d * netmap_mem_if_delete(na, nifp); } +/* call with NMG_LOCK held */ static __inline int nm_tx_si_user(struct netmap_priv_d *priv) { @@ -762,6 +797,7 @@ nm_tx_si_user(struct netmap_priv_d *priv (priv->np_txqlast - priv->np_txqfirst > 1)); } +/* call with NMG_LOCK held */ static __inline int nm_rx_si_user(struct netmap_priv_d *priv) { @@ -771,8 +807,12 @@ nm_rx_si_user(struct netmap_priv_d *priv /* + * Destructor of the netmap_priv_d, called when the fd has + * no active open() and mmap(). Also called in error paths. + * * returns 1 if this is the last instance and we can free priv */ +/* call with NMG_LOCK held */ int netmap_dtor_locked(struct netmap_priv_d *priv) { @@ -805,6 +845,7 @@ netmap_dtor_locked(struct netmap_priv_d } +/* call with NMG_LOCK *not* held */ void netmap_dtor(void *data) { @@ -1009,7 +1050,7 @@ netmap_rxsync_from_host(struct netmap_ad (void)pwait; /* disable unused warnings */ (void)td; - mtx_lock(&q->lock); + mbq_lock(q); /* First part: import newly received packets */ n = mbq_len(q); @@ -1019,7 +1060,7 @@ netmap_rxsync_from_host(struct netmap_ad nm_i = kring->nr_hwtail; stop_i = nm_prev(nm_i, lim); - while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) { + while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) { int len = MBUF_LEN(m); struct netmap_slot *slot = &ring->slot[nm_i]; @@ -1051,7 +1092,7 @@ netmap_rxsync_from_host(struct netmap_ad if (kring->rcur == kring->rtail && td) /* no bufs available */ selrecord(td, &kring->si); - mtx_unlock(&q->lock); + mbq_unlock(q); return ret; } @@ -1194,6 +1235,12 @@ netmap_get_na(struct nmreq *nmr, struct if (*na != NULL) /* valid match in netmap_get_bdg_na() */ goto pipes; + /* + * This must be a hardware na, lookup the name in the system. + * Note that by hardware we actually mean "it shows up in ifconfig". + * This may still be a tap, a veth/epair, or even a + * persistent VALE port. + */ ifp = ifunit_ref(nmr->nr_name); if (ifp == NULL) { return ENXIO; @@ -1212,6 +1259,11 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter_get(ret); pipes: + /* + * If we are opening a pipe whose parent was not in netmap mode, + * we have to allocate the pipe array now. + * XXX get rid of this clumsiness (2014-03-15) + */ error = netmap_pipe_alloc(*na, nmr); out: @@ -1219,7 +1271,7 @@ out: netmap_adapter_put(ret); if (ifp) - if_rele(ifp); + if_rele(ifp); /* allow live unloading of drivers modules */ return error; } @@ -1515,7 +1567,7 @@ netmap_set_ringid(struct netmap_priv_d * if (nm_rx_si_user(priv)) na->rx_si_users++; if (netmap_verbose) { - D("%s: tx [%d,%d) rx [%d,%d) id %d", + D("%s: tx [%d,%d) rx [%d,%d) id %d", NM_IFPNAME(na->ifp), priv->np_txqfirst, priv->np_txqlast, @@ -1555,10 +1607,9 @@ netmap_do_regif(struct netmap_priv_d *pr goto out; } nifp = netmap_if_new(NM_IFPNAME(ifp), na); + + /* Allocate a netmap_if and, if necessary, all the netmap_ring's */ if (nifp == NULL) { /* allocation failed */ - /* we should drop the allocator, but only - * if we were the ones who grabbed it - */ error = ENOMEM; goto out; } @@ -1568,10 +1619,8 @@ netmap_do_regif(struct netmap_priv_d *pr } else { /* Otherwise set the card in netmap mode * and make it use the shared buffers. - * - * do not core lock because the race is harmless here, - * there cannot be any traffic to netmap_transmit() */ + /* cache the allocator info in the na */ na->na_lut = na->nm_mem->pools[NETMAP_BUF_POOL].lut; ND("%p->na_lut == %p", na, na->na_lut); na->na_lut_objtotal = na->nm_mem->pools[NETMAP_BUF_POOL].objtotal; @@ -1585,6 +1634,9 @@ out: *err = error; if (error) { priv->np_na = NULL; + /* we should drop the allocator, but only + * if we were the ones who grabbed it + */ if (need_mem) netmap_drop_memory_locked(priv); } @@ -2008,6 +2060,12 @@ flush_tx: continue; /* only one thread does txsync */ if (nm_kr_tryget(kring)) { + /* either busy or stopped + * XXX if the ring is stopped, sleeping would + * be better. In current code, however, we only + * stop the rings for brief intervals (2014-03-14) + */ + if (netmap_verbose) RD(2, "%p lost race on txring %d, ok", priv, i); @@ -2049,7 +2107,7 @@ flush_tx: */ if (want_rx) { int send_down = 0; /* transparent mode */ - /* two rounds here to for race avoidance */ + /* two rounds here for race avoidance */ do_retry_rx: for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) { int found = 0; @@ -2120,7 +2178,7 @@ do_retry_rx: * Transparent mode: marked bufs on rx rings between * kring->nr_hwcur and ring->head * are passed to the other endpoint. - * + * * In this mode we also scan the sw rxring, which in * turn passes packets up. * @@ -2139,6 +2197,7 @@ do_retry_rx: static int netmap_hw_krings_create(struct netmap_adapter *); +/* default notify callback */ static int netmap_notify(struct netmap_adapter *na, u_int n_ring, enum txrx tx, int flags) @@ -2148,11 +2207,16 @@ netmap_notify(struct netmap_adapter *na, if (tx == NR_TX) { kring = na->tx_rings + n_ring; OS_selwakeup(&kring->si, PI_NET); + /* optimization: avoid a wake up on the global + * queue if nobody has registered for more + * than one ring + */ if (na->tx_si_users > 0) OS_selwakeup(&na->tx_si, PI_NET); } else { kring = na->rx_rings + n_ring; OS_selwakeup(&kring->si, PI_NET); + /* optimization: same as above */ if (na->rx_si_users > 0) OS_selwakeup(&na->rx_si, PI_NET); } @@ -2160,7 +2224,11 @@ netmap_notify(struct netmap_adapter *na, } -// XXX check handling of failures +/* called by all routines that create netmap_adapters. + * Attach na to the ifp (if any) and provide defaults + * for optional callbacks. Defaults assume that we + * are creating an hardware netmap_adapter. + */ int netmap_attach_common(struct netmap_adapter *na) { @@ -2182,6 +2250,10 @@ netmap_attach_common(struct netmap_adapt NETMAP_SET_CAPABLE(ifp); if (na->nm_krings_create == NULL) { + /* we assume that we have been called by a driver, + * since other port types all provide their own + * nm_krings_create + */ na->nm_krings_create = netmap_hw_krings_create; na->nm_krings_delete = netmap_hw_krings_delete; } @@ -2195,10 +2267,11 @@ netmap_attach_common(struct netmap_adapt } +/* standard cleanup, called by all destructors */ void netmap_detach_common(struct netmap_adapter *na) { - if (na->ifp) + if (na->ifp != NULL) WNA(na->ifp) = NULL; /* XXX do we need this? */ if (na->tx_rings) { /* XXX should not happen */ @@ -2255,12 +2328,17 @@ netmap_attach(struct netmap_adapter *arg hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit; #endif /* linux */ - D("success for %s", NM_IFPNAME(ifp)); + D("success for %s tx %d/%d rx %d/%d queues/slots", + NM_IFPNAME(ifp), + hwna->up.num_tx_rings, hwna->up.num_tx_desc, + hwna->up.num_rx_rings, hwna->up.num_rx_desc + ); return 0; fail: D("fail, arg %p ifp %p na %p", arg, ifp, hwna); - netmap_detach(ifp); + if (ifp) + netmap_detach(ifp); return (hwna ? EINVAL : ENOMEM); } @@ -2294,6 +2372,7 @@ NM_DBG(netmap_adapter_put)(struct netmap return 1; } +/* nm_krings_create callback for all hardware native adapters */ int netmap_hw_krings_create(struct netmap_adapter *na) { @@ -2309,8 +2388,7 @@ netmap_hw_krings_create(struct netmap_ad /* - * Free the allocated memory linked to the given ``netmap_adapter`` - * object. + * Called on module unload by the netmap-enabled drivers */ void netmap_detach(struct ifnet *ifp) @@ -2381,7 +2459,7 @@ netmap_transmit(struct ifnet *ifp, struc * not possible on Linux). * Also avoid overflowing the queue. */ - mtx_lock(&q->lock); + mbq_lock(q); space = kring->nr_hwtail - kring->nr_hwcur; if (space < 0) @@ -2398,13 +2476,17 @@ netmap_transmit(struct ifnet *ifp, struc m = NULL; error = 0; } - mtx_unlock(&q->lock); + mbq_unlock(q); done: if (m) m_freem(m); /* unconditionally wake up listeners */ na->nm_notify(na, na->num_rx_rings, NR_RX, 0); + /* this is normally netmap_notify(), but for nics + * connected to a bridge it is netmap_bwrap_intr_notify(), + * that possibly forwards the frames through the switch + */ return (error); } Modified: stable/10/sys/dev/netmap/netmap_freebsd.c ============================================================================== --- stable/10/sys/dev/netmap/netmap_freebsd.c Mon Jun 9 15:16:17 2014 (r267281) +++ stable/10/sys/dev/netmap/netmap_freebsd.c Mon Jun 9 15:24:45 2014 (r267282) @@ -61,7 +61,8 @@ /* ======================== FREEBSD-SPECIFIC ROUTINES ================== */ -rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum) +rawsum_t +nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum) { /* TODO XXX please use the FreeBSD implementation for this. */ uint16_t *words = (uint16_t *)data; @@ -80,7 +81,8 @@ rawsum_t nm_csum_raw(uint8_t *data, size /* Fold a raw checksum: 'cur_sum' is in host byte order, while the * return value is in network byte order. */ -uint16_t nm_csum_fold(rawsum_t cur_sum) +uint16_t +nm_csum_fold(rawsum_t cur_sum) { /* TODO XXX please use the FreeBSD implementation for this. */ while (cur_sum >> 16) @@ -89,7 +91,8 @@ uint16_t nm_csum_fold(rawsum_t cur_sum) return htobe16((~cur_sum) & 0xFFFF); } -uint16_t nm_csum_ipv4(struct nm_iphdr *iph) +uint16_t +nm_csum_ipv4(struct nm_iphdr *iph) { #if 0 return in_cksum_hdr((void *)iph); @@ -98,7 +101,8 @@ uint16_t nm_csum_ipv4(struct nm_iphdr *i #endif } -void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, +void +nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, size_t datalen, uint16_t *check) { #ifdef INET @@ -120,7 +124,8 @@ void nm_csum_tcpudp_ipv4(struct nm_iphdr #endif } -void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, +void +nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, size_t datalen, uint16_t *check) { #ifdef INET6 @@ -143,7 +148,8 @@ void nm_csum_tcpudp_ipv6(struct nm_ipv6h int netmap_catch_rx(struct netmap_adapter *na, int intercept) { - struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; + struct netmap_generic_adapter *gna = + (struct netmap_generic_adapter *)na; struct ifnet *ifp = na->ifp; if (intercept) { @@ -209,11 +215,29 @@ generic_xmit_frame(struct ifnet *ifp, st { int ret; - m->m_len = m->m_pkthdr.len = 0; + /* + * The mbuf should be a cluster from our special pool, + * so we do not need to do an m_copyback but just copy + * (and eventually, just reference the netmap buffer) + */ - // copy data to the mbuf - m_copyback(m, 0, len, addr); - // inc refcount. We are alone, so we can skip the atomic + if (*m->m_ext.ref_cnt != 1) { + D("invalid refcnt %d for %p", + *m->m_ext.ref_cnt, m); + panic("in generic_xmit_frame"); + } + // XXX the ext_size check is unnecessary if we link the netmap buf + if (m->m_ext.ext_size < len) { + RD(5, "size %d < len %d", m->m_ext.ext_size, len); + len = m->m_ext.ext_size; + } + if (1) { /* XXX seems to have negligible benefits */ + m->m_ext.ext_buf = m->m_data = addr; + } else { + bcopy(addr, m->m_data, len); + } + m->m_len = m->m_pkthdr.len = len; + // inc refcount. All ours, we could skip the atomic atomic_fetchadd_int(m->m_ext.ref_cnt, 1); m->m_flags |= M_FLOWID; m->m_pkthdr.flowid = ring_nr; @@ -223,6 +247,14 @@ generic_xmit_frame(struct ifnet *ifp, st } +#if __FreeBSD_version >= 1100005 +struct netmap_adapter * +netmap_getna(if_t ifp) +{ + return (NA((struct ifnet *)ifp)); +} +#endif /* __FreeBSD_version >= 1100005 */ + /* * The following two functions are empty until we have a generic * way to extract the info from the ifp @@ -230,7 +262,7 @@ generic_xmit_frame(struct ifnet *ifp, st int generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx) { - D("called"); + D("called, in tx %d rx %d", *tx, *rx); return 0; } @@ -238,13 +270,14 @@ generic_find_num_desc(struct ifnet *ifp, void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq) { - D("called"); + D("called, in txq %d rxq %d", *txq, *rxq); *txq = netmap_generic_rings; *rxq = netmap_generic_rings; } -void netmap_mitigation_init(struct nm_generic_mit *mit, struct netmap_adapter *na) +void +netmap_mitigation_init(struct nm_generic_mit *mit, struct netmap_adapter *na) { ND("called"); mit->mit_pending = 0; @@ -252,26 +285,30 @@ void netmap_mitigation_init(struct nm_ge } -void netmap_mitigation_start(struct nm_generic_mit *mit) +void +netmap_mitigation_start(struct nm_generic_mit *mit) { ND("called"); } -void netmap_mitigation_restart(struct nm_generic_mit *mit) +void +netmap_mitigation_restart(struct nm_generic_mit *mit) { ND("called"); } -int netmap_mitigation_active(struct nm_generic_mit *mit) +int +netmap_mitigation_active(struct nm_generic_mit *mit) { ND("called"); return 0; } -void netmap_mitigation_cleanup(struct nm_generic_mit *mit) +void +netmap_mitigation_cleanup(struct nm_generic_mit *mit) { ND("called"); } Modified: stable/10/sys/dev/netmap/netmap_generic.c ============================================================================== --- stable/10/sys/dev/netmap/netmap_generic.c Mon Jun 9 15:16:17 2014 (r267281) +++ stable/10/sys/dev/netmap/netmap_generic.c Mon Jun 9 15:24:45 2014 (r267282) @@ -81,20 +81,26 @@ __FBSDID("$FreeBSD$"); #include #include -#define rtnl_lock() D("rtnl_lock called"); -#define rtnl_unlock() D("rtnl_unlock called"); +#define rtnl_lock() ND("rtnl_lock called") +#define rtnl_unlock() ND("rtnl_unlock called") #define MBUF_TXQ(m) ((m)->m_pkthdr.flowid) #define MBUF_RXQ(m) ((m)->m_pkthdr.flowid) #define smp_mb() /* - * mbuf wrappers + * FreeBSD mbuf allocator/deallocator in emulation mode: + * + * We allocate EXT_PACKET mbuf+clusters, but need to set M_NOFREE + * so that the destructor, if invoked, will not free the packet. + * In principle we should set the destructor only on demand, + * but since there might be a race we better do it on allocation. + * As a consequence, we also need to set the destructor or we + * would leak buffers. */ /* - * we allocate an EXT_PACKET + * mbuf wrappers */ -#define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE) /* mbuf destructor, also need to change the type to EXT_EXTREF, * add an M_NOFREE flag, and then clear the flag and @@ -106,6 +112,32 @@ __FBSDID("$FreeBSD$"); (m)->m_ext.ext_type = EXT_EXTREF; \ } while (0) +static void +netmap_default_mbuf_destructor(struct mbuf *m) +{ + /* restore original mbuf */ + m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1; + m->m_ext.ext_arg1 = NULL; + m->m_ext.ext_type = EXT_PACKET; + m->m_ext.ext_free = NULL; + if (*(m->m_ext.ref_cnt) == 0) + *(m->m_ext.ref_cnt) = 1; + uma_zfree(zone_pack, m); +} + +static inline struct mbuf * +netmap_get_mbuf(int len) +{ + struct mbuf *m; + m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR | M_NOFREE); + if (m) { + m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save + m->m_ext.ext_free = (void *)netmap_default_mbuf_destructor; + m->m_ext.ext_type = EXT_EXTREF; + ND(5, "create m %p refcnt %d", m, *m->m_ext.ref_cnt); + } + return m; +} #define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1) @@ -223,7 +255,7 @@ generic_netmap_register(struct netmap_ad #endif /* REG_RESET */ if (enable) { /* Enable netmap mode. */ - /* Init the mitigation support. */ + /* Init the mitigation support on all the rx queues. */ gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit), M_DEVBUF, M_NOWAIT | M_ZERO); if (!gna->mit) { @@ -373,15 +405,11 @@ out: static void generic_mbuf_destructor(struct mbuf *m) { - if (netmap_verbose) - D("Tx irq (%p) queue %d", m, MBUF_TXQ(m)); netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL); #ifdef __FreeBSD__ - m->m_ext.ext_type = EXT_PACKET; - m->m_ext.ext_free = NULL; - if (*(m->m_ext.ref_cnt) == 0) - *(m->m_ext.ref_cnt) = 1; - uma_zfree(zone_pack, m); + if (netmap_verbose) + RD(5, "Tx irq (%p) queue %d index %d" , m, MBUF_TXQ(m), (int)(uintptr_t)m->m_ext.ext_arg1); + netmap_default_mbuf_destructor(m); #endif /* __FreeBSD__ */ IFRATE(rate_ctx.new.txirq++); } @@ -471,12 +499,12 @@ generic_set_tx_event(struct netmap_kring e = generic_tx_event_middle(kring, hwcur); m = kring->tx_pool[e]; + ND(5, "Request Event at %d mbuf %p refcnt %d", e, m, m ? GET_MBUF_REFCNT(m) : -2 ); if (m == NULL) { /* This can happen if there is already an event on the netmap slot 'e': There is nothing to do. */ return; } - ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m)); kring->tx_pool[e] = NULL; SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor); @@ -770,6 +798,10 @@ generic_netmap_attach(struct ifnet *ifp) generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc); + if (num_tx_desc == 0 || num_rx_desc == 0) { + D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc); + return EINVAL; + } gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO); if (gna == NULL) { Modified: stable/10/sys/dev/netmap/netmap_kern.h ============================================================================== --- stable/10/sys/dev/netmap/netmap_kern.h Mon Jun 9 15:16:17 2014 (r267281) +++ stable/10/sys/dev/netmap/netmap_kern.h Mon Jun 9 15:24:45 2014 (r267282) @@ -62,6 +62,9 @@ #define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1)) #define NM_ATOMIC_CLEAR(p) atomic_store_rel_int((p), 0) +#if __FreeBSD_version >= 1100005 +struct netmap_adapter *netmap_getna(if_t ifp); +#endif MALLOC_DECLARE(M_NETMAP); @@ -183,9 +186,6 @@ extern NMG_LOCK_T netmap_global_lock; * the next empty buffer as known by the hardware (next_to_check or so). * TX rings: hwcur + hwofs coincides with next_to_send * - * Clients cannot issue concurrent syscall on a ring. The system - * detects this and reports an error using two flags, - * NKR_WBUSY and NKR_RBUSY * For received packets, slot->flags is set to nkr_slot_flags * so we can provide a proper initial value (e.g. set NS_FORWARD * when operating in 'transparent' mode). @@ -208,7 +208,7 @@ extern NMG_LOCK_T netmap_global_lock; * The kring is manipulated by txsync/rxsync and generic netmap function. * * Concurrent rxsync or txsync on the same ring are prevented through - * by nm_kr_lock() which in turn uses nr_busy. This is all we need + * by nm_kr_(try)lock() which in turn uses nr_busy. This is all we need * for NIC rings, and for TX rings attached to the host stack. * * RX rings attached to the host stack use an mbq (rx_queue) on both @@ -440,15 +440,18 @@ struct netmap_adapter { /* * nm_dtor() is the cleanup routine called when destroying * the adapter. + * Called with NMG_LOCK held. * * nm_register() is called on NIOCREGIF and close() to enter * or exit netmap mode on the NIC + * Called with NMG_LOCK held. * * nm_txsync() pushes packets to the underlying hw/switch * * nm_rxsync() collects packets from the underlying hw/switch * * nm_config() returns configuration information from the OS + * Called with NMG_LOCK held. * * nm_krings_create() create and init the krings array * (the array layout must conform to the description @@ -456,13 +459,12 @@ struct netmap_adapter { * * nm_krings_delete() cleanup and delete the kring array * - * nm_notify() is used to act after data have become available. + * nm_notify() is used to act after data have become available + * (or the stopped state of the ring has changed) * For hw devices this is typically a selwakeup(), * but for NIC/host ports attached to a switch (or vice-versa) * we also need to invoke the 'txsync' code downstream. */ - - /* private cleanup */ void (*nm_dtor)(struct netmap_adapter *); int (*nm_register)(struct netmap_adapter *, int onoff); @@ -678,7 +680,7 @@ static inline uint32_t nm_kr_rxspace(struct netmap_kring *k) { int space = k->nr_hwtail - k->nr_hwcur; - if (space < 0) + if (space < 0) space += k->nkr_num_slots; ND("preserving %d rx slots %d -> %d", space, k->nr_hwcur, k->nr_hwtail); @@ -827,7 +829,7 @@ nm_txsync_finalize(struct netmap_kring * { /* update ring tail to what the kernel knows */ kring->ring->tail = kring->rtail = kring->nr_hwtail; - + /* note, head/rhead/hwcur might be behind cur/rcur * if no carrier */ @@ -1376,5 +1378,4 @@ void bdg_mismatch_datapath(struct netmap struct netmap_vp_adapter *dst_na, struct nm_bdg_fwd *ft_p, struct netmap_ring *ring, u_int *j, u_int lim, u_int *howmany); - #endif /* _NET_NETMAP_KERN_H_ */ Modified: stable/10/sys/dev/netmap/netmap_mbq.c ============================================================================== --- stable/10/sys/dev/netmap/netmap_mbq.c Mon Jun 9 15:16:17 2014 (r267281) +++ stable/10/sys/dev/netmap/netmap_mbq.c Mon Jun 9 15:24:45 2014 (r267282) @@ -76,9 +76,9 @@ static inline void __mbq_enqueue(struct void mbq_safe_enqueue(struct mbq *q, struct mbuf *m) { - mtx_lock(&q->lock); + mbq_lock(q); __mbq_enqueue(q, m); - mtx_unlock(&q->lock); + mbq_unlock(q); } @@ -110,9 +110,9 @@ struct mbuf *mbq_safe_dequeue(struct mbq { struct mbuf *ret; - mtx_lock(&q->lock); + mbq_lock(q); ret = __mbq_dequeue(q); - mtx_unlock(&q->lock); + mbq_unlock(q); return ret; } Modified: stable/10/sys/dev/netmap/netmap_mbq.h ============================================================================== --- stable/10/sys/dev/netmap/netmap_mbq.h Mon Jun 9 15:16:17 2014 (r267281) +++ stable/10/sys/dev/netmap/netmap_mbq.h Mon Jun 9 15:24:45 2014 (r267282) @@ -62,7 +62,17 @@ void mbq_enqueue(struct mbq *q, struct m struct mbuf *mbq_dequeue(struct mbq *q); void mbq_purge(struct mbq *q); -/* XXX missing mbq_lock() and mbq_unlock */ +static inline void +mbq_lock(struct mbq *q) +{ + mtx_lock_spin(&q->lock); +} + +static inline void +mbq_unlock(struct mbq *q) +{ + mtx_unlock_spin(&q->lock); +} void mbq_safe_init(struct mbq *q); void mbq_safe_destroy(struct mbq *q); Modified: stable/10/sys/dev/netmap/netmap_mem2.c ============================================================================== --- stable/10/sys/dev/netmap/netmap_mem2.c Mon Jun 9 15:16:17 2014 (r267281) +++ stable/10/sys/dev/netmap/netmap_mem2.c Mon Jun 9 15:24:45 2014 (r267282) @@ -992,7 +992,7 @@ netmap_mem_private_new(const char *name, if (p[NETMAP_RING_POOL].num < v) p[NETMAP_RING_POOL].num = v; /* for each pipe we only need the buffers for the 4 "real" rings. - * On the other end, the pipe ring dimension may be different from + * On the other end, the pipe ring dimension may be different from * the parent port ring dimension. As a compromise, we allocate twice the * space actually needed if the pipe rings were the same size as the parent rings */ Modified: stable/10/sys/dev/netmap/netmap_pipe.c ============================================================================== --- stable/10/sys/dev/netmap/netmap_pipe.c Mon Jun 9 15:16:17 2014 (r267281) +++ stable/10/sys/dev/netmap/netmap_pipe.c Mon Jun 9 15:24:45 2014 (r267282) @@ -391,7 +391,7 @@ err: /* netmap_pipe_reg. * * There are two cases on registration (onoff==1) - * + * * 1.a) state is * * usr1 --> e1 --> e2 @@ -403,7 +403,7 @@ err: * usr1 --> e1 --> e2 <-- usr2 * * and we are e2. Drop the ref e1 is holding. - * + * * There are two additional cases on unregister (onoff==0) * * 2.a) state is @@ -462,14 +462,14 @@ netmap_pipe_reg(struct netmap_adapter *n * * 1) state is * - * usr1 --> e1 --> e2 + * usr1 --> e1 --> e2 * - * and we are e1 (e2 is not registered, so krings_delete cannot be + * and we are e1 (e2 is not registered, so krings_delete cannot be * called on it); * * 2) state is * - * usr1 --> e1 e2 <-- usr2 + * usr1 --> e1 e2 <-- usr2 * * and we are either e1 or e2. * @@ -519,7 +519,7 @@ netmap_pipe_dtor(struct netmap_adapter * pna->peer_ref = 0; netmap_adapter_put(&pna->peer->up); } - if (pna->role == NR_REG_PIPE_MASTER) + if (pna->role == NR_REG_PIPE_MASTER) netmap_pipe_remove(pna->parent, pna); netmap_adapter_put(pna->parent); free(na->ifp, M_DEVBUF); @@ -587,7 +587,7 @@ netmap_get_pipe_na(struct nmreq *nmr, st error = ENODEV; goto put_out; } - /* we create both master and slave. + /* we create both master and slave. * The endpoint we were asked for holds a reference to * the other one. */ Modified: stable/10/sys/dev/netmap/netmap_vale.c ============================================================================== --- stable/10/sys/dev/netmap/netmap_vale.c Mon Jun 9 15:16:17 2014 (r267281) +++ stable/10/sys/dev/netmap/netmap_vale.c Mon Jun 9 15:24:45 2014 (r267282) @@ -959,6 +959,14 @@ nm_bdg_preflush(struct netmap_vp_adapter ft[ft_i].ft_next = NM_FT_NULL; buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? (void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot); + if (unlikely(buf == NULL)) { + RD(5, "NULL %s buffer pointer from %s slot %d len %d", + (slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT", + kring->name, j, ft[ft_i].ft_len); + buf = ft[ft_i].ft_buf = NMB_VA(0); /* the 'null' buffer */ + ft[ft_i].ft_len = 0; + ft[ft_i].ft_flags = 0; + } __builtin_prefetch(buf); ++ft_i; if (slot->flags & NS_MOREFRAG) { @@ -1064,7 +1072,7 @@ netmap_bdg_learning(char *buf, u_int buf uint64_t smac, dmac; if (buf_len < 14) { - D("invalid buf length %d", buf_len); + RD(5, "invalid buf length %d", buf_len); return NM_BDG_NOPORT; } dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff; @@ -1312,6 +1320,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_in needed = d->bq_len + brddst->bq_len; if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) { + RD(3, "virt_hdr_mismatch, src %d len %d", na->virt_hdr_len, dst_na->virt_hdr_len); /* There is a virtio-net header/offloadings mismatch between * source and destination. The slower mismatch datapath will * be used to cope with all the mismatches. @@ -1412,6 +1421,11 @@ retry: /* round to a multiple of 64 */ copy_len = (copy_len + 63) & ~63; + if (unlikely(copy_len > NETMAP_BUF_SIZE || + copy_len > NETMAP_BUF_SIZE)) { *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***