Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 23 Jan 2013 07:04:28 +0100
From:      Luigi Rizzo <rizzo@iet.unipi.it>
To:        net@freebsd.org
Subject:   [luigi@FreeBSD.org: svn commit: r245836 - head/sys/dev/netmap]
Message-ID:  <20130123060428.GC50751@onelab2.iet.unipi.it>

next in thread | raw e-mail | index | archive | help
this new netmap feature might be of interest

cheers
luigi

----- Forwarded message from Luigi Rizzo <luigi@FreeBSD.org> -----

Date: Wed, 23 Jan 2013 05:37:46 +0000 (UTC)
From: Luigi Rizzo <luigi@FreeBSD.org>
Subject: svn commit: r245836 - head/sys/dev/netmap
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
        svn-src-head@freebsd.org

Author: luigi
Date: Wed Jan 23 05:37:45 2013
New Revision: 245836
URL: http://svnweb.freebsd.org/changeset/base/245836

Log:
  Add support for transparent mode while in netmap.
  
  By setting dev.netmap.fwd=1 (or enabling the feature with a per-ring flag),
  packets are forwarded between the NIC and the host stack unless the
  netmap client clears the NS_FORWARD flag on the individual descriptors.
  
  This feature greatly simplifies applications where some traffic
  (think of ARP, control traffic, ssh sessions...) must be processed
  by the host stack, whereas the bulk is handled by the netmap process
  which simply (un)marks packets that should not be forwarded.
  The default is chosen so that now a netmap receiver operates
  in a mode very similar to bpf.
  
  Of course there is no free lunch: traffic to/from the host stack
  still operates at OS speed (or less, as there is one extra copy in
  one direction).
  HOWEVER, since traffic goes to the user process before being
  reinjected, and reinjection occurs in a user context, you get some
  form of livelock protection for free.

Modified:
  head/sys/dev/netmap/netmap.c

Modified: head/sys/dev/netmap/netmap.c
==============================================================================
--- head/sys/dev/netmap/netmap.c	Wed Jan 23 03:51:47 2013	(r245835)
+++ head/sys/dev/netmap/netmap.c	Wed Jan 23 05:37:45 2013	(r245836)
@@ -120,10 +120,12 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, no_pen
 
 int netmap_drop = 0;	/* debugging */
 int netmap_flags = 0;	/* debug flags */
+int netmap_fwd = 0;	/* force transparent mode */
 int netmap_copy = 0;	/* debugging, copy content */
 
 SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , "");
 SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
 SYSCTL_INT(_dev_netmap, OID_AUTO, copy, CTLFLAG_RW, &netmap_copy, 0 , "");
 
 #ifdef NM_BRIDGE /* support for netmap bridge */
@@ -647,63 +649,170 @@ netmap_open(struct cdev *dev, int oflags
 
 /*
  * Handlers for synchronization of the queues from/to the host.
- *
- * netmap_sync_to_host() passes packets up. We are called from a
- * system call in user process context, and the only contention
- * can be among multiple user threads erroneously calling
- * this routine concurrently. In principle we should not even
- * need to lock.
+ * Netmap has two operating modes:
+ * - in the default mode, the rings connected to the host stack are
+ *   just another ring pair managed by userspace;
+ * - in transparent mode (XXX to be defined) incoming packets
+ *   (from the host or the NIC) are marked as NS_FORWARD upon
+ *   arrival, and the user application has a chance to reset the
+ *   flag for packets that should be dropped.
+ *   On the RXSYNC or poll(), packets in RX rings between
+ *   kring->nr_kcur and ring->cur with NS_FORWARD still set are moved
+ *   to the other side.
+ * The transfer NIC --> host is relatively easy, just encapsulate
+ * into mbufs and we are done. The host --> NIC side is slightly
+ * harder because there might not be room in the tx ring so it
+ * might take a while before releasing the buffer.
+ */
+
+/*
+ * pass a chain of buffers to the host stack as coming from 'dst'
  */
 static void
-netmap_sync_to_host(struct netmap_adapter *na)
+netmap_send_up(struct ifnet *dst, struct mbuf *head)
 {
-	struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
-	struct netmap_ring *ring = kring->ring;
-	struct mbuf *head = NULL, *tail = NULL, *m;
-	u_int k, n, lim = kring->nkr_num_slots - 1;
+	struct mbuf *m;
 
-	k = ring->cur;
-	if (k > lim) {
-		netmap_ring_reinit(kring);
-		return;
+	/* send packets up, outside the lock */
+	while ((m = head) != NULL) {
+		head = head->m_nextpkt;
+		m->m_nextpkt = NULL;
+		if (netmap_verbose & NM_VERB_HOST)
+			D("sending up pkt %p size %d", m, MBUF_LEN(m));
+		NM_SEND_UP(dst, m);
 	}
-	// na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
+}
 
-	/* Take packets from hwcur to cur and pass them up.
+struct mbq {
+	struct mbuf *head;
+	struct mbuf *tail;
+	int count;
+};
+
+/*
+ * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
+ * Run from hwcur to cur - reserved
+ */
+static void
+netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
+{
+	/* Take packets from hwcur to cur-reserved and pass them up.
 	 * In case of no buffers we give up. At the end of the loop,
 	 * the queue is drained in all cases.
+	 * XXX handle reserved
 	 */
+	int k = kring->ring->cur - kring->ring->reserved;
+	u_int n, lim = kring->nkr_num_slots - 1;
+	struct mbuf *m, *tail = q->tail;
+
+	if (k < 0)
+		k = k + kring->nkr_num_slots;
 	for (n = kring->nr_hwcur; n != k;) {
-		struct netmap_slot *slot = &ring->slot[n];
+		struct netmap_slot *slot = &kring->ring->slot[n];
 
 		n = (n == lim) ? 0 : n + 1;
+		if ((slot->flags & NS_FORWARD) == 0 && !force)
+			continue;
 		if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE) {
 			D("bad pkt at %d len %d", n, slot->len);
 			continue;
 		}
-		m = m_devget(NMB(slot), slot->len, 0, na->ifp, NULL);
+		slot->flags &= ~NS_FORWARD; // XXX needed ?
+		m = m_devget(NMB(slot), slot->len, 0, kring->na->ifp, NULL);
 
 		if (m == NULL)
 			break;
 		if (tail)
 			tail->m_nextpkt = m;
 		else
-			head = m;
+			q->head = m;
 		tail = m;
+		q->count++;
 		m->m_nextpkt = NULL;
 	}
+	q->tail = tail;
+}
+
+/*
+ * called under main lock to send packets from the host to the NIC
+ * The host ring has packets from nr_hwcur to (cur - reserved)
+ * to be sent down. We scan the tx rings, which have just been
+ * flushed so nr_hwcur == cur. Pushing packets down means
+ * increment cur and decrement avail.
+ * XXX to be verified
+ */
+static void
+netmap_sw_to_nic(struct netmap_adapter *na)
+{
+	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
+	struct netmap_kring *k1 = &na->tx_rings[0];
+	int i, howmany, src_lim, dst_lim;
+
+	howmany = kring->nr_hwavail;	/* XXX otherwise cur - reserved - nr_hwcur */
+
+	src_lim = kring->nkr_num_slots;
+	for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) {
+		ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail);
+		dst_lim = k1->nkr_num_slots;
+		while (howmany > 0 && k1->ring->avail > 0) {
+			struct netmap_slot *src, *dst, tmp;
+			src = &kring->ring->slot[kring->nr_hwcur];
+			dst = &k1->ring->slot[k1->ring->cur];
+			tmp = *src;
+			src->buf_idx = dst->buf_idx;
+			src->flags = NS_BUF_CHANGED;
+
+			dst->buf_idx = tmp.buf_idx;
+			dst->len = tmp.len;
+			dst->flags = NS_BUF_CHANGED;
+			ND("out len %d buf %d from %d to %d",
+				dst->len, dst->buf_idx,
+				kring->nr_hwcur, k1->ring->cur);
+
+			if (++kring->nr_hwcur >= src_lim)
+				kring->nr_hwcur = 0;
+			howmany--;
+			kring->nr_hwavail--;
+			if (++k1->ring->cur >= dst_lim)
+				k1->ring->cur = 0;
+			k1->ring->avail--;
+		}
+		kring->ring->cur = kring->nr_hwcur; // XXX
+		k1++;
+	}
+}
+
+/*
+ * netmap_sync_to_host() passes packets up. We are called from a
+ * system call in user process context, and the only contention
+ * can be among multiple user threads erroneously calling
+ * this routine concurrently.
+ */
+static void
+netmap_sync_to_host(struct netmap_adapter *na)
+{
+	struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
+	struct netmap_ring *ring = kring->ring;
+	u_int k, lim = kring->nkr_num_slots - 1;
+	struct mbq q = { NULL, NULL };
+
+	k = ring->cur;
+	if (k > lim) {
+		netmap_ring_reinit(kring);
+		return;
+	}
+	// na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
+
+	/* Take packets from hwcur to cur and pass them up.
+	 * In case of no buffers we give up. At the end of the loop,
+	 * the queue is drained in all cases.
+	 */
+	netmap_grab_packets(kring, &q, 1);
 	kring->nr_hwcur = k;
 	kring->nr_hwavail = ring->avail = lim;
 	// na->nm_lock(na->ifp, NETMAP_CORE_UNLOCK, 0);
 
-	/* send packets up, outside the lock */
-	while ((m = head) != NULL) {
-		head = head->m_nextpkt;
-		m->m_nextpkt = NULL;
-		if (netmap_verbose & NM_VERB_HOST)
-			D("sending up pkt %p size %d", m, MBUF_LEN(m));
-		NM_SEND_UP(na->ifp, m);
-	}
+	netmap_send_up(na->ifp, q.head);
 }
 
 /*
@@ -1236,7 +1345,8 @@ netmap_poll(struct cdev *dev, int events
 	struct ifnet *ifp;
 	struct netmap_kring *kring;
 	u_int core_lock, i, check_all, want_tx, want_rx, revents = 0;
-	u_int lim_tx, lim_rx;
+	u_int lim_tx, lim_rx, host_forwarded = 0;
+	struct mbq q = { NULL, NULL, 0 };
 	enum {NO_CL, NEED_CL, LOCKED_CL }; /* see below */
 	void *pwait = dev;	/* linux compatibility */
 
@@ -1284,6 +1394,17 @@ netmap_poll(struct cdev *dev, int events
 		return (revents);
 	}
 
+	/* if we are in transparent mode, check also the host rx ring */
+	kring = &na->rx_rings[lim_rx];
+	if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
+			&& want_rx
+			&& (netmap_fwd || kring->ring->flags & NR_FORWARD) ) {
+		if (kring->ring->avail == 0)
+			netmap_sync_from_host(na, td, dev);
+		if (kring->ring->avail > 0)
+			revents |= want_rx;
+	}
+
 	/*
 	 * check_all is set if the card has more than one queue and
 	 * the client is polling all of them. If true, we sleep on
@@ -1359,6 +1480,7 @@ netmap_poll(struct cdev *dev, int events
 	 * to avoid that the tx rings stall).
 	 */
 	if (priv->np_txpoll || want_tx) {
+flush_tx:
 		for (i = priv->np_qfirst; i < lim_tx; i++) {
 			kring = &na->tx_rings[i];
 			/*
@@ -1411,6 +1533,11 @@ netmap_poll(struct cdev *dev, int events
 			}
 			if (na->separate_locks)
 				na->nm_lock(ifp, NETMAP_RX_LOCK, i);
+			if (netmap_fwd ||kring->ring->flags & NR_FORWARD) {
+				ND(10, "forwarding some buffers up %d to %d",
+				    kring->nr_hwcur, kring->ring->cur);
+				netmap_grab_packets(kring, &q, netmap_fwd);
+			}
 
 			if (na->nm_rxsync(ifp, i, 0 /* no lock */))
 				revents |= POLLERR;
@@ -1433,8 +1560,28 @@ netmap_poll(struct cdev *dev, int events
 		if (want_rx)
 			selrecord(td, &na->rx_si);
 	}
+
+	/* forward host to the netmap ring */
+	kring = &na->rx_rings[lim_rx];
+	if (kring->nr_hwavail > 0)
+		ND("host rx %d has %d packets", lim_rx, kring->nr_hwavail);
+	if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
+			&& (netmap_fwd || kring->ring->flags & NR_FORWARD)
+			 && kring->nr_hwavail > 0 && !host_forwarded) {
+		if (core_lock == NEED_CL) {
+			na->nm_lock(ifp, NETMAP_CORE_LOCK, 0);
+			core_lock = LOCKED_CL;
+		}
+		netmap_sw_to_nic(na);
+		host_forwarded = 1; /* prevent another pass */
+		want_rx = 0;
+		goto flush_tx;
+	}
+
 	if (core_lock == LOCKED_CL)
 		na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
+	if (q.head)
+		netmap_send_up(na->ifp, q.head);
 
 	return (revents);
 }
@@ -1586,7 +1733,8 @@ netmap_start(struct ifnet *ifp, struct m
 		goto done;	/* no space */
 	}
 	if (len > NETMAP_BUF_SIZE) {
-		D("drop packet size %d > %d", len, NETMAP_BUF_SIZE);
+		D("%s from_host, drop packet size %d > %d", ifp->if_xname,
+			len, NETMAP_BUF_SIZE);
 		goto done;	/* too long for us */
 	}
 
@@ -1597,6 +1745,7 @@ netmap_start(struct ifnet *ifp, struct m
 	slot = &kring->ring->slot[i];
 	m_copydata(m, 0, len, NMB(slot));
 	slot->len = len;
+	slot->flags = kring->nkr_slot_flags;
 	kring->nr_hwavail++;
 	if (netmap_verbose  & NM_VERB_HOST)
 		D("wake up host ring %s %d", na->ifp->if_xname, na->num_rx_rings);

----- End forwarded message -----



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20130123060428.GC50751>