Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 20 Feb 2020 21:52:36 +0000 (UTC)
From:      Vincenzo Maffione <vmaffione@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org
Subject:   svn commit: r358185 - stable/12/usr.sbin/bhyve
Message-ID:  <202002202152.01KLqajw027340@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: vmaffione
Date: Thu Feb 20 21:52:36 2020
New Revision: 358185
URL: https://svnweb.freebsd.org/changeset/base/358185

Log:
  MFC r357846
  
  bhyve: move virtio-net header processing to pci_virtio_net
  
  This patch cleans up the API between the net frontends (e1000,
  virtio-net) and the net backends (tap and netmap).
  We move the virtio-net header stripping/prepending to the
  virtio-net code, where this functionality belongs.
  In this way, the netbe_send() and netbe_recv() signatures
  can have const struct iov * rather than struct iov *.
  
  Reviewed by:    grehan, bcr, aleksandr.fedorov@itglobal.com
  Differential Revision:  https://reviews.freebsd.org/D23342

Modified:
  stable/12/usr.sbin/bhyve/net_backends.c
  stable/12/usr.sbin/bhyve/net_backends.h
  stable/12/usr.sbin/bhyve/net_utils.c
  stable/12/usr.sbin/bhyve/pci_e82545.c
  stable/12/usr.sbin/bhyve/pci_virtio_net.c

Modified: stable/12/usr.sbin/bhyve/net_backends.c
==============================================================================
--- stable/12/usr.sbin/bhyve/net_backends.c	Thu Feb 20 21:48:36 2020	(r358184)
+++ stable/12/usr.sbin/bhyve/net_backends.c	Thu Feb 20 21:52:36 2020	(r358185)
@@ -99,7 +99,8 @@ struct net_backend {
 	 * vector provided by the caller has 'iovcnt' elements and contains
 	 * the packet to send.
 	 */
-	ssize_t (*send)(struct net_backend *be, struct iovec *iov, int iovcnt);
+	ssize_t (*send)(struct net_backend *be, const struct iovec *iov,
+	    int iovcnt);
 
 	/*
 	 * Called to receive a packet from the backend. When the function
@@ -108,7 +109,8 @@ struct net_backend {
 	 * The function returns 0 if the backend doesn't have a new packet to
 	 * receive.
 	 */
-	ssize_t (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt);
+	ssize_t (*recv)(struct net_backend *be, const struct iovec *iov,
+	    int iovcnt);
 
 	/*
 	 * Ask the backend to enable or disable receive operation in the
@@ -238,13 +240,13 @@ error:
  * Called to send a buffer chain out to the tap device
  */
 static ssize_t
-tap_send(struct net_backend *be, struct iovec *iov, int iovcnt)
+tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
 {
 	return (writev(be->fd, iov, iovcnt));
 }
 
 static ssize_t
-tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
+tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
 {
 	ssize_t ret;
 
@@ -458,7 +460,7 @@ netmap_cleanup(struct net_backend *be)
 }
 
 static ssize_t
-netmap_send(struct net_backend *be, struct iovec *iov,
+netmap_send(struct net_backend *be, const struct iovec *iov,
 	    int iovcnt)
 {
 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
@@ -538,7 +540,7 @@ txsync:
 }
 
 static ssize_t
-netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
+netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
 {
 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
 	struct netmap_slot *slot = NULL;
@@ -749,42 +751,10 @@ netbe_set_cap(struct net_backend *be, uint64_t feature
 	return (ret);
 }
 
-static __inline struct iovec *
-iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen)
-{
-	struct iovec *riov;
-
-	/* XXX short-cut: assume first segment is >= tlen */
-	assert(iov[0].iov_len >= tlen);
-
-	iov[0].iov_len -= tlen;
-	if (iov[0].iov_len == 0) {
-		assert(*iovcnt > 1);
-		*iovcnt -= 1;
-		riov = &iov[1];
-	} else {
-		iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
-		riov = &iov[0];
-	}
-
-	return (riov);
-}
-
 ssize_t
-netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt)
+netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
 {
 
-	assert(be != NULL);
-	if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) {
-		/*
-		 * The frontend uses a virtio-net header, but the backend
-		 * does not. We ignore it (as it must be all zeroes) and
-		 * strip it.
-		 */
-		assert(be->be_vnet_hdr_len == 0);
-		iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len);
-	}
-
 	return (be->send(be, iov, iovcnt));
 }
 
@@ -794,46 +764,10 @@ netbe_send(struct net_backend *be, struct iovec *iov, 
  * the length of the packet just read. Return -1 in case of errors.
  */
 ssize_t
-netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
+netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
 {
-	/* Length of prepended virtio-net header. */
-	unsigned int hlen = be->fe_vnet_hdr_len;
-	int ret;
 
-	assert(be != NULL);
-
-	if (hlen && hlen != be->be_vnet_hdr_len) {
-		/*
-		 * The frontend uses a virtio-net header, but the backend
-		 * does not. We need to prepend a zeroed header.
-		 */
-		struct virtio_net_rxhdr *vh;
-
-		assert(be->be_vnet_hdr_len == 0);
-
-		/*
-		 * Get a pointer to the rx header, and use the
-		 * data immediately following it for the packet buffer.
-		 */
-		vh = iov[0].iov_base;
-		iov = iov_trim(iov, &iovcnt, hlen);
-
-		/*
-		 * The only valid field in the rx packet header is the
-		 * number of buffers if merged rx bufs were negotiated.
-		 */
-		memset(vh, 0, hlen);
-		if (hlen == VNET_HDR_LEN) {
-			vh->vrh_bufs = 1;
-		}
-	}
-
-	ret = be->recv(be, iov, iovcnt);
-	if (ret > 0) {
-		ret += hlen;
-	}
-
-	return (ret);
+	return (be->recv(be, iov, iovcnt));
 }
 
 /*
@@ -870,4 +804,11 @@ netbe_rx_enable(struct net_backend *be)
 {
 
 	return be->recv_enable(be);
+}
+
+size_t
+netbe_get_vnet_hdr_len(struct net_backend *be)
+{
+
+	return (be->be_vnet_hdr_len);
 }

Modified: stable/12/usr.sbin/bhyve/net_backends.h
==============================================================================
--- stable/12/usr.sbin/bhyve/net_backends.h	Thu Feb 20 21:48:36 2020	(r358184)
+++ stable/12/usr.sbin/bhyve/net_backends.h	Thu Feb 20 21:52:36 2020	(r358185)
@@ -43,8 +43,9 @@ void	netbe_cleanup(net_backend_t *be);
 uint64_t netbe_get_cap(net_backend_t *be);
 int	 netbe_set_cap(net_backend_t *be, uint64_t cap,
              unsigned vnet_hdr_len);
-ssize_t	netbe_send(net_backend_t *be, struct iovec *iov, int iovcnt);
-ssize_t	netbe_recv(net_backend_t *be, struct iovec *iov, int iovcnt);
+size_t	netbe_get_vnet_hdr_len(net_backend_t *be);
+ssize_t	netbe_send(net_backend_t *be, const struct iovec *iov, int iovcnt);
+ssize_t	netbe_recv(net_backend_t *be, const struct iovec *iov, int iovcnt);
 ssize_t	netbe_rx_discard(net_backend_t *be);
 void	netbe_rx_disable(net_backend_t *be);
 void	netbe_rx_enable(net_backend_t *be);

Modified: stable/12/usr.sbin/bhyve/net_utils.c
==============================================================================
--- stable/12/usr.sbin/bhyve/net_utils.c	Thu Feb 20 21:48:36 2020	(r358184)
+++ stable/12/usr.sbin/bhyve/net_utils.c	Thu Feb 20 21:52:36 2020	(r358185)
@@ -43,21 +43,19 @@ int
 net_parsemac(char *mac_str, uint8_t *mac_addr)
 {
         struct ether_addr *ea;
-        char *tmpstr;
         char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
 
-        tmpstr = strsep(&mac_str,"=");
+	if (mac_str == NULL)
+		return (EINVAL);
 
-        if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
-                ea = ether_aton(mac_str);
+	ea = ether_aton(mac_str);
 
-                if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
-                    memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
-			EPRINTLN("Invalid MAC %s", mac_str);
-                        return (EINVAL);
-                } else
-                        memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
-        }
+	if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
+	    memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
+		EPRINTLN("Invalid MAC %s", mac_str);
+		return (EINVAL);
+	} else
+		memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
 
         return (0);
 }

Modified: stable/12/usr.sbin/bhyve/pci_e82545.c
==============================================================================
--- stable/12/usr.sbin/bhyve/pci_e82545.c	Thu Feb 20 21:48:36 2020	(r358184)
+++ stable/12/usr.sbin/bhyve/pci_e82545.c	Thu Feb 20 21:52:36 2020	(r358185)
@@ -2328,18 +2328,36 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi,
 	mac_provided = 0;
 	sc->esc_be = NULL;
 	if (opts != NULL) {
-		int err;
+		int err = 0;
 
 		devname = vtopts = strdup(opts);
 		(void) strsep(&vtopts, ",");
 
-		if (vtopts != NULL) {
-			err = net_parsemac(vtopts, sc->esc_mac.octet);
-			if (err != 0) {
-				free(devname);
-				return (err);
+		/*
+		 * Parse the list of options in the form
+		 *     key1=value1,...,keyN=valueN.
+		 */
+		while (vtopts != NULL) {
+			char *value = vtopts;
+			char *key;
+
+			key = strsep(&value, "=");
+			if (value == NULL)
+				break;
+			vtopts = value;
+			(void) strsep(&vtopts, ",");
+
+			if (strcmp(key, "mac") == 0) {
+				err = net_parsemac(value, sc->esc_mac.octet);
+				if (err)
+					break;
+				mac_provided = 1;
 			}
-			mac_provided = 1;
+		}
+
+		if (err) {
+			free(devname);
+			return (err);
 		}
 
 		err = netbe_init(&sc->esc_be, devname, e82545_rx_callback, sc);

Modified: stable/12/usr.sbin/bhyve/pci_virtio_net.c
==============================================================================
--- stable/12/usr.sbin/bhyve/pci_virtio_net.c	Thu Feb 20 21:48:36 2020	(r358184)
+++ stable/12/usr.sbin/bhyve/pci_virtio_net.c	Thu Feb 20 21:52:36 2020	(r358185)
@@ -117,6 +117,9 @@ struct pci_vtnet_softc {
 	pthread_cond_t	tx_cond;
 	int		tx_in_progress;
 
+	size_t		vhdrlen;
+	size_t		be_vhdrlen;
+
 	struct virtio_net_config vsc_config;
 	struct virtio_consts vsc_consts;
 };
@@ -180,6 +183,38 @@ pci_vtnet_reset(void *vsc)
 	pthread_mutex_unlock(&sc->rx_mtx);
 }
 
+static __inline struct iovec *
+iov_trim_hdr(struct iovec *iov, int *iovcnt, unsigned int hlen)
+{
+	struct iovec *riov;
+
+	if (iov[0].iov_len < hlen) {
+		/*
+		 * Not enough header space in the first fragment.
+		 * That's not ok for us.
+		 */
+		return NULL;
+	}
+
+	iov[0].iov_len -= hlen;
+	if (iov[0].iov_len == 0) {
+		*iovcnt -= 1;
+		if (*iovcnt == 0) {
+			/*
+			 * Only space for the header. That's not
+			 * enough for us.
+			 */
+			return NULL;
+		}
+		riov = &iov[1];
+	} else {
+		iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + hlen);
+		riov = &iov[0];
+	}
+
+	return (riov);
+}
+
 struct virtio_mrg_rxbuf_info {
 	uint16_t idx;
 	uint16_t pad;
@@ -189,31 +224,34 @@ struct virtio_mrg_rxbuf_info {
 static void
 pci_vtnet_rx(struct pci_vtnet_softc *sc)
 {
+	int prepend_hdr_len = sc->vhdrlen - sc->be_vhdrlen;
 	struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS];
 	struct iovec iov[VTNET_MAXSEGS + 1];
 	struct vqueue_info *vq;
-	uint32_t cur_iov_bytes;
-	struct iovec *cur_iov;
-	uint16_t cur_iov_len;
+	uint32_t riov_bytes;
+	struct iovec *riov;
+	int riov_len;
 	uint32_t ulen;
 	int n_chains;
 	int len;
 
 	vq = &sc->vsc_queues[VTNET_RXQ];
 	for (;;) {
+		struct virtio_net_rxhdr *hdr;
+
 		/*
 		 * Get a descriptor chain to store the next ingress
 		 * packet. In case of mergeable rx buffers, get as
 		 * many chains as necessary in order to make room
 		 * for a maximum sized LRO packet.
 		 */
-		cur_iov_bytes = 0;
-		cur_iov_len = 0;
-		cur_iov = iov;
+		riov_bytes = 0;
+		riov_len = 0;
+		riov = iov;
 		n_chains = 0;
 		do {
-			int n = vq_getchain(vq, &info[n_chains].idx, cur_iov,
-			    VTNET_MAXSEGS - cur_iov_len, NULL);
+			int n = vq_getchain(vq, &info[n_chains].idx, riov,
+			    VTNET_MAXSEGS - riov_len, NULL);
 
 			if (n == 0) {
 				/*
@@ -239,21 +277,43 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
 				vq_kick_disable(vq);
 				continue;
 			}
-			assert(n >= 1 && cur_iov_len + n <= VTNET_MAXSEGS);
-			cur_iov_len += n;
+			assert(n >= 1 && riov_len + n <= VTNET_MAXSEGS);
+			riov_len += n;
 			if (!sc->rx_merge) {
 				n_chains = 1;
 				break;
 			}
-			info[n_chains].len = (uint32_t)count_iov(cur_iov, n);
-			cur_iov_bytes += info[n_chains].len;
-			cur_iov += n;
+			info[n_chains].len = (uint32_t)count_iov(riov, n);
+			riov_bytes += info[n_chains].len;
+			riov += n;
 			n_chains++;
-		} while (cur_iov_bytes < VTNET_MAX_PKT_LEN &&
-			    cur_iov_len < VTNET_MAXSEGS);
+		} while (riov_bytes < VTNET_MAX_PKT_LEN &&
+			    riov_len < VTNET_MAXSEGS);
 
-		len = netbe_recv(sc->vsc_be, iov, cur_iov_len);
+		riov = iov;
+		hdr = riov[0].iov_base;
+		if (prepend_hdr_len > 0) {
+			/*
+			 * The frontend uses a virtio-net header, but the
+			 * backend does not. We need to prepend a zeroed
+			 * header.
+			 */
+			riov = iov_trim_hdr(riov, &riov_len, prepend_hdr_len);
+			if (riov == NULL) {
+				/*
+				 * The first collected chain is nonsensical,
+				 * as it is not even enough to store the
+				 * virtio-net header. Just drop it.
+				 */
+				vq_relchain(vq, info[0].idx, 0);
+				vq_retchains(vq, n_chains - 1);
+				continue;
+			}
+			memset(hdr, 0, prepend_hdr_len);
+		}
 
+		len = netbe_recv(sc->vsc_be, riov, riov_len);
+
 		if (len <= 0) {
 			/*
 			 * No more packets (len == 0), or backend errored
@@ -266,18 +326,18 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
 			return;
 		}
 
-		ulen = (uint32_t)len; /* avoid too many casts below */
+		ulen = (uint32_t)(len + prepend_hdr_len);
 
-		/* Publish the used buffers to the guest. */
+		/*
+		 * Publish the used buffers to the guest, reporting the
+		 * number of bytes that we wrote.
+		 */
 		if (!sc->rx_merge) {
 			vq_relchain(vq, info[0].idx, ulen);
 		} else {
-			struct virtio_net_rxhdr *hdr = iov[0].iov_base;
 			uint32_t iolen;
 			int i = 0;
 
-			assert(iov[0].iov_len >= sizeof(*hdr));
-
 			do {
 				iolen = info[i].len;
 				if (iolen > ulen) {
@@ -333,6 +393,7 @@ static void
 pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
 {
 	struct iovec iov[VTNET_MAXSEGS + 1];
+	struct iovec *siov = iov;
 	uint16_t idx;
 	ssize_t len;
 	int n;
@@ -344,10 +405,34 @@ pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vq
 	n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
 	assert(n >= 1 && n <= VTNET_MAXSEGS);
 
-	len = netbe_send(sc->vsc_be, iov, n);
+	if (sc->vhdrlen != sc->be_vhdrlen) {
+		/*
+		 * The frontend uses a virtio-net header, but the backend
+		 * does not. We simply strip the header and ignore it, as
+		 * it should be zero-filled.
+		 */
+		siov = iov_trim_hdr(siov, &n, sc->vhdrlen);
+	}
 
-	/* chain is processed, release it and set len */
-	vq_relchain(vq, idx, len > 0 ? len : 0);
+	if (siov == NULL) {
+		/* The chain is nonsensical. Just drop it. */
+		len = 0;
+	} else {
+		len = netbe_send(sc->vsc_be, siov, n);
+		if (len < 0) {
+			/*
+			 * If send failed, report that 0 bytes
+			 * were read.
+			 */
+			len = 0;
+		}
+	}
+
+	/*
+	 * Return the processed chain to the guest, reporting
+	 * the number of bytes that we read.
+	 */
+	vq_relchain(vq, idx, len);
 }
 
 /* Called on TX kick. */
@@ -466,21 +551,40 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *
 	if (opts != NULL) {
 		char *devname;
 		char *vtopts;
-		int err;
+		int err = 0;
 
+		/* Get the device name. */
 		devname = vtopts = strdup(opts);
 		(void) strsep(&vtopts, ",");
 
-		if (vtopts != NULL) {
-			err = net_parsemac(vtopts, sc->vsc_config.mac);
-			if (err != 0) {
-				free(devname);
-				free(sc);
-				return (err);
+		/*
+		 * Parse the list of options in the form
+		 *     key1=value1,...,keyN=valueN.
+		 */
+		while (vtopts != NULL) {
+			char *value = vtopts;
+			char *key;
+
+			key = strsep(&value, "=");
+			if (value == NULL)
+				break;
+			vtopts = value;
+			(void) strsep(&vtopts, ",");
+
+			if (strcmp(key, "mac") == 0) {
+				err = net_parsemac(value, sc->vsc_config.mac);
+				if (err)
+					break;
+				mac_provided = 1;
 			}
-			mac_provided = 1;
 		}
 
+		if (err) {
+			free(devname);
+			free(sc);
+			return (err);
+		}
+
 		err = netbe_init(&sc->vsc_be, devname, pci_vtnet_rx_callback,
 		          sc);
 		free(devname);
@@ -520,6 +624,7 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *
 	sc->resetting = 0;
 
 	sc->rx_merge = 0;
+	sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2;
 	pthread_mutex_init(&sc->rx_mtx, NULL); 
 
 	/* 
@@ -574,24 +679,25 @@ static void
 pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
 {
 	struct pci_vtnet_softc *sc = vsc;
-	unsigned int rx_vhdrlen;
 
 	sc->vsc_features = negotiated_features;
 
 	if (negotiated_features & VIRTIO_NET_F_MRG_RXBUF) {
-		rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
+		sc->vhdrlen = sizeof(struct virtio_net_rxhdr);
 		sc->rx_merge = 1;
 	} else {
 		/*
 		 * Without mergeable rx buffers, virtio-net header is 2
 		 * bytes shorter than sizeof(struct virtio_net_rxhdr).
 		 */
-		rx_vhdrlen = sizeof(struct virtio_net_rxhdr) - 2;
+		sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2;
 		sc->rx_merge = 0;
 	}
 
 	/* Tell the backend to enable some capabilities it has advertised. */
-	netbe_set_cap(sc->vsc_be, negotiated_features, rx_vhdrlen);
+	netbe_set_cap(sc->vsc_be, negotiated_features, sc->vhdrlen);
+	sc->be_vhdrlen = netbe_get_vnet_hdr_len(sc->vsc_be);
+	assert(sc->be_vhdrlen == 0 || sc->be_vhdrlen == sc->vhdrlen);
 }
 
 static struct pci_devemu pci_de_vnet = {



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202002202152.01KLqajw027340>