Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 10 Oct 2017 08:32:03 +0000 (UTC)
From:      Sepherosa Ziehau <sephe@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r324489 - head/sys/dev/hyperv/netvsc
Message-ID:  <201710100832.v9A8W3FV066114@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: sephe
Date: Tue Oct 10 08:32:03 2017
New Revision: 324489
URL: https://svnweb.freebsd.org/changeset/base/324489

Log:
  hyperv/hn: Workaround erroneous hash type observed on WS2016.
  
  Background:
  - UDP 4-tuple hash type is unconditionally enabled in Hyper-V on WS2016,
    which is _not_ affected by NDIS_OBJTYPE_RSS_PARAMS.
  - Non-fragment UDP/IPv4 datagrams' hash type is delivered to VM as
    TCP_IPV4.
  
  Currently this erroneous behavior only applies to WS2016/Windows10.
  
  Force l3/l4 protocol check, if the RXed packet's hash type is TCP_IPV4,
  and the Hyper-V is running on WS2016/Windows10.  If the RXed packet is
  UDP datagram, adjust mbuf hash type to UDP_IPV4.
  
  MFC after:	3 days
  Sponsored by:	Microsoft

Modified:
  head/sys/dev/hyperv/netvsc/hn_nvs.c
  head/sys/dev/hyperv/netvsc/if_hn.c
  head/sys/dev/hyperv/netvsc/if_hnvar.h

Modified: head/sys/dev/hyperv/netvsc/hn_nvs.c
==============================================================================
--- head/sys/dev/hyperv/netvsc/hn_nvs.c	Tue Oct 10 08:23:19 2017	(r324488)
+++ head/sys/dev/hyperv/netvsc/hn_nvs.c	Tue Oct 10 08:32:03 2017	(r324489)
@@ -601,6 +601,11 @@ hn_nvs_attach(struct hn_softc *sc, int mtu)
 {
 	int error;
 
+	if (hyperv_ver_major >= 10) {
+		/* UDP 4-tuple hash is enforced. */
+		sc->hn_caps |= HN_CAP_UDPHASH;
+	}
+
 	/*
 	 * Initialize NVS.
 	 */

Modified: head/sys/dev/hyperv/netvsc/if_hn.c
==============================================================================
--- head/sys/dev/hyperv/netvsc/if_hn.c	Tue Oct 10 08:23:19 2017	(r324488)
+++ head/sys/dev/hyperv/netvsc/if_hn.c	Tue Oct 10 08:32:03 2017	(r324489)
@@ -385,6 +385,7 @@ static void			hn_link_status(struct hn_softc *);
 static int			hn_create_rx_data(struct hn_softc *, int);
 static void			hn_destroy_rx_data(struct hn_softc *);
 static int			hn_check_iplen(const struct mbuf *, int);
+static void			hn_rxpkt_proto(const struct mbuf *, int *, int *);
 static int			hn_set_rxfilter(struct hn_softc *, uint32_t);
 static int			hn_rxfilter_config(struct hn_softc *);
 static int			hn_rss_reconfig(struct hn_softc *);
@@ -399,6 +400,7 @@ static int			hn_tx_ring_create(struct hn_softc *, int)
 static void			hn_tx_ring_destroy(struct hn_tx_ring *);
 static int			hn_create_tx_data(struct hn_softc *, int);
 static void			hn_fixup_tx_data(struct hn_softc *);
+static void			hn_fixup_rx_data(struct hn_softc *);
 static void			hn_destroy_tx_data(struct hn_softc *);
 static void			hn_txdesc_dmamap_destroy(struct hn_txdesc *);
 static void			hn_txdesc_gc(struct hn_tx_ring *,
@@ -2238,9 +2240,10 @@ hn_attach(device_t dev)
 #endif
 
 	/*
-	 * Fixup TX stuffs after synthetic parts are attached.
+	 * Fixup TX/RX stuffs after synthetic parts are attached.
 	 */
 	hn_fixup_tx_data(sc);
+	hn_fixup_rx_data(sc);
 
 	ctx = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
@@ -3378,6 +3381,7 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int
 	struct mbuf *m_new;
 	int size, do_lro = 0, do_csum = 1, is_vf = 0;
 	int hash_type = M_HASHTYPE_NONE;
+	int l3proto = ETHERTYPE_MAX, l4proto = IPPROTO_DONE;
 
 	ifp = hn_ifp;
 	if (rxr->hn_rxvf_ifp != NULL) {
@@ -3477,31 +3481,9 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int
 		    (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK))
 			do_lro = 1;
 	} else {
-		const struct ether_header *eh;
-		uint16_t etype;
-		int hoff;
-
-		hoff = sizeof(*eh);
-		/* Checked at the beginning of this function. */
-		KASSERT(m_new->m_len >= hoff, ("not ethernet frame"));
-
-		eh = mtod(m_new, struct ether_header *);
-		etype = ntohs(eh->ether_type);
-		if (etype == ETHERTYPE_VLAN) {
-			const struct ether_vlan_header *evl;
-
-			hoff = sizeof(*evl);
-			if (m_new->m_len < hoff)
-				goto skip;
-			evl = mtod(m_new, struct ether_vlan_header *);
-			etype = ntohs(evl->evl_proto);
-		}
-
-		if (etype == ETHERTYPE_IP) {
-			int pr;
-
-			pr = hn_check_iplen(m_new, hoff);
-			if (pr == IPPROTO_TCP) {
+		hn_rxpkt_proto(m_new, &l3proto, &l4proto);
+		if (l3proto == ETHERTYPE_IP) {
+			if (l4proto == IPPROTO_TCP) {
 				if (do_csum &&
 				    (rxr->hn_trust_hcsum &
 				     HN_TRUST_HCSUM_TCP)) {
@@ -3512,7 +3494,7 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int
 					m_new->m_pkthdr.csum_data = 0xffff;
 				}
 				do_lro = 1;
-			} else if (pr == IPPROTO_UDP) {
+			} else if (l4proto == IPPROTO_UDP) {
 				if (do_csum &&
 				    (rxr->hn_trust_hcsum &
 				     HN_TRUST_HCSUM_UDP)) {
@@ -3522,7 +3504,7 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int
 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 					m_new->m_pkthdr.csum_data = 0xffff;
 				}
-			} else if (pr != IPPROTO_DONE && do_csum &&
+			} else if (l4proto != IPPROTO_DONE && do_csum &&
 			    (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
 				rxr->hn_csum_trusted++;
 				m_new->m_pkthdr.csum_flags |=
@@ -3530,7 +3512,7 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int
 			}
 		}
 	}
-skip:
+
 	if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
 		m_new->m_pkthdr.ether_vtag = EVL_MAKETAG(
 		    NDIS_VLAN_INFO_ID(info->vlan_info),
@@ -3585,6 +3567,35 @@ skip:
 
 			case NDIS_HASH_TCP_IPV4:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV4;
+				if (rxr->hn_rx_flags & HN_RX_FLAG_UDP_HASH) {
+					int def_htype = M_HASHTYPE_OPAQUE_HASH;
+
+					if (is_vf)
+						def_htype = M_HASHTYPE_NONE;
+
+					/*
+					 * UDP 4-tuple hash is delivered as
+					 * TCP 4-tuple hash.
+					 */
+					if (l3proto == ETHERTYPE_MAX) {
+						hn_rxpkt_proto(m_new,
+						    &l3proto, &l4proto);
+					}
+					if (l3proto == ETHERTYPE_IP) {
+						if (l4proto == IPPROTO_UDP) {
+							hash_type =
+							M_HASHTYPE_RSS_UDP_IPV4;
+							do_lro = 0;
+						} else if (l4proto !=
+						    IPPROTO_TCP) {
+							hash_type = def_htype;
+							do_lro = 0;
+						}
+					} else {
+						hash_type = def_htype;
+						do_lro = 0;
+					}
+				}
 				break;
 
 			case NDIS_HASH_IPV6:
@@ -4835,6 +4846,36 @@ hn_check_iplen(const struct mbuf *m, int hoff)
 	return ip->ip_p;
 }
 
+static void
+hn_rxpkt_proto(const struct mbuf *m_new, int *l3proto, int *l4proto)
+{
+	const struct ether_header *eh;
+	uint16_t etype;
+	int hoff;
+
+	hoff = sizeof(*eh);
+	/* Checked at the beginning of this function. */
+	KASSERT(m_new->m_len >= hoff, ("not ethernet frame"));
+
+	eh = mtod(m_new, const struct ether_header *);
+	etype = ntohs(eh->ether_type);
+	if (etype == ETHERTYPE_VLAN) {
+		const struct ether_vlan_header *evl;
+
+		hoff = sizeof(*evl);
+		if (m_new->m_len < hoff)
+			return;
+		evl = mtod(m_new, const struct ether_vlan_header *);
+		etype = ntohs(evl->evl_proto);
+	}
+	*l3proto = etype;
+
+	if (etype == ETHERTYPE_IP)
+		*l4proto = hn_check_iplen(m_new, hoff);
+	else
+		*l4proto = IPPROTO_DONE;
+}
+
 static int
 hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
 {
@@ -5543,6 +5584,18 @@ hn_fixup_tx_data(struct hn_softc *sc)
 			if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n");
 		for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 			sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL;
+	}
+}
+
+static void
+hn_fixup_rx_data(struct hn_softc *sc)
+{
+
+	if (sc->hn_caps & HN_CAP_UDPHASH) {
+		int i;
+
+		for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+			sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_UDP_HASH;
 	}
 }
 

Modified: head/sys/dev/hyperv/netvsc/if_hnvar.h
==============================================================================
--- head/sys/dev/hyperv/netvsc/if_hnvar.h	Tue Oct 10 08:23:19 2017	(r324488)
+++ head/sys/dev/hyperv/netvsc/if_hnvar.h	Tue Oct 10 08:32:03 2017	(r324489)
@@ -97,6 +97,7 @@ struct hn_rx_ring {
 #define HN_RX_FLAG_ATTACHED	0x0001
 #define HN_RX_FLAG_BR_REF	0x0002
 #define HN_RX_FLAG_XPNT_VF	0x0004
+#define HN_RX_FLAG_UDP_HASH	0x0008
 
 struct hn_tx_ring {
 #ifndef HN_USE_TXDESC_BUFRING
@@ -304,11 +305,12 @@ do {						\
 #define HN_CAP_TSO4			0x0080
 #define HN_CAP_TSO6			0x0100
 #define HN_CAP_HASHVAL			0x0200
+#define HN_CAP_UDPHASH			0x0400
 
 /* Capability description for use with printf(9) %b identifier. */
 #define HN_CAP_BITS				\
 	"\020\1VLAN\2MTU\3IPCS\4TCP4CS\5TCP6CS"	\
-	"\6UDP4CS\7UDP6CS\10TSO4\11TSO6\12HASHVAL"
+	"\6UDP4CS\7UDP6CS\10TSO4\11TSO6\12HASHVAL\13UDPHASH"
 
 #define HN_LINK_FLAG_LINKUP		0x0001
 #define HN_LINK_FLAG_NETCHG		0x0002



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201710100832.v9A8W3FV066114>