Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 19 Feb 2016 02:03:15 +0000 (UTC)
From:      Sepherosa Ziehau <sephe@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r295789 - in stable/10/sys: conf dev/hyperv/include dev/hyperv/netvsc dev/hyperv/vmbus modules/hyperv/vmbus x86/x86
Message-ID:  <201602190203.u1J23FIZ098390@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: sephe
Date: Fri Feb 19 02:03:14 2016
New Revision: 295789
URL: https://svnweb.freebsd.org/changeset/base/295789

Log:
  MFC [Hyper-V]: r293719-r293722, r293869-r293871, r293873-r293875, r293877
  
  r293719 hyperv/hn: Implement LRO
  r293720 hyperv/hn: Implement SIOC[SG]IFMEDIA support
  r293721 hyperv/hn: Avoid mbuf cluster allocation, if the packet is small.
  r293722 hyperv/hn: Removed unused netvsc_init()
  r293869 hyperv/hn: Unbreak LINT-NOIP
  r293870 hyperv: use x86 generic code to do the hypervisor detection
  r293871 hyperv: remove unused vmbus definitions
  r293873 hyperv: implement an event timer
  r293874 hyperv: add interrupt counters
  r293875 hyperv: set receive buffer size according to NVSP protocol version
  r293877 Unbreak `make depend` with sys/modules/hyperv/vmbus after r293870
  
  Approved by:	re (glebius), adrian (mentor)
  Sponsored by:	Microsoft OSTC

Added:
  stable/10/sys/dev/hyperv/vmbus/hv_et.c
     - copied unchanged from r293873, head/sys/dev/hyperv/vmbus/hv_et.c
Modified:
  stable/10/sys/conf/files.amd64
  stable/10/sys/conf/files.i386
  stable/10/sys/dev/hyperv/include/hyperv.h
  stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.c
  stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h
  stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
  stable/10/sys/dev/hyperv/netvsc/hv_rndis.h
  stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.c
  stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.h
  stable/10/sys/dev/hyperv/vmbus/hv_connection.c
  stable/10/sys/dev/hyperv/vmbus/hv_hv.c
  stable/10/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
  stable/10/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
  stable/10/sys/modules/hyperv/vmbus/Makefile
  stable/10/sys/x86/x86/identcpu.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/conf/files.amd64
==============================================================================
--- stable/10/sys/conf/files.amd64	Fri Feb 19 01:57:51 2016	(r295788)
+++ stable/10/sys/conf/files.amd64	Fri Feb 19 02:03:14 2016	(r295789)
@@ -270,6 +270,7 @@ dev/hyperv/vmbus/hv_channel.c				optiona
 dev/hyperv/vmbus/hv_channel_mgmt.c			optional	hyperv
 dev/hyperv/vmbus/hv_connection.c			optional	hyperv
 dev/hyperv/vmbus/hv_hv.c				optional	hyperv
+dev/hyperv/vmbus/hv_et.c				optional	hyperv
 dev/hyperv/vmbus/hv_ring_buffer.c			optional	hyperv
 dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c			optional	hyperv
 dev/kbd/kbd.c			optional	atkbd | sc | ukbd | vt

Modified: stable/10/sys/conf/files.i386
==============================================================================
--- stable/10/sys/conf/files.i386	Fri Feb 19 01:57:51 2016	(r295788)
+++ stable/10/sys/conf/files.i386	Fri Feb 19 02:03:14 2016	(r295789)
@@ -248,6 +248,7 @@ dev/hyperv/vmbus/hv_channel.c				optiona
 dev/hyperv/vmbus/hv_channel_mgmt.c			optional	hyperv
 dev/hyperv/vmbus/hv_connection.c			optional	hyperv
 dev/hyperv/vmbus/hv_hv.c				optional	hyperv
+dev/hyperv/vmbus/hv_et.c				optional	hyperv
 dev/hyperv/vmbus/hv_ring_buffer.c			optional	hyperv
 dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c			optional	hyperv
 dev/ichwd/ichwd.c		optional ichwd

Modified: stable/10/sys/dev/hyperv/include/hyperv.h
==============================================================================
--- stable/10/sys/dev/hyperv/include/hyperv.h	Fri Feb 19 01:57:51 2016	(r295788)
+++ stable/10/sys/dev/hyperv/include/hyperv.h	Fri Feb 19 02:03:14 2016	(r295789)
@@ -335,11 +335,6 @@ typedef enum {
 	HV_CHANNEL_MESSAGE_INITIATED_CONTACT		= 14,
 	HV_CHANNEL_MESSAGE_VERSION_RESPONSE		= 15,
 	HV_CHANNEL_MESSAGE_UNLOAD			= 16,
-
-#ifdef	HV_VMBUS_FEATURE_PARENT_OR_PEER_MEMORY_MAPPED_INTO_A_CHILD
-	HV_CHANNEL_MESSAGE_VIEW_RANGE_ADD		= 17,
-	HV_CHANNEL_MESSAGE_VIEW_RANGE_REMOVE		= 18,
-#endif
 	HV_CHANNEL_MESSAGE_COUNT
 } hv_vmbus_channel_msg_type;
 

Modified: stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.c
==============================================================================
--- stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.c	Fri Feb 19 01:57:51 2016	(r295788)
+++ stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.c	Fri Feb 19 02:03:14 2016	(r295789)
@@ -641,6 +641,12 @@ hv_nv_connect_to_vsp(struct hv_device *d
 	/* sema_wait(&NetVscChannel->channel_init_sema); */
 
 	/* Post the big receive buffer to NetVSP */
+	if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
+		net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
+	else
+		net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
+	net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
+
 	ret = hv_nv_init_rx_buffer_with_net_vsp(device);
 	if (ret == 0)
 		ret = hv_nv_init_send_buffer_with_net_vsp(device);
@@ -675,9 +681,6 @@ hv_nv_on_device_add(struct hv_device *de
 		goto cleanup;
 
 	/* Initialize the NetVSC channel extension */
-	net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
-
-	net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
 
 	sema_init(&net_dev->channel_init_sema, 0, "netdev_sema");
 
@@ -918,6 +921,7 @@ hv_nv_on_receive(netvsc_dev *net_dev, st
 	 */
 	hv_nv_on_receive_completion(device, vm_xfer_page_pkt->d.transaction_id,
 	    status);
+	hv_rf_receive_rollup(net_dev);
 }
 
 /*

Modified: stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h
==============================================================================
--- stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h	Fri Feb 19 01:57:51 2016	(r295788)
+++ stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h	Fri Feb 19 02:03:14 2016	(r295789)
@@ -44,6 +44,12 @@
 #include <sys/malloc.h>
 #include <sys/sx.h>
 
+#include <netinet/in.h>
+#include <netinet/tcp_lro.h>
+
+#include <net/if.h>
+#include <net/if_media.h>
+
 #include <dev/hyperv/include/hyperv.h>
 
 MALLOC_DECLARE(M_NETVSC);
@@ -851,7 +857,7 @@ typedef struct nvsp_msg_ {
 #define NETVSC_SEND_BUFFER_SIZE			(1024*1024*15)   /* 15M */
 #define NETVSC_SEND_BUFFER_ID			0xface
 
-
+#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY	(1024*1024*15) /* 15MB */
 #define NETVSC_RECEIVE_BUFFER_SIZE		(1024*1024*16) /* 16MB */
 
 #define NETVSC_RECEIVE_BUFFER_ID		0xcafe
@@ -984,6 +990,7 @@ typedef struct {
 typedef struct hn_softc {
 	struct ifnet    *hn_ifp;
 	struct arpcom   arpcom;
+	struct ifmedia	hn_media;
 	device_t        hn_dev;
 	uint8_t         hn_unit;
 	int             hn_carrier;
@@ -994,6 +1001,18 @@ typedef struct hn_softc {
 	int             temp_unusable;
 	struct hv_device  *hn_dev_obj;
 	netvsc_dev  	*net_dev;
+
+	struct lro_ctrl	hn_lro;
+	int		hn_lro_hiwat;
+
+	/* Trust tcp segments verification on host side */
+	int		hn_trust_hosttcp;
+
+	u_long		hn_csum_ip;
+	u_long		hn_csum_tcp;
+	u_long		hn_csum_trusted;
+	u_long		hn_lro_tried;
+	u_long		hn_small_pkts;
 } hn_softc_t;
 
 

Modified: stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
==============================================================================
--- stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c	Fri Feb 19 01:57:51 2016	(r295788)
+++ stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c	Fri Feb 19 02:03:14 2016	(r295789)
@@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
+#include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
@@ -138,13 +139,14 @@ __FBSDID("$FreeBSD$");
     CSUM_IP_ISCSI|CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP|		\
     CSUM_IP6_TSO|CSUM_IP6_ISCSI)
 
-/*
- * Data types
- */
-
-struct hv_netvsc_driver_context {
-	uint32_t		drv_inited;
-};
+/* XXX move to netinet/tcp_lro.h */
+#define HN_LRO_HIWAT_MAX				65535
+#define HN_LRO_HIWAT_DEF				HN_LRO_HIWAT_MAX
+/* YYY 2*MTU is a bit rough, but should be good enough. */
+#define HN_LRO_HIWAT_MTULIM(ifp)			(2 * (ifp)->if_mtu)
+#define HN_LRO_HIWAT_ISVALID(sc, hiwat)			\
+    ((hiwat) >= HN_LRO_HIWAT_MTULIM((sc)->hn_ifp) ||	\
+     (hiwat) <= HN_LRO_HIWAT_MAX)
 
 /*
  * Be aware that this sleepable mutex will exhibit WITNESS errors when
@@ -168,9 +170,9 @@ struct hv_netvsc_driver_context {
 
 int hv_promisc_mode = 0;    /* normal mode by default */
 
-/* The one and only one */
-static struct hv_netvsc_driver_context g_netvsc_drv;
-
+/* Trust tcp segements verification on host side. */
+static int hn_trust_hosttcp = 0;
+TUNABLE_INT("dev.hn.trust_hosttcp", &hn_trust_hosttcp);
 
 /*
  * Forward declarations
@@ -181,6 +183,21 @@ static void hn_ifinit(void *xsc);
 static int  hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
 static int  hn_start_locked(struct ifnet *ifp);
 static void hn_start(struct ifnet *ifp);
+static int hn_ifmedia_upd(struct ifnet *ifp);
+static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
+#ifdef HN_LRO_HIWAT
+static int hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS);
+#endif
+static int hn_check_iplen(const struct mbuf *, int);
+
+static __inline void
+hn_set_lro_hiwat(struct hn_softc *sc, int hiwat)
+{
+	sc->hn_lro_hiwat = hiwat;
+#ifdef HN_LRO_HIWAT
+	sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat;
+#endif
+}
 
 /*
  * NetVsc get message transport protocol type 
@@ -238,35 +255,27 @@ static uint32_t get_transport_proto_type
 	return (ret_val);
 }
 
-/*
- * NetVsc driver initialization
- * Note:  Filter init is no longer required
- */
 static int
-netvsc_drv_init(void)
+hn_ifmedia_upd(struct ifnet *ifp __unused)
 {
-	return (0);
+
+	return EOPNOTSUPP;
 }
 
-/*
- * NetVsc global initialization entry point
- */
 static void
-netvsc_init(void)
+hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
-	if (bootverbose)
-		printf("Netvsc initializing... ");
+	struct hn_softc *sc = ifp->if_softc;
 
-	/*
-	 * XXXKYS: cleanup initialization
-	 */
-	if (!cold && !g_netvsc_drv.drv_inited) {
-		g_netvsc_drv.drv_inited = 1;
-		netvsc_drv_init();
-		if (bootverbose)
-			printf("done!\n");
-	} else if (bootverbose)
-		printf("Already initialized!\n");
+	ifmr->ifm_status = IFM_AVALID;
+	ifmr->ifm_active = IFM_ETHER;
+
+	if (!sc->hn_carrier) {
+		ifmr->ifm_active |= IFM_NONE;
+		return;
+	}
+	ifmr->ifm_status |= IFM_ACTIVE;
+	ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
 }
 
 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
@@ -310,10 +319,10 @@ netvsc_attach(device_t dev)
 	hn_softc_t *sc;
 	int unit = device_get_unit(dev);
 	struct ifnet *ifp;
+	struct sysctl_oid_list *child;
+	struct sysctl_ctx_list *ctx;
 	int ret;
 
-	netvsc_init();
-
 	sc = device_get_softc(dev);
 	if (sc == NULL) {
 		return (ENOMEM);
@@ -322,6 +331,8 @@ netvsc_attach(device_t dev)
 	bzero(sc, sizeof(hn_softc_t));
 	sc->hn_unit = unit;
 	sc->hn_dev = dev;
+	sc->hn_lro_hiwat = HN_LRO_HIWAT_DEF;
+	sc->hn_trust_hosttcp = hn_trust_hosttcp;
 
 	NV_LOCK_INIT(sc, "NetVSCLock");
 
@@ -344,14 +355,22 @@ netvsc_attach(device_t dev)
 	ifp->if_snd.ifq_drv_maxlen = 511;
 	IFQ_SET_READY(&ifp->if_snd);
 
+	ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
+	ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
+	ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
+	/* XXX ifmedia_set really should do this for us */
+	sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
+
 	/*
 	 * Tell upper layers that we support full VLAN capability.
 	 */
 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
 	ifp->if_capabilities |=
-	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO;
+	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
+	    IFCAP_LRO;
 	ifp->if_capenable |=
-	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO;
+	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
+	    IFCAP_LRO;
 	/*
 	 * Only enable UDP checksum offloading when it is on 2012R2 or
 	 * later. UDP checksum offloading doesn't work on earlier
@@ -372,8 +391,63 @@ netvsc_attach(device_t dev)
 		sc->hn_carrier = 1;
 	}
 
+#if defined(INET) || defined(INET6)
+	tcp_lro_init(&sc->hn_lro);
+	/* Driver private LRO settings */
+	sc->hn_lro.ifp = ifp;
+#ifdef HN_LRO_HIWAT
+	sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat;
+#endif
+#endif	/* INET || INET6 */
+
 	ether_ifattach(ifp, device_info.mac_addr);
 
+	ctx = device_get_sysctl_ctx(dev);
+	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_queued",
+	    CTLFLAG_RW, &sc->hn_lro.lro_queued, 0, "LRO queued");
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_flushed",
+	    CTLFLAG_RW, &sc->hn_lro.lro_flushed, 0, "LRO flushed");
+	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "lro_tried",
+	    CTLFLAG_RW, &sc->hn_lro_tried, "# of LRO tries");
+#ifdef HN_LRO_HIWAT
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_hiwat",
+	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_hiwat_sysctl,
+	    "I", "LRO high watermark");
+#endif
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "trust_hosttcp",
+	    CTLFLAG_RW, &sc->hn_trust_hosttcp, 0,
+	    "Trust tcp segement verification on host side, "
+	    "when csum info is missing");
+	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_ip",
+	    CTLFLAG_RW, &sc->hn_csum_ip, "RXCSUM IP");
+	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_tcp",
+	    CTLFLAG_RW, &sc->hn_csum_tcp, "RXCSUM TCP");
+	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_trusted",
+	    CTLFLAG_RW, &sc->hn_csum_trusted,
+	    "# of TCP segements that we trust host's csum verification");
+	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "small_pkts",
+	    CTLFLAG_RW, &sc->hn_small_pkts, "# of small packets received");
+
+	if (unit == 0) {
+		struct sysctl_ctx_list *dc_ctx;
+		struct sysctl_oid_list *dc_child;
+		devclass_t dc;
+
+		/*
+		 * Add sysctl nodes for devclass
+		 */
+		dc = device_get_devclass(dev);
+		dc_ctx = devclass_get_sysctl_ctx(dc);
+		dc_child = SYSCTL_CHILDREN(devclass_get_sysctl_tree(dc));
+
+		SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "trust_hosttcp",
+		    CTLFLAG_RD, &hn_trust_hosttcp, 0,
+		    "Trust tcp segement verification on host side, "
+		    "when csum info is missing (global setting)");
+	}
+
 	return (0);
 }
 
@@ -383,6 +457,7 @@ netvsc_attach(device_t dev)
 static int
 netvsc_detach(device_t dev)
 {
+	struct hn_softc *sc = device_get_softc(dev);
 	struct hv_device *hv_device = vmbus_get_devctx(dev); 
 
 	if (bootverbose)
@@ -401,6 +476,11 @@ netvsc_detach(device_t dev)
 
 	hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL);
 
+	ifmedia_removeall(&sc->hn_media);
+#if defined(INET) || defined(INET6)
+	tcp_lro_free(&sc->hn_lro);
+#endif
+
 	return (0);
 }
 
@@ -887,7 +967,7 @@ netvsc_recv(struct hv_device *device_ctx
 	struct mbuf *m_new;
 	struct ifnet *ifp;
 	device_t dev = device_ctx->device;
-	int size;
+	int size, do_lro = 0;
 
 	if (sc == NULL) {
 		return (0); /* TODO: KYS how can this be! */
@@ -906,40 +986,44 @@ netvsc_recv(struct hv_device *device_ctx
 	 */
 	if (packet->tot_data_buf_len > (ifp->if_mtu + ETHER_HDR_LEN)) {
 		return (0);
-	}
-
-	/*
-	 * Get an mbuf with a cluster.  For packets 2K or less,
-	 * get a standard 2K cluster.  For anything larger, get a
-	 * 4K cluster.  Any buffers larger than 4K can cause problems
-	 * if looped around to the Hyper-V TX channel, so avoid them.
-	 */
-	size = MCLBYTES;
-
-	if (packet->tot_data_buf_len > MCLBYTES) {
-		/* 4096 */
-		size = MJUMPAGESIZE;
-	}
+	} else if (packet->tot_data_buf_len <= MHLEN) {
+		m_new = m_gethdr(M_NOWAIT, MT_DATA);
+		if (m_new == NULL)
+			return (0);
+		memcpy(mtod(m_new, void *), packet->data,
+		    packet->tot_data_buf_len);
+		m_new->m_pkthdr.len = m_new->m_len = packet->tot_data_buf_len;
+		sc->hn_small_pkts++;
+	} else {
+		/*
+		 * Get an mbuf with a cluster.  For packets 2K or less,
+		 * get a standard 2K cluster.  For anything larger, get a
+		 * 4K cluster.  Any buffers larger than 4K can cause problems
+		 * if looped around to the Hyper-V TX channel, so avoid them.
+		 */
+		size = MCLBYTES;
+		if (packet->tot_data_buf_len > MCLBYTES) {
+			/* 4096 */
+			size = MJUMPAGESIZE;
+		}
 
-	m_new = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, size);
+		m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
+		if (m_new == NULL) {
+			device_printf(dev, "alloc mbuf failed.\n");
+			return (0);
+		}
 
-	if (m_new == NULL) {
-		device_printf(dev, "alloc mbuf failed.\n");
-		return (0);
+		hv_m_append(m_new, packet->tot_data_buf_len, packet->data);
 	}
-
-	hv_m_append(m_new, packet->tot_data_buf_len,
-			packet->data);
-
 	m_new->m_pkthdr.rcvif = ifp;
 
 	/* receive side checksum offload */
-	m_new->m_pkthdr.csum_flags = 0;
 	if (NULL != csum_info) {
 		/* IP csum offload */
 		if (csum_info->receive.ip_csum_succeeded) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_IP_CHECKED | CSUM_IP_VALID);
+			sc->hn_csum_ip++;
 		}
 
 		/* TCP csum offload */
@@ -947,9 +1031,50 @@ netvsc_recv(struct hv_device *device_ctx
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 			m_new->m_pkthdr.csum_data = 0xffff;
+			sc->hn_csum_tcp++;
 		}
-	}
 
+		if (csum_info->receive.ip_csum_succeeded &&
+		    csum_info->receive.tcp_csum_succeeded)
+			do_lro = 1;
+	} else {
+		const struct ether_header *eh;
+		uint16_t etype;
+		int hoff;
+
+		hoff = sizeof(*eh);
+		if (m_new->m_len < hoff)
+			goto skip;
+		eh = mtod(m_new, struct ether_header *);
+		etype = ntohs(eh->ether_type);
+		if (etype == ETHERTYPE_VLAN) {
+			const struct ether_vlan_header *evl;
+
+			hoff = sizeof(*evl);
+			if (m_new->m_len < hoff)
+				goto skip;
+			evl = mtod(m_new, struct ether_vlan_header *);
+			etype = ntohs(evl->evl_proto);
+		}
+
+		if (etype == ETHERTYPE_IP) {
+			int pr;
+
+			pr = hn_check_iplen(m_new, hoff);
+			if (pr == IPPROTO_TCP) {
+				if (sc->hn_trust_hosttcp) {
+					sc->hn_csum_trusted++;
+					m_new->m_pkthdr.csum_flags |=
+					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
+					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+					m_new->m_pkthdr.csum_data = 0xffff;
+				}
+				/* Rely on SW csum verification though... */
+				do_lro = 1;
+			}
+		}
+	}
+skip:
 	if ((packet->vlan_tci != 0) &&
 	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) {
 		m_new->m_pkthdr.ether_vtag = packet->vlan_tci;
@@ -963,12 +1088,41 @@ netvsc_recv(struct hv_device *device_ctx
 
 	ifp->if_ipackets++;
 
+	if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
+#if defined(INET) || defined(INET6)
+		struct lro_ctrl *lro = &sc->hn_lro;
+
+		if (lro->lro_cnt) {
+			sc->hn_lro_tried++;
+			if (tcp_lro_rx(lro, m_new, 0) == 0) {
+				/* DONE! */
+				return 0;
+			}
+		}
+#endif
+	}
+
 	/* We're not holding the lock here, so don't release it */
 	(*ifp->if_input)(ifp, m_new);
 
 	return (0);
 }
 
+void
+netvsc_recv_rollup(struct hv_device *device_ctx)
+{
+#if defined(INET) || defined(INET6)
+	hn_softc_t *sc = device_get_softc(device_ctx->device);
+	struct lro_ctrl *lro = &sc->hn_lro;
+	struct lro_entry *queued;
+
+	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
+		SLIST_REMOVE_HEAD(&lro->lro_active, next);
+		tcp_lro_flush(lro, queued);
+	}
+#endif
+}
+
 /*
  * Rules for using sc->temp_unusable:
  * 1.  sc->temp_unusable can only be read or written while holding NV_LOCK()
@@ -1024,7 +1178,13 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, 
 
 		/* Obtain and record requested MTU */
 		ifp->if_mtu = ifr->ifr_mtu;
- 		
+		/*
+		 * Make sure that LRO high watermark is still valid,
+		 * after MTU change (the 2*MTU limit).
+		 */
+		if (!HN_LRO_HIWAT_ISVALID(sc, sc->hn_lro_hiwat))
+			hn_set_lro_hiwat(sc, HN_LRO_HIWAT_MTULIM(ifp));
+
 		do {
 			NV_LOCK(sc);
 			if (!sc->temp_unusable) {
@@ -1149,6 +1309,8 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, 
 				ifp->if_capenable |= IFCAP_RXCSUM;
 			}
 		}
+		if (mask & IFCAP_LRO)
+			ifp->if_capenable ^= IFCAP_LRO;
 
 		if (mask & IFCAP_TSO4) {
 			ifp->if_capenable ^= IFCAP_TSO4;
@@ -1173,10 +1335,11 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, 
 			error = 0;
 		}
 #endif
-		/* FALLTHROUGH */
+		error = EINVAL;
+		break;
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
-		error = EINVAL;
+		error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
 		break;
 	default:
 		error = ether_ioctl(ifp, cmd, data);
@@ -1294,6 +1457,102 @@ hn_watchdog(struct ifnet *ifp)
 }
 #endif
 
+#ifdef HN_LRO_HIWAT
+static int
+hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int hiwat, error;
+
+	hiwat = sc->hn_lro_hiwat;
+	error = sysctl_handle_int(oidp, &hiwat, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	if (!HN_LRO_HIWAT_ISVALID(sc, hiwat))
+		return EINVAL;
+
+	if (sc->hn_lro_hiwat != hiwat)
+		hn_set_lro_hiwat(sc, hiwat);
+	return 0;
+}
+#endif	/* HN_LRO_HIWAT */
+
+static int
+hn_check_iplen(const struct mbuf *m, int hoff)
+{
+	const struct ip *ip;
+	int len, iphlen, iplen;
+	const struct tcphdr *th;
+	int thoff;				/* TCP data offset */
+
+	len = hoff + sizeof(struct ip);
+
+	/* The packet must be at least the size of an IP header. */
+	if (m->m_pkthdr.len < len)
+		return IPPROTO_DONE;
+
+	/* The fixed IP header must reside completely in the first mbuf. */
+	if (m->m_len < len)
+		return IPPROTO_DONE;
+
+	ip = mtodo(m, hoff);
+
+	/* Bound check the packet's stated IP header length. */
+	iphlen = ip->ip_hl << 2;
+	if (iphlen < sizeof(struct ip))		/* minimum header length */
+		return IPPROTO_DONE;
+
+	/* The full IP header must reside completely in the one mbuf. */
+	if (m->m_len < hoff + iphlen)
+		return IPPROTO_DONE;
+
+	iplen = ntohs(ip->ip_len);
+
+	/*
+	 * Check that the amount of data in the buffers is as
+	 * at least much as the IP header would have us expect.
+	 */
+	if (m->m_pkthdr.len < hoff + iplen)
+		return IPPROTO_DONE;
+
+	/*
+	 * Ignore IP fragments.
+	 */
+	if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
+		return IPPROTO_DONE;
+
+	/*
+	 * The TCP/IP or UDP/IP header must be entirely contained within
+	 * the first fragment of a packet.
+	 */
+	switch (ip->ip_p) {
+	case IPPROTO_TCP:
+		if (iplen < iphlen + sizeof(struct tcphdr))
+			return IPPROTO_DONE;
+		if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
+			return IPPROTO_DONE;
+		th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
+		thoff = th->th_off << 2;
+		if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
+			return IPPROTO_DONE;
+		if (m->m_len < hoff + iphlen + thoff)
+			return IPPROTO_DONE;
+		break;
+	case IPPROTO_UDP:
+		if (iplen < iphlen + sizeof(struct udphdr))
+			return IPPROTO_DONE;
+		if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
+			return IPPROTO_DONE;
+		break;
+	default:
+		if (iplen < iphlen)
+			return IPPROTO_DONE;
+		break;
+	}
+	return ip->ip_p;
+}
+
 static device_method_t netvsc_methods[] = {
         /* Device interface */
         DEVMETHOD(device_probe,         netvsc_probe),
@@ -1315,6 +1574,3 @@ static devclass_t netvsc_devclass;
 DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0);
 MODULE_VERSION(hn, 1);
 MODULE_DEPEND(hn, vmbus, 1, 1, 1);
-SYSINIT(netvsc_initx, SI_SUB_KTHREAD_IDLE, SI_ORDER_MIDDLE + 1, netvsc_init,
-     NULL);
-

Modified: stable/10/sys/dev/hyperv/netvsc/hv_rndis.h
==============================================================================
--- stable/10/sys/dev/hyperv/netvsc/hv_rndis.h	Fri Feb 19 01:57:51 2016	(r295788)
+++ stable/10/sys/dev/hyperv/netvsc/hv_rndis.h	Fri Feb 19 02:03:14 2016	(r295789)
@@ -1049,6 +1049,7 @@ typedef struct rndismp_rx_bufs_info_ {
 int netvsc_recv(struct hv_device *device_ctx, 
     netvsc_packet *packet, 
     rndis_tcp_ip_csum_info *csum_info);
+void netvsc_recv_rollup(struct hv_device *device_ctx);
 
 void* hv_set_rppi_data(rndis_msg *rndis_mesg,
     uint32_t rppi_size,

Modified: stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.c
==============================================================================
--- stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.c	Fri Feb 19 01:57:51 2016	(r295788)
+++ stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.c	Fri Feb 19 02:03:14 2016	(r295789)
@@ -963,3 +963,14 @@ hv_rf_on_send_request_halt_completion(vo
 	request->halt_complete_flag = 1;
 }
 
+/*
+ * RNDIS filter when "all" reception is done
+ */
+void
+hv_rf_receive_rollup(netvsc_dev *net_dev)
+{
+	rndis_device *rndis_dev;
+
+	rndis_dev = (rndis_device *)net_dev->extension;
+	netvsc_recv_rollup(rndis_dev->net_dev->dev);
+}

Modified: stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.h
==============================================================================
--- stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.h	Fri Feb 19 01:57:51 2016	(r295788)
+++ stable/10/sys/dev/hyperv/netvsc/hv_rndis_filter.h	Fri Feb 19 02:03:14 2016	(r295789)
@@ -98,6 +98,7 @@ typedef struct rndis_device_ {
 
 int hv_rf_on_receive(netvsc_dev *net_dev,
     struct hv_device *device, netvsc_packet *pkt);
+void hv_rf_receive_rollup(netvsc_dev *net_dev);
 int hv_rf_on_device_add(struct hv_device *device, void *additl_info);
 int hv_rf_on_device_remove(struct hv_device *device, boolean_t destroy_channel);
 int hv_rf_on_open(struct hv_device *device);

Modified: stable/10/sys/dev/hyperv/vmbus/hv_connection.c
==============================================================================
--- stable/10/sys/dev/hyperv/vmbus/hv_connection.c	Fri Feb 19 01:57:51 2016	(r295788)
+++ stable/10/sys/dev/hyperv/vmbus/hv_connection.c	Fri Feb 19 02:03:14 2016	(r295789)
@@ -254,7 +254,7 @@ hv_vmbus_connect(void) {
 
 	hv_vmbus_protocal_version = version;
 	if (bootverbose)
-		printf("VMBUS: Portocal Version: %d.%d\n",
+		printf("VMBUS: Protocol Version: %d.%d\n",
 		    version >> 16, version & 0xFFFF);
 
 	sema_destroy(&msg_info->wait_sema);
@@ -426,12 +426,6 @@ VmbusProcessChannelEvent(uint32_t relid)
 	// mtx_unlock(&channel->inbound_lock);
 }
 
-#ifdef HV_DEBUG_INTR
-extern uint32_t hv_intr_count;
-extern uint32_t hv_vmbus_swintr_event_cpu[MAXCPU];
-extern uint32_t hv_vmbus_intr_cpu[MAXCPU];
-#endif
-
 /**
  * Handler for events
  */
@@ -452,17 +446,6 @@ hv_vmbus_on_events(void *arg) 
 	KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: "
 	    "cpu out of range!"));
 
-#ifdef HV_DEBUG_INTR
-	int i;
-	hv_vmbus_swintr_event_cpu[cpu]++;
-	if (hv_intr_count % 10000 == 0) {
-                printf("VMBUS: Total interrupt %d\n", hv_intr_count);
-                for (i = 0; i < mp_ncpus; i++)
-                        printf("VMBUS: hw cpu[%d]: %d, event sw intr cpu[%d]: %d\n",
-			    i, hv_vmbus_intr_cpu[i], i, hv_vmbus_swintr_event_cpu[i]);
-        }
-#endif
-
 	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
 	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
 		maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5;

Copied: stable/10/sys/dev/hyperv/vmbus/hv_et.c (from r293873, head/sys/dev/hyperv/vmbus/hv_et.c)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/10/sys/dev/hyperv/vmbus/hv_et.c	Fri Feb 19 02:03:14 2016	(r295789, copy of r293873, head/sys/dev/hyperv/vmbus/hv_et.c)
@@ -0,0 +1,131 @@
+/*-
+ * Copyright (c) 2015 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/smp.h>
+#include <sys/time.h>
+#include <sys/timeet.h>
+
+#include "hv_vmbus_priv.h"
+
+#define HV_TIMER_FREQUENCY		(10 * 1000 * 1000LL) /* 100ns period */
+#define HV_MAX_DELTA_TICKS		0xffffffffLL
+#define HV_MIN_DELTA_TICKS		1LL
+
+static struct eventtimer et;
+static uint64_t periodticks[MAXCPU];
+
+static inline uint64_t
+sbintime2tick(sbintime_t time)
+{
+	struct timespec val;
+
+	val = sbttots(time);
+	return val.tv_sec * HV_TIMER_FREQUENCY + val.tv_nsec / 100;
+}
+
+static int
+hv_et_start(struct eventtimer *et, sbintime_t firsttime, sbintime_t periodtime)
+{
+	union hv_timer_config timer_cfg;
+	uint64_t current;
+
+	timer_cfg.as_uint64 = 0;
+	timer_cfg.auto_enable = 1;
+	timer_cfg.sintx = HV_VMBUS_MESSAGE_SINT;
+
+	periodticks[curcpu] = sbintime2tick(periodtime);
+	if (firsttime == 0)
+		firsttime = periodtime;
+
+	current = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+	current += sbintime2tick(firsttime);
+
+	wrmsr(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
+	wrmsr(HV_X64_MSR_STIMER0_COUNT, current);
+
+	return (0);
+}
+
+static int
+hv_et_stop(struct eventtimer *et)
+{
+	wrmsr(HV_X64_MSR_STIMER0_CONFIG, 0);
+	wrmsr(HV_X64_MSR_STIMER0_COUNT, 0);
+
+	return (0);
+}
+
+void
+hv_et_intr(struct trapframe *frame)
+{
+	union hv_timer_config timer_cfg;
+	struct trapframe *oldframe;
+	struct thread *td;
+
+	if (periodticks[curcpu] != 0) {
+		uint64_t tick = sbintime2tick(periodticks[curcpu]);
+		timer_cfg.as_uint64 = rdmsr(HV_X64_MSR_STIMER0_CONFIG);
+		timer_cfg.enable = 0;
+		timer_cfg.auto_enable = 1;
+		timer_cfg.periodic = 1;
+		periodticks[curcpu] = 0;
+
+		wrmsr(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
+		wrmsr(HV_X64_MSR_STIMER0_COUNT, tick);
+	}
+
+	if (et.et_active) {
+		td = curthread;
+		td->td_intr_nesting_level++;
+		oldframe = td->td_intr_frame;
+		td->td_intr_frame = frame;
+		et.et_event_cb(&et, et.et_arg);
+		td->td_intr_frame = oldframe;
+		td->td_intr_nesting_level--;
+	}
+}
+
+void
+hv_et_init(void)
+{
+	et.et_name = "HyperV";
+	et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU | ET_FLAGS_PERIODIC;
+	et.et_quality = 1000;
+	et.et_frequency = HV_TIMER_FREQUENCY;
+	et.et_min_period = (1LL << 32) / HV_TIMER_FREQUENCY;
+	et.et_max_period = HV_MAX_DELTA_TICKS * ((1LL << 32) / HV_TIMER_FREQUENCY);
+	et.et_start = hv_et_start;
+	et.et_stop = hv_et_stop;
+	et.et_priv = &et;
+	et_register(&et);
+}
+

Modified: stable/10/sys/dev/hyperv/vmbus/hv_hv.c
==============================================================================
--- stable/10/sys/dev/hyperv/vmbus/hv_hv.c	Fri Feb 19 01:57:51 2016	(r295788)
+++ stable/10/sys/dev/hyperv/vmbus/hv_hv.c	Fri Feb 19 02:03:14 2016	(r295789)
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/pcpu.h>
 #include <sys/timetc.h>
 #include <machine/bus.h>
+#include <machine/md_var.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
@@ -44,23 +45,11 @@ __FBSDID("$FreeBSD$");
 
 #include "hv_vmbus_priv.h"
 
-#define HV_X64_MSR_GUEST_OS_ID		0x40000000
-
-#define HV_X64_CPUID_MIN		0x40000005
-#define HV_X64_CPUID_MAX		0x4000ffff
-#define HV_X64_MSR_TIME_REF_COUNT	0x40000020
-
 #define HV_NANOSECONDS_PER_SEC		1000000000L
 
 
 static u_int hv_get_timecount(struct timecounter *tc);
 
-static inline void do_cpuid_inline(unsigned int op, unsigned int *eax,
-	unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
-	__asm__ __volatile__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx),
-			     "=d" (*edx) : "0" (op), "c" (ecx));
-}
-
 /**
  * Globals
  */
@@ -86,27 +75,10 @@ hv_get_timecount(struct timecounter *tc)
 int
 hv_vmbus_query_hypervisor_presence(void) 
 {
-	u_int regs[4];
-	int hyper_v_detected = 0;
-
-	/*
-	 * When Xen is detected and native Xen PV support is enabled,
-	 * ignore Xen's HyperV emulation.
-	 */
-	if (vm_guest == VM_GUEST_XEN)
+	if (vm_guest != VM_GUEST_HV)
 		return (0);
 
-	do_cpuid(1, regs);
-	if (regs[2] & 0x80000000) { /* if(a hypervisor is detected) */
-		/* make sure this really is Hyper-V */
-		/* we look at the CPUID info */
-		do_cpuid(HV_X64_MSR_GUEST_OS_ID, regs);
-		hyper_v_detected =
-				regs[0] >= HV_X64_CPUID_MIN &&
-				regs[0] <= HV_X64_CPUID_MAX &&
-				!memcmp("Microsoft Hv", &regs[1], 12);
-	}
-	return (hyper_v_detected);
+	return (hv_high >= HV_X64_CPUID_MIN && hv_high <= HV_X64_CPUID_MAX);
 }
 
 /**
@@ -115,10 +87,7 @@ hv_vmbus_query_hypervisor_presence(void)
 static int
 hv_vmbus_get_hypervisor_version(void) 
 {
-	unsigned int eax;
-	unsigned int ebx;
-	unsigned int ecx;
-	unsigned int edx;
+	u_int regs[4];
 	unsigned int maxLeaf;
 	unsigned int op;
 
@@ -127,28 +96,16 @@ hv_vmbus_get_hypervisor_version(void) 
 	 * Viridian is present
 	 * Query id and revision.
 	 */
-	eax = 0;
-	ebx = 0;
-	ecx = 0;
-	edx = 0;
 	op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION;
-	do_cpuid_inline(op, &eax, &ebx, &ecx, &edx);
+	do_cpuid(op, regs);
 
-	maxLeaf = eax;
-	eax = 0;
-	ebx = 0;
-	ecx = 0;
-	edx = 0;
+	maxLeaf = regs[0];
 	op = HV_CPU_ID_FUNCTION_HV_INTERFACE;
-	do_cpuid_inline(op, &eax, &ebx, &ecx, &edx);
+	do_cpuid(op, regs);
 
 	if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_VERSION) {
-	    eax = 0;
-	    ebx = 0;
-	    ecx = 0;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201602190203.u1J23FIZ098390>