Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 17 Sep 2010 22:05:27 +0000 (UTC)
From:      Andre Oppermann <andre@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r212803 - head/sys/netinet
Message-ID:  <201009172205.o8HM5RPG043265@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: andre
Date: Fri Sep 17 22:05:27 2010
New Revision: 212803
URL: http://svn.freebsd.org/changeset/base/212803

Log:
  Rearrange the TSO code to make it more readable and to clearly
  separate the decision logic, of whether we can do TSO, and the
  calculation of the burst length into two distinct parts.
  
  Change the way the TSO burst length calculation is done. While
  TSO could do bursts of 65535 bytes that can't be represented in
  ip_len together with the IP and TCP header. Account for that and
  use IP_MAXPACKET instead of TCP_MAXWIN as base constant (both
  have the same value of 64K). When more data is available prevent
  less than MSS sized segments from being sent during the current
  TSO burst.
  
  Add two more KASSERTs to ensure the integrity of the packets.
  
  Tested by:	Ben Wilber <ben-at-desync com>
  MFC after:	10 days

Modified:
  head/sys/netinet/tcp_output.c

Modified: head/sys/netinet/tcp_output.c
==============================================================================
--- head/sys/netinet/tcp_output.c	Fri Sep 17 21:53:56 2010	(r212802)
+++ head/sys/netinet/tcp_output.c	Fri Sep 17 22:05:27 2010	(r212803)
@@ -465,9 +465,8 @@ after_sack_rexmit:
 	}
 
 	/*
-	 * Truncate to the maximum segment length or enable TCP Segmentation
-	 * Offloading (if supported by hardware) and ensure that FIN is removed
-	 * if the length no longer contains the last data byte.
+	 * Decide if we can use TCP Segmentation Offloading (if supported by
+	 * hardware).
 	 *
 	 * TSO may only be used if we are in a pure bulk sending state.  The
 	 * presence of TCP-MD5, SACK retransmits, SACK advertizements and
@@ -475,10 +474,6 @@ after_sack_rexmit:
 	 * (except for the sequence number) for all generated packets.  This
 	 * makes it impossible to transmit any options which vary per generated
 	 * segment or packet.
-	 *
-	 * The length of TSO bursts is limited to TCP_MAXWIN.  That limit and
-	 * removal of FIN (if not already catched here) are handled later after
-	 * the exact length of the TCP options are known.
 	 */
 #ifdef IPSEC
 	/*
@@ -487,22 +482,15 @@ after_sack_rexmit:
 	 */
 	ipsec_optlen = ipsec_hdrsiz_tcp(tp);
 #endif
-	if (len > tp->t_maxseg) {
-		if ((tp->t_flags & TF_TSO) && V_tcp_do_tso &&
-		    ((tp->t_flags & TF_SIGNATURE) == 0) &&
-		    tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
-		    tp->t_inpcb->inp_options == NULL &&
-		    tp->t_inpcb->in6p_options == NULL
+	if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
+	    ((tp->t_flags & TF_SIGNATURE) == 0) &&
+	    tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
 #ifdef IPSEC
-		    && ipsec_optlen == 0
+	    ipsec_optlen == 0 &&
 #endif
-		    ) {
-			tso = 1;
-		} else {
-			len = tp->t_maxseg;
-			sendalot = 1;
-		}
-	}
+	    tp->t_inpcb->inp_options == NULL &&
+	    tp->t_inpcb->in6p_options == NULL)
+		tso = 1;
 
 	if (sack_rxmit) {
 		if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc))
@@ -732,28 +720,53 @@ send:
 	 * bump the packet length beyond the t_maxopd length.
 	 * Clear the FIN bit because we cut off the tail of
 	 * the segment.
-	 *
-	 * When doing TSO limit a burst to TCP_MAXWIN minus the
-	 * IP, TCP and Options length to keep ip->ip_len from
-	 * overflowing.  Prevent the last segment from being
-	 * fractional thus making them all equal sized and set
-	 * the flag to continue sending.  TSO is disabled when
-	 * IP options or IPSEC are present.
 	 */
 	if (len + optlen + ipoptlen > tp->t_maxopd) {
 		flags &= ~TH_FIN;
+
 		if (tso) {
-			if (len > TCP_MAXWIN - hdrlen - optlen) {
-				len = TCP_MAXWIN - hdrlen - optlen;
-				len = len - (len % (tp->t_maxopd - optlen));
+			KASSERT(ipoptlen == 0,
+			    ("%s: TSO can't do IP options", __func__));
+
+			/*
+			 * Limit a burst to IP_MAXPACKET minus IP,
+			 * TCP and options length to keep ip->ip_len
+			 * from overflowing.
+			 */
+			if (len > IP_MAXPACKET - hdrlen) {
+				len = IP_MAXPACKET - hdrlen;
+				sendalot = 1;
+			}
+
+			/*
+			 * Prevent the last segment from being
+			 * fractional unless the send sockbuf can
+			 * be emptied.
+			 */
+			if (sendalot && off + len < so->so_snd.sb_cc) {
+				len -= len % (tp->t_maxopd - optlen);
 				sendalot = 1;
-			} else if (tp->t_flags & TF_NEEDFIN)
+			}
+
+			/*
+			 * Send the FIN in a separate segment
+			 * after the bulk sending is done.
+			 * We don't trust the TSO implementations
+			 * to clear the FIN flag on all but the
+			 * last segment.
+			 */
+			if (tp->t_flags & TF_NEEDFIN)
 				sendalot = 1;
+
 		} else {
 			len = tp->t_maxopd - optlen - ipoptlen;
 			sendalot = 1;
 		}
-	}
+	} else
+		tso = 0;
+
+	KASSERT(len + hdrlen + ipoptlen <= IP_MAXPACKET,
+	    ("%s: len > IP_MAXPACKET", __func__));
 
 /*#ifdef DIAGNOSTIC*/
 #ifdef INET6
@@ -1068,6 +1081,9 @@ send:
 		m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen;
 	}
 
+	KASSERT(len + hdrlen + ipoptlen == m_length(m, NULL),
+	    ("%s: mbuf chain shorter than expected", __func__));
+
 	/*
 	 * In transmit state, time the transmission and arrange for
 	 * the retransmit.  In persist state, just set snd_max.



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201009172205.o8HM5RPG043265>