Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 15 Jan 2009 06:44:22 +0000 (UTC)
From:      Lawrence Stewart <lstewart@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r187289 - in head: . sys/netinet sys/sys
Message-ID:  <200901150644.n0F6iMk9067257@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: lstewart
Date: Thu Jan 15 06:44:22 2009
New Revision: 187289
URL: http://svn.freebsd.org/changeset/base/187289

Log:
  Add TCP Appropriate Byte Counting (RFC 3465) support to kernel.
  
  The new behaviour is on by default, and can be disabled by setting the
  net.inet.tcp.rfc3465 sysctl to 0 to obtain previous behaviour.
  
  The patch changes struct tcpcb in sys/netinet/tcp_var.h which breaks
  the ABI. Bump __FreeBSD_version to 800061 accordingly. User space tools
  that rely on the size of struct tcpcb (e.g. sockstat) need to be recompiled.
  
  Reviewed by:	rpaulo, gnn
  Approved by:	gnn, kmacy (mentors)
  Sponsored by:	FreeBSD Foundation

Modified:
  head/UPDATING
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_timer.c
  head/sys/netinet/tcp_var.h
  head/sys/netinet/vinet.h
  head/sys/sys/param.h

Modified: head/UPDATING
==============================================================================
--- head/UPDATING	Thu Jan 15 05:04:31 2009	(r187288)
+++ head/UPDATING	Thu Jan 15 06:44:22 2009	(r187289)
@@ -22,6 +22,12 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.
 	to maximize performance.  (To disable malloc debugging, run
 	ln -s aj /etc/malloc.conf.)
 
+20090115:
+	TCP Appropriate Byte Counting (RFC 3465) support added to kernel.
+	New field in struct tcpcb breaks ABI, so bump __FreeBSD_version to
+	800061. User space tools that rely on the size of struct tcpcb in
+	tcp_var.h (e.g. sockstat) need to be recompiled.
+
 20081225:
 	ng_tty(4) module updated to match the new TTY subsystem.
 	Due to API change, user-level applications must be updated.

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c	Thu Jan 15 05:04:31 2009	(r187288)
+++ head/sys/netinet/tcp_input.c	Thu Jan 15 06:44:22 2009	(r187289)
@@ -117,6 +117,8 @@ int	tcp_insecure_rst;
 int	tcp_do_autorcvbuf;
 int	tcp_autorcvbuf_inc;
 int	tcp_autorcvbuf_max;
+int	tcp_do_rfc3465;
+int	tcp_abc_l_var;
 #endif
 
 SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_tcp, TCPCTL_STATS, stats,
@@ -144,6 +146,13 @@ SYSCTL_V_INT(V_NET, vnet_inet, _net_inet
     tcp_do_rfc3390, 0,
     "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
 
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW,
+    tcp_do_rfc3465, 0,
+    "Enable RFC 3465 (Appropriate Byte Counting)");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_RW,
+    tcp_abc_l_var, 2,
+    "Cap the max cwnd increment during slow-start to this number of segments");
+
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN");
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_ecn, OID_AUTO, enable,
     CTLFLAG_RW, tcp_do_ecn, 0, "TCP ECN support");
@@ -2293,20 +2302,59 @@ process_ACK:
 
 		/*
 		 * When new data is acked, open the congestion window.
-		 * If the window gives us less than ssthresh packets
-		 * in flight, open exponentially (maxseg per packet).
-		 * Otherwise open linearly: maxseg per window
-		 * (maxseg^2 / cwnd per packet).
-		 * If cwnd > maxseg^2, fix the cwnd increment at 1 byte
-		 * to avoid capping cwnd (as suggested in RFC2581).
+		 * Method depends on which congestion control state we're
+		 * in (slow start or cong avoid) and if ABC (RFC 3465) is
+		 * enabled.
+		 *
+		 * slow start: cwnd <= ssthresh
+		 * cong avoid: cwnd > ssthresh
+		 *
+		 * slow start and ABC (RFC 3465):
+		 *   Grow cwnd exponentially by the amount of data
+		 *   ACKed capping the max increment per ACK to
+		 *   (abc_l_var * maxseg) bytes.
+		 *
+		 * slow start without ABC (RFC 2581):
+		 *   Grow cwnd exponentially by maxseg per ACK.
+		 *
+		 * cong avoid and ABC (RFC 3465):
+		 *   Grow cwnd linearly by maxseg per RTT for each
+		 *   cwnd worth of ACKed data.
+		 *
+		 * cong avoid without ABC (RFC 2581):
+		 *   Grow cwnd linearly by approximately maxseg per RTT using
+		 *   maxseg^2 / cwnd per ACK as the increment.
+		 *   If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
+		 *   avoid capping cwnd.
 		 */
 		if ((!V_tcp_do_newreno && !(tp->t_flags & TF_SACK_PERMIT)) ||
 		    !IN_FASTRECOVERY(tp)) {
 			u_int cw = tp->snd_cwnd;
 			u_int incr = tp->t_maxseg;
-			if (cw > tp->snd_ssthresh)
-				incr = max((incr * incr / cw), 1);
-			tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
+			/* In congestion avoidance? */
+			if (cw > tp->snd_ssthresh) {
+				if (V_tcp_do_rfc3465) {
+					tp->t_bytes_acked += acked;
+					if (tp->t_bytes_acked >= tp->snd_cwnd)
+						tp->t_bytes_acked -= cw;
+					else
+						incr = 0;
+				}
+				else
+					incr = max((incr * incr / cw), 1);
+			/*
+			 * In slow-start with ABC enabled and no RTO in sight?
+			 * (Must not use abc_l_var > 1 if slow starting after an
+			 * RTO. On RTO, snd_nxt = snd_una, so the snd_nxt ==
+			 * snd_max check is sufficient to handle this).
+			 */
+			} else if (V_tcp_do_rfc3465 &&
+			    tp->snd_nxt == tp->snd_max)
+				incr = min(acked,
+				    V_tcp_abc_l_var * tp->t_maxseg);
+			/* ABC is on by default, so (incr == 0) frequently. */
+			if (incr > 0)
+				tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
 		}
 		SOCKBUF_LOCK(&so->so_snd);
 		if (acked > so->so_snd.sb_cc) {
@@ -2328,8 +2376,10 @@ process_ACK:
 			tp->snd_recover = th->th_ack - 1;
 		if ((V_tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) &&
 		    IN_FASTRECOVERY(tp) &&
-		    SEQ_GEQ(th->th_ack, tp->snd_recover))
+		    SEQ_GEQ(th->th_ack, tp->snd_recover)) {
 			EXIT_FASTRECOVERY(tp);
+			tp->t_bytes_acked = 0;
+		}
 		tp->snd_una = th->th_ack;
 		if (tp->t_flags & TF_SACK_PERMIT) {
 			if (SEQ_GT(tp->snd_una, tp->snd_recover))

Modified: head/sys/netinet/tcp_subr.c
==============================================================================
--- head/sys/netinet/tcp_subr.c	Thu Jan 15 05:04:31 2009	(r187288)
+++ head/sys/netinet/tcp_subr.c	Thu Jan 15 06:44:22 2009	(r187289)
@@ -316,6 +316,8 @@ tcp_init(void)
 	V_tcp_do_autorcvbuf = 1;
 	V_tcp_autorcvbuf_inc = 16*1024;
 	V_tcp_autorcvbuf_max = 256*1024;
+	V_tcp_do_rfc3465 = 1;
+	V_tcp_abc_l_var = 2;
 
 	V_tcp_mssdflt = TCP_MSS;
 #ifdef INET6

Modified: head/sys/netinet/tcp_timer.c
==============================================================================
--- head/sys/netinet/tcp_timer.c	Thu Jan 15 05:04:31 2009	(r187288)
+++ head/sys/netinet/tcp_timer.c	Thu Jan 15 06:44:22 2009	(r187289)
@@ -587,6 +587,7 @@ tcp_timer_rexmt(void * xtp)
 		tp->t_dupacks = 0;
 	}
 	EXIT_FASTRECOVERY(tp);
+	tp->t_bytes_acked = 0;
 	(void) tcp_output(tp);
 
 out:

Modified: head/sys/netinet/tcp_var.h
==============================================================================
--- head/sys/netinet/tcp_var.h	Thu Jan 15 05:04:31 2009	(r187288)
+++ head/sys/netinet/tcp_var.h	Thu Jan 15 06:44:22 2009	(r187289)
@@ -189,6 +189,7 @@ struct tcpcb {
 	void	*t_pspare[3];		/* toe usrreqs / toepcb * / congestion algo / vimage / 1 general use */
 	struct toe_usrreqs *t_tu;       /* offload operations vector */
 	void	*t_toe;			/* TOE pcb pointer */
+	int	t_bytes_acked;		/* # bytes acked during current RTT */
 };
 
 /*

Modified: head/sys/netinet/vinet.h
==============================================================================
--- head/sys/netinet/vinet.h	Thu Jan 15 05:04:31 2009	(r187288)
+++ head/sys/netinet/vinet.h	Thu Jan 15 06:44:22 2009	(r187289)
@@ -127,6 +127,8 @@ struct vnet_inet {
 	int	_drop_synfin;
 	int	_tcp_do_rfc3042;
 	int	_tcp_do_rfc3390;
+	int	_tcp_do_rfc3465;
+	int	_tcp_abc_l_var;
 	int	_tcp_do_ecn;
 	int	_tcp_ecn_maxretries;
 	int	_tcp_insecure_rst;
@@ -291,6 +293,7 @@ extern struct vnet_inet vnet_inet_0;
 #define	V_subnetsarelocal	VNET_INET(subnetsarelocal)
 #define	V_tcb			VNET_INET(tcb)
 #define	V_tcbinfo		VNET_INET(tcbinfo)
+#define	V_tcp_abc_l_var		VNET_INET(tcp_abc_l_var)
 #define	V_tcp_autorcvbuf_inc	VNET_INET(tcp_autorcvbuf_inc)
 #define	V_tcp_autorcvbuf_max	VNET_INET(tcp_autorcvbuf_max)
 #define	V_tcp_autosndbuf_inc	VNET_INET(tcp_autosndbuf_inc)
@@ -303,6 +306,7 @@ extern struct vnet_inet vnet_inet_0;
 #define	V_tcp_do_rfc1323	VNET_INET(tcp_do_rfc1323)
 #define	V_tcp_do_rfc3042	VNET_INET(tcp_do_rfc3042)
 #define	V_tcp_do_rfc3390	VNET_INET(tcp_do_rfc3390)
+#define	V_tcp_do_rfc3465	VNET_INET(tcp_do_rfc3465)
 #define	V_tcp_do_sack		VNET_INET(tcp_do_sack)
 #define	V_tcp_do_tso		VNET_INET(tcp_do_tso)
 #define	V_tcp_ecn_maxretries	VNET_INET(tcp_ecn_maxretries)

Modified: head/sys/sys/param.h
==============================================================================
--- head/sys/sys/param.h	Thu Jan 15 05:04:31 2009	(r187288)
+++ head/sys/sys/param.h	Thu Jan 15 06:44:22 2009	(r187289)
@@ -57,7 +57,7 @@
  *		is created, otherwise 1.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 800060	/* Master, propagated to newvers */
+#define __FreeBSD_version 800061	/* Master, propagated to newvers */
 
 #ifndef LOCORE
 #include <sys/types.h>



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200901150644.n0F6iMk9067257>