Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 4 Dec 2020 11:29:27 +0000 (UTC)
From:      Richard Scheffenegger <rscheff@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r368327 - head/sys/netinet
Message-ID:  <202012041129.0B4BTRPF098802@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: rscheff
Date: Fri Dec  4 11:29:27 2020
New Revision: 368327
URL: https://svnweb.freebsd.org/changeset/base/368327

Log:
  Add TCP feature Proportional Rate Reduction (PRR) - RFC6937
  
  PRR improves loss recovery and avoids RTOs in a wide range
  of scenarios (ACK thinning) over regular SACK loss recovery.
  
  PRR is disabled by default, enable by net.inet.tcp.do_prr = 1.
  Performance may be impeded by token bucket rate policers at
  the bottleneck, where net.inet.tcp.do_prr_conservate = 1
  should be enabled in addition.
  
  Submitted by:	Aris Angelogiannopoulos
  Sponsored by:	NetApp, Inc.
  Differential Revision:	https://reviews.freebsd.org/D18892

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c	Fri Dec  4 04:39:48 2020	(r368326)
+++ head/sys/netinet/tcp_input.c	Fri Dec  4 11:29:27 2020	(r368327)
@@ -153,6 +153,16 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFL
     &VNET_NAME(drop_synfin), 0,
     "Drop TCP packets with SYN+FIN set");
 
+VNET_DEFINE(int, tcp_do_prr_conservative) = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr_conservative, CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(tcp_do_prr_conservative), 0,
+    "Do conservative Proportional Rate Reduction");
+
+VNET_DEFINE(int, tcp_do_prr) = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(tcp_do_prr), 1,
+    "Enable Proportional Rate Reduction per RFC 6937");
+
 VNET_DEFINE(int, tcp_do_newcwv) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, newcwv, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_newcwv), 0,
@@ -2554,7 +2564,55 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
 				     IN_FASTRECOVERY(tp->t_flags)) {
 					cc_ack_received(tp, th, nsegs,
 					    CC_DUPACK);
-					if ((tp->t_flags & TF_SACK_PERMIT) &&
+					if (V_tcp_do_prr &&
+					    IN_FASTRECOVERY(tp->t_flags) &&
+					    (tp->t_flags & TF_SACK_PERMIT)) {
+						long snd_cnt = 0, limit = 0;
+						long del_data = 0, pipe = 0;
+						/*
+						 * In a duplicate ACK del_data is only the
+						 * diff_in_sack. If no SACK is used del_data
+						 * will be 0. Pipe is the amount of data we
+						 * estimate to be in the network.
+						 */
+						del_data = tp->sackhint.delivered_data;
+						pipe = (tp->snd_nxt - tp->snd_fack) +
+							tp->sackhint.sack_bytes_rexmit;
+						tp->sackhint.prr_delivered += del_data;
+						if (pipe > tp->snd_ssthresh) {
+							snd_cnt = (tp->sackhint.prr_delivered *
+							    tp->snd_ssthresh /
+							    tp->sackhint.recover_fs) +
+							    1 - tp->sackhint.sack_bytes_rexmit;
+						} else {
+							if (V_tcp_do_prr_conservative)
+								limit = tp->sackhint.prr_delivered -
+									tp->sackhint.sack_bytes_rexmit;
+							else
+								if ((tp->sackhint.prr_delivered -
+								    tp->sackhint.sack_bytes_rexmit) >
+								    del_data)
+									limit = tp->sackhint.prr_delivered -
+									    tp->sackhint.sack_bytes_rexmit +
+									    maxseg;
+								else
+									limit = del_data + maxseg;
+							if ((tp->snd_ssthresh - pipe) < limit)
+								snd_cnt = tp->snd_ssthresh - pipe;
+							else
+								snd_cnt = limit;
+						}
+						snd_cnt = max((snd_cnt / maxseg), 0);
+						/*
+						 * Send snd_cnt new data into the network in
+						 * response to this ACK. If there is a going
+						 * to be a SACK retransmission, adjust snd_cwnd
+						 * accordingly.
+						 */
+						tp->snd_cwnd = tp->snd_nxt - tp->snd_recover +
+						    tp->sackhint.sack_bytes_rexmit +
+						    (snd_cnt * maxseg);
+					} else if ((tp->t_flags & TF_SACK_PERMIT) &&
 					    IN_FASTRECOVERY(tp->t_flags)) {
 						int awnd;
 
@@ -2583,13 +2641,14 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
 					tcp_seq onxt = tp->snd_nxt;
 
 					/*
-					 * If we're doing sack, check to
-					 * see if we're already in sack
+					 * If we're doing sack, or prr, check
+					 * to see if we're already in sack
 					 * recovery. If we're not doing sack,
 					 * check to see if we're in newreno
 					 * recovery.
 					 */
-					if (tp->t_flags & TF_SACK_PERMIT) {
+					if (V_tcp_do_prr ||
+					    (tp->t_flags & TF_SACK_PERMIT)) {
 						if (IN_FASTRECOVERY(tp->t_flags)) {
 							tp->t_dupacks = 0;
 							break;
@@ -2607,6 +2666,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
 					    CC_DUPACK);
 					tcp_timer_activate(tp, TT_REXMT, 0);
 					tp->t_rtttime = 0;
+					if (V_tcp_do_prr) {
+					    /*
+					     * snd_ssthresh is already updated by
+					     * cc_cong_signal.
+					     */
+					    tp->sackhint.prr_delivered = 0;
+					    tp->sackhint.sack_bytes_rexmit = 0;
+					    if (!(tp->sackhint.recover_fs = tp->snd_nxt - tp->snd_una))
+						tp->sackhint.recover_fs = 1;
+					}
 					if (tp->t_flags & TF_SACK_PERMIT) {
 						TCPSTAT_INC(
 						    tcps_sack_recovery_episode);
@@ -2713,7 +2782,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
 		if (IN_FASTRECOVERY(tp->t_flags)) {
 			if (SEQ_LT(th->th_ack, tp->snd_recover)) {
 				if (tp->t_flags & TF_SACK_PERMIT)
-					tcp_sack_partialack(tp, th);
+					if (V_tcp_do_prr)
+						tcp_prr_partialack(tp, th);
+					else
+						tcp_sack_partialack(tp, th);
 				else
 					tcp_newreno_partial_ack(tp, th);
 			} else
@@ -3837,6 +3909,54 @@ tcp_mssopt(struct in_conninfo *inc)
 		mss = max(maxmtu, thcmtu) - min_protoh;
 
 	return (mss);
+}
+
+void
+tcp_prr_partialack(struct tcpcb *tp, struct tcphdr *th)
+{
+	long snd_cnt = 0, limit = 0, del_data = 0, pipe = 0;
+	int maxseg = tcp_maxseg(tp);
+
+	INP_WLOCK_ASSERT(tp->t_inpcb);
+
+	tcp_timer_activate(tp, TT_REXMT, 0);
+	tp->t_rtttime = 0;
+	/*
+	 * Compute the amount of data that this ACK is indicating
+	 * (del_data) and an estimate of how many bytes are in the
+	 * network.
+	 */
+	if (SEQ_GEQ(th->th_ack, tp->snd_una))
+		del_data = BYTES_THIS_ACK(tp, th);
+	del_data += tp->sackhint.delivered_data;
+	pipe = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit;
+	tp->sackhint.prr_delivered += del_data;
+	/*
+	 * Proportional Rate Reduction
+	 */
+	if (pipe > tp->snd_ssthresh)
+		snd_cnt = (tp->sackhint.prr_delivered * tp->snd_ssthresh / tp->sackhint.recover_fs) -
+		    tp->sackhint.sack_bytes_rexmit;
+	else {
+		if (V_tcp_do_prr_conservative)
+			limit = tp->sackhint.prr_delivered - tp->sackhint.sack_bytes_rexmit;
+		else
+			if ((tp->sackhint.prr_delivered - tp->sackhint.sack_bytes_rexmit) > del_data)
+				limit = tp->sackhint.prr_delivered - tp->sackhint.sack_bytes_rexmit + maxseg;
+			else
+				limit = del_data + maxseg;
+		snd_cnt = min((tp->snd_ssthresh - pipe), limit);
+	}
+	snd_cnt = max((snd_cnt / maxseg), 0);
+	/*
+	 * Send snd_cnt new data into the network in response to this ack.
+	 * If there is going to be a SACK retransmission, adjust snd_cwnd
+	 * accordingly.
+	 */
+	tp->snd_cwnd = tp->snd_nxt - tp->snd_recover +
+		tp->sackhint.sack_bytes_rexmit + (snd_cnt * maxseg);
+	tp->t_flags |= TF_ACKNOW;
+	(void) tcp_output(tp);
 }
 
 /*

Modified: head/sys/netinet/tcp_var.h
==============================================================================
--- head/sys/netinet/tcp_var.h	Fri Dec  4 04:39:48 2020	(r368326)
+++ head/sys/netinet/tcp_var.h	Fri Dec  4 11:29:27 2020	(r368327)
@@ -113,8 +113,9 @@ struct sackhint {
 	int32_t		sacked_bytes;	/* Total sacked bytes reported by the
 					 * receiver via sack option
 					 */
-	uint32_t	_pad1[1];	/* TBD */
-	uint64_t	_pad[1];	/* TBD */
+	uint32_t	recover_fs;	/* Flight Size at the start of Loss recovery */
+	uint32_t	prr_delivered;	/* Total bytes delivered using PRR */
+	uint32_t	_pad[1];	/* TBD */
 };
 
 #define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq)
@@ -866,6 +867,8 @@ VNET_DECLARE(int, tcp_sendspace);
 VNET_DECLARE(struct inpcbhead, tcb);
 VNET_DECLARE(struct inpcbinfo, tcbinfo);
 
+#define	V_tcp_do_prr			VNET(tcp_do_prr)
+#define	V_tcp_do_prr_conservative	VNET(tcp_do_prr_conservative)
 #define	V_tcp_do_newcwv			VNET(tcp_do_newcwv)
 #define	V_drop_synfin			VNET(drop_synfin)
 #define	V_path_mtu_discovery		VNET(path_mtu_discovery)
@@ -1051,6 +1054,7 @@ void	 tcp_clean_dsack_blocks(struct tcpcb *tp);
 void	 tcp_clean_sackreport(struct tcpcb *tp);
 void	 tcp_sack_adjust(struct tcpcb *tp);
 struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
+void	 tcp_prr_partialack(struct tcpcb *, struct tcphdr *);
 void	 tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
 void	 tcp_free_sackholes(struct tcpcb *tp);
 int	 tcp_newreno(struct tcpcb *, struct tcphdr *);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202012041129.0B4BTRPF098802>