Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 7 Jun 2018 18:18:14 +0000 (UTC)
From:      Randall Stewart <rrs@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r334804 - in head/sys: kern modules/tcp modules/tcp/rack netinet netinet/tcp_stacks sys
Message-ID:  <201806071818.w57IIENp080093@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: rrs
Date: Thu Jun  7 18:18:13 2018
New Revision: 334804
URL: https://svnweb.freebsd.org/changeset/base/334804

Log:
  This commit brings in a new refactored TCP stack called Rack.
  Rack includes the following features:
   - A different SACK processing scheme (the old sack structures are not used).
   - RACK (Recent acknowledgment) where counting dup-acks is no longer done
          instead time is used to knwo when to retransmit. (see the I-D)
   - TLP (Tail Loss Probe) where we will probe for tail-losses to attempt
          to try not to take a retransmit time-out. (see the I-D)
   - Burst mitigation using TCPHTPS
   - PRR (partial rate reduction) see the RFC.
  
  Once built into your kernel, you can select this stack by either
  socket option with the name of the stack is "rack" or by setting
  the global sysctl so the default is rack.
  
  Note that any connection that does not support SACK will be kicked
  back to the "default" base  FreeBSD stack (currently known as "default").
  
  To build this into your kernel you will need to enable in your
  kernel:
     makeoptions WITH_EXTRA_TCP_STACKS=1
     options TCPHPTS
  
  Sponsored by:	Netflix Inc.
  Differential Revision:		https://reviews.freebsd.org/D15525

Added:
  head/sys/modules/tcp/rack/
  head/sys/modules/tcp/rack/Makefile   (contents, props changed)
  head/sys/netinet/tcp_stacks/rack.c   (contents, props changed)
  head/sys/netinet/tcp_stacks/rack_bbr_common.h   (contents, props changed)
  head/sys/netinet/tcp_stacks/sack_filter.c   (contents, props changed)
  head/sys/netinet/tcp_stacks/sack_filter.h   (contents, props changed)
  head/sys/netinet/tcp_stacks/tcp_rack.h   (contents, props changed)
Modified:
  head/sys/kern/uipc_sockbuf.c
  head/sys/modules/tcp/Makefile
  head/sys/netinet/tcp.h
  head/sys/netinet/tcp_log_buf.h
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_stacks/fastpath.c
  head/sys/netinet/tcp_timer.c
  head/sys/netinet/tcp_timer.h
  head/sys/netinet/tcp_var.h
  head/sys/sys/mbuf.h
  head/sys/sys/queue.h
  head/sys/sys/sockbuf.h
  head/sys/sys/time.h

Modified: head/sys/kern/uipc_sockbuf.c
==============================================================================
--- head/sys/kern/uipc_sockbuf.c	Thu Jun  7 18:06:01 2018	(r334803)
+++ head/sys/kern/uipc_sockbuf.c	Thu Jun  7 18:18:13 2018	(r334804)
@@ -1283,6 +1283,55 @@ sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_i
 	return (ret);
 }
 
+struct mbuf *
+sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff)
+{
+	struct mbuf *m;
+
+	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
+	if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
+		*moff = off;
+		if (sb->sb_sndptr == NULL) {
+			sb->sb_sndptr = sb->sb_mb;
+			sb->sb_sndptroff = 0;
+		}
+		return (sb->sb_mb);
+	} else {
+		m = sb->sb_sndptr;
+		off -= sb->sb_sndptroff;
+	}
+	*moff = off;
+	return (m);
+}
+
+void
+sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len)
+{
+	/*
+	 * A small copy was done, advance forward the sb_sbsndptr to cover
+	 * it.
+	 */
+	struct mbuf *m;
+
+	if (mb != sb->sb_sndptr) {
+		/* Did not copyout at the same mbuf */
+		return;
+	}
+	m = mb;
+	while (m && (len > 0)) {
+		if (len >= m->m_len) {
+			len -= m->m_len;
+			if (m->m_next) {
+				sb->sb_sndptroff += m->m_len;
+				sb->sb_sndptr = m->m_next;
+			}
+			m = m->m_next;
+		} else {
+			len = 0;
+		}
+	}
+}
+
 /*
  * Return the first mbuf and the mbuf data offset for the provided
  * send offset without changing the "sb_sndptroff" field.

Modified: head/sys/modules/tcp/Makefile
==============================================================================
--- head/sys/modules/tcp/Makefile	Thu Jun  7 18:06:01 2018	(r334803)
+++ head/sys/modules/tcp/Makefile	Thu Jun  7 18:18:13 2018	(r334804)
@@ -7,10 +7,12 @@ SYSDIR?=${SRCTOP}/sys
 
 SUBDIR=	\
 	${_tcp_fastpath} \
+        ${_tcp_rack} \
 	${_tcpmd5} \
 
 .if ${MK_EXTRA_TCP_STACKS} != "no" || defined(ALL_MODULES)
 _tcp_fastpath=	fastpath
+_tcp_rack= 	rack
 .endif
 
 .if (${MK_INET_SUPPORT} != "no" || ${MK_INET6_SUPPORT} != "no") || \

Added: head/sys/modules/tcp/rack/Makefile
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/modules/tcp/rack/Makefile	Thu Jun  7 18:18:13 2018	(r334804)
@@ -0,0 +1,24 @@
+#
+# $FreeBSD$
+#
+
+.PATH: ${.CURDIR}/../../../netinet/tcp_stacks
+
+STACKNAME=	rack
+KMOD=	tcp_${STACKNAME}
+SRCS=	rack.c sack_filter.c
+
+SRCS+=	opt_inet.h opt_inet6.h opt_ipsec.h
+SRCS+=	opt_tcpdebug.h
+SRCS+=	opt_kern_tls.h
+
+#
+# Enable full debugging
+#
+#CFLAGS += -g
+
+CFLAGS+=	-DMODNAME=${KMOD}
+CFLAGS+=	-DSTACKNAME=${STACKNAME}
+CFLAGS+=	-DSTACKALIAS=rack_18q21
+
+.include <bsd.kmod.mk>

Modified: head/sys/netinet/tcp.h
==============================================================================
--- head/sys/netinet/tcp.h	Thu Jun  7 18:06:01 2018	(r334803)
+++ head/sys/netinet/tcp.h	Thu Jun  7 18:18:13 2018	(r334804)
@@ -176,6 +176,7 @@ struct tcphdr {
 				   device */
 #define	TCP_CONGESTION	64	/* get/set congestion control algorithm */
 #define	TCP_CCALGOOPT	65	/* get/set cc algorithm specific options */
+#define TCP_DELACK  	72	/* socket option for delayed ack */
 #define	TCP_KEEPINIT	128	/* N, time to establish connection */
 #define	TCP_KEEPIDLE	256	/* L,N,X start keeplives after this period */
 #define	TCP_KEEPINTVL	512	/* L,N interval between keepalives */
@@ -184,6 +185,61 @@ struct tcphdr {
 #define	TCP_PCAP_OUT	2048	/* number of output packets to keep */
 #define	TCP_PCAP_IN	4096	/* number of input packets to keep */
 #define TCP_FUNCTION_BLK 8192	/* Set the tcp function pointers to the specified stack */
+/* Options for Rack and BBR */
+#define TCP_RACK_PROP	      1051 /* RACK proportional rate reduction (bool) */
+#define TCP_RACK_TLP_REDUCE   1052 /* RACK TLP cwnd reduction (bool) */
+#define TCP_RACK_PACE_REDUCE  1053 /* RACK Pacing reduction factor (divisor) */
+#define TCP_RACK_PACE_MAX_SEG 1054 /* Max segments in a pace */
+#define TCP_RACK_PACE_ALWAYS  1055 /* Use the always pace method */
+#define TCP_RACK_PROP_RATE    1056 /* The proportional reduction rate */
+#define TCP_RACK_PRR_SENDALOT 1057 /* Allow PRR to send more than one seg */
+#define TCP_RACK_MIN_TO       1058 /* Minimum time between rack t-o's in ms */
+#define TCP_RACK_EARLY_RECOV  1059 /* Should recovery happen early (bool) */
+#define TCP_RACK_EARLY_SEG    1060 /* If early recovery max segments */
+#define TCP_RACK_REORD_THRESH 1061 /* RACK reorder threshold (shift amount) */
+#define TCP_RACK_REORD_FADE   1062 /* Does reordering fade after ms time */
+#define TCP_RACK_TLP_THRESH   1063 /* RACK TLP theshold i.e. srtt+(srtt/N) */
+#define TCP_RACK_PKT_DELAY    1064 /* RACK added ms i.e. rack-rtt + reord + N */
+#define TCP_RACK_TLP_INC_VAR  1065 /* Does TLP include rtt variance in t-o */
+#define TCP_RACK_SESS_CWV     1066 /* Enable RFC7611 cwnd validation on sess */
+#define TCP_BBR_IWINTSO	      1067 /* Initial TSO window for BBRs first sends */
+#define TCP_BBR_RECFORCE      1068 /* Enter recovery force out a segment disregard pacer */
+#define TCP_BBR_STARTUP_PG    1069 /* Startup pacing gain */
+#define TCP_BBR_DRAIN_PG      1070 /* Drain pacing gain */
+#define TCP_BBR_RWND_IS_APP   1071 /* Rwnd limited is considered app limited */
+#define TCP_BBR_PROBE_RTT_INT 1072 /* How long in useconds between probe-rtt */
+#define TCP_BBR_ONE_RETRAN    1073 /* Is only one segment allowed out during retran */
+#define TCP_BBR_STARTUP_LOSS_EXIT 1074	/* Do we exit a loss during startup if not 20% incr */
+#define TCP_BBR_USE_LOWGAIN   1075 /* lower the gain in PROBE_BW enable */
+#define TCP_BBR_LOWGAIN_THRESH 1076 /* How many cycles do we stay in lowgain */
+#define TCP_BBR_LOWGAIN_HALF  1077 /* Do we halfstep lowgain down */
+#define TCP_BBR_LOWGAIN_FD    1078 /* Do we force a drain when lowgain in place */
+#define TCP_BBR_USEDEL_RATE   1079 /* Enable use of delivery rate for loss recovery */
+#define TCP_BBR_MIN_RTO       1080 /* Min RTO in milliseconds */
+#define TCP_BBR_MAX_RTO	      1081 /* Max RTO in milliseconds */
+#define TCP_BBR_REC_OVER_HPTS 1082 /* Recovery override htps settings 0/1/3 */
+#define TCP_BBR_UNLIMITED     1083 /* Does BBR, in non-recovery not use cwnd */
+#define TCP_BBR_DRAIN_INC_EXTRA 1084 /* Does the 3/4 drain target include the extra gain */
+#define TCP_BBR_STARTUP_EXIT_EPOCH 1085 /* what epoch gets us out of startup */
+#define TCP_BBR_PACE_PER_SEC   1086
+#define TCP_BBR_PACE_DEL_TAR   1087
+#define TCP_BBR_PACE_SEG_MAX   1088
+#define TCP_BBR_PACE_SEG_MIN   1089
+#define TCP_BBR_PACE_CROSS     1090
+#define TCP_RACK_IDLE_REDUCE_HIGH 1092  /* Reduce the highest cwnd seen to IW on idle */
+#define TCP_RACK_IDLE_REDUCE_HIGH 1092  /* Reduce the highest cwnd seen to IW on idle */
+#define TCP_RACK_MIN_PACE      1093 	/* Do we enforce rack min pace time */
+#define TCP_RACK_MIN_PACE_SEG  1094	/* If so what is the seg threshould */
+#define TCP_RACK_TLP_USE       1095
+#define TCP_BBR_ACK_COMP_ALG   1096 	/* Not used */
+#define TCP_BBR_EXTRA_GAIN     1097
+#define TCP_BBR_RACK_RTT_USE   1098	/* what RTT should we use 0, 1, or 2? */
+#define TCP_BBR_RETRAN_WTSO    1099
+#define TCP_DATA_AFTER_CLOSE   1100
+#define TCP_BBR_PROBE_RTT_GAIN 1101
+#define TCP_BBR_PROBE_RTT_LEN  1102
+
+
 /* Start of reserved space for third-party user-settable options. */
 #define	TCP_VENDOR	SO_VENDOR
 

Modified: head/sys/netinet/tcp_log_buf.h
==============================================================================
--- head/sys/netinet/tcp_log_buf.h	Thu Jun  7 18:06:01 2018	(r334803)
+++ head/sys/netinet/tcp_log_buf.h	Thu Jun  7 18:18:13 2018	(r334804)
@@ -94,7 +94,7 @@ struct tcp_log_bbr {
 	uint16_t flex7;
 	uint8_t bbr_state;
 	uint8_t bbr_substate;
-	uint8_t inpacer;
+	uint8_t inhpts;
 	uint8_t ininput;
 	uint8_t use_lt_bw;
 	uint8_t flex8;

Modified: head/sys/netinet/tcp_output.c
==============================================================================
--- head/sys/netinet/tcp_output.c	Thu Jun  7 18:06:01 2018	(r334803)
+++ head/sys/netinet/tcp_output.c	Thu Jun  7 18:18:13 2018	(r334804)
@@ -143,18 +143,13 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto_lowat
 	    tcp_timer_active((tp), TT_PERSIST),				\
 	    ("neither rexmt nor persist timer is set"))
 
-#ifdef TCP_HHOOK
-static void inline	hhook_run_tcp_est_out(struct tcpcb *tp,
-			    struct tcphdr *th, struct tcpopt *to,
-			    uint32_t len, int tso);
-#endif
 static void inline	cc_after_idle(struct tcpcb *tp);
 
 #ifdef TCP_HHOOK
 /*
  * Wrapper for the TCP established output helper hook.
  */
-static void inline
+void
 hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th,
     struct tcpopt *to, uint32_t len, int tso)
 {
@@ -1849,6 +1844,144 @@ tcp_addoptions(struct tcpopt *to, u_char *optp)
 
 	KASSERT(optlen <= TCP_MAXOLEN, ("%s: TCP options too long", __func__));
 	return (optlen);
+}
+
+/*
+ * This is a copy of m_copym(), taking the TSO segment size/limit
+ * constraints into account, and advancing the sndptr as it goes.
+ */
+struct mbuf *
+tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
+    int32_t seglimit, int32_t segsize, struct sockbuf *sb)
+{
+	struct mbuf *n, **np;
+	struct mbuf *top;
+	int32_t off = off0;
+	int32_t len = *plen;
+	int32_t fragsize;
+	int32_t len_cp = 0;
+	int32_t *pkthdrlen;
+	uint32_t mlen, frags;
+	bool copyhdr;
+
+
+	KASSERT(off >= 0, ("tcp_m_copym, negative off %d", off));
+	KASSERT(len >= 0, ("tcp_m_copym, negative len %d", len));
+	if (off == 0 && m->m_flags & M_PKTHDR)
+		copyhdr = true;
+	else
+		copyhdr = false;
+	while (off > 0) {
+		KASSERT(m != NULL, ("tcp_m_copym, offset > size of mbuf chain"));
+		if (off < m->m_len)
+			break;
+		off -= m->m_len;
+		if ((sb) && (m == sb->sb_sndptr)) {
+			sb->sb_sndptroff += m->m_len;
+			sb->sb_sndptr = m->m_next;
+		}
+		m = m->m_next;
+	}
+	np = &top;
+	top = NULL;
+	pkthdrlen = NULL;
+	while (len > 0) {
+		if (m == NULL) {
+			KASSERT(len == M_COPYALL,
+			    ("tcp_m_copym, length > size of mbuf chain"));
+			*plen = len_cp;
+			if (pkthdrlen != NULL)
+				*pkthdrlen = len_cp;
+			break;
+		}
+		mlen = min(len, m->m_len - off);
+		if (seglimit) {
+			/*
+			 * For M_NOMAP mbufs, add 3 segments
+			 * + 1 in case we are crossing page boundaries
+			 * + 2 in case the TLS hdr/trailer are used
+			 * It is cheaper to just add the segments
+			 * than it is to take the cache miss to look
+			 * at the mbuf ext_pgs state in detail.
+			 */
+			if (m->m_flags & M_NOMAP) {
+				fragsize = min(segsize, PAGE_SIZE);
+				frags = 3;
+			} else {
+				fragsize = segsize;
+				frags = 0;
+			}
+
+			/* Break if we really can't fit anymore. */
+			if ((frags + 1) >= seglimit) {
+				*plen =	len_cp;
+				if (pkthdrlen != NULL)
+					*pkthdrlen = len_cp;
+				break;
+			}
+
+			/*
+			 * Reduce size if you can't copy the whole
+			 * mbuf. If we can't copy the whole mbuf, also
+			 * adjust len so the loop will end after this
+			 * mbuf.
+			 */
+			if ((frags + howmany(mlen, fragsize)) >= seglimit) {
+				mlen = (seglimit - frags - 1) * fragsize;
+				len = mlen;
+				*plen = len_cp + len;
+				if (pkthdrlen != NULL)
+					*pkthdrlen = *plen;
+			}
+			frags += howmany(mlen, fragsize);
+			if (frags == 0)
+				frags++;
+			seglimit -= frags;
+			KASSERT(seglimit > 0,
+			    ("%s: seglimit went too low", __func__));
+		}
+		if (copyhdr)
+			n = m_gethdr(M_NOWAIT, m->m_type);
+		else
+			n = m_get(M_NOWAIT, m->m_type);
+		*np = n;
+		if (n == NULL)
+			goto nospace;
+		if (copyhdr) {
+			if (!m_dup_pkthdr(n, m, M_NOWAIT))
+				goto nospace;
+			if (len == M_COPYALL)
+				n->m_pkthdr.len -= off0;
+			else
+				n->m_pkthdr.len = len;
+			pkthdrlen = &n->m_pkthdr.len;
+			copyhdr = false;
+		}
+		n->m_len = mlen;
+		len_cp += n->m_len;
+		if (m->m_flags & M_EXT) {
+			n->m_data = m->m_data + off;
+			mb_dupcl(n, m);
+		} else
+			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
+			    (u_int)n->m_len);
+
+		if (sb && (sb->sb_sndptr == m) &&
+		    ((n->m_len + off) >= m->m_len) && m->m_next) {
+			sb->sb_sndptroff += m->m_len;
+			sb->sb_sndptr = m->m_next;
+		}
+		off = 0;
+		if (len != M_COPYALL) {
+			len -= n->m_len;
+		}
+		m = m->m_next;
+		np = &n->m_next;
+	}
+	return (top);
+nospace:
+	m_freem(top);
+	return (NULL);
 }
 
 void

Modified: head/sys/netinet/tcp_stacks/fastpath.c
==============================================================================
--- head/sys/netinet/tcp_stacks/fastpath.c	Thu Jun  7 18:06:01 2018	(r334803)
+++ head/sys/netinet/tcp_stacks/fastpath.c	Thu Jun  7 18:18:13 2018	(r334804)
@@ -2392,7 +2392,7 @@ struct tcp_function_block __tcp_fastack = {
 static int
 tcp_addfastpaths(module_t mod, int type, void *data)
 {
-	int err=0;
+	int err = 0;
 
 	switch (type) {
 	case MOD_LOAD:

Added: head/sys/netinet/tcp_stacks/rack.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/netinet/tcp_stacks/rack.c	Thu Jun  7 18:18:13 2018	(r334804)
@@ -0,0 +1,9164 @@
+/*-
+ * Copyright (c) 2016-2018
+ *	Netflix Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_ipsec.h"
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/module.h>
+#include <sys/kernel.h>
+#ifdef TCP_HHOOK
+#include <sys/hhook.h>
+#endif
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/mbuf.h>
+#include <sys/proc.h>		/* for proc0 declaration */
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#ifdef NETFLIX_STATS
+#include <sys/stats.h>
+#endif
+#include <sys/refcount.h>
+#include <sys/queue.h>
+#include <sys/smp.h>
+#include <sys/kthread.h>
+#include <sys/kern_prefetch.h>
+
+#include <vm/uma.h>
+
+#include <net/route.h>
+#include <net/vnet.h>
+
+#define TCPSTATES		/* for logging */
+
+#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>	/* required for icmp_var.h */
+#include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
+#include <netinet/ip_var.h>
+#include <netinet/ip6.h>
+#include <netinet6/in6_pcb.h>
+#include <netinet6/ip6_var.h>
+#define	TCPOUTFLAGS
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_log_buf.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_hpts.h>
+#include <netinet/tcpip.h>
+#include <netinet/cc/cc.h>
+#ifdef NETFLIX_CWV
+#include <netinet/tcp_newcwv.h>
+#endif
+#include <netinet/tcp_fastopen.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif				/* TCPDEBUG */
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
+#ifdef INET6
+#include <netinet6/tcp6_var.h>
+#endif
+
+#include <netipsec/ipsec_support.h>
+
+#if defined(IPSEC) || defined(IPSEC_SUPPORT)
+#include <netipsec/ipsec.h>
+#include <netipsec/ipsec6.h>
+#endif				/* IPSEC */
+
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <machine/in_cksum.h>
+
+#ifdef MAC
+#include <security/mac/mac_framework.h>
+#endif
+#include "sack_filter.h"
+#include "tcp_rack.h"
+#include "rack_bbr_common.h"
+
+uma_zone_t rack_zone;
+uma_zone_t rack_pcb_zone;
+
+#ifndef TICKS2SBT
+#define	TICKS2SBT(__t)	(tick_sbt * ((sbintime_t)(__t)))
+#endif
+
+struct sysctl_ctx_list rack_sysctl_ctx;
+struct sysctl_oid *rack_sysctl_root;
+
+#ifndef TCPHPTS
+fatal error missing option TCPHSTS in the build;
+#endif
+
+#define CUM_ACKED 1
+#define SACKED 2
+
+/*
+ * The RACK module incorporates a number of
+ * TCP ideas that have been put out into the IETF
+ * over the last few years:
+ * - Matt Mathis's Rate Halving which slowly drops
+ *    the congestion window so that the ack clock can
+ *    be maintained during a recovery.
+ * - Yuchung Cheng's RACK TCP (for which its named) that
+ *    will stop us using the number of dup acks and instead
+ *    use time as the gage of when we retransmit.
+ * - Reorder Detection of RFC4737 and the Tail-Loss probe draft
+ *    of Dukkipati et.al.
+ * RACK depends on SACK, so if an endpoint arrives that
+ * cannot do SACK the state machine below will shuttle the
+ * connection back to using the "default" TCP stack that is
+ * in FreeBSD.
+ *
+ * To implement RACK the original TCP stack was first decomposed
+ * into a functional state machine with individual states
+ * for each of the possible TCP connection states. The do_segement
+ * functions role in life is to mandate the connection supports SACK
+ * initially and then assure that the RACK state matches the conenction
+ * state before calling the states do_segment function. Each
+ * state is simplified due to the fact that the original do_segment
+ * has been decomposed and we *know* what state we are in (no
+ * switches on the state) and all tests for SACK are gone. This
+ * greatly simplifies what each state does.
+ *
+ * TCP output is also over-written with a new version since it
+ * must maintain the new rack scoreboard.
+ *
+ */
+static int32_t rack_precache = 1;
+static int32_t rack_tlp_thresh = 1;
+static int32_t rack_reorder_thresh = 2;
+static int32_t rack_reorder_fade = 60000;	/* 0 - never fade, def 60,000
+						 * - 60 seconds */
+static int32_t rack_pkt_delay = 1;
+static int32_t rack_inc_var = 0;/* For TLP */
+static int32_t rack_reduce_largest_on_idle = 0;
+static int32_t rack_min_pace_time = 0;
+static int32_t rack_min_pace_time_seg_req=6;
+static int32_t rack_early_recovery = 1;
+static int32_t rack_early_recovery_max_seg = 6;
+static int32_t rack_send_a_lot_in_prr = 1;
+static int32_t rack_min_to = 1;	/* Number of ms minimum timeout */
+static int32_t rack_tlp_in_recovery = 1;	/* Can we do TLP in recovery? */
+static int32_t rack_verbose_logging = 0;
+static int32_t rack_ignore_data_after_close = 1;
+/*
+ * Currently regular tcp has a rto_min of 30ms
+ * the backoff goes 12 times so that ends up
+ * being a total of 122.850 seconds before a
+ * connection is killed.
+ */
+static int32_t rack_tlp_min = 10;
+static int32_t rack_rto_min = 30;	/* 30ms same as main freebsd */
+static int32_t rack_rto_max = 30000;	/* 30 seconds */
+static const int32_t rack_free_cache = 2;
+static int32_t rack_hptsi_segments = 40;
+static int32_t rack_rate_sample_method = USE_RTT_LOW;
+static int32_t rack_pace_every_seg = 1;
+static int32_t rack_delayed_ack_time = 200;	/* 200ms */
+static int32_t rack_slot_reduction = 4;
+static int32_t rack_lower_cwnd_at_tlp = 0;
+static int32_t rack_use_proportional_reduce = 0;
+static int32_t rack_proportional_rate = 10;
+static int32_t rack_tlp_max_resend = 2;
+static int32_t rack_limited_retran = 0;
+static int32_t rack_always_send_oldest = 0;
+static int32_t rack_sack_block_limit = 128;
+static int32_t rack_use_sack_filter = 1;
+static int32_t rack_tlp_threshold_use = TLP_USE_TWO_ONE;
+
+/* Rack specific counters */
+counter_u64_t rack_badfr;
+counter_u64_t rack_badfr_bytes;
+counter_u64_t rack_rtm_prr_retran;
+counter_u64_t rack_rtm_prr_newdata;
+counter_u64_t rack_timestamp_mismatch;
+counter_u64_t rack_reorder_seen;
+counter_u64_t rack_paced_segments;
+counter_u64_t rack_unpaced_segments;
+counter_u64_t rack_saw_enobuf;
+counter_u64_t rack_saw_enetunreach;
+
+/* Tail loss probe counters */
+counter_u64_t rack_tlp_tot;
+counter_u64_t rack_tlp_newdata;
+counter_u64_t rack_tlp_retran;
+counter_u64_t rack_tlp_retran_bytes;
+counter_u64_t rack_tlp_retran_fail;
+counter_u64_t rack_to_tot;
+counter_u64_t rack_to_arm_rack;
+counter_u64_t rack_to_arm_tlp;
+counter_u64_t rack_to_alloc;
+counter_u64_t rack_to_alloc_hard;
+counter_u64_t rack_to_alloc_emerg;
+
+counter_u64_t rack_sack_proc_all;
+counter_u64_t rack_sack_proc_short;
+counter_u64_t rack_sack_proc_restart;
+counter_u64_t rack_runt_sacks;
+counter_u64_t rack_used_tlpmethod;
+counter_u64_t rack_used_tlpmethod2;
+counter_u64_t rack_enter_tlp_calc;
+counter_u64_t rack_input_idle_reduces;
+counter_u64_t rack_tlp_does_nada;
+
+/* Temp CPU counters */
+counter_u64_t rack_find_high;
+
+counter_u64_t rack_progress_drops;
+counter_u64_t rack_out_size[TCP_MSS_ACCT_SIZE];
+counter_u64_t rack_opts_arry[RACK_OPTS_SIZE];
+
+static void
+rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick,  int event, int line);
+
+static int
+rack_process_ack(struct mbuf *m, struct tcphdr *th,
+    struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t * ti_locked,
+    uint32_t tiwin, int32_t tlen, int32_t * ofia, int32_t thflags, int32_t * ret_val);
+static int
+rack_process_data(struct mbuf *m, struct tcphdr *th,
+    struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
+    int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
+static void
+rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack,
+    struct tcphdr *th, uint16_t nsegs, uint16_t type, int32_t recovery);
+static struct rack_sendmap *rack_alloc(struct tcp_rack *rack);
+static struct rack_sendmap *
+rack_check_recovery_mode(struct tcpcb *tp,
+    uint32_t tsused);
+static void
+rack_cong_signal(struct tcpcb *tp, struct tcphdr *th,
+    uint32_t type);
+static void rack_counter_destroy(void);
+static int
+rack_ctloutput(struct socket *so, struct sockopt *sopt,
+    struct inpcb *inp, struct tcpcb *tp);
+static int32_t rack_ctor(void *mem, int32_t size, void *arg, int32_t how);
+static void
+rack_do_segment(struct mbuf *m, struct tcphdr *th,
+    struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
+    uint8_t iptos, int32_t ti_locked);
+static void rack_dtor(void *mem, int32_t size, void *arg);
+static void
+rack_earlier_retran(struct tcpcb *tp, struct rack_sendmap *rsm,
+    uint32_t t, uint32_t cts);
+static struct rack_sendmap *
+rack_find_high_nonack(struct tcp_rack *rack,
+    struct rack_sendmap *rsm);
+static struct rack_sendmap *rack_find_lowest_rsm(struct tcp_rack *rack);
+static void rack_free(struct tcp_rack *rack, struct rack_sendmap *rsm);
+static void rack_fini(struct tcpcb *tp, int32_t tcb_is_purged);
+static int
+rack_get_sockopt(struct socket *so, struct sockopt *sopt,
+    struct inpcb *inp, struct tcpcb *tp, struct tcp_rack *rack);
+static int32_t rack_handoff_ok(struct tcpcb *tp);
+static int32_t rack_init(struct tcpcb *tp);
+static void rack_init_sysctls(void);
+static void
+rack_log_ack(struct tcpcb *tp, struct tcpopt *to,
+    struct tcphdr *th);
+static void
+rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len,
+    uint32_t seq_out, uint8_t th_flags, int32_t err, uint32_t ts,
+    uint8_t pass, struct rack_sendmap *hintrsm);
+static void
+rack_log_sack_passed(struct tcpcb *tp, struct tcp_rack *rack,
+    struct rack_sendmap *rsm);
+static void rack_log_to_event(struct tcp_rack *rack, int32_t to_num);
+static int32_t rack_output(struct tcpcb *tp);
+static void
+rack_hpts_do_segment(struct mbuf *m, struct tcphdr *th,
+    struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
+    uint8_t iptos, int32_t ti_locked, int32_t nxt_pkt, struct timeval *tv);
+
+static uint32_t
+rack_proc_sack_blk(struct tcpcb *tp, struct tcp_rack *rack,
+    struct sackblk *sack, struct tcpopt *to, struct rack_sendmap **prsm,
+    uint32_t cts);
+static void rack_post_recovery(struct tcpcb *tp, struct tcphdr *th);
+static void rack_remxt_tmr(struct tcpcb *tp);
+static int
+rack_set_sockopt(struct socket *so, struct sockopt *sopt,
+    struct inpcb *inp, struct tcpcb *tp, struct tcp_rack *rack);
+static void rack_set_state(struct tcpcb *tp, struct tcp_rack *rack);
+static int32_t rack_stopall(struct tcpcb *tp);
+static void
+rack_timer_activate(struct tcpcb *tp, uint32_t timer_type,
+    uint32_t delta);
+static int32_t rack_timer_active(struct tcpcb *tp, uint32_t timer_type);
+static void rack_timer_cancel(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, int line);
+static void rack_timer_stop(struct tcpcb *tp, uint32_t timer_type);
+static uint32_t
+rack_update_entry(struct tcpcb *tp, struct tcp_rack *rack,
+    struct rack_sendmap *rsm, uint32_t ts, int32_t * lenp);
+static void
+rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack,
+    struct rack_sendmap *rsm, uint32_t ts);
+static int
+rack_update_rtt(struct tcpcb *tp, struct tcp_rack *rack,
+    struct rack_sendmap *rsm, struct tcpopt *to, uint32_t cts, int32_t ack_type);
+static int32_t tcp_addrack(module_t mod, int32_t type, void *data);
+static void
+rack_challenge_ack(struct mbuf *m, struct tcphdr *th,
+    struct tcpcb *tp, int32_t * ti_locked, int32_t * ret_val);
+static int
+rack_do_close_wait(struct mbuf *m, struct tcphdr *th,
+    struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
+    int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
+static int
+rack_do_closing(struct mbuf *m, struct tcphdr *th,
+    struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
+    int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
+static void rack_do_drop(struct mbuf *m, struct tcpcb *tp, int32_t * ti_locked);
+static void
+rack_do_dropafterack(struct mbuf *m, struct tcpcb *tp,
+    struct tcphdr *th, int32_t * ti_locked, int32_t thflags, int32_t tlen, int32_t * ret_val);
+static void
+rack_do_dropwithreset(struct mbuf *m, struct tcpcb *tp,
+    struct tcphdr *th, int32_t * ti_locked, int32_t rstreason, int32_t tlen);
+static int
+rack_do_established(struct mbuf *m, struct tcphdr *th,
+    struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
+    int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
+static int
+rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th,
+    struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
+    int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t nxt_pkt);
+static int
+rack_do_fin_wait_1(struct mbuf *m, struct tcphdr *th,
+    struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
+    int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
+static int
+rack_do_fin_wait_2(struct mbuf *m, struct tcphdr *th,
+    struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
+    int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
+static int
+rack_do_lastack(struct mbuf *m, struct tcphdr *th,
+    struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
+    int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
+static int
+rack_do_syn_recv(struct mbuf *m, struct tcphdr *th,
+    struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
+    int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
+static int
+rack_do_syn_sent(struct mbuf *m, struct tcphdr *th,
+    struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
+    int32_t tlen, int32_t * ti_locked, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
+static int
+rack_drop_checks(struct tcpopt *to, struct mbuf *m,
+    struct tcphdr *th, struct tcpcb *tp, int32_t * tlenp, int32_t * ti_locked, int32_t * thf,
+    int32_t * drop_hdrlen, int32_t * ret_val);
+static int
+rack_process_rst(struct mbuf *m, struct tcphdr *th,
+    struct socket *so, struct tcpcb *tp, int32_t * ti_locked);
+struct rack_sendmap *
+tcp_rack_output(struct tcpcb *tp, struct tcp_rack *rack,
+    uint32_t tsused);
+static void tcp_rack_xmit_timer(struct tcp_rack *rack, int32_t rtt);
+static void
+     tcp_rack_partialack(struct tcpcb *tp, struct tcphdr *th);
+
+static int
+rack_ts_check(struct mbuf *m, struct tcphdr *th,
+    struct tcpcb *tp, int32_t * ti_locked, int32_t tlen, int32_t thflags, int32_t * ret_val);
+
+int32_t rack_clear_counter=0;
+
+
+static int
+sysctl_rack_clear(SYSCTL_HANDLER_ARGS)
+{
+	uint32_t stat;
+	int32_t error;
+
+	error = SYSCTL_OUT(req, &rack_clear_counter, sizeof(uint32_t));
+	if (error || req->newptr == NULL)
+		return error;
+
+	error = SYSCTL_IN(req, &stat, sizeof(uint32_t));
+	if (error)
+		return (error);
+	if (stat == 1) {
+#ifdef INVARIANTS
+		printf("Clearing RACK counters\n");
+#endif
+		counter_u64_zero(rack_badfr);
+		counter_u64_zero(rack_badfr_bytes);
+		counter_u64_zero(rack_rtm_prr_retran);
+		counter_u64_zero(rack_rtm_prr_newdata);
+		counter_u64_zero(rack_timestamp_mismatch);
+		counter_u64_zero(rack_reorder_seen);
+		counter_u64_zero(rack_tlp_tot);
+		counter_u64_zero(rack_tlp_newdata);
+		counter_u64_zero(rack_tlp_retran);
+		counter_u64_zero(rack_tlp_retran_bytes);
+		counter_u64_zero(rack_tlp_retran_fail);
+		counter_u64_zero(rack_to_tot);
+		counter_u64_zero(rack_to_arm_rack);
+		counter_u64_zero(rack_to_arm_tlp);
+		counter_u64_zero(rack_paced_segments);
+		counter_u64_zero(rack_unpaced_segments);
+		counter_u64_zero(rack_saw_enobuf);
+		counter_u64_zero(rack_saw_enetunreach);
+		counter_u64_zero(rack_to_alloc_hard);
+		counter_u64_zero(rack_to_alloc_emerg);
+		counter_u64_zero(rack_sack_proc_all);
+		counter_u64_zero(rack_sack_proc_short);
+		counter_u64_zero(rack_sack_proc_restart);
+		counter_u64_zero(rack_to_alloc);
+		counter_u64_zero(rack_find_high);
+		counter_u64_zero(rack_runt_sacks);
+		counter_u64_zero(rack_used_tlpmethod);
+		counter_u64_zero(rack_used_tlpmethod2);
+		counter_u64_zero(rack_enter_tlp_calc);
+		counter_u64_zero(rack_progress_drops);
+		counter_u64_zero(rack_tlp_does_nada);
+	}
+	rack_clear_counter = 0;
+	return (0);
+}
+
+
+
+static void
+rack_init_sysctls()
+{
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "rate_sample_method", CTLFLAG_RW,
+	    &rack_rate_sample_method , USE_RTT_LOW,
+	    "What method should we use for rate sampling 0=high, 1=low ");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "data_after_close", CTLFLAG_RW,
+	    &rack_ignore_data_after_close, 0,
+	    "Do we hold off sending a RST until all pending data is ack'd");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "tlpmethod", CTLFLAG_RW,
+	    &rack_tlp_threshold_use, TLP_USE_TWO_ONE,
+	    "What method do we do for TLP time calc 0=no-de-ack-comp, 1=ID, 2=2.1, 3=2.2");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "min_pace_time", CTLFLAG_RW,
+	    &rack_min_pace_time, 0,
+	    "Should we enforce a minimum pace time of 1ms");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "min_pace_segs", CTLFLAG_RW,
+	    &rack_min_pace_time_seg_req, 6,
+	    "How many segments have to be in the len to enforce min-pace-time");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "idle_reduce_high", CTLFLAG_RW,
+	    &rack_reduce_largest_on_idle, 0,
+	    "Should we reduce the largest cwnd seen to IW on idle reduction");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "bb_verbose", CTLFLAG_RW,
+	    &rack_verbose_logging, 0,
+	    "Should RACK black box logging be verbose");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "sackfiltering", CTLFLAG_RW,
+	    &rack_use_sack_filter, 1,
+	    "Do we use sack filtering?");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "delayed_ack", CTLFLAG_RW,
+	    &rack_delayed_ack_time, 200,
+	    "Delayed ack time (200ms)");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "tlpminto", CTLFLAG_RW,
+	    &rack_tlp_min, 10,
+	    "TLP minimum timeout per the specification (10ms)");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "precache", CTLFLAG_RW,
+	    &rack_precache, 0,
+	    "Where should we precache the mcopy (0 is not at all)");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "sblklimit", CTLFLAG_RW,
+	    &rack_sack_block_limit, 128,
+	    "When do we start paying attention to small sack blocks");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "send_oldest", CTLFLAG_RW,
+	    &rack_always_send_oldest, 1,
+	    "Should we always send the oldest TLP and RACK-TLP");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "rack_tlp_in_recovery", CTLFLAG_RW,
+	    &rack_tlp_in_recovery, 1,
+	    "Can we do a TLP during recovery?");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "rack_tlimit", CTLFLAG_RW,
+	    &rack_limited_retran, 0,
+	    "How many times can a rack timeout drive out sends");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "minrto", CTLFLAG_RW,
+	    &rack_rto_min, 0,
+	    "Minimum RTO in ms -- set with caution below 1000 due to TLP");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "maxrto", CTLFLAG_RW,
+	    &rack_rto_max, 0,
+	    "Maxiumum RTO in ms -- should be at least as large as min_rto");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "tlp_retry", CTLFLAG_RW,
+	    &rack_tlp_max_resend, 2,
+	    "How many times does TLP retry a single segment or multiple with no ACK");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "recovery_loss_prop", CTLFLAG_RW,
+	    &rack_use_proportional_reduce, 0,
+	    "Should we proportionaly reduce cwnd based on the number of losses ");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "recovery_prop", CTLFLAG_RW,
+	    &rack_proportional_rate, 10,
+	    "What percent reduction per loss");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "tlp_cwnd_flag", CTLFLAG_RW,
+	    &rack_lower_cwnd_at_tlp, 0,
+	    "When a TLP completes a retran should we enter recovery?");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "hptsi_reduces", CTLFLAG_RW,
+	    &rack_slot_reduction, 4,
+	    "When setting a slot should we reduce by divisor");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "hptsi_every_seg", CTLFLAG_RW,
+	    &rack_pace_every_seg, 1,
+	    "Should we pace out every segment hptsi");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "hptsi_seg_max", CTLFLAG_RW,
+	    &rack_hptsi_segments, 6,
+	    "Should we pace out only a limited size of segments");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "prr_sendalot", CTLFLAG_RW,
+	    &rack_send_a_lot_in_prr, 1,
+	    "Send a lot in prr");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "minto", CTLFLAG_RW,
+	    &rack_min_to, 1,
+	    "Minimum rack timeout in milliseconds");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "earlyrecoveryseg", CTLFLAG_RW,
+	    &rack_early_recovery_max_seg, 6,
+	    "Max segments in early recovery");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "earlyrecovery", CTLFLAG_RW,
+	    &rack_early_recovery, 1,
+	    "Do we do early recovery with rack");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "reorder_thresh", CTLFLAG_RW,
+	    &rack_reorder_thresh, 2,
+	    "What factor for rack will be added when seeing reordering (shift right)");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "rtt_tlp_thresh", CTLFLAG_RW,
+	    &rack_tlp_thresh, 1,
+	    "what divisor for TLP rtt/retran will be added (1=rtt, 2=1/2 rtt etc)");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "reorder_fade", CTLFLAG_RW,
+	    &rack_reorder_fade, 0,
+	    "Does reorder detection fade, if so how many ms (0 means never)");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "pktdelay", CTLFLAG_RW,
+	    &rack_pkt_delay, 1,
+	    "Extra RACK time (in ms) besides reordering thresh");
+	SYSCTL_ADD_S32(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "inc_var", CTLFLAG_RW,
+	    &rack_inc_var, 0,
+	    "Should rack add to the TLP timer the variance in rtt calculation");
+	rack_badfr = counter_u64_alloc(M_WAITOK);
+	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
+	    SYSCTL_CHILDREN(rack_sysctl_root),
+	    OID_AUTO, "badfr", CTLFLAG_RD,

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201806071818.w57IIENp080093>