Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 21 Mar 2018 20:59:30 +0000 (UTC)
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r331322 - head/sys/netinet
Message-ID:  <201803212059.w2LKxUiO084366@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: glebius
Date: Wed Mar 21 20:59:30 2018
New Revision: 331322
URL: https://svnweb.freebsd.org/changeset/base/331322

Log:
  The net.inet.tcp.nolocaltimewait=1 optimization prevents local TCP connections
  from entering the TIME_WAIT state. However, it omits sending the ACK for the
  FIN, which results in RST. This becomes a bigger deal if the sysctl
  net.inet.tcp.blackhole is 2. In this case RST isn't send, so the other side of
  the connection (also local) keeps retransmitting FINs.
  
  To fix that in tcp_twstart() we will not call tcp_close() immediately. Instead
  we will allocate a tcptw on stack and proceed to the end of the function all
  the way to tcp_twrespond(), to generate the correct ACK, then we will drop the
  last PCB reference.
  
  While here, make a few tiny improvements:
  - use bools for boolean variable
  - staticize nolocaltimewait
  - remove pointless acquisiton of socket lock
  
  Reported by:	jtl
  Reviewed by:	jtl
  Sponsored by:	Netflix
  Differential Revision:	https://reviews.freebsd.org/D14697

Modified:
  head/sys/netinet/tcp_timewait.c
  head/sys/netinet/tcp_usrreq.c

Modified: head/sys/netinet/tcp_timewait.c
==============================================================================
--- head/sys/netinet/tcp_timewait.c	Wed Mar 21 20:36:57 2018	(r331321)
+++ head/sys/netinet/tcp_timewait.c	Wed Mar 21 20:59:30 2018	(r331322)
@@ -172,7 +172,7 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxtcptw, CTLTYPE
     &maxtcptw, 0, sysctl_maxtcptw, "IU",
     "Maximum number of compressed TCP TIME_WAIT entries");
 
-VNET_DEFINE(int, nolocaltimewait) = 0;
+static VNET_DEFINE(int, nolocaltimewait) = 0;
 #define	V_nolocaltimewait	VNET(nolocaltimewait)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, nolocaltimewait, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(nolocaltimewait), 0,
@@ -225,12 +225,12 @@ tcp_tw_destroy(void)
 void
 tcp_twstart(struct tcpcb *tp)
 {
-	struct tcptw *tw;
+	struct tcptw twlocal, *tw;
 	struct inpcb *inp = tp->t_inpcb;
-	int acknow;
 	struct socket *so;
+	bool acknow, local;
 #ifdef INET6
-	int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
+	bool isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
 #endif
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
@@ -241,33 +241,29 @@ tcp_twstart(struct tcpcb *tp)
 	    "(inp->inp_flags & INP_DROPPED) != 0"));
 
 	if (V_nolocaltimewait) {
-		int error = 0;
 #ifdef INET6
 		if (isipv6)
-			error = in6_localaddr(&inp->in6p_faddr);
+			local = in6_localaddr(&inp->in6p_faddr);
 #endif
 #if defined(INET6) && defined(INET)
 		else
 #endif
 #ifdef INET
-			error = in_localip(inp->inp_faddr);
+			local = in_localip(inp->inp_faddr);
 #endif
-		if (error) {
-			tp = tcp_close(tp);
-			if (tp != NULL)
-				INP_WUNLOCK(inp);
-			return;
-		}
-	}
+	} else
+		local = false;
 
-
 	/*
 	 * For use only by DTrace.  We do not reference the state
 	 * after this point so modifying it in place is not a problem.
 	 */
 	tcp_state_change(tp, TCPS_TIME_WAIT);
 
-	tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);
+	if (local)
+		tw = &twlocal;
+	else
+		tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);
 	if (tw == NULL) {
 		/*
 		 * Reached limit on total number of TIMEWAIT connections
@@ -286,11 +282,10 @@ tcp_twstart(struct tcpcb *tp)
 		}
 	}
 	/*
-	 * The tcptw will hold a reference on its inpcb until tcp_twclose
-	 * is called
+	 * For !local case the tcptw will hold a reference on its inpcb
+	 * until tcp_twclose is called.
 	 */
 	tw->tw_inpcb = inp;
-	in_pcbref(inp);	/* Reference from tw */
 
 	/*
 	 * Recover last window size sent.
@@ -337,16 +332,19 @@ tcp_twstart(struct tcpcb *tp)
 	tcp_discardcb(tp);
 	so = inp->inp_socket;
 	soisdisconnected(so);
-	tw->tw_cred = crhold(so->so_cred);
-	SOCK_LOCK(so);
 	tw->tw_so_options = so->so_options;
-	SOCK_UNLOCK(so);
+	inp->inp_flags |= INP_TIMEWAIT;
 	if (acknow)
 		tcp_twrespond(tw, TH_ACK);
-	inp->inp_ppcb = tw;
-	inp->inp_flags |= INP_TIMEWAIT;
-	TCPSTATES_INC(TCPS_TIME_WAIT);
-	tcp_tw_2msl_reset(tw, 0);
+	if (local)
+		in_pcbdrop(inp);
+	else {
+		in_pcbref(inp);	/* Reference from tw */
+		tw->tw_cred = crhold(so->so_cred);
+		inp->inp_ppcb = tw;
+		TCPSTATES_INC(TCPS_TIME_WAIT);
+		tcp_tw_2msl_reset(tw, 0);
+	}
 
 	/*
 	 * If the inpcb owns the sole reference to the socket, then we can

Modified: head/sys/netinet/tcp_usrreq.c
==============================================================================
--- head/sys/netinet/tcp_usrreq.c	Wed Mar 21 20:36:57 2018	(r331321)
+++ head/sys/netinet/tcp_usrreq.c	Wed Mar 21 20:59:30 2018	(r331322)
@@ -198,15 +198,18 @@ tcp_detach(struct socket *so, struct inpcb *inp)
 		 * XXXRW: Would it be cleaner to free the tcptw here?
 		 *
 		 * Astute question indeed, from twtcp perspective there are
-		 * three cases to consider:
+		 * four cases to consider:
 		 *
 		 * #1 tcp_detach is called at tcptw creation time by
 		 *  tcp_twstart, then do not discard the newly created tcptw
 		 *  and leave inpcb present until timewait ends
-		 * #2 tcp_detach is called at timewait end (or reuse) by
+		 * #2 tcp_detach is called at tcptw creation time by
+		 *  tcp_twstart, but connection is local and tw will be
+		 *  discarded immediately
+		 * #3 tcp_detach is called at timewait end (or reuse) by
 		 *  tcp_twclose, then the tcptw has already been discarded
 		 *  (or reused) and inpcb is freed here
-		 * #3 tcp_detach is called() after timewait ends (or reuse)
+		 * #4 tcp_detach is called() after timewait ends (or reuse)
 		 *  (e.g. by soclose), then tcptw has already been discarded
 		 *  (or reused) and inpcb is freed here
 		 *



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201803212059.w2LKxUiO084366>