Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 27 Jul 2016 07:51:58 +0000 (UTC)
From:      Julien Charbon <jch@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r303371 - stable/10/sys/netinet
Message-ID:  <201607270751.u6R7pwEu001011@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jch
Date: Wed Jul 27 07:51:58 2016
New Revision: 303371
URL: https://svnweb.freebsd.org/changeset/base/303371

Log:
  MFC r271119, r272081:
  
  r271119:
  
  In tcp_input(), don't acquire the pcbinfo global write lock for SYN
  packets targeting a listening socket.  Permit to reduce TCP input
  processing starvation in context of high SYN load (e.g. short-lived TCP
  connections or SYN flood).
  
  Submitted by:	Julien Charbon <jcharbon@verisign.com>
  Reviewed by:	adrian, hiren, jhb, Mike Bentkofsky
  
  r272081:
  
  Catch up with r271119.

Modified:
  stable/10/sys/netinet/tcp_input.c
  stable/10/sys/netinet/tcp_syncache.c
  stable/10/sys/netinet/toecore.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/netinet/tcp_input.c
==============================================================================
--- stable/10/sys/netinet/tcp_input.c	Wed Jul 27 07:36:54 2016	(r303370)
+++ stable/10/sys/netinet/tcp_input.c	Wed Jul 27 07:51:58 2016	(r303371)
@@ -751,12 +751,12 @@ tcp_input(struct mbuf *m, int off0)
 
 	/*
 	 * Locate pcb for segment; if we're likely to add or remove a
-	 * connection then first acquire pcbinfo lock.  There are two cases
+	 * connection then first acquire pcbinfo lock.  There are three cases
 	 * where we might discover later we need a write lock despite the
-	 * flags: ACKs moving a connection out of the syncache, and ACKs for
-	 * a connection in TIMEWAIT.
+	 * flags: ACKs moving a connection out of the syncache, ACKs for a
+	 * connection in TIMEWAIT and SYNs not targeting a listening socket.
 	 */
-	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) {
+	if ((thflags & (TH_FIN | TH_RST)) != 0) {
 		INP_INFO_WLOCK(&V_tcbinfo);
 		ti_locked = TI_WLOCKED;
 	} else
@@ -983,10 +983,12 @@ relocked:
 	 * now be in TIMEWAIT.
 	 */
 #ifdef INVARIANTS
-	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0)
+	if ((thflags & (TH_FIN | TH_RST)) != 0)
 		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
 #endif
-	if (tp->t_state != TCPS_ESTABLISHED) {
+	if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
+	      (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
+	       !(tp->t_flags & TF_FASTOPEN)))) {
 		if (ti_locked == TI_UNLOCKED) {
 			if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
 				in_pcbref(inp);
@@ -1027,17 +1029,13 @@ relocked:
 	/*
 	 * When the socket is accepting connections (the INPCB is in LISTEN
 	 * state) we look into the SYN cache if this is a new connection
-	 * attempt or the completion of a previous one.  Because listen
-	 * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be
-	 * held in this case.
+	 * attempt or the completion of a previous one.
 	 */
 	if (so->so_options & SO_ACCEPTCONN) {
 		struct in_conninfo inc;
 
 		KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
 		    "tp not listening", __func__));
-		INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
-
 		bzero(&inc, sizeof(inc));
 #ifdef INET6
 		if (isipv6) {
@@ -1060,6 +1058,8 @@ relocked:
 		 * socket appended to the listen queue in SYN_RECEIVED state.
 		 */
 		if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
+
+			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
 			/*
 			 * Parse the TCP options here because
 			 * syncookies need access to the reflected
@@ -1348,8 +1348,12 @@ new_tfo_socket:
 #endif
 		/*
 		 * Entry added to syncache and mbuf consumed.
-		 * Everything already unlocked by syncache_add().
+		 * Only the listen socket is unlocked by syncache_add().
 		 */
+		if (ti_locked == TI_WLOCKED) {
+			INP_INFO_WUNLOCK(&V_tcbinfo);
+			ti_locked = TI_UNLOCKED;
+		}
 		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 		return;
 	} else if (tp->t_state == TCPS_LISTEN) {

Modified: stable/10/sys/netinet/tcp_syncache.c
==============================================================================
--- stable/10/sys/netinet/tcp_syncache.c	Wed Jul 27 07:36:54 2016	(r303370)
+++ stable/10/sys/netinet/tcp_syncache.c	Wed Jul 27 07:51:58 2016	(r303371)
@@ -1107,7 +1107,7 @@ syncache_tfo_expand(struct syncache *sc,
 	 * Global TCP locks are held because we manipulate the PCB lists
 	 * and create a new socket.
 	 */
-	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
 
 	pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
 	*lsop = syncache_socket(sc, *lsop, m);
@@ -1175,7 +1175,6 @@ syncache_add(struct in_conninfo *inc, st
 	int tfo_response_cookie_valid = 0;
 #endif
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);			/* listen socket */
 	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
 	    ("%s: unexpected tcp flags", __func__));
@@ -1229,21 +1228,15 @@ syncache_add(struct in_conninfo *inc, st
 #ifdef MAC
 	if (mac_syncache_init(&maclabel) != 0) {
 		INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
 		goto done;
 	} else
 		mac_syncache_create(maclabel, inp);
 #endif
 #ifdef TCP_RFC7413
-	if (!tfo_cookie_valid) {
-		INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_tcbinfo);
-	}
-#else
-	INP_WUNLOCK(inp);
-	INP_INFO_WUNLOCK(&V_tcbinfo);
+	if (!tfo_cookie_valid)
 #endif
-	
+		INP_WUNLOCK(inp);
+
 	/*
 	 * Remember the IP options, if any.
 	 */
@@ -1272,10 +1265,8 @@ syncache_add(struct in_conninfo *inc, st
 	SCH_LOCK_ASSERT(sch);
 	if (sc != NULL) {
 #ifdef TCP_RFC7413
-		if (tfo_cookie_valid) {
+		if (tfo_cookie_valid)
 			INP_WUNLOCK(inp);
-			INP_INFO_WUNLOCK(&V_tcbinfo);
-		}
 #endif
 		TCPSTAT_INC(tcps_sc_dupsyn);
 		if (ipopts) {

Modified: stable/10/sys/netinet/toecore.c
==============================================================================
--- stable/10/sys/netinet/toecore.c	Wed Jul 27 07:36:54 2016	(r303370)
+++ stable/10/sys/netinet/toecore.c	Wed Jul 27 07:51:58 2016	(r303371)
@@ -328,7 +328,6 @@ toe_syncache_add(struct in_conninfo *inc
 {
 	struct socket *lso = inp->inp_socket;
 
-	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201607270751.u6R7pwEu001011>