Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 19 Sep 2018 18:49:38 +0000 (UTC)
From:      "Bjoern A. Zeeb" <bz@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r338804 - head/sys/netinet6
Message-ID:  <201809191849.w8JIncmT028823@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: bz
Date: Wed Sep 19 18:49:37 2018
New Revision: 338804
URL: https://svnweb.freebsd.org/changeset/base/338804

Log:
  Update udp6_output() inp locking to avoid concurrency issues with
  route cache updates.
  
  Bring over locking changes applied to udp_output() for the route cache
  in r297225 and fixed in r306559 which achieve multiple things:
  (1) acquire an exclusive inp lock earlier depending on the expected
      conditions; we add a comment explaining this in udp6,
  (2) having acquired the exclusive lock earlier eliminates a slight
      possible chance for a race condition which was present in v4 for
      multiple years as well and is now gone, and
  (3) only pass the inp_route6 to ip6_output() if we are holding an
      exclusive inp lock, so that possible route cache updates in case
      of routing table generation number changes can happen safely.
  In addition this change (as the legacy IP counterpart) decomposes the
  tracking of inp and pcbinfo lock and adds extra assertions, that the
  two together are acquired correctly.
  
  PR:		230950
  Reviewed by:	karels, markj
  Approved by:	re (gjb)
  Pointyhat to:	bz (for completely missing this bit)
  Differential Revision:	https://reviews.freebsd.org/D17230

Modified:
  head/sys/netinet6/udp6_usrreq.c

Modified: head/sys/netinet6/udp6_usrreq.c
==============================================================================
--- head/sys/netinet6/udp6_usrreq.c	Wed Sep 19 16:37:43 2018	(r338803)
+++ head/sys/netinet6/udp6_usrreq.c	Wed Sep 19 18:49:37 2018	(r338804)
@@ -698,7 +698,7 @@ udp6_output(struct socket *so, int flags_arg, struct m
 	u_int32_t ulen, plen;
 	uint16_t cscov;
 	u_short fport;
-	uint8_t nxt, unlock_udbinfo;
+	uint8_t nxt, unlock_inp, unlock_udbinfo;
 
 	/* addr6 has been validated in udp6_send(). */
 	sin6 = (struct sockaddr_in6 *)addr6;
@@ -734,7 +734,22 @@ udp6_output(struct socket *so, int flags_arg, struct m
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
-	INP_RLOCK(inp);
+	/*
+	 * In the following cases we want a write lock on the inp for either
+	 * local operations or for possible route cache updates in the IPv6
+	 * output path:
+	 * - on connected sockets (sin6 is NULL) for route cache updates,
+	 * - when we are not bound to an address and source port (it is
+	 *   in6_pcbsetport() which will require the write lock).
+	 */
+	if (sin6 == NULL || (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
+	    inp->inp_lport == 0)) {
+		INP_WLOCK(inp);
+		unlock_inp = UH_WLOCKED;
+	} else {
+		INP_RLOCK(inp);
+		unlock_inp = UH_RLOCKED;
+	}
 	nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
 	    IPPROTO_UDP : IPPROTO_UDPLITE;
 
@@ -758,7 +773,10 @@ udp6_output(struct socket *so, int flags_arg, struct m
 			 * potential race in which the factors causing us to
 			 * select the UDPv4 output routine are invalidated?
 			 */
-			INP_RUNLOCK(inp);
+			if (unlock_inp == UH_WLOCKED)
+				INP_WUNLOCK(inp);
+			else
+				INP_RUNLOCK(inp);
 			if (sin6)
 				in6_sin6_2_sin_in_sock((struct sockaddr *)sin6);
 			pru = inetsw[ip_protox[nxt]].pr_usrreqs;
@@ -772,7 +790,10 @@ udp6_output(struct socket *so, int flags_arg, struct m
 	if (control) {
 		if ((error = ip6_setpktopts(control, &opt,
 		    inp->in6p_outputopts, td->td_ucred, nxt)) != 0) {
-			INP_RUNLOCK(inp);
+			if (unlock_inp == UH_WLOCKED)
+				INP_WUNLOCK(inp);
+			else
+				INP_RUNLOCK(inp);
 			ip6_clearpktopts(&opt, -1);
 			if (control)
 				m_freem(control);
@@ -786,12 +807,6 @@ udp6_output(struct socket *so, int flags_arg, struct m
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	if (sin6 != NULL &&
 	    IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && inp->inp_lport == 0) {
-		INP_RUNLOCK(inp);
-		/*
-		 * XXX there is a short window here which could lead to a race;
-		 * should we re-check that what got us here is still valid?
-		 */
-		INP_WLOCK(inp);
 		INP_HASH_WLOCK(pcbinfo);
 		unlock_udbinfo = UH_WLOCKED;
 	} else if (sin6 != NULL &&
@@ -972,9 +987,10 @@ udp6_output(struct socket *so, int flags_arg, struct m
 		UDPLITE_PROBE(send, NULL, inp, ip6, inp, udp6);
 	else
 		UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
-	error = ip6_output(m, optp, &inp->inp_route6, flags,
+	error = ip6_output(m, optp,
+	    (unlock_inp == UH_WLOCKED) ? &inp->inp_route6 : NULL, flags,
 	    inp->in6p_moptions, NULL, inp);
-	if (unlock_udbinfo == UH_WLOCKED)
+	if (unlock_inp == UH_WLOCKED)
 		INP_WUNLOCK(inp);
 	else
 		INP_RUNLOCK(inp);
@@ -987,12 +1003,20 @@ udp6_output(struct socket *so, int flags_arg, struct m
 
 release:
 	if (unlock_udbinfo == UH_WLOCKED) {
+		KASSERT(unlock_inp == UH_WLOCKED, ("%s: excl udbinfo lock, "
+		    "non-excl inp lock: pcbinfo %p %#x inp %p %#x",
+		    __func__, pcbinfo, unlock_udbinfo, inp, unlock_inp));
 		INP_HASH_WUNLOCK(pcbinfo);
 		INP_WUNLOCK(inp);
 	} else if (unlock_udbinfo == UH_RLOCKED) {
+		KASSERT(unlock_inp == UH_RLOCKED, ("%s: non-excl udbinfo lock, "
+		    "excl inp lock: pcbinfo %p %#x inp %p %#x",
+		    __func__, pcbinfo, unlock_udbinfo, inp, unlock_inp));
 		INP_HASH_RUNLOCK_ET(pcbinfo, et);
 		INP_RUNLOCK(inp);
-	} else
+	} else if (unlock_inp == UH_WLOCKED)
+		INP_WUNLOCK(inp);
+	else
 		INP_RUNLOCK(inp);
 	if (control) {
 		ip6_clearpktopts(&opt, -1);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201809191849.w8JIncmT028823>