Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 28 Jul 2015 15:42:20 +0300
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        Ermal =?iso-8859-1?Q?Lu=E7i?= <eri@FreeBSD.org>
Cc:        Olivier =?iso-8859-1?Q?Cochard-Labb=E9?= <olivier@cochard.me>, src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   Re: svn commit: r285051 - head/sys/netinet
Message-ID:  <20150728124220.GW72729@FreeBSD.org>
In-Reply-To: <201507021810.t62IAgCc003272@repo.freebsd.org>
References:  <201507021810.t62IAgCc003272@repo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help

--apg+fY3UKMMABzWO
Content-Type: text/plain; charset=iso-8859-1
Content-Disposition: inline
Content-Transfer-Encoding: 8bit

  Ermal,

  see comments inlined,

On Thu, Jul 02, 2015 at 06:10:42PM +0000, Ermal Luçi wrote:
E> Author: eri
E> Date: Thu Jul  2 18:10:41 2015
E> New Revision: 285051
E> URL: https://svnweb.freebsd.org/changeset/base/285051
E> 
E> Log:
E>   Avoid doing multiple route lookups for the same destination IP during forwarding
E>   
E>   ip_forward() does a route lookup for testing this packet can be sent to a known destination,
E>   it also can do another route lookup if it detects that an ICMP redirect is needed,
E>   it forgets all of this and handovers to ip_output() to do the same lookup yet again.
E>   
E>   This optimisation just does one route lookup during the forwarding path and handovers that to be considered by ip_output().
E>   
E>   Differential Revision:	https://reviews.freebsd.org/D2964
E>   Approved by:	ae, gnn(mentor)
E>   MFC after:	1 week
E> 
E> Modified:
E>   head/sys/netinet/ip_input.c
E> 
E> Modified: head/sys/netinet/ip_input.c
E> ==============================================================================
E> --- head/sys/netinet/ip_input.c	Thu Jul  2 17:30:59 2015	(r285050)
E> +++ head/sys/netinet/ip_input.c	Thu Jul  2 18:10:41 2015	(r285051)
E> @@ -897,6 +897,7 @@ ip_forward(struct mbuf *m, int srcrt)
E>  	struct ip *ip = mtod(m, struct ip *);
E>  	struct in_ifaddr *ia;
E>  	struct mbuf *mcopy;
E> +	struct sockaddr_in *sin;
E>  	struct in_addr dest;
E>  	struct route ro;
E>  	int error, type = 0, code = 0, mtu = 0;
E> @@ -925,7 +926,22 @@ ip_forward(struct mbuf *m, int srcrt)
E>  	}
E>  #endif
E>  
E> -	ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m));
E> +	bzero(&ro, sizeof(ro));
E> +	sin = (struct sockaddr_in *)&ro.ro_dst;
E> +	sin->sin_family = AF_INET;
E> +	sin->sin_len = sizeof(*sin);
E> +	sin->sin_addr = ip->ip_dst;
E> +#ifdef RADIX_MPATH
E> +	rtalloc_mpath_fib(&ro,
E> +	    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
E> +	    M_GETFIB(m));
E> +#else
E> +	in_rtalloc_ign(&ro, 0, M_GETFIB(m));
E> +#endif
E> +	if (ro.ro_rt != NULL) {
E> +		ia = ifatoia(ro.ro_rt->rt_ifa);
E> +		ifa_ref(&ia->ia_ifa);
E> +	}
E>  #ifndef IPSEC
E>  	/*
E>  	 * 'ia' may be NULL if there is no route for this destination.
E> @@ -934,6 +950,7 @@ ip_forward(struct mbuf *m, int srcrt)
E>  	 */
E>  	if (!srcrt && ia == NULL) {
E>  		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
E> +		RO_RTFREE(&ro);
E>  		return;
E>  	}

Here the ifa reference is leaked upon return.


But don't hurry with fixing that :) Actually you don't need to ifa_ref()
in this function. You acquired a reference on rtentry in in_rtalloc_ign()
and hold it until RO_RTFREE(). And the rtentry itself always holds a
reference on the ifa. So, there is no reason to put extra reference on
the ifa.

The ip_output() was already improved in r262747. And ip_forward() can
also be. The only place that touches ia after RO_RTFREE() is EMSGSIZE
handling, this can be moved up before RO_RTFREE().

Here is suggested patch. Ermal and Oliver, can you please test/benchmark
it?

-- 
Totus tuus, Glebius.

--apg+fY3UKMMABzWO
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment; filename="ip_forward.diff"

Index: ip_input.c
===================================================================
--- ip_input.c	(revision 285945)
+++ ip_input.c	(working copy)
@@ -938,10 +938,9 @@ ip_forward(struct mbuf *m, int srcrt)
 #else
 	in_rtalloc_ign(&ro, 0, M_GETFIB(m));
 #endif
-	if (ro.ro_rt != NULL) {
+	if (ro.ro_rt != NULL)
 		ia = ifatoia(ro.ro_rt->rt_ifa);
-		ifa_ref(&ia->ia_ifa);
-	} else
+	else
 		ia = NULL;
 #ifndef IPSEC
 	/*
@@ -1031,9 +1030,33 @@ ip_forward(struct mbuf *m, int srcrt)
 	}
 
 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
-
-	if (error == EMSGSIZE && ro.ro_rt)
-		mtu = ro.ro_rt->rt_mtu;
+	if (error == EMSGSIZE) {
+		if (ro.ro_rt != NULL)
+			mtu = ro.ro_rt->rt_mtu;
+#ifdef IPSEC
+		/* 
+		 * If IPsec is configured for this path,
+		 * override any possibly mtu value set by ip_output.
+		 */ 
+		mtu = ip_ipsec_mtu(mcopy, mtu);
+#endif /* IPSEC */
+		/*
+		 * If the MTU was set before make sure we are below the
+		 * interface MTU.
+		 * If the MTU wasn't set before use the interface mtu or
+		 * fall back to the next smaller mtu step compared to the
+		 * current packet size.
+		 */
+		if (mtu != 0) {
+			if (ia != NULL)
+				mtu = min(mtu, ia->ia_ifp->if_mtu);
+		} else {
+			if (ia != NULL)
+				mtu = ia->ia_ifp->if_mtu;
+			else
+				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
+		}
+	}
 	RO_RTFREE(&ro);
 
 	if (error)
@@ -1045,16 +1068,11 @@ ip_forward(struct mbuf *m, int srcrt)
 		else {
 			if (mcopy)
 				m_freem(mcopy);
-			if (ia != NULL)
-				ifa_free(&ia->ia_ifa);
 			return;
 		}
 	}
-	if (mcopy == NULL) {
-		if (ia != NULL)
-			ifa_free(&ia->ia_ifa);
+	if (mcopy == NULL)
 		return;
-	}
 
 	switch (error) {
 
@@ -1074,30 +1092,6 @@ ip_forward(struct mbuf *m, int srcrt)
 	case EMSGSIZE:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_NEEDFRAG;
-
-#ifdef IPSEC
-		/* 
-		 * If IPsec is configured for this path,
-		 * override any possibly mtu value set by ip_output.
-		 */ 
-		mtu = ip_ipsec_mtu(mcopy, mtu);
-#endif /* IPSEC */
-		/*
-		 * If the MTU was set before make sure we are below the
-		 * interface MTU.
-		 * If the MTU wasn't set before use the interface mtu or
-		 * fall back to the next smaller mtu step compared to the
-		 * current packet size.
-		 */
-		if (mtu != 0) {
-			if (ia != NULL)
-				mtu = min(mtu, ia->ia_ifp->if_mtu);
-		} else {
-			if (ia != NULL)
-				mtu = ia->ia_ifp->if_mtu;
-			else
-				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
-		}
 		IPSTAT_INC(ips_cantfrag);
 		break;
 
@@ -1104,12 +1098,8 @@ ip_forward(struct mbuf *m, int srcrt)
 	case ENOBUFS:
 	case EACCES:			/* ipfw denied packet */
 		m_freem(mcopy);
-		if (ia != NULL)
-			ifa_free(&ia->ia_ifa);
 		return;
 	}
-	if (ia != NULL)
-		ifa_free(&ia->ia_ifa);
 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
 }
 

--apg+fY3UKMMABzWO--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20150728124220.GW72729>