Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 17 Apr 2012 14:26:56 +0000 (UTC)
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r234382 - projects/pf/head/sys/contrib/pf/net
Message-ID:  <201204171426.q3HEQu0I085966@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: glebius
Date: Tue Apr 17 14:26:55 2012
New Revision: 234382
URL: http://svn.freebsd.org/changeset/base/234382

Log:
  Fixes to pf_route(), pf_route6().
  
  These functions are very special, they may call into send side of IP
  stack, for example they call ifp->if_output(), icmp_error(), icmp6_error(),
  ip6_output(), and they also can perform route lookup. Even more, they
  can call pf_test() or pf_test6() recursively! Hence, they need dropping
  any pf(4) locks before.
  
  Fortunately, pf_route() and pf_route6() are called only at the
  very end of packet processing, so we can drop the pf(4) locks
  with no need re-acquire them, thus:
  
  o Drop locks in pf_route(), pf_route6() in all error cases.
  o Drop locks in pf_route(), pf_route6() as soon as we get
    information from the state, or if there is no state, or
    if we don't need state.
  
  More cleanups and fixes to pf_route():
  - We don't need to check for m_len, pf_test() already did this.
  - To do route lookup we don't need to allocate 'struct route'
    on stack, we can just have a sockaddr and rtentry pointer.
  - We need rtentry for a quite short time, so it is better to
    avoid its unlocking and refcounting.
  - In case if new interface (after routing rule applied) is
    a loopback one, then put M_SKIP_FIREWALL on mbuf, otherwise
    we may end up with infinite processing. [1]
  - Update the copy-paste from ip_output() bringing in TSO and
    SCTP support.
  - Reduce number of byte order swaps.
  
  More cleanups and fixes to pf_route6():
  - We don't need to check for m_len, pf_test() already did this.
  - We don't need struct route_in6 on stack, a sockaddr_in6
    would be enough.
  - In case if new interface (after routing rule applied) is
    a loopback one, then put M_SKIP_FIREWALL on mbuf, otherwise
    we may end up with infinite processing. [1]
  
  Discussed with:		eri [1]

Modified:
  projects/pf/head/sys/contrib/pf/net/pf.c

Modified: projects/pf/head/sys/contrib/pf/net/pf.c
==============================================================================
--- projects/pf/head/sys/contrib/pf/net/pf.c	Tue Apr 17 13:44:40 2012	(r234381)
+++ projects/pf/head/sys/contrib/pf/net/pf.c	Tue Apr 17 14:26:55 2012	(r234382)
@@ -5112,18 +5112,13 @@ pf_route(struct mbuf **m, struct pf_rule
     struct pf_state *s, struct pf_pdesc *pd)
 {
 	struct mbuf		*m0, *m1;
-	struct route		 iproute;
-	struct route		*ro = NULL;
-	struct sockaddr_in	*dst;
+	struct sockaddr_in	dst;
 	struct ip		*ip;
 	struct ifnet		*ifp = NULL;
 	struct pf_addr		 naddr;
 	struct pf_src_node	*sn = NULL;
 	int			 error = 0;
 	int sw_csum;
-#ifdef IPSEC
-	struct m_tag		*mtag;
-#endif /* IPSEC */
 
 	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
 	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
@@ -5132,78 +5127,84 @@ pf_route(struct mbuf **m, struct pf_rule
 	if (pd->pf_mtag->routed++ > 3) {
 		m0 = *m;
 		*m = NULL;
-		goto bad;
+		goto bad_locked;
 	}
 
 	if (r->rt == PF_DUPTO) {
-		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL)
+		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
+			if (s)
+				PF_STATE_UNLOCK(s);
+			PF_UNLOCK();
 			return;
+		}
 	} else {
-		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
+		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
+			if (s)
+				PF_STATE_UNLOCK(s);
+			PF_UNLOCK();
 			return;
+		}
 		m0 = *m;
 	}
 
-	if (m0->m_len < sizeof(struct ip)) {
-		DPFPRINTF(PF_DEBUG_URGENT,
-		    ("%s: m0->m_len < sizeof(struct ip)\n", __func__));
-		goto bad;
-	}
-
 	ip = mtod(m0, struct ip *);
 
-	ro = &iproute;
-	bzero((caddr_t)ro, sizeof(*ro));
-	dst = satosin(&ro->ro_dst);
-	dst->sin_family = AF_INET;
-	dst->sin_len = sizeof(*dst);
-	dst->sin_addr = ip->ip_dst;
+	bzero(&dst, sizeof(dst));
+	dst.sin_family = AF_INET;
+	dst.sin_len = sizeof(dst);
+	dst.sin_addr = ip->ip_dst;
 
 	if (r->rt == PF_FASTROUTE) {
-		in_rtalloc_ign(ro, 0, M_GETFIB(m0));
-		if (ro->ro_rt == 0) {
+		struct rtentry *rt;
+
+		if (s)
+			PF_STATE_UNLOCK(s);
+		PF_UNLOCK();
+		rt = rtalloc1_fib(sintosa(&dst), 0, 0, M_GETFIB(m0));
+		if (rt == NULL) {
+			RTFREE_LOCKED(rt);
 			KMOD_IPSTAT_INC(ips_noroute);
+			error = EHOSTUNREACH;
 			goto bad;
 		}
 
-		ifp = ro->ro_rt->rt_ifp;
-		ro->ro_rt->rt_use++;
+		ifp = rt->rt_ifp;
+		rt->rt_rmx.rmx_pksent++;
 
-		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
-			dst = satosin(ro->ro_rt->rt_gateway);
+		if (rt->rt_flags & RTF_GATEWAY)
+			bcopy(satosin(rt->rt_gateway), &dst, sizeof(dst));
+		RTFREE_LOCKED(rt);
 	} else {
 		if (TAILQ_EMPTY(&r->rpool.list)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
-			goto bad;
+			goto bad_locked;
 		}
 		if (s == NULL) {
 			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
 			    &naddr, NULL, &sn);
 			if (!PF_AZERO(&naddr, AF_INET))
-				dst->sin_addr.s_addr = naddr.v4.s_addr;
+				dst.sin_addr.s_addr = naddr.v4.s_addr;
 			ifp = r->rpool.cur->kif ?
 			    r->rpool.cur->kif->pfik_ifp : NULL;
+			PF_UNLOCK();
 		} else {
 			if (!PF_AZERO(&s->rt_addr, AF_INET))
-				dst->sin_addr.s_addr =
+				dst.sin_addr.s_addr =
 				    s->rt_addr.v4.s_addr;
 			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
+			PF_STATE_UNLOCK(s);
+			PF_UNLOCK();
 		}
 	}
 	if (ifp == NULL)
 		goto bad;
 
 	if (oifp != ifp) {
-		PF_UNLOCK();
-		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS) {
-			PF_LOCK();
+		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
 			goto bad;
-		} else if (m0 == NULL) {
-			PF_LOCK();
+		else if (m0 == NULL)
 			goto done;
-		}
-		PF_LOCK();
 		if (m0->m_len < sizeof(struct ip)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("%s: m0->m_len < sizeof(struct ip)\n", __func__));
@@ -5212,82 +5213,67 @@ pf_route(struct mbuf **m, struct pf_rule
 		ip = mtod(m0, struct ip *);
 	}
 
-	/* Copied from FreeBSD 5.1-CURRENT ip_output. */
+	if (ifp->if_flags & IFF_LOOPBACK)
+		m0->m_flags |= M_SKIP_FIREWALL;
+
+	/* Back to host byte order. */
+	ip->ip_len = ntohs(ip->ip_len);
+	ip->ip_off = ntohs(ip->ip_off);
+
+	/* Copied from FreeBSD 10.0-CURRENT ip_output. */
 	m0->m_pkthdr.csum_flags |= CSUM_IP;
 	sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist;
 	if (sw_csum & CSUM_DELAY_DATA) {
-		/*
-		 * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least)
-		 */
-		NTOHS(ip->ip_len);
-		NTOHS(ip->ip_off);	/* XXX: needed? */
 		in_delayed_cksum(m0);
-		HTONS(ip->ip_len);
-		HTONS(ip->ip_off);
 		sw_csum &= ~CSUM_DELAY_DATA;
 	}
+#ifdef SCTP
+	if (sw_csum & CSUM_SCTP) {
+		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
+		sw_csum &= ~CSUM_SCTP;
+	}
+#endif
 	m0->m_pkthdr.csum_flags &= ifp->if_hwassist;
 
-	if (ntohs(ip->ip_len) <= ifp->if_mtu ||
-	    (ifp->if_hwassist & CSUM_FRAGMENT &&
-	    ((ip->ip_off & htons(IP_DF)) == 0))) {
-		/*
-		 * ip->ip_len = htons(ip->ip_len);
-		 * ip->ip_off = htons(ip->ip_off);
-		 */
+        /*
+	 * If small enough for interface, or the interface will take
+	 * care of the fragmentation for us, we can just send directly.
+	 */
+	if (ip->ip_len <= ifp->if_mtu ||
+	    (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
+	    ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
+		ip->ip_len = htons(ip->ip_len);
+		ip->ip_off = htons(ip->ip_off);
 		ip->ip_sum = 0;
-		if (sw_csum & CSUM_DELAY_IP) {
-			/* From KAME */
-			if (ip->ip_v == IPVERSION &&
-			    (ip->ip_hl << 2) == sizeof(*ip)) {
-				ip->ip_sum = in_cksum_hdr(ip);
-			} else {
-				ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
-			}
-		}
-		PF_UNLOCK();
-		error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro);
-		PF_LOCK();
+		if (sw_csum & CSUM_DELAY_IP)
+			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
+		m0->m_flags &= ~(M_PROTOFLAGS);
+		error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
 		goto done;
 	}
 
-	/*
-	 * Too large for interface; fragment if possible.
-	 * Must be able to put at least 8 bytes per fragment.
-	 */
-	if (ip->ip_off & htons(IP_DF)) {
+	/* Balk when DF bit is set or the interface didn't support TSO. */
+	if ((ip->ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
+		error = EMSGSIZE; 
 		KMOD_IPSTAT_INC(ips_cantfrag);
 		if (r->rt != PF_DUPTO) {
-			/* icmp_error() expects host byte ordering */
-			NTOHS(ip->ip_len);
-			NTOHS(ip->ip_off);
-			PF_UNLOCK();
 			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
 			    ifp->if_mtu);
-			PF_LOCK();
 			goto done;
 		} else
 			goto bad;
 	}
 
-	m1 = m0;
-	/*
-	 * XXX: is cheaper + less error prone than own function
-	 */
-	NTOHS(ip->ip_len);
-	NTOHS(ip->ip_off);
 	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum);
 	if (error)
 		goto bad;
 
-	for (m0 = m1; m0; m0 = m1) {
+	for (; m0; m0 = m1) {
 		m1 = m0->m_nextpkt;
-		m0->m_nextpkt = 0;
+		m0->m_nextpkt = NULL;
 		if (error == 0) {
-			PF_UNLOCK();
-			error = (*ifp->if_output)(ifp, m0, sintosa(dst),
-			    NULL);
-			PF_LOCK();
+			m0->m_flags &= ~(M_PROTOFLAGS);
+			error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
 		} else
 			m_freem(m0);
 	}
@@ -5298,10 +5284,12 @@ pf_route(struct mbuf **m, struct pf_rule
 done:
 	if (r->rt != PF_DUPTO)
 		*m = NULL;
-	if (ro == &iproute && ro->ro_rt)
-		RTFREE(ro->ro_rt);
 	return;
 
+bad_locked:
+	if (s)
+		PF_STATE_UNLOCK(s);
+	PF_UNLOCK();
 bad:
 	m_freem(m0);
 	goto done;
@@ -5314,9 +5302,7 @@ pf_route6(struct mbuf **m, struct pf_rul
     struct pf_state *s, struct pf_pdesc *pd)
 {
 	struct mbuf		*m0;
-	struct route_in6	 ip6route;
-	struct route_in6	*ro;
-	struct sockaddr_in6	*dst;
+	struct sockaddr_in6	dst;
 	struct ip6_hdr		*ip6;
 	struct ifnet		*ifp = NULL;
 	struct pf_addr		 naddr;
@@ -5329,36 +5315,39 @@ pf_route6(struct mbuf **m, struct pf_rul
 	if (pd->pf_mtag->routed++ > 3) {
 		m0 = *m;
 		*m = NULL;
-		goto bad;
+		goto bad_locked;
 	}
 
 	if (r->rt == PF_DUPTO) {
-		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL)
+		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
+			if (s)
+				PF_STATE_UNLOCK(s);
+			PF_UNLOCK();
 			return;
+		}
 	} else {
-		if ((r->rt == PF_REPLYTO) == (r->direction == dir))
+		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
+			if (s)
+				PF_STATE_UNLOCK(s);
+			PF_UNLOCK();
 			return;
+		}
 		m0 = *m;
 	}
 
-	if (m0->m_len < sizeof(struct ip6_hdr)) {
-		DPFPRINTF(PF_DEBUG_URGENT,
-		    ("%s: m0->m_len < sizeof(struct ip6_hdr)\n", __func__));
-		goto bad;
-	}
 	ip6 = mtod(m0, struct ip6_hdr *);
 
-	ro = &ip6route;
-	bzero((caddr_t)ro, sizeof(*ro));
-	dst = (struct sockaddr_in6 *)&ro->ro_dst;
-	dst->sin6_family = AF_INET6;
-	dst->sin6_len = sizeof(*dst);
-	dst->sin6_addr = ip6->ip6_dst;
+	bzero(&dst, sizeof(dst));
+	dst.sin6_family = AF_INET6;
+	dst.sin6_len = sizeof(dst);
+	dst.sin6_addr = ip6->ip6_dst;
 
 	/* Cheat. XXX why only in the v6 case??? */
 	if (r->rt == PF_FASTROUTE) {
-		m0->m_flags |= M_SKIP_FIREWALL;
+		if (s)
+			PF_STATE_UNLOCK(s);
 		PF_UNLOCK();
+		m0->m_flags |= M_SKIP_FIREWALL;
 		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
 		return;
 	}
@@ -5366,34 +5355,34 @@ pf_route6(struct mbuf **m, struct pf_rul
 	if (TAILQ_EMPTY(&r->rpool.list)) {
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
-		goto bad;
+		goto bad_locked;
 	}
 	if (s == NULL) {
 		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
 		    &naddr, NULL, &sn);
 		if (!PF_AZERO(&naddr, AF_INET6))
-			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
+			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
 			    &naddr, AF_INET6);
 		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
 	} else {
 		if (!PF_AZERO(&s->rt_addr, AF_INET6))
-			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
+			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
 			    &s->rt_addr, AF_INET6);
 		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
 	}
+
+	if (s)
+		PF_STATE_UNLOCK(s);
+	PF_UNLOCK();
+
 	if (ifp == NULL)
 		goto bad;
 
 	if (oifp != ifp) {
-		PF_UNLOCK();
-		if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS) {
-			PF_LOCK();
+		if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS)
 			goto bad;
-		} else if (m0 == NULL) {
-			PF_LOCK();
+		else if (m0 == NULL)
 			goto done;
-		}
-		PF_LOCK();
 		if (m0->m_len < sizeof(struct ip6_hdr)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("%s: m0->m_len < sizeof(struct ip6_hdr)\n",
@@ -5403,23 +5392,22 @@ pf_route6(struct mbuf **m, struct pf_rul
 		ip6 = mtod(m0, struct ip6_hdr *);
 	}
 
+	if (ifp->if_flags & IFF_LOOPBACK)
+		m0->m_flags |= M_SKIP_FIREWALL;
+
 	/*
 	 * If the packet is too large for the outgoing interface,
 	 * send back an icmp6 error.
 	 */
-	if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr))
-		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
-	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
-		PF_UNLOCK();
-		nd6_output(ifp, ifp, m0, dst, NULL);
-		PF_LOCK();
-	} else {
+	if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
+		dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
+	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu)
+		nd6_output(ifp, ifp, m0, &dst, NULL);
+	else {
 		in6_ifstat_inc(ifp, ifs6_in_toobig);
-		if (r->rt != PF_DUPTO) {
-			PF_UNLOCK();
+		if (r->rt != PF_DUPTO)
 			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
-			PF_LOCK();
-		} else
+		else
 			goto bad;
 	}
 
@@ -5428,6 +5416,10 @@ done:
 		*m = NULL;
 	return;
 
+bad_locked:
+	if (s)
+		PF_STATE_UNLOCK(s);
+	PF_UNLOCK();
 bad:
 	m_freem(m0);
 	goto done;
@@ -5916,9 +5908,11 @@ done:
 		action = PF_PASS;
 		break;
 	default:
-		/* pf_route can free the mbuf causing *m0 to become NULL */
-		if (r->rt)
+		/* pf_route() returns unlocked. */
+		if (r->rt) {
 			pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
+			return (action);
+		}
 		break;
 	}
 	if (s)
@@ -6297,9 +6291,11 @@ done:
 		action = PF_PASS;
 		break;
 	default:
-		/* pf_route6 can free the mbuf causing *m0 to become NULL */
-		if (r->rt)
+		/* pf_route6() returns unlocked. */
+		if (r->rt) {
 			pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
+			return (action);
+		}
 		break;
 	}
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201204171426.q3HEQu0I085966>