Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 26 Jul 2006 11:35:14 -0700
From:      Julian Elischer <julian@elischer.org>
To:        FreeBSD Net <freebsd-net@freebsd.org>
Subject:   [patch] RFC: allow divert from layer 2 ipfw (e.g. bridge) 
Message-ID:  <44C7B5E2.5080001@elischer.org>

next in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------010107020001000001000102
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit


This code is running on quite a few systems but in a very limited 
environment that may not test all possibilities..

Does anyone have comments or suggestions as to changes that I might make
for checkin into generic FreeBSD? It was originally written for 4.x but 
with 6.x in mind.
It is now running on 6.1 and seems to be ok so far.

Certainly I am interested in hearing from Robert and Luigi and I am 
particularly interested in
what people think on how this will handle locking/SMP difficulies.


--------------010107020001000001000102
Content-Type: text/plain; x-mac-type="0"; x-mac-creator="0";
	name="link_divert.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="link_divert.patch"

Only in ./sys/i386/compile: MESSAGING_GATEWAY.i386
Only in ./sys/i386/conf: MESSAGING_GATEWAY.i386
diff -upr ../src/sys/net/bridge.c ./sys/net/bridge.c
--- ../src/sys/net/bridge.c	Thu May 25 13:03:05 2006
+++ ./sys/net/bridge.c	Fri Jun  9 11:19:58 2006
@@ -88,6 +88,7 @@
  *  - loop detection is still not very robust.
  */
 
+#include "opt_ipdivert.h"
 #include <sys/param.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
@@ -109,12 +110,15 @@
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
+#include <netinet/ip_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 
 #include <net/route.h>
 #include <netinet/ip_fw.h>
+#include <netinet/ip_divert.h>
 #include <netinet/ip_dummynet.h>
+#include <netinet/ip_var.h>
 #include <net/bridge.h>
 
 /*--------------------*/
@@ -1107,7 +1111,30 @@ bdg_forward(struct mbuf *m0, struct ifne
 
 	if (i == 0) /* a PASS rule.  */
 	    goto forward;
-	if (DUMMYNET_LOADED && (i == IP_FW_DUMMYNET)) {
+
+	/* To get here it is either a dummynet thing or a divert/tee */
+	if ((i & IP_FW_DUMMYNET) == 0) {
+#ifdef IPDIVERT
+		struct mbuf *clone = NULL;
+
+		/* Deliver packet to divert input routine */
+		/* Clone packet if we're doing a 'tee' */
+		if ((i & IP_FW_TEE) != 0) {
+			clone = m_dup(m0, M_DONTWAIT);
+			if (clone) {
+				if (clone->m_pkthdr.rcvif) {
+					ip_divert_enqueue_ptr(clone);
+				} else {
+					ip_divert_ptr(clone, 0);
+				}
+			}
+			goto forward;
+		} else {
+			ip_divert_enqueue_ptr(m0);
+			return (NULL);
+		}
+#endif
+	} else if (DUMMYNET_LOADED) {
 	    /*
 	     * Pass the pkt to dummynet, which consumes it.
 	     * If shared, make a copy and keep the original.
Only in ./sys/net: bridge.c~
diff -upr ../src/sys/net/if_ethersubr.c ./sys/net/if_ethersubr.c
--- ../src/sys/net/if_ethersubr.c	Thu May 25 13:03:19 2006
+++ ./sys/net/if_ethersubr.c	Fri Jun  9 11:31:01 2006
@@ -34,6 +34,7 @@
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipx.h"
+#include "opt_ipdivert.h"
 #include "opt_bdg.h"
 #include "opt_mac.h"
 #include "opt_netgraph.h"
@@ -67,8 +68,10 @@
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
+#include <netinet/ip_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip_fw.h>
+#include <netinet/ip_divert.h>
 #include <netinet/ip_dummynet.h>
 #endif
 #ifdef INET6
@@ -377,6 +380,19 @@ ether_output_frame_pre_netgraph(struct i
 			return (0);
 	}
 
+	if (BDG_ACTIVE(ifp)) {
+		/*
+		 * Beware, the bridge code notices the null rcvif and
+		 * uses that identify that it's being called from
+		 * ether_output as opposd to ether_input.  Yech.
+		 */
+		m->m_pkthdr.rcvif = NULL;
+		m = bdg_forward_ptr(m, ifp);
+		if (m != NULL)
+			m_freem(m);
+		return (0);
+	}
+
 	return ether_output_frame(ifp, m);
 }
 
@@ -396,18 +412,33 @@ ether_output_frame(struct ifnet *ifp, st
 #endif
 	int error;
 
-	if (rule == NULL && BDG_ACTIVE(ifp)) {
-		/*
-		 * Beware, the bridge code notices the null rcvif and
-		 * uses that identify that it's being called from
-		 * ether_output as opposd to ether_input.  Yech.
-		 */
-		m->m_pkthdr.rcvif = NULL;
-		m = bdg_forward_ptr(m, ifp);
-		if (m != NULL)
-			m_freem(m);
-		return (0);
+#ifdef IPDIVERT
+	/*
+	 * It's either a dummynet thing or a divert (but not both).
+	 */
+	if ((IP_FW_DUMMYNET) == 0) {
+		struct mbuf *clone = NULL;
+
+		/* Deliver packet to divert input routine */
+		/* Clone packet if we're doing a 'tee' */
+		if ((IP_FW_TEE) != 0) {
+			clone = m_dup(m, M_DONTWAIT);
+			if (clone) {
+				if (clone->m_pkthdr.rcvif) {
+					ip_divert_enqueue_ptr(clone);
+				} else {
+					ip_divert_ptr(clone, 0);
+				}
+			}
+			return (1);
+		} else {
+			ip_divert_enqueue_ptr(m);
+			m = NULL;
+			return (0);
+		}
 	}
+#endif
+
 #if defined(INET) || defined(INET6)
 	if (IPFW_LOADED && ether_ipfw != 0) {
 		if (ether_ipfw_chk(&m, ifp, &rule, 0) == 0) {
@@ -499,6 +530,33 @@ ether_ipfw_chk(struct mbuf **m0, struct 
 	if (i == IP_FW_PASS) /* a PASS rule.  */
 		return 1;
 
+#ifdef IPDIVERT
+	/*
+	 * It's either a dummynet thing or a divert (but not both).
+	 */
+	if ((i & IP_FW_DUMMYNET) == 0) {
+		struct mbuf *clone = NULL;
+
+		/* Deliver packet to divert input routine */
+		/* Clone packet if we're doing a 'tee' */
+		if ((i & IP_FW_TEE) != 0) {
+			clone = m_dup(*m0, M_DONTWAIT);
+			if (clone) {
+				if (clone->m_pkthdr.rcvif) {
+					ip_divert_enqueue_ptr(clone);
+				} else {
+					ip_divert_ptr(clone, 0);
+				}
+			}
+			return (1);
+		} else {
+			ip_divert_ptr(*m0, (*m0)->m_pkthdr.rcvif?1:0);
+			*m0 = NULL;
+			return (0);
+		}
+	}
+#endif
+
 	if (DUMMYNET_LOADED && (i == IP_FW_DUMMYNET)) {
 		/*
 		 * Pass the pkt to dummynet, which consumes it.
@@ -656,6 +714,11 @@ ether_demux(struct ifnet *ifp, struct mb
 #if defined(INET) || defined(INET6)
 	struct ip_fw *rule = ip_dn_claim_rule(m);
 #endif
+	/* Discard packet if interface is not up */
+	if ((ifp->if_flags & IFF_UP) == 0) {
+		m_freem(m);
+		return;
+	}
 
 	KASSERT(ifp != NULL, ("ether_demux: NULL interface pointer"));
 
@@ -667,6 +730,17 @@ ether_demux(struct ifnet *ifp, struct mb
 		goto post_stats;
 #endif
 
+
+#ifdef DEV_CARP
+pre_stats:
+#endif
+	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
+		if (bcmp(etherbroadcastaddr, eh->ether_dhost,
+		    sizeof(etherbroadcastaddr)) == 0)
+			m->m_flags |= M_BCAST;
+		else
+			m->m_flags |= M_MCAST;
+    } else {
 	if (!(BDG_ACTIVE(ifp)) && !(ifp->if_bridge) &&
 	    !((ether_type == ETHERTYPE_VLAN || m->m_flags & M_VLANTAG) &&
 	    ifp->if_nvlans > 0)) {
@@ -711,22 +785,7 @@ ether_demux(struct ifnet *ifp, struct mb
 			}
 		}
 	}
-
-#ifdef DEV_CARP
-pre_stats:
-#endif
-	/* Discard packet if interface is not up */
-	if ((ifp->if_flags & IFF_UP) == 0) {
-		m_freem(m);
-		return;
-	}
-	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
-		if (bcmp(etherbroadcastaddr, eh->ether_dhost,
-		    sizeof(etherbroadcastaddr)) == 0)
-			m->m_flags |= M_BCAST;
-		else
-			m->m_flags |= M_MCAST;
-	}
+    }
 	if (m->m_flags & (M_BCAST|M_MCAST))
 		ifp->if_imcasts++;
 
Only in ./sys/net: if_ethersubr.c~
diff -upr ../src/sys/net/netisr.h ./sys/net/netisr.h
--- ../src/sys/net/netisr.h	Thu Jan  6 17:45:35 2005
+++ ./sys/net/netisr.h	Thu Jun  8 13:51:56 2006
@@ -50,6 +50,7 @@
  */
 #define	NETISR_POLL	0		/* polling callback, must be first */
 #define	NETISR_IP	2		/* same as AF_INET */
+#define	NETISR_DIVERT	3		/* For diverting level2 packets. */
 #define	NETISR_ROUTE	14		/* routing socket */
 #define	NETISR_AARP	15		/* Appletalk ARP */
 #define	NETISR_ATALK2	16		/* Appletalk phase 2 */
diff -upr ../src/sys/netinet/ip_divert.c ./sys/netinet/ip_divert.c
--- ../src/sys/netinet/ip_divert.c	Wed Nov 16 02:31:22 2005
+++ ./sys/netinet/ip_divert.c	Fri Jun  9 12:04:13 2006
@@ -61,7 +61,9 @@
 #include <vm/uma.h>
 
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/route.h>
+#include <net/netisr.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
@@ -110,6 +112,9 @@
 static struct inpcbhead divcb;
 static struct inpcbinfo divcbinfo;
 
+static void	divertintr(struct mbuf *m);
+static struct	ifqueue divertintrq;
+static int	div_intrqmax = IFQ_MAXLEN;	/* was 50 */
 static u_long	div_sendspace = DIVSNDQ;	/* XXX sysctl ? */
 static u_long	div_recvspace = DIVRCVQ;	/* XXX sysctl ? */
 
@@ -132,6 +137,9 @@ div_init(void)
 	divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(divcbinfo.ipi_zone, maxsockets);
+	divertintrq.ifq_maxlen = div_intrqmax;
+	mtx_init(&divertintrq.ifq_mtx, "div_inq", NULL, MTX_DEF);
+	netisr_register(NETISR_DIVERT, divertintr, &divertintrq, 0);
 }
 
 /*
@@ -262,6 +268,53 @@ divert_packet(struct mbuf *m, int incomi
         }
 }
 
+void divert_enqueue(struct mbuf *m);
+/*
+ * enqueue a packet for processing after netisr has been activated
+ */
+void
+divert_enqueue(struct mbuf *m)
+{
+	struct socket *sa;
+	struct inpcb *inp;
+	u_int16_t nport;
+	struct m_tag *mtag;
+
+	mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL);
+	if (mtag == NULL) {
+		printf("%s: no divert tag\n", __func__);
+		m_freem(m);
+		return;
+	}
+
+	/* XXX Just checking if we even have a listenner.
+	 * probably not safe to scan this list like this.
+	 * as it could be in the middle of being fiddled.
+	 */
+	sa = NULL;
+	nport = htons((u_int16_t)divert_info(mtag));
+	LIST_FOREACH(inp, &divcb, inp_list) {
+		if (inp->inp_lport == nport) {
+			sa = inp->inp_socket;
+			break;
+		}
+	}
+	if (sa == NULL) {
+		m_freem(m);
+	} else {
+		netisr_queue(NETISR_DIVERT, m);
+	}
+}
+
+static void
+divertintr(struct mbuf *m)
+{
+		if (m == 0 || (m->m_flags & M_PKTHDR) == 0)
+			panic("divertintr");
+
+		divert_packet(m, 1);
+}
+
 /*
  * Deliver packet back into the IP processing machinery.
  *
@@ -674,6 +740,7 @@ div_modevent(module_t mod, int type, voi
 		 */
 		err = pf_proto_register(PF_INET, &div_protosw);
 		ip_divert_ptr = divert_packet;
+		ip_divert_enqueue_ptr = divert_enqueue;
 		break;
 	case MOD_QUIESCE:
 		/*
@@ -703,7 +770,9 @@ div_modevent(module_t mod, int type, voi
 			break;
 		}
 		ip_divert_ptr = NULL;
+		ip_divert_enqueue_ptr = NULL;
 		err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW);
+		netisr_unregister(NETISR_DIVERT);
 		INP_INFO_WUNLOCK(&divcbinfo);
 		INP_INFO_LOCK_DESTROY(&divcbinfo);
 		uma_zdestroy(divcbinfo.ipi_zone);
Only in ./sys/netinet: ip_divert.c~
diff -upr ../src/sys/netinet/ip_divert.h ./sys/netinet/ip_divert.h
--- ../src/sys/netinet/ip_divert.h	Tue Oct 19 14:14:57 2004
+++ ./sys/netinet/ip_divert.h	Fri Jun  9 11:21:06 2006
@@ -80,8 +80,10 @@ divert_find_info(struct mbuf *m)
 	return mtag ? divert_info(mtag) : 0;
 }
 
+typedef	void ip_divert_packet_enqueue_t(struct mbuf *m);
 typedef	void ip_divert_packet_t(struct mbuf *m, int incoming);
 extern	ip_divert_packet_t *ip_divert_ptr;
+extern	ip_divert_packet_enqueue_t *ip_divert_enqueue_ptr;
 
 extern	void div_init(void);
 extern	void div_input(struct mbuf *, int);
Only in ./sys/netinet: ip_divert.h~
diff -upr ../src/sys/netinet/ip_fw2.c ./sys/netinet/ip_fw2.c
--- ../src/sys/netinet/ip_fw2.c	Fri Jun  9 12:08:46 2006
+++ ./sys/netinet/ip_fw2.c	Thu Jun  8 13:51:56 2006
@@ -3046,8 +3046,10 @@ check_body:
 			case O_TEE: {
 				struct divert_tag *dt;
 
+#if 0
 				if (args->eh) /* not on layer 2 */
 					break;
+#endif
 				mtag = m_tag_get(PACKET_TAG_DIVERT,
 						sizeof(struct divert_tag),
 						M_NOWAIT);
diff -upr ../src/sys/netinet/ip_fw_pfil.c ./sys/netinet/ip_fw_pfil.c
--- ../src/sys/netinet/ip_fw_pfil.c	Sat Feb 11 00:19:37 2006
+++ ./sys/netinet/ip_fw_pfil.c	Fri Jun  9 12:06:31 2006
@@ -71,6 +71,7 @@ ip_dn_ruledel_t	*ip_dn_ruledel_ptr = NUL
 
 /* Divert hooks. */
 ip_divert_packet_t *ip_divert_ptr = NULL;
+ip_divert_packet_enqueue_t *ip_divert_enqueue_ptr = NULL;
 
 /* ng_ipfw hooks. */
 ng_ipfw_input_t *ng_ipfw_input_p = NULL;
Only in ./sys/netinet: ip_fw_pfil.c~
--- sys/net/bridge.c.orig	Tue Jun 13 13:29:27 2006
+++ sys/net/bridge.c	Tue Jun 13 13:31:54 2006
@@ -854,8 +854,16 @@ bridge_in(struct ifnet *ifp, struct mbuf
 	else
 	    dst = BDG_DROP;
     } else {
-	if (dst == ifp)
-	    dst = BDG_DROP;
+	/*
+	 * This is so that we can use a "half bridge" and not have
+	 * packets discarded just because the destination is out the same
+	 * interface. We only use this when we are firewalling it so the
+	 * packet will get clobbered by the firewall anyhow before we send it.
+	 */
+	if (ifp2sc[ifp->if_index].cluster->ports > 1) {
+	    if (dst == ifp)
+		dst = BDG_DROP;
+	}
     }
     DPRINTF(("%s: %6D ->%6D ty 0x%04x dst %s\n", __func__,
 	eh->ether_shost, ".",

--------------010107020001000001000102--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?44C7B5E2.5080001>