From owner-freebsd-net@FreeBSD.ORG Wed Jul 26 18:35:15 2006 Return-Path: X-Original-To: freebsd-net@freebsd.org Delivered-To: freebsd-net@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id EA6AB16A4DF for ; Wed, 26 Jul 2006 18:35:15 +0000 (UTC) (envelope-from prvs=julian=355ecf7dd@elischer.org) Received: from a50.ironport.com (a50.ironport.com [63.251.108.112]) by mx1.FreeBSD.org (Postfix) with ESMTP id 3729B43D55 for ; Wed, 26 Jul 2006 18:35:15 +0000 (GMT) (envelope-from prvs=julian=355ecf7dd@elischer.org) Received: from unknown (HELO [10.251.18.229]) ([10.251.18.229]) by a50.ironport.com with ESMTP; 26 Jul 2006 11:35:14 -0700 Message-ID: <44C7B5E2.5080001@elischer.org> Date: Wed, 26 Jul 2006 11:35:14 -0700 From: Julian Elischer User-Agent: Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.7.13) Gecko/20060414 X-Accept-Language: en-us, en MIME-Version: 1.0 To: FreeBSD Net Content-Type: multipart/mixed; boundary="------------010107020001000001000102" Subject: [patch] RFC: allow divert from layer 2 ipfw (e.g. bridge) X-BeenThere: freebsd-net@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Networking and TCP/IP with FreeBSD List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 26 Jul 2006 18:35:16 -0000 This is a multi-part message in MIME format. --------------010107020001000001000102 Content-Type: text/plain; charset=us-ascii; format=flowed Content-Transfer-Encoding: 7bit This code is running on quite a few systems but in a very limited environment that may not test all possibilities.. Does anyone have comments or suggestions as to changes that I might make for checkin into generic FreeBSD? It was originally written for 4.x but with 6.x in mind. It is now running on 6.1 and seems to be ok so far. Certainly I am interested in hearing from Robert and Luigi and I am particularly interested in what people think on how this will handle locking/SMP difficulies. --------------010107020001000001000102 Content-Type: text/plain; x-mac-type="0"; x-mac-creator="0"; name="link_divert.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="link_divert.patch" Only in ./sys/i386/compile: MESSAGING_GATEWAY.i386 Only in ./sys/i386/conf: MESSAGING_GATEWAY.i386 diff -upr ../src/sys/net/bridge.c ./sys/net/bridge.c --- ../src/sys/net/bridge.c Thu May 25 13:03:05 2006 +++ ./sys/net/bridge.c Fri Jun 9 11:19:58 2006 @@ -88,6 +88,7 @@ * - loop detection is still not very robust. */ +#include "opt_ipdivert.h" #include #include #include @@ -109,12 +110,15 @@ #include #include #include +#include #include #include #include #include +#include #include +#include #include /*--------------------*/ @@ -1107,7 +1111,30 @@ bdg_forward(struct mbuf *m0, struct ifne if (i == 0) /* a PASS rule. */ goto forward; - if (DUMMYNET_LOADED && (i == IP_FW_DUMMYNET)) { + + /* To get here it is either a dummynet thing or a divert/tee */ + if ((i & IP_FW_DUMMYNET) == 0) { +#ifdef IPDIVERT + struct mbuf *clone = NULL; + + /* Deliver packet to divert input routine */ + /* Clone packet if we're doing a 'tee' */ + if ((i & IP_FW_TEE) != 0) { + clone = m_dup(m0, M_DONTWAIT); + if (clone) { + if (clone->m_pkthdr.rcvif) { + ip_divert_enqueue_ptr(clone); + } else { + ip_divert_ptr(clone, 0); + } + } + goto forward; + } else { + ip_divert_enqueue_ptr(m0); + return (NULL); + } +#endif + } else if (DUMMYNET_LOADED) { /* * Pass the pkt to dummynet, which consumes it. * If shared, make a copy and keep the original. Only in ./sys/net: bridge.c~ diff -upr ../src/sys/net/if_ethersubr.c ./sys/net/if_ethersubr.c --- ../src/sys/net/if_ethersubr.c Thu May 25 13:03:19 2006 +++ ./sys/net/if_ethersubr.c Fri Jun 9 11:31:01 2006 @@ -34,6 +34,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipx.h" +#include "opt_ipdivert.h" #include "opt_bdg.h" #include "opt_mac.h" #include "opt_netgraph.h" @@ -67,8 +68,10 @@ #if defined(INET) || defined(INET6) #include #include +#include #include #include +#include #include #endif #ifdef INET6 @@ -377,6 +380,19 @@ ether_output_frame_pre_netgraph(struct i return (0); } + if (BDG_ACTIVE(ifp)) { + /* + * Beware, the bridge code notices the null rcvif and + * uses that identify that it's being called from + * ether_output as opposd to ether_input. Yech. + */ + m->m_pkthdr.rcvif = NULL; + m = bdg_forward_ptr(m, ifp); + if (m != NULL) + m_freem(m); + return (0); + } + return ether_output_frame(ifp, m); } @@ -396,18 +412,33 @@ ether_output_frame(struct ifnet *ifp, st #endif int error; - if (rule == NULL && BDG_ACTIVE(ifp)) { - /* - * Beware, the bridge code notices the null rcvif and - * uses that identify that it's being called from - * ether_output as opposd to ether_input. Yech. - */ - m->m_pkthdr.rcvif = NULL; - m = bdg_forward_ptr(m, ifp); - if (m != NULL) - m_freem(m); - return (0); +#ifdef IPDIVERT + /* + * It's either a dummynet thing or a divert (but not both). + */ + if ((IP_FW_DUMMYNET) == 0) { + struct mbuf *clone = NULL; + + /* Deliver packet to divert input routine */ + /* Clone packet if we're doing a 'tee' */ + if ((IP_FW_TEE) != 0) { + clone = m_dup(m, M_DONTWAIT); + if (clone) { + if (clone->m_pkthdr.rcvif) { + ip_divert_enqueue_ptr(clone); + } else { + ip_divert_ptr(clone, 0); + } + } + return (1); + } else { + ip_divert_enqueue_ptr(m); + m = NULL; + return (0); + } } +#endif + #if defined(INET) || defined(INET6) if (IPFW_LOADED && ether_ipfw != 0) { if (ether_ipfw_chk(&m, ifp, &rule, 0) == 0) { @@ -499,6 +530,33 @@ ether_ipfw_chk(struct mbuf **m0, struct if (i == IP_FW_PASS) /* a PASS rule. */ return 1; +#ifdef IPDIVERT + /* + * It's either a dummynet thing or a divert (but not both). + */ + if ((i & IP_FW_DUMMYNET) == 0) { + struct mbuf *clone = NULL; + + /* Deliver packet to divert input routine */ + /* Clone packet if we're doing a 'tee' */ + if ((i & IP_FW_TEE) != 0) { + clone = m_dup(*m0, M_DONTWAIT); + if (clone) { + if (clone->m_pkthdr.rcvif) { + ip_divert_enqueue_ptr(clone); + } else { + ip_divert_ptr(clone, 0); + } + } + return (1); + } else { + ip_divert_ptr(*m0, (*m0)->m_pkthdr.rcvif?1:0); + *m0 = NULL; + return (0); + } + } +#endif + if (DUMMYNET_LOADED && (i == IP_FW_DUMMYNET)) { /* * Pass the pkt to dummynet, which consumes it. @@ -656,6 +714,11 @@ ether_demux(struct ifnet *ifp, struct mb #if defined(INET) || defined(INET6) struct ip_fw *rule = ip_dn_claim_rule(m); #endif + /* Discard packet if interface is not up */ + if ((ifp->if_flags & IFF_UP) == 0) { + m_freem(m); + return; + } KASSERT(ifp != NULL, ("ether_demux: NULL interface pointer")); @@ -667,6 +730,17 @@ ether_demux(struct ifnet *ifp, struct mb goto post_stats; #endif + +#ifdef DEV_CARP +pre_stats: +#endif + if (ETHER_IS_MULTICAST(eh->ether_dhost)) { + if (bcmp(etherbroadcastaddr, eh->ether_dhost, + sizeof(etherbroadcastaddr)) == 0) + m->m_flags |= M_BCAST; + else + m->m_flags |= M_MCAST; + } else { if (!(BDG_ACTIVE(ifp)) && !(ifp->if_bridge) && !((ether_type == ETHERTYPE_VLAN || m->m_flags & M_VLANTAG) && ifp->if_nvlans > 0)) { @@ -711,22 +785,7 @@ ether_demux(struct ifnet *ifp, struct mb } } } - -#ifdef DEV_CARP -pre_stats: -#endif - /* Discard packet if interface is not up */ - if ((ifp->if_flags & IFF_UP) == 0) { - m_freem(m); - return; - } - if (ETHER_IS_MULTICAST(eh->ether_dhost)) { - if (bcmp(etherbroadcastaddr, eh->ether_dhost, - sizeof(etherbroadcastaddr)) == 0) - m->m_flags |= M_BCAST; - else - m->m_flags |= M_MCAST; - } + } if (m->m_flags & (M_BCAST|M_MCAST)) ifp->if_imcasts++; Only in ./sys/net: if_ethersubr.c~ diff -upr ../src/sys/net/netisr.h ./sys/net/netisr.h --- ../src/sys/net/netisr.h Thu Jan 6 17:45:35 2005 +++ ./sys/net/netisr.h Thu Jun 8 13:51:56 2006 @@ -50,6 +50,7 @@ */ #define NETISR_POLL 0 /* polling callback, must be first */ #define NETISR_IP 2 /* same as AF_INET */ +#define NETISR_DIVERT 3 /* For diverting level2 packets. */ #define NETISR_ROUTE 14 /* routing socket */ #define NETISR_AARP 15 /* Appletalk ARP */ #define NETISR_ATALK2 16 /* Appletalk phase 2 */ diff -upr ../src/sys/netinet/ip_divert.c ./sys/netinet/ip_divert.c --- ../src/sys/netinet/ip_divert.c Wed Nov 16 02:31:22 2005 +++ ./sys/netinet/ip_divert.c Fri Jun 9 12:04:13 2006 @@ -61,7 +61,9 @@ #include #include +#include #include +#include #include #include @@ -110,6 +112,9 @@ static struct inpcbhead divcb; static struct inpcbinfo divcbinfo; +static void divertintr(struct mbuf *m); +static struct ifqueue divertintrq; +static int div_intrqmax = IFQ_MAXLEN; /* was 50 */ static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */ static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */ @@ -132,6 +137,9 @@ div_init(void) divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(divcbinfo.ipi_zone, maxsockets); + divertintrq.ifq_maxlen = div_intrqmax; + mtx_init(&divertintrq.ifq_mtx, "div_inq", NULL, MTX_DEF); + netisr_register(NETISR_DIVERT, divertintr, &divertintrq, 0); } /* @@ -262,6 +268,53 @@ divert_packet(struct mbuf *m, int incomi } } +void divert_enqueue(struct mbuf *m); +/* + * enqueue a packet for processing after netisr has been activated + */ +void +divert_enqueue(struct mbuf *m) +{ + struct socket *sa; + struct inpcb *inp; + u_int16_t nport; + struct m_tag *mtag; + + mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); + if (mtag == NULL) { + printf("%s: no divert tag\n", __func__); + m_freem(m); + return; + } + + /* XXX Just checking if we even have a listenner. + * probably not safe to scan this list like this. + * as it could be in the middle of being fiddled. + */ + sa = NULL; + nport = htons((u_int16_t)divert_info(mtag)); + LIST_FOREACH(inp, &divcb, inp_list) { + if (inp->inp_lport == nport) { + sa = inp->inp_socket; + break; + } + } + if (sa == NULL) { + m_freem(m); + } else { + netisr_queue(NETISR_DIVERT, m); + } +} + +static void +divertintr(struct mbuf *m) +{ + if (m == 0 || (m->m_flags & M_PKTHDR) == 0) + panic("divertintr"); + + divert_packet(m, 1); +} + /* * Deliver packet back into the IP processing machinery. * @@ -674,6 +740,7 @@ div_modevent(module_t mod, int type, voi */ err = pf_proto_register(PF_INET, &div_protosw); ip_divert_ptr = divert_packet; + ip_divert_enqueue_ptr = divert_enqueue; break; case MOD_QUIESCE: /* @@ -703,7 +770,9 @@ div_modevent(module_t mod, int type, voi break; } ip_divert_ptr = NULL; + ip_divert_enqueue_ptr = NULL; err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW); + netisr_unregister(NETISR_DIVERT); INP_INFO_WUNLOCK(&divcbinfo); INP_INFO_LOCK_DESTROY(&divcbinfo); uma_zdestroy(divcbinfo.ipi_zone); Only in ./sys/netinet: ip_divert.c~ diff -upr ../src/sys/netinet/ip_divert.h ./sys/netinet/ip_divert.h --- ../src/sys/netinet/ip_divert.h Tue Oct 19 14:14:57 2004 +++ ./sys/netinet/ip_divert.h Fri Jun 9 11:21:06 2006 @@ -80,8 +80,10 @@ divert_find_info(struct mbuf *m) return mtag ? divert_info(mtag) : 0; } +typedef void ip_divert_packet_enqueue_t(struct mbuf *m); typedef void ip_divert_packet_t(struct mbuf *m, int incoming); extern ip_divert_packet_t *ip_divert_ptr; +extern ip_divert_packet_enqueue_t *ip_divert_enqueue_ptr; extern void div_init(void); extern void div_input(struct mbuf *, int); Only in ./sys/netinet: ip_divert.h~ diff -upr ../src/sys/netinet/ip_fw2.c ./sys/netinet/ip_fw2.c --- ../src/sys/netinet/ip_fw2.c Fri Jun 9 12:08:46 2006 +++ ./sys/netinet/ip_fw2.c Thu Jun 8 13:51:56 2006 @@ -3046,8 +3046,10 @@ check_body: case O_TEE: { struct divert_tag *dt; +#if 0 if (args->eh) /* not on layer 2 */ break; +#endif mtag = m_tag_get(PACKET_TAG_DIVERT, sizeof(struct divert_tag), M_NOWAIT); diff -upr ../src/sys/netinet/ip_fw_pfil.c ./sys/netinet/ip_fw_pfil.c --- ../src/sys/netinet/ip_fw_pfil.c Sat Feb 11 00:19:37 2006 +++ ./sys/netinet/ip_fw_pfil.c Fri Jun 9 12:06:31 2006 @@ -71,6 +71,7 @@ ip_dn_ruledel_t *ip_dn_ruledel_ptr = NUL /* Divert hooks. */ ip_divert_packet_t *ip_divert_ptr = NULL; +ip_divert_packet_enqueue_t *ip_divert_enqueue_ptr = NULL; /* ng_ipfw hooks. */ ng_ipfw_input_t *ng_ipfw_input_p = NULL; Only in ./sys/netinet: ip_fw_pfil.c~ --- sys/net/bridge.c.orig Tue Jun 13 13:29:27 2006 +++ sys/net/bridge.c Tue Jun 13 13:31:54 2006 @@ -854,8 +854,16 @@ bridge_in(struct ifnet *ifp, struct mbuf else dst = BDG_DROP; } else { - if (dst == ifp) - dst = BDG_DROP; + /* + * This is so that we can use a "half bridge" and not have + * packets discarded just because the destination is out the same + * interface. We only use this when we are firewalling it so the + * packet will get clobbered by the firewall anyhow before we send it. + */ + if (ifp2sc[ifp->if_index].cluster->ports > 1) { + if (dst == ifp) + dst = BDG_DROP; + } } DPRINTF(("%s: %6D ->%6D ty 0x%04x dst %s\n", __func__, eh->ether_shost, ".", --------------010107020001000001000102--