From owner-svn-src-user@FreeBSD.ORG Mon Mar 15 18:27:51 2010 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id DA6231065670; Mon, 15 Mar 2010 18:27:51 +0000 (UTC) (envelope-from luigi@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id C79AA8FC0A; Mon, 15 Mar 2010 18:27:51 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o2FIRpi2041112; Mon, 15 Mar 2010 18:27:51 GMT (envelope-from luigi@svn.freebsd.org) Received: (from luigi@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o2FIRp27041092; Mon, 15 Mar 2010 18:27:51 GMT (envelope-from luigi@svn.freebsd.org) Message-Id: <201003151827.o2FIRp27041092@svn.freebsd.org> From: Luigi Rizzo Date: Mon, 15 Mar 2010 18:27:51 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r205182 - in user/luigi/ipfw3-r8: sbin/ipfw sys/conf sys/net sys/netinet sys/netinet/ipfw X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 15 Mar 2010 18:27:52 -0000 Author: luigi Date: Mon Mar 15 18:27:51 2010 New Revision: 205182 URL: http://svn.freebsd.org/changeset/base/205182 Log: almost completed merge of ipfw3 from HEAD into my working tree Modified: user/luigi/ipfw3-r8/sbin/ipfw/dummynet.c user/luigi/ipfw3-r8/sbin/ipfw/ipfw2.c user/luigi/ipfw3-r8/sbin/ipfw/ipfw2.h user/luigi/ipfw3-r8/sys/conf/files user/luigi/ipfw3-r8/sys/net/if_bridge.c user/luigi/ipfw3-r8/sys/net/if_ethersubr.c user/luigi/ipfw3-r8/sys/net/radix.c user/luigi/ipfw3-r8/sys/net/radix.h user/luigi/ipfw3-r8/sys/net/route.c user/luigi/ipfw3-r8/sys/netinet/ip_divert.c user/luigi/ipfw3-r8/sys/netinet/ip_divert.h user/luigi/ipfw3-r8/sys/netinet/ip_fw.h user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_dn_io.c user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_dn_private.h user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_dummynet.c user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_fw2.c user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_fw_dynamic.c user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_fw_log.c user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_fw_table.c user/luigi/ipfw3-r8/sys/netinet/raw_ip.c Modified: user/luigi/ipfw3-r8/sbin/ipfw/dummynet.c ============================================================================== --- user/luigi/ipfw3-r8/sbin/ipfw/dummynet.c Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sbin/ipfw/dummynet.c Mon Mar 15 18:27:51 2010 (r205182) @@ -141,7 +141,8 @@ print_mask(struct ipfw_flow_id *id) { if (!IS_IP6_FLOW_ID(id)) { printf(" " - "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n", + "mask: %s 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n", + id->extra ? "queue," : "", id->proto, id->src_ip, id->src_port, id->dst_ip, id->dst_port); @@ -151,7 +152,8 @@ print_mask(struct ipfw_flow_id *id) "Tot_pkt/bytes Pkt/Byte Drp\n"); } else { char buf[255]; - printf("\n mask: proto: 0x%02x, flow_id: 0x%08x, ", + printf("\n mask: %sproto: 0x%02x, flow_id: 0x%08x, ", + id->extra ? "queue," : "", id->proto, id->flow_id6); inet_ntop(AF_INET6, &(id->src_ip6), buf, sizeof(buf)); printf("%s/0x%04x -> ", buf, id->src_port); @@ -175,7 +177,8 @@ list_flow(struct dn_flow *ni) pe = getprotobynumber(id->proto); /* XXX: Should check for IPv4 flows */ - printf("%3u ", (ni->oid.id) & 0xff); + printf("%3u%c", (ni->oid.id) & 0xff, + id->extra ? '*' : ' '); if (!IS_IP6_FLOW_ID(id)) { if (pe) printf("%-4s ", pe->p_name); @@ -910,6 +913,7 @@ ipfw_config_pipe(int ac, char **av) case TOK_ALL: /* * special case, all bits significant + * except 'extra' (the queue number) */ mask->dst_ip = ~0; mask->src_ip = ~0; @@ -922,6 +926,11 @@ ipfw_config_pipe(int ac, char **av) *flags |= DN_HAVE_MASK; goto end_mask; + case TOK_QUEUE: + mask->extra = ~0; + *flags |= DN_HAVE_MASK; + goto end_mask; + case TOK_DSTIP: mask->addr_type = 4; p32 = &mask->dst_ip; @@ -992,7 +1001,7 @@ ipfw_config_pipe(int ac, char **av) if (a > 0xFF) errx(EX_DATAERR, "proto mask must be 8 bit"); - fs->flow_mask.proto = (uint8_t)a; + mask->proto = (uint8_t)a; } if (a != 0) *flags |= DN_HAVE_MASK; Modified: user/luigi/ipfw3-r8/sbin/ipfw/ipfw2.c ============================================================================== --- user/luigi/ipfw3-r8/sbin/ipfw/ipfw2.c Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sbin/ipfw/ipfw2.c Mon Mar 15 18:27:51 2010 (r205182) @@ -231,7 +231,7 @@ static struct _s_x rule_action_params[] */ static int lookup_key[] = { TOK_DSTIP, TOK_SRCIP, TOK_DSTPORT, TOK_SRCPORT, - TOK_UID, TOK_JAIL, -1 }; + TOK_UID, TOK_JAIL, TOK_DSCP, -1 }; static struct _s_x rule_options[] = { { "tagged", TOK_TAGGED }, @@ -258,6 +258,7 @@ static struct _s_x rule_options[] = { { "iplen", TOK_IPLEN }, { "ipid", TOK_IPID }, { "ipprecedence", TOK_IPPRECEDENCE }, + { "dscp", TOK_DSCP }, { "iptos", TOK_IPTOS }, { "ipttl", TOK_IPTTL }, { "ipversion", TOK_IPVER }, @@ -3519,7 +3520,7 @@ read_options: char *p; int j; - if (av[0] && av[1]) + if (!av[0] || !av[1]) errx(EX_USAGE, "format: lookup argument tablenum"); cmd->opcode = O_IP_DST_LOOKUP; cmd->len |= F_INSN_SIZE(ipfw_insn) + 2; Modified: user/luigi/ipfw3-r8/sbin/ipfw/ipfw2.h ============================================================================== --- user/luigi/ipfw3-r8/sbin/ipfw/ipfw2.h Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sbin/ipfw/ipfw2.h Mon Mar 15 18:27:51 2010 (r205182) @@ -125,6 +125,7 @@ enum tokens { TOK_IPLEN, TOK_IPID, TOK_IPPRECEDENCE, + TOK_DSCP, TOK_IPTOS, TOK_IPTTL, TOK_IPVER, Modified: user/luigi/ipfw3-r8/sys/conf/files ============================================================================== --- user/luigi/ipfw3-r8/sys/conf/files Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sys/conf/files Mon Mar 15 18:27:51 2010 (r205182) @@ -2474,13 +2474,24 @@ netinet/in_proto.c optional inet \ compile-with "${NORMAL_C} -I$S/contrib/pf" netinet/in_rmx.c optional inet netinet/ip_divert.c optional inet ipdivert ipfirewall +netinet/ipfw/dn_heap.c optional inet dummynet +netinet/ipfw/dn_sched_fifo.c optional inet dummynet +netinet/ipfw/dn_sched_rr.c optional inet dummynet +netinet/ipfw/dn_sched_wf2q.c optional inet dummynet +netinet/ipfw/dn_sched_qfq.c optional inet dummynet netinet/ipfw/ip_dummynet.c optional inet dummynet +netinet/ipfw/ip_dn_io.c optional inet dummynet +netinet/ipfw/ip_dn_glue.c optional inet dummynet netinet/ip_ecn.c optional inet | inet6 netinet/ip_encap.c optional inet | inet6 netinet/ip_fastfwd.c optional inet netinet/ipfw/ip_fw2.c optional inet ipfirewall \ compile-with "${NORMAL_C} -I$S/contrib/pf" +netinet/ipfw/ip_fw_dynamic.c optional inet ipfirewall +netinet/ipfw/ip_fw_log.c optional inet ipfirewall netinet/ipfw/ip_fw_pfil.c optional inet ipfirewall +netinet/ipfw/ip_fw_sockopt.c optional inet ipfirewall +netinet/ipfw/ip_fw_table.c optional inet ipfirewall netinet/ipfw/ip_fw_nat.c optional inet ipfirewall_nat netinet/ip_icmp.c optional inet netinet/ip_input.c optional inet Modified: user/luigi/ipfw3-r8/sys/net/if_bridge.c ============================================================================== --- user/luigi/ipfw3-r8/sys/net/if_bridge.c Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sys/net/if_bridge.c Mon Mar 15 18:27:51 2010 (r205182) @@ -134,7 +134,7 @@ __FBSDID("$FreeBSD$"); #include #include -#include +#include /* * Size of the route hash table. Must be a power of two. @@ -3038,20 +3038,28 @@ bridge_pfil(struct mbuf **mp, struct ifn goto bad; } - if (V_ip_fw_chk_ptr && pfil_ipfw != 0 && dir == PFIL_OUT && ifp != NULL) { - struct dn_pkt_tag *dn_tag; + /* XXX this section is also in if_ethersubr.c */ + // XXX PFIL_OUT or DIR_OUT ? + if (V_ip_fw_chk_ptr && pfil_ipfw != 0 && + dir == PFIL_OUT && ifp != NULL) { + struct m_tag *mtag; error = -1; - dn_tag = ip_dn_claim_tag(*mp); - if (dn_tag != NULL) { - if (dn_tag->rule != NULL && V_fw_one_pass) - /* packet already partially processed */ + /* fetch the start point from existing tags, if any */ + mtag = m_tag_locate(*mp, MTAG_IPFW_RULE, 0, NULL); + if (mtag == NULL) { + args.rule.slot = 0; + } else { + struct ipfw_rule_ref *r; + + /* XXX can we free the tag after use ? */ + mtag->m_tag_id = PACKET_TAG_NONE; + r = (struct ipfw_rule_ref *)(mtag + 1); + /* packet already partially processed ? */ + if (r->info & IPFW_ONEPASS) goto ipfwpass; - args.rule = dn_tag->rule; /* matching rule to restart */ - args.rule_id = dn_tag->rule_id; - args.chain_id = dn_tag->chain_id; - } else - args.rule = NULL; + args.rule = *r; + } args.m = *mp; args.oif = ifp; @@ -3077,7 +3085,7 @@ bridge_pfil(struct mbuf **mp, struct ifn * packet will return to us via bridge_dummynet(). */ args.oif = ifp; - ip_dn_io_ptr(mp, DN_TO_IFB_FWD, &args); + ip_dn_io_ptr(mp, DIR_FWD | PROTO_IFB, &args); return (error); } Modified: user/luigi/ipfw3-r8/sys/net/if_ethersubr.c ============================================================================== --- user/luigi/ipfw3-r8/sys/net/if_ethersubr.c Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sys/net/if_ethersubr.c Mon Mar 15 18:27:51 2010 (r205182) @@ -70,9 +70,9 @@ #include #include #include -#include -#include #include +#include +#include #endif #ifdef INET6 #include @@ -466,19 +466,23 @@ ether_ipfw_chk(struct mbuf **m0, struct struct mbuf *m; int i; struct ip_fw_args args; - struct dn_pkt_tag *dn_tag; - - dn_tag = ip_dn_claim_tag(*m0); + struct m_tag *mtag; - if (dn_tag != NULL) { - if (dn_tag->rule != NULL && V_fw_one_pass) + /* fetch start point from rule, if any */ + mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL); + if (mtag == NULL) { + args.rule.slot = 0; + } else { /* dummynet packet, already partially processed */ + struct ipfw_rule_ref *r; + + /* XXX can we free it after use ? */ + mtag->m_tag_id = PACKET_TAG_NONE; + r = (struct ipfw_rule_ref *)(mtag + 1); + if (r->info & IPFW_ONEPASS) return (1); - args.rule = dn_tag->rule; /* matching rule to restart */ - args.rule_id = dn_tag->rule_id; - args.chain_id = dn_tag->chain_id; - } else - args.rule = NULL; + args.rule = *r; + } /* * I need some amt of data to be contiguous, and in case others need @@ -529,6 +533,7 @@ ether_ipfw_chk(struct mbuf **m0, struct return 1; if (ip_dn_io_ptr && (i == IP_FW_DUMMYNET)) { + int dir; /* * Pass the pkt to dummynet, which consumes it. * If shared, make a copy and keep the original. @@ -544,7 +549,8 @@ ether_ipfw_chk(struct mbuf **m0, struct */ *m0 = NULL ; } - ip_dn_io_ptr(&m, dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args); + dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN); + ip_dn_io_ptr(&m, dir, &args); return 0; } /* Modified: user/luigi/ipfw3-r8/sys/net/radix.c ============================================================================== --- user/luigi/ipfw3-r8/sys/net/radix.c Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sys/net/radix.c Mon Mar 15 18:27:51 2010 (r205182) @@ -33,7 +33,6 @@ /* * Routines to build and maintain radix trees for routing lookups. */ -#ifndef _RADIX_H_ #include #ifdef _KERNEL #include @@ -41,20 +40,21 @@ #include #include #include -#include -#else -#include -#endif #include #include -#endif - #include "opt_mpath.h" - #ifdef RADIX_MPATH #include #endif - +#else /* !_KERNEL */ +#include +#include +#include +#define log(x, arg...) fprintf(stderr, ## arg) +#define panic(x) fprintf(stderr, "PANIC: %s", x), exit(1) +#define min(a, b) ((a) < (b) ? (a) : (b) ) +#include +#endif /* !_KERNEL */ static int rn_walktree_from(struct radix_node_head *h, void *a, void *m, walktree_f_t *f, void *w); @@ -72,6 +72,8 @@ static struct radix_node_head *mask_rnhe /* * Work area -- the following point to 3 buffers of size max_keylen, * allocated in this order in a block of memory malloc'ed by rn_init. + * rn_zeros, rn_ones are set in rn_init and used in readonly afterwards. + * addmask_key is used in rn_addmask in rw mode and not thread-safe. */ static char *rn_zeros, *rn_ones, *addmask_key; @@ -135,8 +137,9 @@ static int rn_satisfies_leaf(char *trial * To make the assumption more explicit, we use the LEN() macro to access * this field. It is safe to pass an expression with side effects * to LEN() as the argument is evaluated only once. + * We cast the result to int as this is the dominant usage. */ -#define LEN(x) (*(const u_char *)(x)) +#define LEN(x) ( (int) (*(const u_char *)(x)) ) /* * XXX THIS NEEDS TO BE FIXED @@ -197,7 +200,7 @@ rn_refines(m_arg, n_arg) { register caddr_t m = m_arg, n = n_arg; register caddr_t lim, lim2 = lim = n + LEN(n); - int longer = LEN(n++) - (int)LEN(m++); + int longer = LEN(n++) - LEN(m++); int masks_are_equal = 1; if (longer > 0) @@ -250,10 +253,10 @@ rn_satisfies_leaf(trial, leaf, skip) char *cplim; int length = min(LEN(cp), LEN(cp2)); - if (cp3 == 0) + if (cp3 == NULL) cp3 = rn_ones; else - length = min(length, *(u_char *)cp3); + length = min(length, LEN(cp3)); cplim = cp + length; cp3 += skip; cp2 += skip; for (cp += skip; cp < cplim; cp++, cp2++, cp3++) if ((*cp ^ *cp2) & *cp3) @@ -424,7 +427,7 @@ rn_insert(v_arg, head, dupentry, nodes) { caddr_t v = v_arg; struct radix_node *top = head->rnh_treetop; - int head_off = top->rn_offset, vlen = (int)LEN(v); + int head_off = top->rn_offset, vlen = LEN(v); register struct radix_node *t = rn_search(v_arg, top); register caddr_t cp = v + head_off; register int b; @@ -933,7 +936,7 @@ on1: if (m) log(LOG_ERR, "rn_delete: Orphaned Mask %p at %p\n", - (void *)m, (void *)x); + m, x); } } /* @@ -1158,17 +1161,28 @@ rn_inithead(head, off) return (1); } +#if 0 +int rn_detachhead(void **head) +{ + struct radix_node_head *rnh; + + KASSERT((head != NULL && *head != NULL), + ("%s: head already freed", __func__)); + rnh = *head; + + /* Free nodes. */ + Free(rnh); + + *head = NULL; + return (1); +} +#endif // XXX void -rn_init() +rn_init(int maxk) { char *cp, *cplim; -#ifdef _KERNEL - struct domain *dom; - for (dom = domains; dom; dom = dom->dom_next) - if (dom->dom_maxrtkey > max_keylen) - max_keylen = dom->dom_maxrtkey; -#endif + max_keylen = maxk; if (max_keylen == 0) { log(LOG_ERR, "rn_init: radix functions require max_keylen be set\n"); Modified: user/luigi/ipfw3-r8/sys/net/radix.h ============================================================================== --- user/luigi/ipfw3-r8/sys/net/radix.h Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sys/net/radix.h Mon Mar 15 18:27:51 2010 (r205182) @@ -160,8 +160,9 @@ struct radix_node_head { #define RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_WLOCKED) #endif /* _KERNEL */ -void rn_init(void); +void rn_init(int); int rn_inithead(void **, int); +int rn_detachhead(void **); int rn_refines(void *, void *); struct radix_node *rn_addmask(void *, int, int), Modified: user/luigi/ipfw3-r8/sys/net/route.c ============================================================================== --- user/luigi/ipfw3-r8/sys/net/route.c Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sys/net/route.c Mon Mar 15 18:27:51 2010 (r205182) @@ -169,13 +169,20 @@ rt_tables_get_rnh(int table, int fam) static void route_init(void) { + struct domain *dom; + int max_keylen = 0; /* whack the tunable ints into line. */ if (rt_numfibs > RT_MAXFIBS) rt_numfibs = RT_MAXFIBS; if (rt_numfibs == 0) rt_numfibs = 1; - rn_init(); /* initialize all zeroes, all ones, mask table */ + + for (dom = domains; dom; dom = dom->dom_next) + if (dom->dom_maxrtkey > max_keylen) + max_keylen = dom->dom_maxrtkey; + + rn_init(max_keylen); /* init all zeroes, all ones, mask table */ } SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0); Modified: user/luigi/ipfw3-r8/sys/netinet/ip_divert.c ============================================================================== --- user/luigi/ipfw3-r8/sys/netinet/ip_divert.c Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sys/netinet/ip_divert.c Mon Mar 15 18:27:51 2010 (r205182) @@ -32,14 +32,10 @@ __FBSDID("$FreeBSD$"); #if !defined(KLD_MODULE) #include "opt_inet.h" -#include "opt_ipfw.h" #include "opt_sctp.h" #ifndef INET #error "IPDIVERT requires INET." #endif -#ifndef IPFIREWALL -#error "IPDIVERT requires IPFIREWALL" -#endif #endif #include @@ -72,9 +68,7 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include -#include #ifdef SCTP #include #endif @@ -92,27 +86,29 @@ __FBSDID("$FreeBSD$"); #define DIVRCVQ (65536 + 100) /* - * Divert sockets work in conjunction with ipfw, see the divert(4) - * manpage for features. - * Internally, packets selected by ipfw in ip_input() or ip_output(), - * and never diverted before, are passed to the input queue of the - * divert socket with a given 'divert_port' number (as specified in - * the matching ipfw rule), and they are tagged with a 16 bit cookie - * (representing the rule number of the matching ipfw rule), which - * is passed to process reading from the socket. + * Divert sockets work in conjunction with ipfw or other packet filters, + * see the divert(4) manpage for features. + * Packets are selected by the packet filter and tagged with an + * MTAG_IPFW_RULE tag carrying the 'divert port' number (as set by + * the packet filter) and information on the matching filter rule for + * subsequent reinjection. The divert_port is used to put the packet + * on the corresponding divert socket, while the rule number is passed + * up (at least partially) as the sin_port in the struct sockaddr. * - * Packets written to the divert socket are again tagged with a cookie - * (usually the same as above) and a destination address. - * If the destination address is INADDR_ANY then the packet is - * treated as outgoing and sent to ip_output(), otherwise it is - * treated as incoming and sent to ip_input(). - * In both cases, the packet is tagged with the cookie. + * Packets written to the divert socket carry in sin_addr a + * destination address, and in sin_port the number of the filter rule + * after which to continue processing. + * If the destination address is INADDR_ANY, the packet is treated as + * as outgoing and sent to ip_output(); otherwise it is treated as + * incoming and sent to ip_input(). + * Further, sin_zero carries some information on the interface, + * which can be used in the reinject -- see comments in the code. * * On reinjection, processing in ip_input() and ip_output() * will be exactly the same as for the original packet, except that - * ipfw processing will start at the rule number after the one - * written in the cookie (so, tagging a packet with a cookie of 0 - * will cause it to be effectively considered as a standard packet). + * packet filter processing will start at the rule number after the one + * written in the sin_port (ipfw does not allow a rule #0, so sin_port=0 + * will apply the entire ruleset to the packet). */ /* Internal variables. */ @@ -193,7 +189,7 @@ div_destroy(void) * IPPROTO_DIVERT is not in the real IP protocol number space; this * function should never be called. Just in case, drop any packets. */ -void +static void div_input(struct mbuf *m, int off) { @@ -217,9 +213,8 @@ divert_packet(struct mbuf *m, int incomi struct sockaddr_in divsrc; struct m_tag *mtag; - mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); + mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL); if (mtag == NULL) { - printf("%s: no divert tag\n", __func__); m_freem(m); return; } @@ -244,14 +239,15 @@ divert_packet(struct mbuf *m, int incomi ip->ip_len = htons(ip->ip_len); } #endif + bzero(&divsrc, sizeof(divsrc)); + divsrc.sin_len = sizeof(divsrc); + divsrc.sin_family = AF_INET; + /* record matching rule, in host format */ + divsrc.sin_port = ((struct ipfw_rule_ref *)(mtag+1))->rulenum; /* * Record receive interface address, if any. * But only for incoming packets. */ - bzero(&divsrc, sizeof(divsrc)); - divsrc.sin_len = sizeof(divsrc); - divsrc.sin_family = AF_INET; - divsrc.sin_port = divert_cookie(mtag); /* record matching rule */ if (incoming) { struct ifaddr *ifa; struct ifnet *ifp; @@ -299,7 +295,7 @@ divert_packet(struct mbuf *m, int incomi /* Put packet on socket queue, if any */ sa = NULL; - nport = htons((u_int16_t)divert_info(mtag)); + nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info)); INP_INFO_RLOCK(&V_divcbinfo); LIST_FOREACH(inp, &V_divcb, inp_list) { /* XXX why does only one socket match? */ @@ -338,7 +334,7 @@ div_output(struct socket *so, struct mbu struct mbuf *control) { struct m_tag *mtag; - struct divert_tag *dt; + struct ipfw_rule_ref *dt; int error = 0; struct mbuf *options; @@ -353,23 +349,31 @@ div_output(struct socket *so, struct mbu if (control) m_freem(control); /* XXX */ - if ((mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL)) == NULL) { - mtag = m_tag_get(PACKET_TAG_DIVERT, sizeof(struct divert_tag), - M_NOWAIT | M_ZERO); + mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL); + if (mtag == NULL) { + /* this should be normal */ + mtag = m_tag_alloc(MTAG_IPFW_RULE, 0, + sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO); if (mtag == NULL) { error = ENOBUFS; goto cantsend; } - dt = (struct divert_tag *)(mtag+1); m_tag_prepend(m, mtag); - } else - dt = (struct divert_tag *)(mtag+1); + } + dt = (struct ipfw_rule_ref *)(mtag+1); /* Loopback avoidance and state recovery */ if (sin) { int i; - dt->cookie = sin->sin_port; + /* set the starting point. We provide a non-zero slot, + * but a non_matching chain_id to skip that info and use + * the rulenum/rule_id. + */ + dt->slot = 1; /* dummy, chain_id is invalid */ + dt->chain_id = 0; + dt->rulenum = sin->sin_port+1; /* host format ? */ + dt->rule_id = 0; /* * Find receive interface with the given name, stuffed * (if it exists) in the sin_zero[] field. @@ -387,7 +391,7 @@ div_output(struct socket *so, struct mbu struct ip *const ip = mtod(m, struct ip *); struct inpcb *inp; - dt->info |= IP_FW_DIVERT_OUTPUT_FLAG; + dt->info |= IPFW_IS_DIVERT | IPFW_INFO_OUT; INP_INFO_WLOCK(&V_divcbinfo); inp = sotoinpcb(so); INP_RLOCK(inp); @@ -453,7 +457,7 @@ div_output(struct socket *so, struct mbu m_freem(options); } } else { - dt->info |= IP_FW_DIVERT_LOOPBACK_FLAG; + dt->info |= IPFW_IS_DIVERT | IPFW_INFO_IN; if (m->m_pkthdr.rcvif == NULL) { /* * No luck with the name, check by IP address. @@ -587,7 +591,7 @@ div_send(struct socket *so, int flags, s return div_output(so, m, (struct sockaddr_in *)nam, control); } -void +static void div_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct in_addr faddr; @@ -800,5 +804,5 @@ static moduledata_t ipdivertmod = { }; DECLARE_MODULE(ipdivert, ipdivertmod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); -MODULE_DEPEND(dummynet, ipfw, 2, 2, 2); +MODULE_DEPEND(ipdivert, ipfw, 2, 2, 2); MODULE_VERSION(ipdivert, 1); Modified: user/luigi/ipfw3-r8/sys/netinet/ip_divert.h ============================================================================== --- user/luigi/ipfw3-r8/sys/netinet/ip_divert.h Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sys/netinet/ip_divert.h Mon Mar 15 18:27:51 2010 (r205182) @@ -36,53 +36,20 @@ #define _NETINET_IP_DIVERT_H_ /* - * Sysctl declaration. + * divert has no custom kernel-userland API. + * + * All communication occurs through a sockaddr_in socket where + * + * kernel-->userland + * sin_port = matching rule, host format; + * sin_addr = IN: first address of the incoming interface; + * OUT: INADDR_ANY + * sin_zero = if fits, the interface name (max 7 bytes + NUL) + * + * userland->kernel + * sin_port = restart-rule - 1, host order + * (we restart at sin_port + 1) + * sin_addr = IN: address of the incoming interface; + * OUT: INADDR_ANY */ -#ifdef SYSCTL_DECL -SYSCTL_DECL(_net_inet_divert); -#endif - -/* - * Divert socket definitions. - */ -struct divert_tag { - u_int32_t info; /* port & flags */ - u_int16_t cookie; /* ipfw rule number */ -}; - -/* - * Return the divert cookie associated with the mbuf; if any. - */ -static __inline u_int16_t -divert_cookie(struct m_tag *mtag) -{ - return ((struct divert_tag *)(mtag+1))->cookie; -} -static __inline u_int16_t -divert_find_cookie(struct mbuf *m) -{ - struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); - return mtag ? divert_cookie(mtag) : 0; -} - -/* - * Return the divert info associated with the mbuf; if any. - */ -static __inline u_int32_t -divert_info(struct m_tag *mtag) -{ - return ((struct divert_tag *)(mtag+1))->info; -} -static __inline u_int32_t -divert_find_info(struct mbuf *m) -{ - struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); - return mtag ? divert_info(mtag) : 0; -} - -typedef void ip_divert_packet_t(struct mbuf *m, int incoming); -extern ip_divert_packet_t *ip_divert_ptr; - -extern void div_input(struct mbuf *, int); -extern void div_ctlinput(int, struct sockaddr *, void *); #endif /* _NETINET_IP_DIVERT_H_ */ Modified: user/luigi/ipfw3-r8/sys/netinet/ip_fw.h ============================================================================== --- user/luigi/ipfw3-r8/sys/netinet/ip_fw.h Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sys/netinet/ip_fw.h Mon Mar 15 18:27:51 2010 (r205182) @@ -487,24 +487,27 @@ struct ip_fw { #define RULESIZE(rule) (sizeof(struct ip_fw) + \ ((struct ip_fw *)(rule))->cmd_len * 4 - 4) -#if 1 // moved to in.h +#if 1 // should be moved to in.h /* * This structure is used as a flow mask and a flow id for various * parts of the code. + * addr_type is used in userland and kernel to mark the address type. + * fib is used in the kernel to record the fib in use. + * _flags is used in the kernel to store tcp flags for dynamic rules. */ struct ipfw_flow_id { uint32_t dst_ip; uint32_t src_ip; uint16_t dst_port; uint16_t src_port; - uint8_t fib; - uint8_t proto; - uint8_t flags; /* protocol-specific flags */ - uint8_t addr_type; /* 4 = ipv4, 6 = ipv6, 1=ether ? */ + uint8_t fib; + uint8_t proto; + uint8_t _flags; /* protocol-specific flags */ + uint8_t addr_type; /* 4=ip4, 6=ip6, 1=ether ? */ struct in6_addr dst_ip6; struct in6_addr src_ip6; uint32_t flow_id6; - uint32_t frag_id6; + uint32_t extra; /* queue/pipe or frag_id */ }; #endif Modified: user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_dn_io.c ============================================================================== --- user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_dn_io.c Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_dn_io.c Mon Mar 15 18:27:51 2010 (r205182) @@ -113,6 +113,10 @@ SYSCTL_INT(_net_inet_ip_dummynet, OID_AU CTLFLAG_RW, &dn_cfg.io_fast, 0, "Enable fast dummynet io."); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &dn_cfg.debug, 0, "Dummynet debug level"); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire, + CTLFLAG_RW, &dn_cfg.expire, 0, "Expire empty queues/pipes"); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire_cycle, + CTLFLAG_RD, &dn_cfg.expire_cycle, 0, "Expire cycle for queues/pipes"); /* RED parameters */ SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth, @@ -546,8 +550,11 @@ dummynet_task(void *context, int pending transmit_event(&q, (struct delay_line *)p, dn_cfg.curr_time); } } - dn_drain_scheduler(); - dn_drain_queue(); + if (dn_cfg.expire && ++dn_cfg.expire_cycle >= dn_cfg.expire) { + dn_cfg.expire_cycle = 0; + dn_drain_scheduler(); + dn_drain_queue(); + } DN_BH_WUNLOCK(); dn_reschedule(); Modified: user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_dn_private.h ============================================================================== --- user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_dn_private.h Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_dn_private.h Mon Mar 15 18:27:51 2010 (r205182) @@ -141,10 +141,14 @@ struct dn_parms { struct dn_alg_head schedlist; /* list of algorithms */ /* Store the fs/sch to scan when draining. The value is the - * bucket number of the hash table + * bucket number of the hash table. Expire can be disabled + * with net.inet.ip.dummynet.expire=0, or it happens every + * expire ticks. **/ int drain_fs; int drain_sch; + uint32_t expire; + uint32_t expire_cycle; /* tick count */ /* if the upper half is busy doing something long, * can set the busy flag and we will enqueue packets in Modified: user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_dummynet.c ============================================================================== --- user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_dummynet.c Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_dummynet.c Mon Mar 15 18:27:51 2010 (r205182) @@ -122,6 +122,12 @@ ipdn_bound_var(int *v, int dflt, int lo, } /*---- flow_id mask, hash and compare functions ---*/ +/* + * The flow_id includes the 5-tuple, the queue/pipe number + * which we store in the extra area in host order, + * and for ipv6 also the flow_id6. + * XXX see if we want the tos byte (can store in 'flags') + */ static struct ipfw_flow_id * flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id) { @@ -130,7 +136,7 @@ flow_id_mask(struct ipfw_flow_id *mask, id->dst_port &= mask->dst_port; id->src_port &= mask->src_port; id->proto &= mask->proto; - id->flags = 0; /* we don't care about this one */ + id->extra &= mask->extra; if (is_v6) { APPLY_MASK(&id->dst_ip6, &mask->dst_ip6); APPLY_MASK(&id->src_ip6, &mask->src_ip6); @@ -151,7 +157,7 @@ flow_id_or(struct ipfw_flow_id *src, str dst->dst_port |= src->dst_port; dst->src_port |= src->src_port; dst->proto |= src->proto; - dst->flags = 0; /* we don't care about this one */ + dst->extra |= src->extra; if (is_v6) { #define OR_MASK(_d, _s) \ (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \ @@ -172,7 +178,7 @@ flow_id_or(struct ipfw_flow_id *src, str static int nonzero_mask(struct ipfw_flow_id *m) { - if (m->dst_port || m->src_port || m->proto) + if (m->dst_port || m->src_port || m->proto || m->extra) return 1; if (IS_IP6_FLOW_ID(m)) { return @@ -208,10 +214,12 @@ flow_id_hash(struct ipfw_flow_id *id) (s[0] << 16) ^ (s[1] << 16) ^ (s[2] << 16) ^ (s[3] << 16) ^ (id->dst_port << 1) ^ (id->src_port) ^ + (id->extra) ^ (id->proto ) ^ (id->flow_id6); } else { i = (id->dst_ip) ^ (id->dst_ip >> 15) ^ (id->src_ip << 1) ^ (id->src_ip >> 16) ^ + (id->extra) ^ (id->dst_port << 1) ^ (id->src_port) ^ (id->proto); } return i; @@ -223,29 +231,26 @@ flow_id_cmp(struct ipfw_flow_id *id1, st { int is_v6 = IS_IP6_FLOW_ID(id1); - if (is_v6 != IS_IP6_FLOW_ID(id2)) - return 1; /* a ipv4 and a ipv6 flow */ - - if (!is_v6 && id1->dst_ip == id2->dst_ip && - id1->src_ip == id2->src_ip && - id1->dst_port == id2->dst_port && - id1->src_port == id2->src_port && - id1->proto == id2->proto && - id1->flags == id2->flags) - return 0; - - if (is_v6 && + if (!is_v6) { + if (IS_IP6_FLOW_ID(id2)) + return 1; /* different address families */ + + return (id1->dst_ip == id2->dst_ip && + id1->src_ip == id2->src_ip && + id1->dst_port == id2->dst_port && + id1->src_port == id2->src_port && + id1->proto == id2->proto && + id1->extra == id2->extra) ? 0 : 1; + } + /* the ipv6 case */ + return ( !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) && !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) && id1->dst_port == id2->dst_port && id1->src_port == id2->src_port && id1->proto == id2->proto && - id1->flags == id2->flags && - id1->flow_id6 == id2->flow_id6) - return 0; - - /* Masks differ */ - return 1; + id1->extra == id2->extra && + id1->flow_id6 == id2->flow_id6) ? 0 : 1; } /*--------- end of flow-id mask, hash and compare ---------*/ @@ -2111,10 +2116,13 @@ ip_dn_init(void) if (bootverbose) printf("DUMMYNET with IPv6 initialized (100131)\n"); - /* init defaults here, MSVC does not accept initializers */ + /* Set defaults here. MSVC does not accept initializers, + * and this is also useful for vimages + */ /* queue limits */ dn_cfg.slot_limit = 100; /* Foot shooting limit for queues. */ dn_cfg.byte_limit = 1024 * 1024; + dn_cfg.expire = 1; /* RED parameters */ dn_cfg.red_lookup_depth = 256; /* default lookup table depth */ Modified: user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_fw2.c ============================================================================== --- user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_fw2.c Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_fw2.c Mon Mar 15 18:27:51 2010 (r205182) @@ -886,10 +886,13 @@ ipfw_chk(struct ip_fw_args *args) * ulp is NULL if not found. */ void *ulp = NULL; /* upper layer protocol pointer. */ + /* XXX ipv6 variables */ int is_ipv6 = 0; - u_int16_t ext_hd = 0; /* bits vector for extension header filtering */ + uint8_t icmp6_type = 0; + uint16_t ext_hd = 0; /* bits vector for extension header filtering */ /* end of ipv6 variables */ + int is_ipv4 = 0; int done = 0; /* flag to exit the outer loop */ @@ -941,14 +944,15 @@ do { \ switch (proto) { case IPPROTO_ICMPV6: PULLUP_TO(hlen, ulp, struct icmp6_hdr); - args->f_id.flags = ICMP6(ulp)->icmp6_type; + icmp6_type = ICMP6(ulp)->icmp6_type; break; case IPPROTO_TCP: PULLUP_TO(hlen, ulp, struct tcphdr); dst_port = TCP(ulp)->th_dport; src_port = TCP(ulp)->th_sport; - args->f_id.flags = TCP(ulp)->th_flags; + /* save flags for dynamic rules */ + args->f_id._flags = TCP(ulp)->th_flags; break; case IPPROTO_SCTP: @@ -1012,7 +1016,7 @@ do { \ return (IP_FW_DENY); break; } - args->f_id.frag_id6 = + args->f_id.extra = ntohl(((struct ip6_frag *)ulp)->ip6f_ident); ulp = NULL; break; @@ -1115,7 +1119,8 @@ do { \ PULLUP_TO(hlen, ulp, struct tcphdr); dst_port = TCP(ulp)->th_dport; src_port = TCP(ulp)->th_sport; - args->f_id.flags = TCP(ulp)->th_flags; + /* save flags for dynamic rules */ + args->f_id._flags = TCP(ulp)->th_flags; break; case IPPROTO_UDP: @@ -1126,7 +1131,7 @@ do { \ case IPPROTO_ICMP: PULLUP_TO(hlen, ulp, struct icmphdr); - args->f_id.flags = ICMP(ulp)->icmp_type; + //args->f_id.flags = ICMP(ulp)->icmp_type; break; default: @@ -1362,6 +1367,8 @@ do { \ key = dst_ip.s_addr; else if (v == 1) key = src_ip.s_addr; + else if (v == 6) /* dscp */ + key = (ip->ip_tos >> 2) & 0x3f; else if (offset != 0) break; else if (proto != IPPROTO_TCP && @@ -2034,7 +2041,7 @@ do { \ if (hlen > 0 && is_ipv6 && ((offset & IP6F_OFF_MASK) == 0) && (proto != IPPROTO_ICMPV6 || - (is_icmp6_query(args->f_id.flags) == 1)) && + (is_icmp6_query(icmp6_type) == 1)) && !(m->m_flags & (M_BCAST|M_MCAST)) && !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) { send_reject6( @@ -2392,7 +2399,7 @@ vnet_ipfw_uninit(const void *unused) IPFW_WLOCK(chain); ipfw_dyn_uninit(0); /* run the callout_drain */ - ipfw_destroy_tables(chain); + ipfw_flush_tables(chain); reap = NULL; for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; Modified: user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_fw_dynamic.c ============================================================================== --- user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_fw_dynamic.c Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_fw_dynamic.c Mon Mar 15 18:27:51 2010 (r205182) @@ -476,7 +476,7 @@ next: V_ipfw_dyn_v[i] = q; } if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ - u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST); + u_char flags = pkt->_flags & (TH_FIN|TH_SYN|TH_RST); #define BOTH_SYN (TH_SYN | (TH_SYN << 8)) #define BOTH_FIN (TH_FIN | (TH_FIN << 8)) Modified: user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_fw_log.c ============================================================================== --- user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_fw_log.c Mon Mar 15 18:26:09 2010 (r205181) +++ user/luigi/ipfw3-r8/sys/netinet/ipfw/ip_fw_log.c Mon Mar 15 18:27:51 2010 (r205182) *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***