Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 24 Apr 2013 20:46:01 +0400
From:      "Alexander V. Chernikov" <melifaro@FreeBSD.org>
To:        Luigi Rizzo <rizzo@iet.unipi.it>
Cc:        freebsd-ipfw@freebsd.org, luigi@freebsd.org
Subject:   Re: [patch] ipfw interface tracking and opcode rewriting
Message-ID:  <51780C49.7000204@FreeBSD.org>
In-Reply-To: <20130424162349.GA8439@onelab2.iet.unipi.it>
References:  <517801D3.5040502@FreeBSD.org> <20130424162349.GA8439@onelab2.iet.unipi.it>

next in thread | previous in thread | raw e-mail | index | archive | help
On 24.04.2013 20:23, Luigi Rizzo wrote:
> On Wed, Apr 24, 2013 at 08:01:23PM +0400, Alexander V. Chernikov wrote:
>> Hello list!
>>
>> Currently ipfw uses strncmp() function to do interface matching which is
>> quite slow.
>> Additionally, ipfw_insn_if opcode is quite big and given that struct
>> ip_fw occupy 48 bytes
>> (without first instruction) which gives us good chance that part of
>> interface name will be on the second cache line on amd64.
>>
>> Pure synthetic testing (ipfw with 1 and 2 'ipfw count ip from any to any
>> recv ifaceX') shows about 3.8% performance loss (190kpps out of 5.1
>> mpps) for each rule,
>> while indexed version shows about 2.0% and 1.2% for first and second rule.
>>
>> Additionally, our production (8.3-based firewalls with old strncmp)
>> shows about 40% kernel time spent in strncmp on 1-2mpps (each packet
>> traverses 5-6 such rules).
>>
>> Here is the patch which does the following:
>> 1) adds interface tracking for ipfw. Every interface is tracked
>> regardless of its usage in the ruleset. This simplifies locking and
>> makes easier to port such functionality to userland.
>> 2) adds general opcode rewriting system permitting kernel to
>> algorithmically (stateless) or statefully (involving extrernal data)
>> rewrite user-supplied opcodes with possible size change.
>> This can be used to deprecate opcodes which are now superseded by newer
>> ones while keeping ABI (and we currently have such opcodes).
>> 3) Store (and track) inderface index for non-wildcard interface inside
>> opcode.
>>
>> If there are no objections I would like to commit (possibly updated
>> vesrion) in the middle of the next week.
> hmmm.... this is quite a large change, and from the description it
> is a bit unclear to me how the "opcode rewriting" thing relates to
> the use of strings vs index for name matching.
sorry, I havent't describe this explicitly.
Index matching is done via storing interface index in in p.glob field of 
ipfw_insn_if instruction.
>
> Additionally, i wonder if there isn't a better way to replace strncmp
> with some two 64-bit comparisons (the name is 16 bytes) by making
> sure that the fields are zero-padded and suitably aligned.
> At this point, on the machines you care about (able to sustain
> 1+ Mpps) the two comparison should have the same cost as
> the index comparison, without the need to track update in the names.
Well, actually I'm thinking of the next 2 steps:
1) making kernel rule header more compact (20 bytes instead of 48) and 
making it invisible for userland.
This involves rule counters to be stored separately (and possibly as 
pcpu-based ones).
2) since ruleset is now nearly readonly and more or less compact we can 
try to store it in
contiguous address space to optimize cache line usage.


>
> note, my comment is only about the strncmp() part, i guess the
> opcode rewriting is orthogonal and may have other applications,
> but i am not completely sure about it.
>
> cheers
> luigi
>
> understand if
> While in principle
> i do not think the strncmp is a major performance killer. even if it is,
> i believe it
>
>> Index: sys/netinet/ip_fw.h
>> ===================================================================
>> --- sys/netinet/ip_fw.h	(revision 248704)
>> +++ sys/netinet/ip_fw.h	(working copy)
>> @@ -341,6 +341,7 @@ typedef struct	_ipfw_insn_if {
>>   	union {
>>   		struct in_addr ip;
>>   		int glob;
>> +		unsigned int if_idx;	/* Interface index (kernel) */
>>   	} p;
>>   	char name[IFNAMSIZ];
>>   } ipfw_insn_if;
>> @@ -495,6 +496,8 @@ typedef struct _ipfw_insn_icmp6 {
>>    *	queue(3) macros for portability and readability.
>>    */
>>   
>> +#define	IP_FW_RULE_REWRITTEN	0x01	/* Rule is modified by rewriter */
>> +
>>   struct ip_fw {
>>   	struct ip_fw	*x_next;	/* linked list of rules		*/
>>   	struct ip_fw	*next_rule;	/* ptr to next [skipto] rule	*/
>> @@ -505,7 +508,7 @@ struct ip_fw {
>>   	uint16_t	rulenum;	/* rule number			*/
>>   	uint8_t	set;		/* rule set (0..31)		*/
>>   #define	RESVD_SET	31	/* set for default and persistent rules */
>> -	uint8_t		_pad;		/* padding			*/
>> +	uint8_t		flags;		/* padding			*/
>>   	uint32_t	id;		/* rule id */
>>   
>>   	/* These fields are present in all rules.			*/
>> Index: sys/modules/ipfw/Makefile
>> ===================================================================
>> --- sys/modules/ipfw/Makefile	(revision 248704)
>> +++ sys/modules/ipfw/Makefile	(working copy)
>> @@ -8,6 +8,7 @@ KMOD=	ipfw
>>   SRCS=	ip_fw2.c ip_fw_pfil.c
>>   SRCS+=	ip_fw_dynamic.c ip_fw_log.c
>>   SRCS+=	ip_fw_sockopt.c ip_fw_table.c
>> +SRCS+=	ip_fw_iface.c ip_fw_rewrite.c
>>   SRCS+=	opt_inet.h opt_inet6.h opt_ipdivert.h opt_ipfw.h opt_ipsec.h
>>   
>>   CFLAGS+= -DIPFIREWALL
>> Index: sys/netpfil/ipfw/ip_fw2.c
>> ===================================================================
>> --- sys/netpfil/ipfw/ip_fw2.c	(revision 248704)
>> +++ sys/netpfil/ipfw/ip_fw2.c	(working copy)
>> @@ -353,17 +353,17 @@ iface_match(struct ifnet *ifp, ipfw_insn_if *cmd,
>>   	if (ifp == NULL)	/* no iface with this packet, match fails */
>>   		return 0;
>>   	/* Check by name or by IP address */
>> -	if (cmd->name[0] != '\0') { /* match by name */
>> -		if (cmd->name[0] == '\1') /* use tablearg to match */
>> +	if (cmd->o.arg1 != 0) { /* match by name */
>> +		if (cmd->o.arg1 == 1) /* use tablearg to match */
>>   			return ipfw_lookup_table_extended(chain, cmd->p.glob,
>>   				ifp->if_xname, tablearg, IPFW_TABLE_INTERFACE);
>>   		/* Check name */
>> -		if (cmd->p.glob) {
>> +		if (cmd->p.if_idx) {
>> +			if (ifp->if_index == cmd->p.if_idx)
>> +				return (1);
>> +		} else {
>>   			if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
>>   				return(1);
>> -		} else {
>> -			if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
>> -				return(1);
>>   		}
>>   	} else {
>>   #ifdef __FreeBSD__	/* and OSX too ? */
>> @@ -2667,6 +2667,8 @@ vnet_ipfw_init(const void *unused)
>>   
>>   	IPFW_LOCK_INIT(chain);
>>   	ipfw_dyn_init(chain);
>> +	ipfw_ifhash_init(chain);
>> +	ipfw_rewrite_init(chain);
>>   
>>   	/* First set up some values that are compile time options */
>>   	V_ipfw_vnet_ready = 1;		/* Open for business */
>> @@ -2708,6 +2710,7 @@ vnet_ipfw_uninit(const void *unused)
>>   	(void)ipfw_attach_hooks(0 /* detach */);
>>   	V_ip_fw_ctl_ptr = NULL;
>>   	IPFW_UH_WLOCK(chain);
>> +	ipfw_ifhash_detach(chain); /* detach eventhandlers */
>>   	IPFW_UH_WUNLOCK(chain);
>>   	IPFW_UH_WLOCK(chain);
>>   
>> @@ -2722,9 +2725,14 @@ vnet_ipfw_uninit(const void *unused)
>>   		rule = chain->map[i];
>>   		rule->x_next = reap;
>>   		reap = rule;
>> +		/* Clear rewrites if any */
>> +		if (rule->flags & IP_FW_RULE_REWRITTEN)
>> +			ipfw_relocate_rewrite(chain, rule->cmd, NULL);
>>   	}
>>   	if (chain->map)
>>   		free(chain->map, M_IPFW);
>> +	ipfw_rewrite_free(chain);
>> +	ipfw_ifhash_free(chain);
>>   	IPFW_WUNLOCK(chain);
>>   	IPFW_UH_WUNLOCK(chain);
>>   	if (reap != NULL)
>> Index: sys/netpfil/ipfw/ip_fw_private.h
>> ===================================================================
>> --- sys/netpfil/ipfw/ip_fw_private.h	(revision 248704)
>> +++ sys/netpfil/ipfw/ip_fw_private.h	(working copy)
>> @@ -212,6 +212,13 @@ VNET_DECLARE(int, autoinc_step);
>>   VNET_DECLARE(unsigned int, fw_tables_max);
>>   #define V_fw_tables_max		VNET(fw_tables_max)
>>   
>> +
>> +#define CMDSIZE(rule)  (((struct ip_fw *)(rule))->cmd_len * sizeof(uint32_t))
>> +
>> +
>> +struct ip_fw_if_data;
>> +struct ip_fw_rw_data;
>> +
>>   struct ip_fw_chain {
>>   	struct ip_fw	*rules;		/* list of rules */
>>   	struct ip_fw	*reap;		/* list of rules to reap */
>> @@ -232,8 +239,42 @@ struct ip_fw_chain {
>>   #endif
>>   	uint32_t	id;		/* ruleset id */
>>   	uint32_t	gencnt;		/* generation count */
>> +	struct ip_fw_if_data	*if_data;	/* Interface tracking data */
>> +	struct ip_fw_rw_data	*rewrite_data;	/* Rule rewrite data */
>>   };
>>   
>> +/* ip_fw_rewrite.c */
>> +struct ip_fw_rw_info {
>> +	void	*sptr;	/* State created by ipfw_prepare_rewrite() */
>> +	int	count;	/* Number of opcodes requesting rewrite */
>> +	int	states;	/* Number of opcodes with stateful rewrite */
>> +	int	lendiff;	/* Difference with oridinal rule len (insns) */
>> +};
>> +
>> +void ipfw_rewrite_init(struct ip_fw_chain *chain);
>> +void ipfw_rewrite_free(struct ip_fw_chain *chain);
>> +int ipfw_rewrite_len(struct ip_fw_chain *chain);
>> +void *ipfw_prepare_rewrite(struct ip_fw_chain *chain, ipfw_insn *cmd,
>> +    int cmd_len, struct ip_fw_rw_info *rwi);
>> +void ipfw_perform_rewrite(struct ip_fw_chain *chain, ipfw_insn *kcmd,
>> +    void *state);
>> +void ipfw_relocate_rewrite(struct ip_fw_chain *chain, ipfw_insn *old,
>> +    ipfw_insn *new);
>> +int ipfw_export_rewrite(struct ip_fw_chain *chain, ipfw_insn *kcmd,
>> +    ipfw_insn *target);
>> +
>> +void ipfw_check_rewrite(struct ip_fw_chain *chain, ipfw_insn *insn,
>> +    struct ip_fw_rw_info *rwi);
>> +void ipfw_update_rewrite(struct ip_fw_chain *chain, ipfw_insn *insn,
>> +    void *state, uintptr_t val);
>> +
>> +
>> +/* ip_fw_iface.c */
>> +void ipfw_ifhash_init(struct ip_fw_chain *chain);
>> +void ipfw_ifhash_free(struct ip_fw_chain *chain);
>> +void ipfw_ifhash_detach(struct ip_fw_chain *chain);
>> +
>> +
>>   struct sockopt;	/* used by tcp_var.h */
>>   
>>   /* Macro for working with various counters */
>> @@ -295,7 +336,8 @@ struct sockopt;	/* used by tcp_var.h */
>>   
>>   /* In ip_fw_sockopt.c */
>>   int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id);
>> -int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule);
>> +int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule,
>> +    struct ip_fw_rw_info *rwi);
>>   int ipfw_ctl(struct sockopt *sopt);
>>   int ipfw_chk(struct ip_fw_args *args);
>>   void ipfw_reap_rules(struct ip_fw *head);
>> Index: sys/netpfil/ipfw/ip_fw_sockopt.c
>> ===================================================================
>> --- sys/netpfil/ipfw/ip_fw_sockopt.c	(revision 248971)
>> +++ sys/netpfil/ipfw/ip_fw_sockopt.c	(working copy)
>> @@ -73,6 +73,8 @@ MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct
>>    * static variables followed by global ones (none in this file)
>>    */
>>   
>> +static void ipfw_export_header(struct ip_fw *krule, struct ip_fw *dst);
>> +
>>   /*
>>    * Find the smallest rule >= key, id.
>>    * We could use bsearch but it is so simple that we code it directly
>> @@ -153,7 +155,8 @@ swap_map(struct ip_fw_chain *chain, struct ip_fw *
>>    * Must be called without IPFW_UH held
>>    */
>>   int
>> -ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
>> +ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule,
>> +    struct ip_fw_rw_info *rwi)
>>   {
>>   	struct ip_fw *rule;
>>   	int i, l, insert_before;
>> @@ -163,7 +166,8 @@ int
>>   		return (EINVAL);
>>   
>>   	l = RULESIZE(input_rule);
>> -	rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO);
>> +	rule = malloc(l + rwi->lendiff * sizeof(uint32_t), M_IPFW,
>> +	    M_WAITOK | M_ZERO);
>>   	/* get_map returns with IPFW_UH_WLOCK if successful */
>>   	map = get_map(chain, 1, 0 /* not locked */);
>>   	if (map == NULL) {
>> @@ -171,7 +175,15 @@ int
>>   		return ENOSPC;
>>   	}
>>   
>> -	bcopy(input_rule, rule, l);
>> +	if (rwi->sptr == NULL)
>> +		bcopy(input_rule, rule, l);
>> +	else {
>> +		/* Copy header and first instuction */
>> +		bcopy(input_rule, rule, sizeof(struct ip_fw));
>> +		rule->flags |= IP_FW_RULE_REWRITTEN;
>> +		ipfw_perform_rewrite(chain, rule->cmd, rwi->sptr);
>> +	}
>> +
>>   	/* clear fields not settable from userland */
>>   	rule->x_next = NULL;
>>   	rule->next_rule = NULL;
>> @@ -366,6 +378,14 @@ del_entry(struct ip_fw_chain *chain, uint32_t arg)
>>   			rule = chain->map[i];
>>   			if (keep_rule(rule, cmd, new_set, num))
>>   				map[ofs++] = rule;
>> +			else {
>> +				/* Clear rewrites if any */
>> +				if (rule->flags & IP_FW_RULE_REWRITTEN) {
>> +					printf("Moving rule %p to clear list\n", rule);
>> +					ipfw_relocate_rewrite(chain,
>> +					    rule->cmd, NULL);
>> +				}
>> +			}
>>   		}
>>   		/* 3. copy the final part of the map */
>>   		bcopy(chain->map + end, map + ofs,
>> @@ -384,6 +404,7 @@ del_entry(struct ip_fw_chain *chain, uint32_t arg)
>>   				ipfw_expire_dyn_rules(chain, rule, RESVD_SET);
>>   			rule->x_next = chain->reap;
>>   			chain->reap = rule;
>> +			printf("Adding rule %p to reap list\n", rule);
>>   		}
>>   		break;
>>   
>> @@ -517,7 +538,8 @@ zero_entry(struct ip_fw_chain *chain, u_int32_t ar
>>    * Rules are simple, so this mostly need to check rule sizes.
>>    */
>>   static int
>> -check_ipfw_struct(struct ip_fw *rule, int size)
>> +check_ipfw_struct(struct ip_fw_chain *chain, struct ip_fw *rule, int size,
>> +    struct ip_fw_rw_info *rwi)
>>   {
>>   	int l, cmdlen = 0;
>>   	int have_action=0;
>> @@ -696,6 +718,7 @@ static int
>>   		case O_VIA:
>>   			if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
>>   				goto bad_size;
>> +			ipfw_check_rewrite(chain, cmd, rwi);
>>   			break;
>>   
>>   		case O_ALTQ:
>> @@ -868,6 +891,13 @@ int convert_rule_to_8(struct ip_fw *rule);
>>   #endif
>>   
>>   
>> +static void
>> +ipfw_export_header(struct ip_fw *krule, struct ip_fw *dst)
>> +{
>> +
>> +	memcpy(dst, krule, sizeof(struct ip_fw) - sizeof(ipfw_insn));
>> +}
>> +
>>   /*
>>    * Copy the static and dynamic rules to the supplied buffer
>>    * and return the amount of space actually used.
>> @@ -887,11 +917,28 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf
>>   		rule = chain->map[i];
>>   
>>   		if (is7) {
>> -		    /* Convert rule to FreeBSd 7.2 format */
>> -		    l = RULESIZE7(rule);
>> +		    /* Convert rule to FreeBSD 7.2 format */
>> +			if (rule->flags & IP_FW_RULE_REWRITTEN)
>> +				l = ipfw_export_rewrite(chain, rule->cmd, NULL);
>> +			else
>> +				l = CMDSIZE(rule);
>> +
>> +			/*
>> +			 * Add header length.
>> +			 * v.8 rule header is 4 bytes bigger.
>> +			 */
>> +			l += sizeof(struct ip_fw7) - sizeof(ipfw_insn);
>> +
>>   		    if (bp + l + sizeof(uint32_t) <= ep) {
>>   			int error;
>>   			bcopy(rule, bp, l + sizeof(uint32_t));
>> +
>> +			if (rule->flags & IP_FW_RULE_REWRITTEN) {
>> +				ipfw_export_rewrite(chain, rule->cmd, dst->cmd);
>> +				ipfw_export_header(rule, dst);
>> +			} else
>> +				bcopy(rule, bp, l + sizeof(uint32_t));
>> +
>>   			error = convert_rule_to_7((struct ip_fw *) bp);
>>   			if (error)
>>   				return 0; /*XXX correct? */
>> @@ -910,14 +957,23 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf
>>   		    continue; /* go to next rule */
>>   		}
>>   
>> -		/* normal mode, don't touch rules */
>> -		l = RULESIZE(rule);
>> +		if (rule->flags & IP_FW_RULE_REWRITTEN)
>> +			l = ipfw_export_rewrite(chain, rule->cmd, NULL);
>> +		else
>> +			l = CMDSIZE(rule);
>> +		/* Add header length */
>> +		l += sizeof(struct ip_fw) - sizeof(ipfw_insn);
>> +
>>   		if (bp + l > ep) { /* should not happen */
>>   			printf("overflow dumping static rules\n");
>>   			break;
>>   		}
>>   		dst = (struct ip_fw *)bp;
>> -		bcopy(rule, dst, l);
>> +		if (rule->flags & IP_FW_RULE_REWRITTEN) {
>> +			ipfw_export_rewrite(chain, rule->cmd, dst->cmd);
>> +			ipfw_export_header(rule, dst);
>> +		} else
>> +			bcopy(rule, dst, l);
>>   		/*
>>   		 * XXX HACK. Store the disable mask in the "next"
>>   		 * pointer in a wild attempt to keep the ABI the same.
>> @@ -949,6 +1005,7 @@ ipfw_ctl(struct sockopt *sopt)
>>   	uint32_t opt;
>>   	char xbuf[128];
>>   	ip_fw3_opheader *op3 = NULL;
>> +	struct ip_fw_rw_info rwi;
>>   
>>   	error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW);
>>   	if (error)
>> @@ -998,7 +1055,7 @@ ipfw_ctl(struct sockopt *sopt)
>>   		for (;;) {
>>   			int len = 0, want;
>>   
>> -			size = chain->static_len;
>> +			size = chain->static_len + ipfw_rewrite_len(chain);
>>   			size += ipfw_dyn_len();
>>   			if (size >= sopt->sopt_valsize)
>>   				break;
>> @@ -1027,6 +1084,8 @@ ipfw_ctl(struct sockopt *sopt)
>>   		error = sooptcopyin(sopt, rule, RULE_MAXSIZE,
>>   			sizeof(struct ip_fw7) );
>>   
>> +		memset(&rwi, 0, sizeof(rwi));
>> +
>>   		/*
>>   		 * If the size of commands equals RULESIZE7 then we assume
>>   		 * a FreeBSD7.2 binary is talking to us (set is7=1).
>> @@ -1042,15 +1101,21 @@ ipfw_ctl(struct sockopt *sopt)
>>   		    if (error)
>>   			return error;
>>   		    if (error == 0)
>> -			error = check_ipfw_struct(rule, RULESIZE(rule));
>> +			error = check_ipfw_struct(chain, rule, RULESIZE(rule),
>> +			    &rwi);
>>   		} else {
>>   		    is7 = 0;
>>   		if (error == 0)
>> -			error = check_ipfw_struct(rule, sopt->sopt_valsize);
>> +			error = check_ipfw_struct(chain, rule,
>> +			    sopt->sopt_valsize, &rwi);
>>   		}
>>   		if (error == 0) {
>> +			/* Prepare rewrite, if needed */
>> +			if (rwi.count > 0)
>> +				rwi.sptr = ipfw_prepare_rewrite(chain,
>> +				    rule->cmd, rule->cmd_len, &rwi);
>>   			/* locking is done within ipfw_add_rule() */
>> -			error = ipfw_add_rule(chain, rule);
>> +			error = ipfw_add_rule(chain, rule, &rwi);
>>   			size = RULESIZE(rule);
>>   			if (!error && sopt->sopt_dir == SOPT_GET) {
>>   				if (is7) {
>> @@ -1350,7 +1415,7 @@ convert_rule_to_7(struct ip_fw *rule)
>>   	bcopy(rule, tmp, RULE_MAXSIZE);
>>   
>>   	/* Copy fields */
>> -	rule7->_pad = tmp->_pad;
>> +	rule7->_pad = 0;
>>   	rule7->set = tmp->set;
>>   	rule7->rulenum = tmp->rulenum;
>>   	rule7->cmd_len = tmp->cmd_len;
>> @@ -1423,7 +1488,7 @@ convert_rule_to_8(struct ip_fw *rule)
>>   		}
>>   	}
>>   
>> -	rule->_pad = tmp->_pad;
>> +	rule->flags = 0;
>>   	rule->set = tmp->set;
>>   	rule->rulenum = tmp->rulenum;
>>   	rule->cmd_len = tmp->cmd_len;
>> --- /dev/null	2013-04-24 17:20:19.000000000 +0400
>> +++ sys/netpfil/ipfw/ip_fw_rewrite.c	2013-04-24 17:19:15.278097243 +0400
>> @@ -0,0 +1,835 @@
>> +/*-
>> + * Copyright (c) 2013 Yandex LLC.
>> + * Author: Alexander V. Chernikov <melifaro@yandex-team.ru>
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + * 1. Redistributions of source code must retain the above copyright
>> + *    notice, this list of conditions and the following disclaimer.
>> + * 2. Redistributions in binary form must reproduce the above copyright
>> + *    notice, this list of conditions and the following disclaimer in the
>> + *    documentation and/or other materials provided with the distribution.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
>> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
>> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
>> + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
>> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
>> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
>> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
>> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
>> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
>> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>> + * SUCH DAMAGE.
>> + */
>> +
>> +#include <sys/cdefs.h>
>> +__FBSDID("$FreeBSD$");
>> +
>> +/*
>> + * Rule opcode rewriting system for ipfw.
>> + * System permits automatic algoritmic (stateless) or statefull (
>> + * requiring access/monidifcation to external data) of opcodes.
>> + * Modification is done by calling special per-opcode dependent
>> + * callbacks. Saving unmodified user-supplied rules, size recalculation,
>> + * rule export and relocation is handled by subsystem.
>> + * Writing opcode modificator requires adding it to rewrites[] array
>> + * and filling appropriate callbacks (at least 'convert' one.
>> + */
>> +
>> +#include "opt_ipfw.h"
>> +
>> +#include <sys/param.h>
>> +#include <sys/systm.h>
>> +#include <sys/malloc.h>
>> +#include <sys/kernel.h>
>> +#include <sys/lock.h>
>> +#include <sys/rwlock.h>
>> +#include <sys/fnv_hash.h>
>> +#include <sys/socket.h>
>> +#include <net/if.h>
>> +
>> +#include <netinet/in.h>
>> +#include <netinet/ip_var.h> /* hooks */
>> +#include <netinet/ip_fw.h>
>> +
>> +#include <netpfil/ipfw/ip_fw_private.h>
>> +#include <netpfil/ipfw/ip_fw_iface.h>
>> +
>> +#define	NO_REWRITE		0
>> +#define	STATELESS_REWRITE	1
>> +#define	STATEFUL_REWRITE	2
>> +
>> +struct ip_fw_rewrite {
>> +	uint32_t opcode;
>> +
>> +	/*
>> +	 * Checks if given opcode needs to be changed. Called (indirectly)
>> +	 * from check_ipfw_struct() without holding any locks. Fuction should
>> +	 * quickly check if given opcode needs to be rewritten and set @len to
>> +	 * size difference (in bytes) between new (altered) opcode size and
>> +	 * old one. Note that &len hould be aligned to u32.
>> +	 *
>> +	 * Params:
>> +	 * @chain - pointer to current ifpw chain
>> +	 * @insn - given ipfw_instn
>> +	 * @len - pointer to length diff (in insns)
>> +	 *
>> +	 * Returns:
>> +	 * NO_REWRITE - no need to convert
>> +	 * STATELESS_REWRITE - (algoritmic) conversion required.
>> +	 * STATEFUL_REWRITE - stateful conversion required.
>> +	 *
>> +	 * Callback is OPTIONAL, defaults to STATELESS_REWRITE if not set.
>> +	 */
>> +	int (*check)(struct ip_fw_chain *, ipfw_insn *, int *);
>> +
>> +	/*
>> +	 * Prepares state for given opcode if needed. Called without
>> +	 * holding any locks permitting to allocate any amount of memory.
>> +	 * Note that result (and actual state usage) has to be consistent
>> +	 * with *check (and other) callbacks.
>> +	 *
>> +	 * Params:
>> +	 * @chain - pointer to current ifpw chain
>> +	 * @insn - given ipfw_instn
>> +	 * @pstate - pointer to pointer to state
>> +	 *
>> +	 * Returns:
>> +	 * NO_REWRITE - no need to convert
>> +	 * STATELESS_REWRITE - (algoritmic) conversion can be done.
>> +	 * STATEFUL_REWRITE - stateful conversion required, state is saved to
>> +	 * given pointer.
>> +	 *
>> +	 * Callback is OPTIONAL, defaults to STATELESS_REWRITE if not set.
>> +	 */
>> +	int (*prepare)(struct ip_fw_chain *, ipfw_insn *insn, void **);
>> +
>> +	/*
>> +	 * Performs opcode conversion. Called with chain WLOCK held.
>> +	 * Note that opcode copy is handled automatically if
>> +	 * NO_REWRITE is returned. @len has to be filled otherwise.
>> +	 *
>> +	 * Params:
>> +	 * @chain - pointer to current ifpw chain
>> +	 * @_old - userland ipfw_instn
>> +	 * @_new - kernel ipfw_insn
>> +	 * @state - pointer to state saved
>> +	 * @len - pointer to opcode length (in instructions)
>> +	 *
>> +	 * Returns:
>> +	 * NO_REWRITE - no need to convert
>> +	 * STATELESS_REWRITE - (algoritmic) conversion is  done.
>> +	 * STATEFUL_REWRITE - stateful conversion is done, state is consumed.
>> +	 *
>> +	 * Callback is MANDATORY.
>> + 	 */
>> +	int (*convert)(struct ip_fw_chain *, ipfw_insn *, ipfw_insn *, void *, int *);
>> +
>> +	/*
>> +	 * Performs state cleanup (rule deletion). Called with chain WLOCK held.
>> +	 * State hint can be provided.
>> +	 *
>> +	 * Params:
>> +	 * @chain - pointer to current ifpw chain
>> +	 * @insn - kernel ipfw_insn
>> +	 * @state - pointer to state hint
>> +	 *
>> +	 * Callback is OPTIONAL.
>> +	 */
>> +	void (*clear)(struct ip_fw_chain *, ipfw_insn *, void *);
>> +
>> +	/*
>> +	 * Performs opcode-dependent update.
>> +	 * Flag/argument can be provided.
>> +	 *
>> +	 * Params:
>> +	 * @chain - pointer to current ifpw chain
>> +	 * @insn - kernel ipfw_insn
>> +	 * @state - pointer to opcode-dependent data
>> +	 * @val - opcode-dependet value
>> +	 *
>> +	 * Callback is OPTIONAL.
>> +	 */
>> +	void (*update)(struct ip_fw_chain *, ipfw_insn *, void *, uintptr_t);
>> +
>> +	/*
>> +	 * Dispatches memory relocation of given opcode, Called with WLOCK held.
>> +	 * Actual copy is already done at the moment of call.
>> +	 *
>> +	 * Params:
>> +	 * @chain - pointer to current ifpw chain
>> +	 * @_old - kernel ipfw_insn
>> +	 * @_old - new kernel ipfw_insn
>> +	 *
>> +	 * Callback is OPTIONAL.
>> +	 */
>> +	void (*move)(struct ip_fw_chain *, ipfw_insn *, ipfw_insn *);
>> +};
>> +
>> +/* Opcode callbacks */
>> +static int
>> +convertable_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn);
>> +
>> +static void move_insn_if(struct ip_fw_chain *chain, ipfw_insn *_old, ipfw_insn *_new);
>> +static void update_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn,
>> +    void *_iface_mask, uintptr_t new_id);
>> +static void clear_insn_if(struct ip_fw_chain *chain, ipfw_insn *_src, void *data);
>> +static int convert_insn_if(struct ip_fw_chain *chain, ipfw_insn *_old, ipfw_insn *_new,
>> +    void *state, int *len);
>> +static int prepare_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn, void **pstate);
>> +static int check_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn, int *len);
>> +
>> +
>> +/* Note opcodes MUST be in asceding order */
>> +struct ip_fw_rewrite rewrites[] = {
>> +	{
>> +		O_RECV,
>> +		check_insn_if,
>> +		prepare_insn_if,
>> +		convert_insn_if,
>> +		clear_insn_if,
>> +		update_insn_if,
>> +		move_insn_if,
>> +	},
>> +	{
>> +		O_XMIT,
>> +		check_insn_if,
>> +		prepare_insn_if,
>> +		convert_insn_if,
>> +		clear_insn_if,
>> +		update_insn_if,
>> +		move_insn_if,
>> +	},
>> +	{
>> +		O_VIA,
>> +		check_insn_if,
>> +		prepare_insn_if,
>> +		convert_insn_if,
>> +		clear_insn_if,
>> +		update_insn_if,
>> +		move_insn_if,
>> +	},
>> +};
>> +
>> +struct rewrite_rule_ptr {
>> +	TAILQ_ENTRY(rewrite_rule_ptr)	next;
>> +	int		cmd_klen;	/* Kernel opcodes len (insns) */
>> +	int		cmd_len;	/* Original opcodes len (insns) */
>> +	ipfw_insn	*kcmd;	/* Kernel rule version */
>> +	void		**states;	/* opcode states */
>> +	int		states_count;	/* number of states */
>> +	ipfw_insn	cmd[1];	/* Original opcodes */
>> +};
>> +TAILQ_HEAD(rewrite_rule_head, rewrite_rule_ptr);
>> +
>> +struct ip_fw_rw_data {
>> +	struct rewrite_rule_head	*hash;
>> +	size_t		hsize;
>> +	int		lendiff;	/* sizeof(kern) - sizeof(user) */
>> +};
>> +
>> +#define	DEFAULT_HASH_SIZE	32
>> +#define	PTR_HASH_PRIME		31
>> +
>> +static struct ip_fw_rewrite *ipfw_find_rewrite(uint32_t opcode);
>> +
>> +void
>> +ipfw_rewrite_init(struct ip_fw_chain *chain)
>> +{
>> +	struct ip_fw_rw_data *rwd;
>> +	struct rewrite_rule_head *rh;
>> +	int i;
>> +
>> +	rwd = malloc(sizeof(struct ip_fw_rw_data), M_IPFW, M_WAITOK | M_ZERO);
>> +
>> +	rwd->hsize = DEFAULT_HASH_SIZE;
>> +	rwd->hash = malloc(sizeof(struct rewrite_rule_head) * rwd->hsize,
>> +	    M_IPFW, M_WAITOK | M_ZERO);
>> +
>> +	for (i = 0, rh = rwd->hash; i < rwd->hsize; i++, rh++)
>> +		TAILQ_INIT(rh);
>> +
>> +	chain->rewrite_data = rwd;
>> +}
>> +
>> +void
>> +ipfw_rewrite_free(struct ip_fw_chain *chain)
>> +{
>> +	struct ip_fw_rw_data *rwd;
>> +
>> +	rwd = chain->rewrite_data;
>> +	chain->rewrite_data = NULL;
>> +
>> +	/* Assume every rule to be already removed */
>> +	free(rwd->hash, M_IPFW);
>> +	free(rwd, M_IPFW);
>> +}
>> +
>> +int
>> +ipfw_rewrite_len(struct ip_fw_chain *chain)
>> +{
>> +	struct ip_fw_rw_data *rwd;
>> +
>> +	rwd = chain->rewrite_data;
>> +
>> +	return (rwd->lendiff);
>> +}
>> +
>> +/*
>> + * Prepares given rule for modification:
>> + * allocates memory for rule and number of states reported
>> + * by 'check' callbacks. Calls 'prepare' callback for
>> + * every opcode in rule.
>> + *
>> + * Returns state to be passed to ipfw_store_rule.
>> + */
>> +void *
>> +ipfw_prepare_rewrite(struct ip_fw_chain *chain, ipfw_insn *ucmd,
>> +    int cmd_len, struct ip_fw_rw_info *rwi)
>> +{
>> +	int i, l, cmdlen, size, states_count;
>> +	struct rewrite_rule_ptr *rptr;
>> +	ipfw_insn *cmd;
>> +	struct ip_fw_rewrite *rewrite;
>> +	void **pstate;
>> +
>> +	/*
>> +	 * Allocate memory for rule header, opcodes and state array.
>> +	 */
>> +	size = sizeof(struct rewrite_rule_ptr) +
>> +	    (cmd_len - 1) * sizeof(uint32_t);
>> +
>> +	size = roundup(size, sizeof(void *));
>> +
>> +	rptr = malloc(size + rwi->states * sizeof(void *), M_IPFW,
>> +	    M_WAITOK | M_ZERO);
>> +
>> +	/* Save original opcodes */
>> +	memcpy(rptr->cmd, ucmd, cmd_len * sizeof(uint32_t));
>> +	rptr->cmd_len = cmd_len;
>> +	rptr->cmd_klen = rptr->cmd_len + rwi->lendiff;
>> +
>> +	rptr->states = (void **)((char *)rptr + size);
>> +	rptr->states_count = rwi->states;
>> +	pstate = rptr->states;
>> +	states_count = rptr->states_count;
>> +
>> +	CTR4(KTR_NET, "Prepare rule rewrite: cmd %p len %d klen %d rptr %p",
>> +	    ucmd, rptr->cmd_len, rptr->cmd_klen, rptr);
>> +
>> +	for (l = cmd_len, cmd = ucmd ;
>> +			l > 0 ; l -= cmdlen, cmd += cmdlen) {
>> +		cmdlen = F_LEN(cmd);
>> +
>> +		if ((rewrite = ipfw_find_rewrite(cmd->opcode)) == NULL)
>> +			continue;
>> +
>> +		if (rewrite->prepare == NULL)
>> +			continue;
>> +
>> +		i = rewrite->prepare(chain, cmd, pstate);
>> +
>> +		if (i == STATEFUL_REWRITE) {
>> +			CTR3(KTR_NET, "New stateful rewrite %p val %p count %d",
>> +			    pstate, *pstate, states_count);
>> +			pstate++;
>> +			states_count--;
>> +
>> +			KASSERT(states_count >= 0,
>> +			    ("prepare_rewrite state overflow"));
>> +		}
>> +	}
>> +
>> +	return ((void *)rptr);
>> +}
>> +
>> +static int
>> +hash_ptr(struct ip_fw_rw_data *rwd, ipfw_insn *cmd)
>> +{
>> +	return (uintptr_t)cmd % PTR_HASH_PRIME;
>> +}
>> +
>> +/*
>> + * Fills in kernel rule with modified opcodes. Updates old rule state
>> + * with new kernel pointer. Actual rewriting and header copy is done
>> + * in ipfw_run_rewrite().
>> + */
>> +void
>> +ipfw_perform_rewrite(struct ip_fw_chain *chain, ipfw_insn *kcmd, void *state)
>> +{
>> +	struct rewrite_rule_ptr *rptr;
>> +	struct rewrite_rule_head *rh;
>> +	struct ip_fw_rw_data *rwd;
>> +	struct ip_fw_rewrite *rewrite;
>> +	ipfw_insn *ucmd;
>> +	void **pstate;
>> +	int i, l, ucmdlen, kcmdlen, states_count;
>> +
>> +	rwd = chain->rewrite_data;
>> +
>> +	rptr = (struct rewrite_rule_ptr *)state;
>> +	rptr->kcmd = kcmd;
>> +	pstate = rptr->states;
>> +	states_count = rptr->states_count;
>> +
>> +	CTR3(KTR_NET, "Linking kcmd %p to orig %p idx %d",
>> +	    kcmd, rptr, hash_ptr(rwd, kcmd));
>> +
>> +	rh = &rwd->hash[hash_ptr(rwd, kcmd)];
>> +	TAILQ_INSERT_TAIL(rh, rptr, next);
>> +
>> +	ucmd = rptr->cmd;
>> +	
>> +	for (l = rptr->cmd_len; l > 0 ;
>> +	    l -= ucmdlen, ucmd += ucmdlen, kcmd += kcmdlen) {
>> +		ucmdlen = F_LEN(ucmd);
>> +
>> +		if ((rewrite = ipfw_find_rewrite(ucmd->opcode)) == NULL) {
>> +			/* No conversion required, copy as is */
>> +			kcmdlen = ucmdlen;
>> +			memcpy(kcmd, ucmd, ucmdlen * sizeof(ipfw_insn));
>> +			continue;
>> +		}
>> +
>> +		i = rewrite->convert(chain, ucmd, kcmd, *pstate, &kcmdlen);
>> +		CTR2("RW for %d st %p returned %d", ucmd->cmd, *pstate, i);
>> +
>> +		if (i == NO_REWRITE) {
>> +			kcmdlen = ucmdlen;
>> +			memcpy(kcmd, ucmd, ucmdlen * sizeof(ipfw_insn));
>> +		} else if (i == STATEFUL_REWRITE) {
>> +			pstate++;
>> +			states_count--;
>> +
>> +			KASSERT(states_count >= 0, ("rewrite state overflow"));
>> +		}
>> +	}
>> +
>> +	/* Save size difference */
>> +	rwd->lendiff += rptr->cmd_klen - rptr->cmd_len;
>> +	CTR2(KTR_NET, "old len: %d, new: %d", rptr->cmd_len, rptr->cmd_klen);
>> +}
>> +
>> +/*
>> + * Handle rule moving to new place (or deletion).
>> + * Updates kernel rule pointer and run opcode callbacks via
>> + * ipfw_move_rewrite() or clears state via ipfw_clear_rewrite()
>> + * int latter case.
>> + */
>> +void
>> +ipfw_relocate_rewrite(struct ip_fw_chain *chain, ipfw_insn *old, ipfw_insn *new)
>> +{
>> +	struct rewrite_rule_ptr *rptr;
>> +	struct rewrite_rule_head *rh;
>> +	struct ip_fw_rw_data *rwd;
>> +	struct ip_fw_rewrite *rewrite;
>> +	int l, cmdlen;
>> +
>> +	rwd = chain->rewrite_data;
>> +
>> +	rh = &rwd->hash[hash_ptr(rwd, old)];
>> +
>> +	TAILQ_FOREACH(rptr, rh, next) {
>> +		if (rptr->kcmd == old)
>> +			break;
>> +	}
>> +
>> +	CTR3(KTR_NET, "Moving %p idx %p to %p", rptr, hash_ptr(rwd, old), new);
>> +
>> +	KASSERT(rptr != NULL, ("ipfw_relocate_rewrite: old rule not found"));
>> +
>> +	TAILQ_REMOVE(rh, rptr, next);
>> +
>> +	if (new == NULL) {
>> +		/* Clear states (if any) and delete original rule */
>> +		for (l = rptr->cmd_klen; l > 0; l -= cmdlen, old += cmdlen) {
>> +			cmdlen = F_LEN(old);
>> +	
>> +			if ((rewrite = ipfw_find_rewrite(old->opcode)) == NULL)
>> +				continue;
>> +	
>> +			if (rewrite->clear == NULL)
>> +				continue;
>> +	
>> +			CTR1(KTR_NET, "clear-state for opcode %u", old->opcode);
>> +			rewrite->clear(chain, old, NULL);
>> +		}
>> +
>> +		/* Update size difference */
>> +		rwd->lendiff -= rptr->cmd_klen - rptr->cmd_len;
>> +		free(rptr, M_IPFW);
>> +	} else {
>> +		/* Put to new slot */
>> +		rptr->kcmd = new;
>> +		rh = &rwd->hash[hash_ptr(rwd, new)];
>> +		TAILQ_INSERT_TAIL(rh, rptr, next);
>> +
>> +		/* Update instructions pointers */
>> +		for (l = rptr->cmd_klen; l > 0 ;
>> +		    l -= cmdlen, old += cmdlen, new += cmdlen) {
>> +			cmdlen = F_LEN(old);
>> +	
>> +			if ((rewrite = ipfw_find_rewrite(old->opcode)) == NULL)
>> +				continue;
>> +	
>> +			if (rewrite->move == NULL)
>> +				continue;
>> +	
>> +			rewrite->move(chain, old, new);
>> +		}
>> +	}
>> +}
>> +
>> +/*
>> + * Exports modified rule to userland. Returns userland rule length
>> + * (used in initial size-checking calculations). Copies userland rule version
>> + * with updated counters to supplied buffer.
>> + */
>> +int
>> +ipfw_export_rewrite(struct ip_fw_chain *chain, ipfw_insn *kcmd, ipfw_insn *target)
>> +{
>> +	struct rewrite_rule_ptr *rptr;
>> +	struct rewrite_rule_head *rh;
>> +	struct ip_fw_rw_data *rwd;
>> +	ipfw_insn *ucmd;
>> +
>> +	rwd = chain->rewrite_data;
>> +
>> +	KASSERT(rw != NULL, ("ipfw_export_rewrite: rewrite not initialized"));
>> +
>> +	rh = &rwd->hash[hash_ptr(rwd, kcmd)];
>> +
>> +	TAILQ_FOREACH(rptr, rh, next) {
>> +		if (rptr->kcmd == kcmd)
>> +			break;
>> +	}
>> +
>> +	KASSERT(rptr != NULL, ("ipfw_export_rewrite:  kcmd not found"));
>> +	ucmd = rptr->cmd;
>> +
>> +	if (target != NULL)
>> +		memcpy(target, rptr->cmd, rptr->cmd_len * sizeof(uint32_t));
>> +
>> +	return (rptr->cmd_len * sizeof(uint32_t));
>> +}
>> +
>> +/*
>> + * bsearch() helper function.
>> + */
>> +static int
>> +rewrite_comp(const void *_key, const void *_member)
>> +{
>> +	uint32_t opcode;
>> +	struct ip_fw_rewrite *rewrite;
>> +
>> +	opcode = *((uint32_t *)_key);
>> +	rewrite = (struct ip_fw_rewrite *)_member;
>> +
>> +	if (opcode < rewrite->opcode)
>> +		return (-1);
>> +	else if (opcode == rewrite->opcode)
>> +		return (0);
>> +	else
>> +		return (1);
>> +}
>> +
>> +
>> +static struct ip_fw_rewrite *
>> +ipfw_find_rewrite(uint32_t opcode)
>> +{
>> +	size_t count;
>> +	struct ip_fw_rewrite *rewrite;
>> +
>> +	count = sizeof(rewrites) / sizeof(struct ip_fw_rewrite);
>> +
>> +	rewrite = (struct ip_fw_rewrite *)bsearch(&opcode, rewrites,
>> +	    count, sizeof(struct ip_fw_rewrite), rewrite_comp);
>> +
>> +	return (rewrite);
>> +}
>> +
>> +
>> +/*
>> + * Checks if given opcode needs to be changed.
>> + * Updates @rwi appropriate fields if instruction needs to be
>> + * stateless/stafully rewritten possibly with changed size.
>> + */
>> +void
>> +ipfw_check_rewrite(struct ip_fw_chain *chain, ipfw_insn *insn,
>> +    struct ip_fw_rw_info *rwi)
>> +{
>> +	struct ip_fw_rewrite *rewrite;
>> +	int i = 0, len = 0;
>> +
>> +	if ((rewrite = ipfw_find_rewrite(insn->opcode)) == NULL)
>> +		i = NO_REWRITE;
>> +	else if (rewrite->check == NULL)
>> +		i = STATELESS_REWRITE;
>> +	else
>> +		i = rewrite->check(chain, insn, &len);
>> +
>> +	if (len != 0)
>> +		rwi->lendiff += len;
>> +
>> +	if (i == STATELESS_REWRITE)
>> +		rwi->count++;
>> +
>> +	if (i == STATEFUL_REWRITE) {
>> +		rwi->count++;
>> +		rwi->states++;
>> +	}
>> +
>> +	if (i != NO_REWRITE)
>> +		CTR4(KTR_NET, "opcode %d: count=%d states=%d len=%d",
>> +		    insn->opcode, rwi->count, rwi->states, rwi->lendiff);
>> +}
>> +
>> +/*
>> + * Call opcode-dependent 'update' callback.
>> + */
>> +void
>> +ipfw_update_rewrite(struct ip_fw_chain *chain, ipfw_insn *insn,
>> +    void *state, uintptr_t val)
>> +{
>> +	struct ip_fw_rewrite *rewrite;
>> +
>> +	if ((rewrite = ipfw_find_rewrite(insn->opcode)) == NULL)
>> +		return;
>> +
>> +	if (rewrite->update == NULL)
>> +		return;
>> +
>> +	rewrite->update(chain, insn, state, val);
>> +}
>> +
>> +/*******************************************************************
>> + * 								    *
>> + *          O_RECV | O_VIA | O_XMIT rewrite handling.               *
>> + * 								    *
>> + *******************************************************************/
>> +/*
>> + * Converts insns_if to more compact form. Currently instruction
>> + * is used to specify
>> + * 1) interface name ( ->name[0] != ('\0' | '\1') AND p.glob == 0)
>> + * 2) interface pattern ( ->name[0] != ('\0' | '\1') AND p.glob != 0)
>> + * 3) eXtended table number ( ->name[0] == '\1')
>> + * 4) interface address ( ->name[0] == '\0')
>> + *
>> + * We want to save iface index in case 1 (and to eliminate interface name at all).
>> + * Given that, we do the following:
>> + *
>> + * p.glob is now p.if_idx (u_int) (glob if zero, iface index otherwise)
>> + * o.arg1 works like ->name[0], so:
>> + *
>> + * 1) interface name (o.arg1 == 2, p.if_idx contains index)
>> + * 2) interface pattern (o.arg1 == 2, p.if_idx == 0)
>> + * 3) eXtended table number (o.arg1 == 1)
>> + * 4) interface address (o.arg1 == 0)
>> + */
>> +
>> +static int
>> +convertable_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn)
>> +{
>> +	ipfw_insn_if *cmd = (ipfw_insn_if *)insn;
>> +
>> +	/* Either IPv4 address or extended table (3) and (4) */
>> +	if (cmd->name[0] == '\0' || cmd->name[0] == '\1')
>> +		return (0);
>> +
>> +	return (1);
>> +}
>> +
>> +static int
>> +check_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn, int *len_diff)
>> +{
>> +	ipfw_insn_if *cmd = (ipfw_insn_if *)insn;
>> +
>> +	*len_diff = 0;
>> +
>> +	if (convertable_insn_if(chain, insn) == 0)
>> +		return (STATELESS_REWRITE);
>> +
>> +	/* Either interface name (1) or glob pattern (2). */
>> +
>> +	if (cmd->p.glob != 0)
>> +		return (STATELESS_REWRITE);
>> +	else
>> +		return (STATEFUL_REWRITE);
>> +}
>> +
>> +static int
>> +prepare_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn, void **pstate)
>> +{
>> +	struct iface_mask *ifm;
>> +	struct ipfw_insn_ptr *insn_ptr;
>> +	ipfw_insn_if *cmd = (ipfw_insn_if *)insn;
>> +
>> +	if (convertable_insn_if(chain, insn) == 0)
>> +		return (STATELESS_REWRITE);
>> +
>> +	if (cmd->p.glob != 0) {
>> +		/* Glob pattern (2), no state needed, */
>> +		return (STATELESS_REWRITE);
>> +	}
>> +
>> +	/* Allocate data used by convert callback */
>> +	insn_ptr = malloc(sizeof(struct ipfw_insn_ptr), M_IPFW,
>> +	    M_WAITOK | M_ZERO);
>> +	ifm = malloc(sizeof(struct iface_mask), M_IPFW, M_WAITOK | M_ZERO);
>> +
>> +	TAILQ_INIT(&ifm->instructions);
>> +	TAILQ_INSERT_TAIL(&ifm->instructions, insn_ptr, next);
>> +
>> +	CTR3(KTR_NET, "pstate %p, val %p insns %p", pstate, ifm, insn);
>> +
>> +	*pstate = ifm;
>> +	return (STATEFUL_REWRITE);
>> +}
>> +
>> +static int
>> +convert_insn_if(struct ip_fw_chain *chain, ipfw_insn *_old, ipfw_insn *_new,
>> +    void *state, int *len)
>> +{
>> +	struct iface_mask *ifm, *ifm2;
>> +	struct ipfw_insn_ptr *insn_ptr;
>> +	ipfw_insn_if *cmd_old = (ipfw_insn_if *)_old;
>> +	ipfw_insn_if *cmd_new = (ipfw_insn_if *)_new;
>> +
>> +	/* Set length anyway */
>> +	*len = F_INSN_SIZE(ipfw_insn_if);
>> +	memcpy(cmd_new, cmd_old, sizeof(ipfw_insn_if));
>> +
>> +	if (convertable_insn_if(chain, _old) == 0) {
>> +		/*
>> +		 * case (3, eX table): o.arg1 = 1
>> +		 * case (4, ifaddr): o.arg1 = 0
>> +		 */
>> +
>> +		cmd_new->o.arg1 = (cmd_old->name[0] == '\1') ? 1 : 0;
>> +
>> +		return (STATELESS_REWRITE);
>> +	}
>> +
>> +	/*
>> +	 * Prepare instruction for altering.
>> +	 * case (1, ifname): o.arg1 = 2; p_if_idx == interface index
>> +	 * case (2, glob): o.arg1 = 2' p.if_idx = 0
>> +	 */
>> +	memcpy(cmd_new, cmd_old, sizeof(ipfw_insn_if));
>> +	cmd_new->o.arg1 = 2;
>> +
>> +	if (cmd_old->p.glob) {
>> +		/* Interface mask (2). Copy as is and set index */
>> +		cmd_new->p.if_idx = 0;
>> +		return (STATELESS_REWRITE);
>> +	}
>> +
>> +	/* Interface name. */
>> +	ifm = (struct iface_mask *)state;
>> +	insn_ptr = TAILQ_FIRST(&ifm->instructions);
>> +
>> +	insn_ptr->insn = _new;
>> +
>> +	if ((ifm2 = ipfw_search_ifname(chain, cmd_old->name)) != NULL) {
>> +		/* Interface found, link entry here */
>> +		TAILQ_INSERT_TAIL(&ifm2->instructions, insn_ptr, next);
>> +		ifm2->refcount++;
>> +		cmd_new->p.if_idx = ifm2->idx;
>> +		if (ifm2->flags & IPFW_IFLAG_FAKE)
>> +			cmd_new->p.if_idx |= IPFW_FAKE_IDX;
>> +
>> +		free(ifm, M_IPFW);
>> +		return (STATEFUL_REWRITE);
>> +	}
>> +
>> +	/* Interface not found, add and mark as unexistent */
>> +	strlcpy(ifm->name, cmd_old->name, IFNAMSIZ);
>> +	ifm->flags |= IPFW_IFLAG_FAKE;
>> +	ifm->refcount++;
>> +	ipfw_add_ifname(chain, ifm);
>> +	cmd_new->p.if_idx = ifm->idx | IPFW_FAKE_IDX;
>> +	/* Add instruction back (add_ifname reinits list) */
>> +	TAILQ_INSERT_TAIL(&ifm->instructions, insn_ptr, next);
>> +
>> +	return (STATEFUL_REWRITE);
>> +}
>> +
>> +static void
>> +clear_insn_if(struct ip_fw_chain *chain, ipfw_insn *_src, void *data)
>> +{
>> +	struct iface_mask *ifm;
>> +	ipfw_insn_if *cmd;
>> +	struct ipfw_insn_ptr *insn_ptr = (struct ipfw_insn_ptr *)data;
>> +
>> +	cmd = (ipfw_insn_if *)_src;
>> +
>> +	/* State is used for interface names, skip other cases */
>> +	if (cmd->o.arg1 != 2)
>> +		return;
>> +
>> +	ifm = ipfw_search_ifindex(chain, cmd->p.if_idx);
>> +	KASSERT(ifm != NULL, ("no ifp found for index %u", cmd->p.if_idx));
>> +
>> +	if (insn_ptr == NULL) {
>> +		TAILQ_FOREACH(insn_ptr, &ifm->instructions, next) {
>> +			if (insn_ptr->insn == _src)
>> +				break;
>> +		}
>> +
>> +		KASSERT(insn_ptr != NULL, ("no insns found"));
>> +	}
>> +
>> +	/* Remove instruction from interface */
>> +	TAILQ_REMOVE(&ifm->instructions, insn_ptr, next);
>> +	ifm->refcount--;
>> +
>> +	free(insn_ptr, M_IPFW);
>> +}
>> +
>> +static void
>> +update_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn, void *_iface_mask,
>> +    uintptr_t new_id)
>> +{
>> +	struct ip_fw_if_data *ifd;
>> +	ipfw_insn_if *cmd;
>> +
>> +	IPFW_WLOCK_ASSERT(chain);
>> +
>> +	ifd = chain->if_data;
>> +	cmd = (ipfw_insn_if *)insn;
>> +
>> +	CTR2(KTR_NET, "updating insn: ifi %u -> %u",
>> +	    cmd->p.if_idx, (uint32_t)new_id);
>> +
>> +	cmd->p.if_idx = (uint32_t)new_id;
>> +}
>> +
>> +static void
>> +move_insn_if(struct ip_fw_chain *chain, ipfw_insn *_old, ipfw_insn *_new)
>> +{
>> +	struct iface_mask *ifm;
>> +	ipfw_insn_if *cmd;
>> +	struct ipfw_insn_ptr *insn_ptr;
>> +
>> +	cmd = (ipfw_insn_if *)_old;
>> +
>> +	/* State is used for interface names, skip other cases */
>> +	if (cmd->o.arg1 != 2)
>> +		return;
>> +
>> +	ifm = ipfw_search_ifindex(chain, cmd->p.if_idx);
>> +	KASSERT(ifm != NULL, ("no ifp found for index %u", cmd->p.if_idx));
>> +
>> +	TAILQ_FOREACH(insn_ptr, &ifm->instructions, next) {
>> +		if (insn_ptr->insn == _old)
>> +			break;
>> +	}
>> +
>> +	KASSERT(insn_ptr != NULL, ("no insns found"));
>> +
>> +	insn_ptr->insn = _new;
>> +}
>> +
>> +
>> --- /dev/null	2013-04-24 17:20:19.000000000 +0400
>> +++ sys/netpfil/ipfw/ip_fw_iface.c	2013-04-24 17:18:35.546357594 +0400
>> @@ -0,0 +1,467 @@
>> +/*-
>> + * Copyright (c) 2013 Yandex LLC.
>> + * Author: Alexander V. Chernikov <melifaro@yandex-team.ru>
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + * 1. Redistributions of source code must retain the above copyright
>> + *    notice, this list of conditions and the following disclaimer.
>> + * 2. Redistributions in binary form must reproduce the above copyright
>> + *    notice, this list of conditions and the following disclaimer in the
>> + *    documentation and/or other materials provided with the distribution.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
>> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
>> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
>> + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
>> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
>> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
>> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
>> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
>> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
>> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>> + * SUCH DAMAGE.
>> + */
>> +
>> +#include <sys/cdefs.h>
>> +__FBSDID("$FreeBSD$");
>> +
>> +/*
>> + * Interface tracking for ipfw.
>> + */
>> +
>> +#include "opt_ipfw.h"
>> +
>> +#include <sys/param.h>
>> +#include <sys/systm.h>
>> +#include <sys/malloc.h>
>> +#include <sys/kernel.h>
>> +#include <sys/lock.h>
>> +#include <sys/rwlock.h>
>> +#include <sys/fnv_hash.h>
>> +#include <sys/socket.h>
>> +#include <net/if.h>
>> +#include <net/vnet.h>
>> +
>> +#include <netinet/in.h>
>> +#include <netinet/ip_var.h> /* hooks */
>> +#include <netinet/ip_fw.h>
>> +
>> +#include <netpfil/ipfw/ip_fw_private.h>
>> +#include <netpfil/ipfw/ip_fw_iface.h>
>> +
>> +#define	IPFW_IFHASH_IDX(idx, hsize)	((idx) % (hsize))
>> +#define	IPFW_IFHASH_NAME(name, hsize)	(fnv_32_str(name, FNV1_32_INIT) % (hsize))
>> +
>> +TAILQ_HEAD(iface_mask_head, iface_mask);
>> +
>> +struct ip_fw_if_data {
>> +	struct iface_mask_head *masks; /* Interface name hash */
>> +	size_t masks_count, masks_hsize;
>> +	struct iface_mask_head *real_ifaces; /* 'Real' interface index hash */
>> +	size_t real_count, real_hsize;
>> +	struct iface_mask_head *fake_ifaces; /* Nonexistent interface index hash */
>> +	size_t fake_count, fake_hsize;
>> +	eventhandler_tag arrival, departure;
>> +	u_short fake_idx;
>> +};
>> +
>> +static void ipfw_ifhash_init_int(struct iface_mask_head **phash, size_t hsize);
>> +static void ipfw_ifnet_init(struct ip_fw_chain *chain, struct iface_mask *ifm);
>> +
>> +/*
>> + * Mappings:
>> + * 'iface_mask' -> idx
>> + * if_index -> iface_mask
>> + * fake_index -> iface_mask
>> + *
>> + * List of masks
>> + *
>> + */
>> +static void ifnet_arrival(void *arg, struct ifnet *ifp);
>> +static void ifnet_departure(void *arg, struct ifnet *ifp);
>> +
>> +/*
>> + * Find interface structure by name.
>> + * Called with either UH or chain readlock held.
>> + */
>> +struct iface_mask *
>> +ipfw_search_ifname(struct ip_fw_chain *chain, char *name)
>> +{
>> +	struct iface_mask *ifm;
>> +	struct ip_fw_if_data *ifd;
>> +	struct iface_mask_head *ifh;
>> +	int i;
>> +
>> +	ifd = chain->if_data;
>> +
>> +	i = IPFW_IFHASH_NAME(name, ifd->masks_hsize);
>> +
>> +	ifh = &ifd->masks[i];
>> +	TAILQ_FOREACH(ifm, ifh, name_next) {
>> +		if (strcmp(name, ifm->name) == 0)
>> +			return (ifm);
>> +	}
>> +
>> +	return (NULL);
>> +}
>> +
>> +/*
>> + * Find interface structure by real or fake ifindex.
>> + * Called with either UH or chain readlock held.
>> + */
>> +struct iface_mask *
>> +ipfw_search_ifindex(struct ip_fw_chain *chain, uint32_t idx)
>> +{
>> +	struct iface_mask *ifm;
>> +	struct ip_fw_if_data *ifd;
>> +	struct iface_mask_head *ifh;
>> +	int i;
>> +
>> +	ifd = chain->if_data;
>> +
>> +	if (idx & IPFW_FAKE_IDX) {
>> +		idx &= ~IPFW_FAKE_IDX;
>> +		i = IPFW_IFHASH_IDX(idx, ifd->fake_hsize);
>> +		ifh = &ifd->fake_ifaces[i];
>> +	} else {
>> +		i = IPFW_IFHASH_IDX(idx, ifd->real_hsize);
>> +		ifh = &ifd->real_ifaces[i];
>> +	}
>> +
>> +	TAILQ_FOREACH(ifm, ifh, idx_next) {
>> +		if (ifm->idx == idx)
>> +			return (ifm);
>> +	}
>> +
>> +	return (NULL);
>> +}
>> +
>> +void
>> +ipfw_add_ifname(struct ip_fw_chain *chain, struct iface_mask *ifm)
>> +{
>> +	struct ip_fw_if_data *ifd;
>> +	struct iface_mask_head *ifh;
>> +	struct iface_mask *iftemp;
>> +	int i;
>> +
>> +	ifd = chain->if_data;
>> +
>> +	ipfw_ifnet_init(chain, ifm);
>> +
>> +	/* Add to named hash */
>> +	i = IPFW_IFHASH_NAME(ifm->name, ifd->masks_hsize);
>> +	ifh = &ifd->masks[i];
>> +	TAILQ_INSERT_TAIL(ifh, ifm, name_next);
>> +
>> +	if (ifm->flags & IPFW_IFLAG_FAKE) {
>> +		/* Add to fake interfaces hash */
>> +		ifm->idx = ++ifd->fake_idx;
>> +		i = IPFW_IFHASH_IDX(ifm->idx, ifd->fake_hsize);
>> +		ifh = &ifd->fake_ifaces[i];
>> +	} else {
>> +		/* Add to real interfaces hash */
>> +		i = IPFW_IFHASH_IDX(ifm->idx, ifd->real_hsize);
>> +		ifh = &ifd->real_ifaces[i];
>> +	
>> +		/* Check index for consistency */
>> +		TAILQ_FOREACH(iftemp, ifh, idx_next) {
>> +			KASSERT(iftemp->idx != ifm->idx,
>> +			    ("Non-fake if %s w idx %d found (%s)!",
>> +			     iftemp->name, ifm->idx, ifm->name));
>> +		}
>> +	}
>> +	
>> +	TAILQ_INSERT_TAIL(ifh, ifm, idx_next);
>> +}
>> +
>> +static void
>> +ifnet_arrival(void *arg, struct ifnet *ifp)
>> +{
>> +	struct ip_fw_chain *chain = (struct ip_fw_chain *)arg;
>> +	struct ip_fw_if_data *ifd;
>> +	struct iface_mask *iftemp, *ifm;
>> +	struct iface_mask_head *ifh;
>> +	struct ipfw_insn_ptr *insn_ptr;
>> +	int i;
>> +
>> +	iftemp = malloc(sizeof(struct iface_mask), M_IPFW, M_WAITOK | M_ZERO);
>> +
>> +	iftemp->ifp = ifp;
>> +	iftemp->idx = ifp->if_index;
>> +	strlcpy(iftemp->name, ifp->if_xname, IFNAMSIZ);
>> +
>> +	IPFW_UH_WLOCK(chain);
>> +	IPFW_WLOCK(chain);
>> +
>> +	ifd = chain->if_data;
>> +
>> +	if (ifd == NULL || ifd->arrival == NULL) {
>> +		/* We're shutting down */
>> +		IPFW_WUNLOCK(chain);
>> +		IPFW_UH_WUNLOCK(chain);
>> +		free(iftemp, M_IPFW);
>> +		return;
>> +	}
>> +
>> +	ifm = ipfw_search_ifname(chain, iftemp->name);
>> +
>> +	if (ifm != NULL) {
>> +		/* Found. Let's update index */
>> +		KASSERT(ifm->flags & IPFW_IFLAG_FAKE,
>> +		    ("Non-fake interface found for %s", ifm->name));
>> +
>> +		ifm->flags &= ~IPFW_IFLAG_FAKE;
>> +		/* Relink to real index */
>> +		i = IPFW_IFHASH_IDX(ifm->idx, ifd->fake_hsize);
>> +		ifh = &ifd->fake_ifaces[i];
>> +		TAILQ_REMOVE(ifh, ifm, idx_next);
>> +
>> +		i = IPFW_IFHASH_IDX(iftemp->idx, ifd->real_hsize);
>> +		ifh = &ifd->real_ifaces[i];
>> +		TAILQ_INSERT_TAIL(ifh, ifm, idx_next);
>> +
>> +		CTR2(KTR_NET, "ifnet upgrade: fake %u -> %u", ifm->idx,
>> +		    iftemp->idx);
>> +		/* Notify consumers */
>> +		TAILQ_FOREACH(insn_ptr, &ifm->instructions, next)
>> +			ipfw_update_rewrite(chain, insn_ptr->insn, ifm,
>> +			    (uintptr_t)iftemp->idx);
>> +
>> +		ifm->idx = iftemp->idx;
>> +	} else {
>> +		/* Not found. Add to list */
>> +		ifm = iftemp;
>> +		iftemp = NULL;
>> +
>> +		ipfw_ifnet_init(chain, ifm);
>> +
>> +		CTR2(KTR_NET, "ifmp=%p uc=%u", ifm, ifm->refcount);
>> +
>> +		/* Add to named hash */
>> +		i = IPFW_IFHASH_NAME(ifm->name, ifd->masks_hsize);
>> +		ifh = &ifd->masks[i];
>> +		TAILQ_INSERT_TAIL(ifh, ifm, name_next);
>> +
>> +		/* Add to real interfaces hash */
>> +		i = IPFW_IFHASH_IDX(ifm->idx, ifd->real_hsize);
>> +		ifh = &ifd->real_ifaces[i];
>> +
>> +		/* Check index for consistency */
>> +		TAILQ_FOREACH(iftemp, ifh, idx_next) {
>> +			KASSERT(iftemp->idx != ifm->idx,
>> +			    ("Non-fake if %s w idx %d found (%s)!",
>> +			     iftemp->name, ifm->idx, ifm->name));
>> +		}
>> +
>> +		TAILQ_INSERT_TAIL(ifh, ifm, idx_next);
>> +
>> +		CTR3(KTR_NET, "new iface %p, idx %u uc=%u", ifm->name,
>> +		    ifm->idx, ifm->refcount);
>> +	}
>> +	IPFW_WUNLOCK(chain);
>> +	IPFW_UH_WUNLOCK(chain);
>> +
>> +	if (iftemp != NULL)
>> +		free(iftemp, M_IPFW);
>> +}
>> +
>> +static void
>> +ifnet_departure(void *arg, struct ifnet *ifp)
>> +{
>> +	struct ip_fw_chain *chain = (struct ip_fw_chain *)arg;
>> +	struct ip_fw_if_data *ifd;
>> +	struct iface_mask *ifm;
>> +	struct iface_mask_head *ifh;
>> +	struct ipfw_insn_ptr *insn_ptr;
>> +	int i;
>> +
>> +	IPFW_UH_WLOCK(chain);
>> +	IPFW_WLOCK(chain);
>> +
>> +	if ((ifd = chain->if_data) == NULL) {
>> +		/* We're shutting down */
>> +		IPFW_WUNLOCK(chain);
>> +		IPFW_UH_WUNLOCK(chain);
>> +		return;
>> +	}
>> +
>> +	ifm = ipfw_search_ifname(chain, ifp->if_xname);
>> +
>> +	if (ifm == NULL) {
>> +		IPFW_WUNLOCK(chain);
>> +		IPFW_UH_WUNLOCK(chain);
>> +		printf("ipfw: unknown iface %s departure\n", ifp->if_xname);
>> +		return;
>> +	}
>> +
>> +	KASSERT((ifm->flags & IPFW_IFLAG_FAKE) == 0,
>> +	    ("Fake interface found for %s", ifm->name));
>> +
>> +	/* Check if we need to save given interface. */
>> +	if (ifm->refcount == 0) {
>> +		CTR1(KTR_NET, "Deleting interface %p", ifm);
>> +		/* Delete from name hash */
>> +		i = IPFW_IFHASH_NAME(ifm->name, ifd->masks_hsize);
>> +		ifh = &ifd->masks[i];
>> +		TAILQ_REMOVE(ifh, ifm, name_next);
>> +
>> +		/* Delete from real iface hash */
>> +		i = IPFW_IFHASH_IDX(ifm->idx, ifd->real_hsize);
>> +		ifh = &ifd->real_ifaces[i];
>> +		TAILQ_REMOVE(ifh, ifm, idx_next);
>> +
>> +		IPFW_WUNLOCK(chain);
>> +		IPFW_UH_WUNLOCK(chain);
>> +
>> +		free(ifm, M_IPFW);
>> +		return;
>> +	}
>> +
>> +	CTR1(KTR_NET, "Interface uc=%u", ifm->refcount);
>> +
>> +	/* Interface is used. Move to fake hash */
>> +	ifm->flags |= IPFW_IFLAG_FAKE;
>> +	/* Relink to fake index */
>> +	i = IPFW_IFHASH_IDX(ifm->idx, ifd->real_hsize);
>> +	ifh = &ifd->real_ifaces[i];
>> +	TAILQ_REMOVE(ifh, ifm, idx_next);
>> +
>> +	/* Alloc fake index */
>> +	ifd->fake_idx++;
>> +	i = IPFW_IFHASH_IDX(ifd->fake_idx, ifd->fake_hsize);
>> +	ifh = &ifd->fake_ifaces[i];
>> +	TAILQ_INSERT_TAIL(ifh, ifm, idx_next);
>> +
>> +	CTR2(KTR_NET, "Interface %p departure, fake index %u",
>> +	    ifm, ifd->fake_idx);
>> +
>> +	/* Notify consumers */
>> +	TAILQ_FOREACH(insn_ptr, &ifm->instructions, next)
>> +		ipfw_update_rewrite(chain, insn_ptr->insn, ifm,
>> +		    (uintptr_t)(ifd->fake_idx | IPFW_FAKE_IDX));
>> +
>> +	ifm->idx = ifd->fake_idx;
>> +
>> +	IPFW_WUNLOCK(chain);
>> +	IPFW_UH_WUNLOCK(chain);
>> +}
>> +
>> +static void
>> +ipfw_ifnet_init(struct ip_fw_chain *chain, struct iface_mask *ifm)
>> +{
>> +
>> +	TAILQ_INIT(&ifm->instructions);
>> +}
>> +
>> +
>> +static void
>> +ipfw_ifhash_init_int(struct iface_mask_head **phash, size_t hsize)
>> +{
>> +	struct iface_mask_head *ifh;
>> +	int i;
>> +
>> +	ifh = malloc(sizeof(struct iface_mask_head) * hsize, M_IPFW,
>> +	    M_WAITOK | M_ZERO);
>> +
>> +	*phash = ifh;
>> +
>> +	for (i = 0; i < hsize; i++, ifh++)
>> +		TAILQ_INIT(ifh);
>> +}
>> +
>> +void
>> +ipfw_ifhash_init(struct ip_fw_chain *chain)
>> +{
>> +	struct ip_fw_if_data *ifd;
>> +	struct iface_mask_head *ifh;
>> +	struct iface_mask *ifm;
>> +	struct ifnet *ifp;
>> +	int i;
>> +
>> +	ifd = malloc(sizeof(struct ip_fw_if_data), M_IPFW, M_WAITOK | M_ZERO);
>> +	chain->if_data = ifd;
>> +
>> +	ifd->masks_hsize = ifd->real_hsize = ifd->fake_hsize = 32;
>> +
>> +	ipfw_ifhash_init_int(&ifd->masks, ifd->masks_hsize);
>> +	ipfw_ifhash_init_int(&ifd->real_ifaces, ifd->fake_hsize);
>> +	ipfw_ifhash_init_int(&ifd->fake_ifaces, ifd->real_hsize);
>> +
>> +	IFNET_RLOCK();
>> +	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
>> +		ifm = malloc(sizeof(struct iface_mask), M_IPFW, M_WAITOK | M_ZERO);
>> +		strlcpy(ifm->name, ifp->if_xname, IFNAMSIZ);
>> +		ifm->ifp = ifp;
>> +		ifm->idx = ifp->if_index;
>> +
>> +		ipfw_ifnet_init(chain, ifm);
>> +
>> +		i = IPFW_IFHASH_IDX(ifm->idx, ifd->real_hsize);
>> +		ifh = &ifd->real_ifaces[i];
>> +		TAILQ_INSERT_TAIL(ifh, ifm, idx_next);
>> +
>> +		i = IPFW_IFHASH_NAME(ifm->name, ifd->masks_hsize);
>> +		ifh = &ifd->masks[i];
>> +		TAILQ_INSERT_TAIL(ifh, ifm, name_next);
>> +
>> +		CTR2(KTR_NET, "init iface %p idx %u", ifm, ifm->idx);
>> +
>> +	}
>> +	IFNET_RUNLOCK();
>> +
>> +	/* XXX: there is a gap between RUNLOCK and interface registration */
>> +
>> +	ifd->arrival = EVENTHANDLER_REGISTER(ifnet_arrival_event,
>> +	    ifnet_arrival, chain, EVENTHANDLER_PRI_ANY);
>> +
>> +	ifd->departure = EVENTHANDLER_REGISTER(ifnet_departure_event,
>> +	    ifnet_departure, chain, EVENTHANDLER_PRI_ANY);
>> +}
>> +
>> +void
>> +ipfw_ifhash_detach(struct ip_fw_chain *chain)
>> +{
>> +	struct ip_fw_if_data *ifd;
>> +	
>> +	ifd = chain->if_data;
>> +
>> +	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifd->arrival);
>> +	EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifd->departure);
>> +
>> +	ifd->arrival = NULL;
>> +	ifd->departure = NULL;
>> +}
>> +
>> +
>> +void
>> +ipfw_ifhash_free(struct ip_fw_chain *chain)
>> +{
>> +	struct ip_fw_if_data *ifd;
>> +	struct iface_mask_head *ifh;
>> +	struct iface_mask *ifm, *ifm_next;
>> +	int i;
>> +	
>> +	ifd = chain->if_data;
>> +	chain->if_data = NULL;
>> +
>> +	ifh = ifd->masks;
>> +
>> +	for (i = 0; i < ifd->masks_hsize; i++, ifh++) {
>> +		TAILQ_FOREACH_SAFE(ifm, ifh, name_next, ifm_next) {
>> +			/*
>> +			 * Assume every consumer to free its
>> +			 * iface-specific data beforehand.
>> +			 */
>> +			free(ifm, M_IPFW);
>> +		}
>> +	}
>> +
>> +	free(ifd->masks, M_IPFW);
>> +	free(ifd->real_ifaces, M_IPFW);
>> +	free(ifd->fake_ifaces, M_IPFW);
>> +
>> +	free(ifd, M_IPFW);
>> +}
>> +
>> --- /dev/null	2013-04-24 17:22:00.000000000 +0400
>> +++ sys/netpfil/ipfw/ip_fw_iface.h	2013-04-22 19:09:56.624996491 +0400
>> @@ -0,0 +1,55 @@
>> +/*-
>> + * Copyright (c) 2013 Yandex LLC.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + * 1. Redistributions of source code must retain the above copyright
>> + *    notice, this list of conditions and the following disclaimer.
>> + * 2. Redistributions in binary form must reproduce the above copyright
>> + *    notice, this list of conditions and the following disclaimer in the
>> + *    documentation and/or other materials provided with the distribution.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
>> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
>> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
>> + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
>> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
>> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
>> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
>> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
>> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
>> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>> + * SUCH DAMAGE.
>> + *
>> + * $FreeBSD$
>> + */
>> +
>> +#ifndef _IP_FW_IFACE_H_
>> +#define _IP_FW_IFACE_H_
>> +
>> +struct ipfw_insn_ptr {
>> +	TAILQ_ENTRY(ipfw_insn_ptr)	next;
>> +	ipfw_insn	*insn;
>> +};
>> +
>> +struct iface_mask {
>> +	struct ifnet	*ifp;
>> +	uint32_t	idx;	/* Saved interface index */
>> +	uint32_t	flags;	/* Pad */
>> +	uint32_t	refcount;	/* Usage count */
>> +	char		name[IFNAMSIZ];	/* Interface/mask */
>> +	TAILQ_ENTRY(iface_mask)	idx_next;
>> +	TAILQ_ENTRY(iface_mask)	name_next;
>> +	TAILQ_HEAD(rule_list, ipfw_insn_ptr)	instructions;	/* instructions using given mask */
>> +};
>> +#define	IPFW_IFLAG_FAKE	0x01
>> +
>> +#define	IPFW_FAKE_IDX	(1 << 31)
>> +
>> +struct iface_mask *ipfw_search_ifname(struct ip_fw_chain *chain, char *name);
>> +struct iface_mask *ipfw_search_ifindex(struct ip_fw_chain *chain, uint32_t idx);
>> +void ipfw_add_ifname(struct ip_fw_chain *chain, struct iface_mask *ifm);
>> +
>> +#endif
>> +
>>
> _______________________________________________
> freebsd-ipfw@freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-ipfw
> To unsubscribe, send any mail to "freebsd-ipfw-unsubscribe@freebsd.org"
>




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?51780C49.7000204>