Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 4 Mar 2014 15:14:48 +0000 (UTC)
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r262743 - in stable/10: sys/conf sys/net sys/netinet sys/netinet6 usr.bin/netstat
Message-ID:  <201403041514.s24FEmr0063015@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: glebius
Date: Tue Mar  4 15:14:47 2014
New Revision: 262743
URL: http://svnweb.freebsd.org/changeset/base/262743

Log:
  Merge r261582, r261601, r261610, r261613, r261627, r261640, r261641, r261823,
        r261825, r261859, r261875, r261883, r261911, r262027, r262028, r262029,
        r262030, r262162 from head.
  
    Large flowtable revamp. See commit messages for merged revisions for
    details.
  
  Sponsored by:	Netflix

Added:
  stable/10/usr.bin/netstat/flowtable.c
     - copied, changed from r261601, head/usr.bin/netstat/flowtable.c
Modified:
  stable/10/sys/conf/options
  stable/10/sys/net/flowtable.c
  stable/10/sys/net/flowtable.h
  stable/10/sys/net/route.c
  stable/10/sys/netinet/ip_input.c
  stable/10/sys/netinet/ip_output.c
  stable/10/sys/netinet6/in6_proto.c
  stable/10/sys/netinet6/ip6_input.c
  stable/10/sys/netinet6/ip6_output.c
  stable/10/usr.bin/netstat/Makefile
  stable/10/usr.bin/netstat/main.c
  stable/10/usr.bin/netstat/netstat.h
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/conf/options
==============================================================================
--- stable/10/sys/conf/options	Tue Mar  4 15:09:57 2014	(r262742)
+++ stable/10/sys/conf/options	Tue Mar  4 15:14:47 2014	(r262743)
@@ -438,6 +438,7 @@ TCP_SIGNATURE		opt_inet.h
 VLAN_ARRAY		opt_vlan.h
 XBONEHACK
 FLOWTABLE		opt_route.h
+FLOWTABLE_HASH_ALL	opt_route.h
 
 #
 # SCTP

Modified: stable/10/sys/net/flowtable.c
==============================================================================
--- stable/10/sys/net/flowtable.c	Tue Mar  4 15:09:57 2014	(r262742)
+++ stable/10/sys/net/flowtable.c	Tue Mar  4 15:14:47 2014	(r262743)
@@ -1,31 +1,30 @@
-/**************************************************************************
-
-Copyright (c) 2008-2010, BitGravity Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Neither the name of the BitGravity Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/*-
+ * Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
+ * Copyright (c) 2008-2010, BitGravity Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Neither the name of the BitGravity Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
 
 #include "opt_route.h"
 #include "opt_mpath.h"
@@ -36,29 +35,32 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#include <sys/param.h>  
+#include <sys/param.h>
 #include <sys/types.h>
 #include <sys/bitstring.h>
 #include <sys/condvar.h>
 #include <sys/callout.h>
 #include <sys/hash.h>
-#include <sys/kernel.h>  
+#include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/pcpu.h>
 #include <sys/proc.h>
+#include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
+#include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_llatbl.h>
 #include <net/if_var.h>
-#include <net/route.h> 
+#include <net/route.h>
 #include <net/flowtable.h>
 #include <net/vnet.h>
 
@@ -70,156 +72,79 @@ __FBSDID("$FreeBSD$");
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
+#ifdef FLOWTABLE_HASH_ALL
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 #include <netinet/sctp.h>
+#endif
 
 #include <ddb/ddb.h>
 
-struct ipv4_tuple {
-	uint16_t 	ip_sport;	/* source port */
-	uint16_t 	ip_dport;	/* destination port */
-	in_addr_t 	ip_saddr;	/* source address */
-	in_addr_t 	ip_daddr;	/* destination address */
-};
-
-union ipv4_flow {
-	struct ipv4_tuple ipf_ipt;
-	uint32_t 	ipf_key[3];
-};
+#ifdef	FLOWTABLE_HASH_ALL
+#define	KEY_PORTS	(sizeof(uint16_t) * 2)
+#define	KEY_ADDRS	2
+#else
+#define	KEY_PORTS	0
+#define	KEY_ADDRS	1
+#endif
 
-struct ipv6_tuple {
-	uint16_t 	ip_sport;	/* source port */
-	uint16_t 	ip_dport;	/* destination port */
-	struct in6_addr	ip_saddr;	/* source address */
-	struct in6_addr	ip_daddr;	/* destination address */
-};
+#ifdef	INET6
+#define	KEY_ADDR_LEN	sizeof(struct in6_addr)
+#else
+#define	KEY_ADDR_LEN	sizeof(struct in_addr)
+#endif
 
-union ipv6_flow {
-	struct ipv6_tuple ipf_ipt;
-	uint32_t 	ipf_key[9];
-};
+#define	KEYLEN	((KEY_ADDR_LEN * KEY_ADDRS + KEY_PORTS) / sizeof(uint32_t))
 
 struct flentry {
-	volatile uint32_t	f_fhash;	/* hash flowing forward */
-	uint16_t		f_flags;	/* flow flags */
-	uint8_t			f_pad;		
-	uint8_t			f_proto;	/* protocol */
-	uint32_t		f_fibnum;	/* fib index */
+	uint32_t		f_hash;		/* hash flowing forward */
+	uint32_t		f_key[KEYLEN];	/* address(es and ports) */
 	uint32_t		f_uptime;	/* uptime at last access */
-	struct flentry		*f_next;	/* pointer to collision entry */
-	volatile struct rtentry *f_rt;		/* rtentry for flow */
-	volatile struct llentry *f_lle;		/* llentry for flow */
-};
-
-struct flentry_v4 {
-	struct flentry	fl_entry;
-	union ipv4_flow	fl_flow;
-};
-
-struct flentry_v6 {
-	struct flentry	fl_entry;
-	union ipv6_flow	fl_flow;
-};
-
-#define	fl_fhash	fl_entry.fl_fhash
-#define	fl_flags	fl_entry.fl_flags
-#define	fl_proto	fl_entry.fl_proto
-#define	fl_uptime	fl_entry.fl_uptime
-#define	fl_rt		fl_entry.fl_rt
-#define	fl_lle		fl_entry.fl_lle
-
-#define	SECS_PER_HOUR		3600
-#define	SECS_PER_DAY		(24*SECS_PER_HOUR)
-
-#define	SYN_IDLE		300
-#define	UDP_IDLE		300
-#define	FIN_WAIT_IDLE		600
-#define	TCP_IDLE		SECS_PER_DAY
-
-
-typedef	void fl_lock_t(struct flowtable *, uint32_t);
-typedef void fl_rtalloc_t(struct route *, uint32_t, u_int);
-
-union flentryp {
-	struct flentry		**global;
-	struct flentry		**pcpu[MAXCPU];
+	uint16_t		f_fibnum;	/* fib index */
+#ifdef FLOWTABLE_HASH_ALL
+	uint8_t			f_proto;	/* protocol */
+	uint8_t			f_flags;	/* stale? */
+#define FL_STALE 		1
+#endif
+	SLIST_ENTRY(flentry)	f_next;		/* pointer to collision entry */
+	struct rtentry		*f_rt;		/* rtentry for flow */
+	struct llentry		*f_lle;		/* llentry for flow */
 };
+#undef KEYLEN
 
-struct flowtable_stats {
-	uint64_t	ft_collisions;
-	uint64_t	ft_allocated;
-	uint64_t	ft_misses;
-	uint64_t	ft_max_depth;
-	uint64_t	ft_free_checks;
-	uint64_t	ft_frees;
-	uint64_t	ft_hits;
-	uint64_t	ft_lookups;
-} __aligned(CACHE_LINE_SIZE);
+SLIST_HEAD(flist, flentry);
+/* Make sure we can use pcpu_zone_ptr for struct flist. */
+CTASSERT(sizeof(struct flist) == sizeof(void *));
 
 struct flowtable {
-	struct	flowtable_stats ft_stats[MAXCPU];
+	counter_u64_t	*ft_stat;
 	int 		ft_size;
-	int 		ft_lock_count;
-	uint32_t	ft_flags;
-	char		*ft_name;
-	fl_lock_t	*ft_lock;
-	fl_lock_t 	*ft_unlock;
-	fl_rtalloc_t	*ft_rtalloc;
 	/*
-	 * XXX need to pad out 
-	 */ 
-	struct mtx	*ft_locks;
-	union flentryp	ft_table;
-	bitstr_t 	*ft_masks[MAXCPU];
+	 * ft_table is a malloc(9)ed array of pointers.  Pointers point to
+	 * memory from UMA_ZONE_PCPU zone.
+	 * ft_masks is per-cpu pointer itself.  Each instance points
+	 * to a malloc(9)ed bitset, that is private to corresponding CPU.
+	 */
+	struct flist	**ft_table;
+	bitstr_t 	**ft_masks;
 	bitstr_t	*ft_tmpmask;
-	struct flowtable *ft_next;
+};
 
-	uint32_t	ft_count __aligned(CACHE_LINE_SIZE);
-	uint32_t	ft_udp_idle __aligned(CACHE_LINE_SIZE);
-	uint32_t	ft_fin_wait_idle;
-	uint32_t	ft_syn_idle;
-	uint32_t	ft_tcp_idle;
-	boolean_t	ft_full;
-} __aligned(CACHE_LINE_SIZE);
+#define	FLOWSTAT_ADD(ft, name, v)	\
+	counter_u64_add((ft)->ft_stat[offsetof(struct flowtable_stat, name) / sizeof(uint64_t)], (v))
+#define	FLOWSTAT_INC(ft, name)	FLOWSTAT_ADD(ft, name, 1)
 
 static struct proc *flowcleanerproc;
-static VNET_DEFINE(struct flowtable *, flow_list_head);
-static VNET_DEFINE(uint32_t, flow_hashjitter);
-static VNET_DEFINE(uma_zone_t, flow_ipv4_zone);
-static VNET_DEFINE(uma_zone_t, flow_ipv6_zone);
-
-#define	V_flow_list_head	VNET(flow_list_head)
-#define	V_flow_hashjitter	VNET(flow_hashjitter)
-#define	V_flow_ipv4_zone	VNET(flow_ipv4_zone)
-#define	V_flow_ipv6_zone	VNET(flow_ipv6_zone)
-
+static uint32_t flow_hashjitter;
 
 static struct cv 	flowclean_f_cv;
 static struct cv 	flowclean_c_cv;
 static struct mtx	flowclean_lock;
 static uint32_t		flowclean_cycles;
-static uint32_t		flowclean_freq;
-
-#ifdef FLOWTABLE_DEBUG
-#define FLDPRINTF(ft, flags, fmt, ...) 		\
-do {		  				\
-	if ((ft)->ft_flags & (flags))		\
-		printf((fmt), __VA_ARGS__);	\
-} while (0);					\
-
-#else
-#define FLDPRINTF(ft, flags, fmt, ...)
-
-#endif
-
 
 /*
  * TODO:
- * - Make flowtable stats per-cpu, aggregated at sysctl call time,
- *   to avoid extra cache evictions caused by incrementing a shared
- *   counter
- * - add sysctls to resize && flush flow tables 
+ * - add sysctls to resize && flush flow tables
  * - Add per flowtable sysctls for statistics and configuring timeouts
  * - add saturation counter to rtentry to support per-packet load-balancing
  *   add flag to indicate round-robin flow, add list lookup from head
@@ -230,396 +155,117 @@ do {		  				\
  * - support explicit connection state (currently only ad-hoc for DSR)
  * - idetach() cleanup for options VIMAGE builds.
  */
-VNET_DEFINE(int, flowtable_enable) = 1;
-static VNET_DEFINE(int, flowtable_debug);
-static VNET_DEFINE(int, flowtable_syn_expire) = SYN_IDLE;
-static VNET_DEFINE(int, flowtable_udp_expire) = UDP_IDLE;
-static VNET_DEFINE(int, flowtable_fin_wait_expire) = FIN_WAIT_IDLE;
-static VNET_DEFINE(int, flowtable_tcp_expire) = TCP_IDLE;
-static VNET_DEFINE(int, flowtable_nmbflows);
-static VNET_DEFINE(int, flowtable_ready) = 0;
+#ifdef INET
+static VNET_DEFINE(struct flowtable, ip4_ft);
+#define	V_ip4_ft	VNET(ip4_ft)
+#endif
+#ifdef INET6
+static VNET_DEFINE(struct flowtable, ip6_ft);
+#define	V_ip6_ft	VNET(ip6_ft)
+#endif
 
+static uma_zone_t flow_zone;
+
+static VNET_DEFINE(int, flowtable_enable) = 1;
 #define	V_flowtable_enable		VNET(flowtable_enable)
-#define	V_flowtable_debug		VNET(flowtable_debug)
-#define	V_flowtable_syn_expire		VNET(flowtable_syn_expire)
-#define	V_flowtable_udp_expire		VNET(flowtable_udp_expire)
-#define	V_flowtable_fin_wait_expire	VNET(flowtable_fin_wait_expire)
-#define	V_flowtable_tcp_expire		VNET(flowtable_tcp_expire)
-#define	V_flowtable_nmbflows		VNET(flowtable_nmbflows)
-#define	V_flowtable_ready		VNET(flowtable_ready)
 
-static SYSCTL_NODE(_net_inet, OID_AUTO, flowtable, CTLFLAG_RD, NULL,
+static SYSCTL_NODE(_net, OID_AUTO, flowtable, CTLFLAG_RD, NULL,
     "flowtable");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, debug, CTLFLAG_RW,
-    &VNET_NAME(flowtable_debug), 0, "print debug info.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, enable, CTLFLAG_RW,
+SYSCTL_VNET_INT(_net_flowtable, OID_AUTO, enable, CTLFLAG_RW,
     &VNET_NAME(flowtable_enable), 0, "enable flowtable caching.");
+SYSCTL_UMA_MAX(_net_flowtable, OID_AUTO, maxflows, CTLFLAG_RW,
+    &flow_zone, "Maximum number of flows allowed");
 
-/*
- * XXX This does not end up updating timeouts at runtime
- * and only reflects the value for the last table added :-/
- */
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, syn_expire, CTLFLAG_RW,
-    &VNET_NAME(flowtable_syn_expire), 0,
-    "seconds after which to remove syn allocated flow.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, udp_expire, CTLFLAG_RW,
-    &VNET_NAME(flowtable_udp_expire), 0,
-    "seconds after which to remove flow allocated to UDP.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, fin_wait_expire, CTLFLAG_RW,
-    &VNET_NAME(flowtable_fin_wait_expire), 0,
-    "seconds after which to remove a flow in FIN_WAIT.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, tcp_expire, CTLFLAG_RW,
-    &VNET_NAME(flowtable_tcp_expire), 0,
-    "seconds after which to remove flow allocated to a TCP connection.");
-
-
-/*
- * Maximum number of flows that can be allocated of a given type.
- *
- * The table is allocated at boot time (for the pure caching case
- * there is no reason why this could not be changed at runtime)
- * and thus (currently) needs to be set with a tunable.
- */
-static int
-sysctl_nmbflows(SYSCTL_HANDLER_ARGS)
-{
-	int error, newnmbflows;
-
-	newnmbflows = V_flowtable_nmbflows;
-	error = sysctl_handle_int(oidp, &newnmbflows, 0, req); 
-	if (error == 0 && req->newptr) {
-		if (newnmbflows > V_flowtable_nmbflows) {
-			V_flowtable_nmbflows = newnmbflows;
-			uma_zone_set_max(V_flow_ipv4_zone,
-			    V_flowtable_nmbflows);
-			uma_zone_set_max(V_flow_ipv6_zone,
-			    V_flowtable_nmbflows);
-		} else
-			error = EINVAL;
-	}
-	return (error);
-}
-SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, nmbflows,
-    CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_nmbflows, "IU",
-    "Maximum number of flows allowed");
-
-
-
-#define FS_PRINT(sb, field)	sbuf_printf((sb), "\t%s: %jd\n", #field, fs->ft_##field)
-
-static void
-fs_print(struct sbuf *sb, struct flowtable_stats *fs)
-{
-
-	FS_PRINT(sb, collisions);
-	FS_PRINT(sb, allocated);
-	FS_PRINT(sb, misses);
-	FS_PRINT(sb, max_depth);
-	FS_PRINT(sb, free_checks);
-	FS_PRINT(sb, frees);
-	FS_PRINT(sb, hits);
-	FS_PRINT(sb, lookups);
-}
-
-static void
-flowtable_show_stats(struct sbuf *sb, struct flowtable *ft)
-{
-	int i;
-	struct flowtable_stats fs, *pfs;
-
-	if (ft->ft_flags & FL_PCPU) {
-		bzero(&fs, sizeof(fs));
-		pfs = &fs;
-		CPU_FOREACH(i) {
-			pfs->ft_collisions  += ft->ft_stats[i].ft_collisions;
-			pfs->ft_allocated   += ft->ft_stats[i].ft_allocated;
-			pfs->ft_misses      += ft->ft_stats[i].ft_misses;
-			pfs->ft_free_checks += ft->ft_stats[i].ft_free_checks;
-			pfs->ft_frees       += ft->ft_stats[i].ft_frees;
-			pfs->ft_hits        += ft->ft_stats[i].ft_hits;
-			pfs->ft_lookups     += ft->ft_stats[i].ft_lookups;
-			if (ft->ft_stats[i].ft_max_depth > pfs->ft_max_depth)
-				pfs->ft_max_depth = ft->ft_stats[i].ft_max_depth;
-		}
-	} else {
-		pfs = &ft->ft_stats[0];
-	}
-	fs_print(sb, pfs);
-}
-
-static int
-sysctl_flowtable_stats(SYSCTL_HANDLER_ARGS)
-{
-	struct flowtable *ft;
-	struct sbuf *sb;
-	int error;
-
-	sb = sbuf_new(NULL, NULL, 64*1024, SBUF_FIXEDLEN);
+static MALLOC_DEFINE(M_FTABLE, "flowtable", "flowtable hashes and bitstrings");
 
-	ft = V_flow_list_head;
-	while (ft != NULL) {
-		sbuf_printf(sb, "\ntable name: %s\n", ft->ft_name);
-		flowtable_show_stats(sb, ft);
-		ft = ft->ft_next;
-	}
-	sbuf_finish(sb);
-	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
-	sbuf_delete(sb);
-
-	return (error);
-}
-SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD,
-    NULL, 0, sysctl_flowtable_stats, "A", "flowtable statistics");
-
-
-#ifndef RADIX_MPATH
-static void
-rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum)
-{
-
-	rtalloc_ign_fib(ro, 0, fibnum);
-}
-#endif
-
-static void
-flowtable_global_lock(struct flowtable *table, uint32_t hash)
-{	
-	int lock_index = (hash)&(table->ft_lock_count - 1);
-
-	mtx_lock(&table->ft_locks[lock_index]);
-}
-
-static void
-flowtable_global_unlock(struct flowtable *table, uint32_t hash)
-{	
-	int lock_index = (hash)&(table->ft_lock_count - 1);
-
-	mtx_unlock(&table->ft_locks[lock_index]);
-}
-
-static void
-flowtable_pcpu_lock(struct flowtable *table, uint32_t hash)
-{
-
-	critical_enter();
-}
-
-static void
-flowtable_pcpu_unlock(struct flowtable *table, uint32_t hash)
-{
-
-	critical_exit();
-}
-
-#define FL_ENTRY_INDEX(table, hash)((hash) % (table)->ft_size)
-#define FL_ENTRY(table, hash) *flowtable_entry((table), (hash))
-#define FL_ENTRY_LOCK(table, hash)  (table)->ft_lock((table), (hash))
-#define FL_ENTRY_UNLOCK(table, hash) (table)->ft_unlock((table), (hash))
-
-#define FL_STALE 	(1<<8)
-#define FL_OVERWRITE	(1<<10)
-
-void
-flow_invalidate(struct flentry *fle)
-{
-
-	fle->f_flags |= FL_STALE;
-}
-
-static __inline int
-proto_to_flags(uint8_t proto)
-{
-	int flag;
-
-	switch (proto) {
-	case IPPROTO_TCP:
-		flag = FL_TCP;
-		break;
-	case IPPROTO_SCTP:
-		flag = FL_SCTP;
-		break;		
-	case IPPROTO_UDP:
-		flag = FL_UDP;
-		break;
-	default:
-		flag = 0;
-		break;
-	}
-
-	return (flag);
-}
-
-static __inline int
-flags_to_proto(int flags)
-{
-	int proto, protoflags;
-
-	protoflags = flags & (FL_TCP|FL_SCTP|FL_UDP);
-	switch (protoflags) {
-	case FL_TCP:
-		proto = IPPROTO_TCP;
-		break;
-	case FL_SCTP:
-		proto = IPPROTO_SCTP;
-		break;
-	case FL_UDP:
-		proto = IPPROTO_UDP;
-		break;
-	default:
-		proto = 0;
-		break;
-	}
-	return (proto);
-}
+static struct flentry *
+flowtable_lookup_common(struct flowtable *, uint32_t *, int, uint32_t);
 
 #ifdef INET
-#ifdef FLOWTABLE_DEBUG
-static void
-ipv4_flow_print_tuple(int flags, int proto, struct sockaddr_in *ssin,
-    struct sockaddr_in *dsin)
-{
-	char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
-
-	if (flags & FL_HASH_ALL) {
-		inet_ntoa_r(ssin->sin_addr, saddr);
-		inet_ntoa_r(dsin->sin_addr, daddr);
-		printf("proto=%d %s:%d->%s:%d\n",
-		    proto, saddr, ntohs(ssin->sin_port), daddr,
-		    ntohs(dsin->sin_port));
-	} else {
-		inet_ntoa_r(*(struct in_addr *) &dsin->sin_addr, daddr);
-		printf("proto=%d %s\n", proto, daddr);
-	}
-
-}
-#endif
-
-static int
-ipv4_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
-    struct sockaddr_in *ssin, struct sockaddr_in *dsin, uint16_t *flags)
+static struct flentry *
+flowtable_lookup_ipv4(struct mbuf *m, struct route *ro)
 {
+	struct flentry *fle;
+	struct sockaddr_in *sin;
 	struct ip *ip;
-	uint8_t proto;
+	uint32_t fibnum;
+#ifdef FLOWTABLE_HASH_ALL
+	uint32_t key[3];
 	int iphlen;
-	struct tcphdr *th;
-	struct udphdr *uh;
-	struct sctphdr *sh;
 	uint16_t sport, dport;
+	uint8_t proto;
+#endif
 
-	proto = sport = dport = 0;
 	ip = mtod(m, struct ip *);
-	dsin->sin_family = AF_INET;
-	dsin->sin_len = sizeof(*dsin);
-	dsin->sin_addr = ip->ip_dst;
-	ssin->sin_family = AF_INET;
-	ssin->sin_len = sizeof(*ssin);
-	ssin->sin_addr = ip->ip_src;	
 
-	proto = ip->ip_p;
-	if ((*flags & FL_HASH_ALL) == 0) {
-		FLDPRINTF(ft, FL_DEBUG_ALL, "skip port check flags=0x%x ",
-		    *flags);
-		goto skipports;
-	}
+	if (ip->ip_src.s_addr == ip->ip_dst.s_addr ||
+	    (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
+	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
+		return (NULL);
 
-	iphlen = ip->ip_hl << 2; /* XXX options? */
+	fibnum = M_GETFIB(m);
+
+#ifdef FLOWTABLE_HASH_ALL
+	iphlen = ip->ip_hl << 2;
+	proto = ip->ip_p;
 
 	switch (proto) {
-	case IPPROTO_TCP:
-		th = (struct tcphdr *)((caddr_t)ip + iphlen);
+	case IPPROTO_TCP: {
+		struct tcphdr *th;
+
+		th = (struct tcphdr *)((char *)ip + iphlen);
 		sport = th->th_sport;
 		dport = th->th_dport;
-		if ((*flags & FL_HASH_ALL) &&
-		    (th->th_flags & (TH_RST|TH_FIN)))
-			*flags |= FL_STALE;
-	break;
-	case IPPROTO_UDP:
-		uh = (struct udphdr *)((caddr_t)ip + iphlen);
+		if (th->th_flags & (TH_RST|TH_FIN))
+			fibnum |= (FL_STALE << 24);
+		break;
+	}
+	case IPPROTO_UDP: {
+		struct udphdr *uh;
+
+		uh = (struct udphdr *)((char *)ip + iphlen);
 		sport = uh->uh_sport;
 		dport = uh->uh_dport;
-	break;
-	case IPPROTO_SCTP:
-		sh = (struct sctphdr *)((caddr_t)ip + iphlen);
+		break;
+	}
+	case IPPROTO_SCTP: {
+		struct sctphdr *sh;
+
+		sh = (struct sctphdr *)((char *)ip + iphlen);
 		sport = sh->src_port;
 		dport = sh->dest_port;
-	break;
+		/* XXXGL: handle stale? */
+		break;
+	}
 	default:
-		FLDPRINTF(ft, FL_DEBUG_ALL, "proto=0x%x not supported\n", proto);
-		return (ENOTSUP);
-		/* no port - hence not a protocol we care about */
+		sport = dport = 0;
 		break;
-	
 	}
 
-skipports:
-	*flags |= proto_to_flags(proto);
-	ssin->sin_port = sport;
-	dsin->sin_port = dport;
-	return (0);
-}
-
-static uint32_t
-ipv4_flow_lookup_hash_internal(
-	struct sockaddr_in *ssin, struct sockaddr_in *dsin, 
-	    uint32_t *key, uint16_t flags)
-{
-	uint16_t sport, dport;
-	uint8_t proto;
-	int offset = 0;
+	key[0] = ip->ip_dst.s_addr;
+	key[1] = ip->ip_src.s_addr;
+	key[2] = (dport << 16) | sport;
+	fibnum |= proto << 16;
 
-	if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
-		return (0);
-	proto = flags_to_proto(flags);
-	sport = dport = key[2] = key[1] = key[0] = 0;
-	if ((ssin != NULL) && (flags & FL_HASH_ALL)) {
-		key[1] = ssin->sin_addr.s_addr;
-		sport = ssin->sin_port;
-	}
-	if (dsin != NULL) {
-		key[2] = dsin->sin_addr.s_addr;
-		dport = dsin->sin_port;
-	}
-	if (flags & FL_HASH_ALL) {
-		((uint16_t *)key)[0] = sport;
-		((uint16_t *)key)[1] = dport; 
-	} else
-		offset = V_flow_hashjitter + proto;
+	fle = flowtable_lookup_common(&V_ip4_ft, key, 3 * sizeof(uint32_t),
+	    fibnum);
 
-	return (jenkins_hash32(key, 3, offset));
-}
+#else	/* !FLOWTABLE_HASH_ALL */
 
-static struct flentry *
-flowtable_lookup_mbuf4(struct flowtable *ft, struct mbuf *m)
-{
-	struct sockaddr_storage ssa, dsa;
-	uint16_t flags;
-	struct sockaddr_in *dsin, *ssin;
-
-	dsin = (struct sockaddr_in *)&dsa;
-	ssin = (struct sockaddr_in *)&ssa;
-	bzero(dsin, sizeof(*dsin));
-	bzero(ssin, sizeof(*ssin));
-	flags = ft->ft_flags;
-	if (ipv4_mbuf_demarshal(ft, m, ssin, dsin, &flags) != 0)
-		return (NULL);
+	fle = flowtable_lookup_common(&V_ip4_ft, (uint32_t *)&ip->ip_dst,
+	    sizeof(struct in_addr), fibnum);
 
-	return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
-}
+#endif	/* FLOWTABLE_HASH_ALL */
 
-void
-flow_to_route(struct flentry *fle, struct route *ro)
-{
-	uint32_t *hashkey = NULL;
-	struct sockaddr_in *sin;
+	if (fle == NULL)
+		return (NULL);
 
 	sin = (struct sockaddr_in *)&ro->ro_dst;
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
-	hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
-	sin->sin_addr.s_addr = hashkey[2];
-	ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
-	ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
-	ro->ro_flags |= RT_NORTREF;
+	sin->sin_addr = ip->ip_dst;
+
+	return (fle);
 }
 #endif /* INET */
 
@@ -633,9 +279,8 @@ flow_to_route(struct flentry *fle, struc
 #define PULLUP_TO(_len, p, T)						\
 do {									\
 	int x = (_len) + sizeof(T);					\
-	if ((m)->m_len < x) {						\
-		goto receive_failed;					\
-	}								\
+	if ((m)->m_len < x)						\
+		return (NULL);						\
 	p = (mtod(m, char *) + (_len));					\
 } while (0)
 
@@ -643,26 +288,35 @@ do {									\
 #define	SCTP(p)		((struct sctphdr *)(p))
 #define	UDP(p)		((struct udphdr *)(p))
 
-static int
-ipv6_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
-    struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6, uint16_t *flags)
+static struct flentry *
+flowtable_lookup_ipv6(struct mbuf *m, struct route *ro)
 {
+	struct flentry *fle;
+	struct sockaddr_in6 *sin6;
 	struct ip6_hdr *ip6;
-	uint8_t proto;
+	uint32_t fibnum;
+#ifdef FLOWTABLE_HASH_ALL
+	uint32_t key[9];
+	void *ulp;
 	int hlen;
-	uint16_t src_port, dst_port;
+	uint16_t sport, dport;
 	u_short offset;
-	void *ulp;
+	uint8_t proto;
+#else
+	uint32_t key[4];
+#endif
 
-	offset = hlen = src_port = dst_port = 0;
-	ulp = NULL;
 	ip6 = mtod(m, struct ip6_hdr *);
-	hlen = sizeof(struct ip6_hdr);
-	proto = ip6->ip6_nxt;
+	if (in6_localaddr(&ip6->ip6_dst))
+		return (NULL);
 
-	if ((*flags & FL_HASH_ALL) == 0)
-		goto skipports;
+	fibnum = M_GETFIB(m);
 
+#ifdef	FLOWTABLE_HASH_ALL
+	hlen = sizeof(struct ip6_hdr);
+	proto = ip6->ip6_nxt;
+	offset = sport = dport = 0;
+	ulp = NULL;
 	while (ulp == NULL) {
 		switch (proto) {
 		case IPPROTO_ICMPV6:
@@ -675,21 +329,21 @@ ipv6_mbuf_demarshal(struct flowtable *ft
 			break;
 		case IPPROTO_TCP:
 			PULLUP_TO(hlen, ulp, struct tcphdr);
-			dst_port = TCP(ulp)->th_dport;
-			src_port = TCP(ulp)->th_sport;
-			if ((*flags & FL_HASH_ALL) &&
-			    (TCP(ulp)->th_flags & (TH_RST|TH_FIN)))
-				*flags |= FL_STALE;
+			dport = TCP(ulp)->th_dport;
+			sport = TCP(ulp)->th_sport;
+			if (TCP(ulp)->th_flags & (TH_RST|TH_FIN))
+				fibnum |= (FL_STALE << 24);
 			break;
 		case IPPROTO_SCTP:
 			PULLUP_TO(hlen, ulp, struct sctphdr);
-			src_port = SCTP(ulp)->src_port;
-			dst_port = SCTP(ulp)->dest_port;
+			dport = SCTP(ulp)->src_port;
+			sport = SCTP(ulp)->dest_port;
+			/* XXXGL: handle stale? */
 			break;
 		case IPPROTO_UDP:
 			PULLUP_TO(hlen, ulp, struct udphdr);
-			dst_port = UDP(ulp)->uh_dport;
-			src_port = UDP(ulp)->uh_sport;
+			dport = UDP(ulp)->uh_dport;
+			sport = UDP(ulp)->uh_sport;
 			break;
 		case IPPROTO_HOPOPTS:	/* RFC 2460 */
 			PULLUP_TO(hlen, ulp, struct ip6_hbh);
@@ -698,7 +352,7 @@ ipv6_mbuf_demarshal(struct flowtable *ft
 			ulp = NULL;
 			break;
 		case IPPROTO_ROUTING:	/* RFC 2460 */
-			PULLUP_TO(hlen, ulp, struct ip6_rthdr);	
+			PULLUP_TO(hlen, ulp, struct ip6_rthdr);
 			hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
 			proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
 			ulp = NULL;
@@ -729,689 +383,395 @@ ipv6_mbuf_demarshal(struct flowtable *ft
 		}
 	}
 
-	if (src_port == 0) {
-	receive_failed:
-		return (ENOTSUP);
-	}
-
-skipports:
-	dsin6->sin6_family = AF_INET6;
-	dsin6->sin6_len = sizeof(*dsin6);
-	dsin6->sin6_port = dst_port;
-	memcpy(&dsin6->sin6_addr, &ip6->ip6_dst, sizeof(struct in6_addr));
-
-	ssin6->sin6_family = AF_INET6;
-	ssin6->sin6_len = sizeof(*ssin6);
-	ssin6->sin6_port = src_port;
-	memcpy(&ssin6->sin6_addr, &ip6->ip6_src, sizeof(struct in6_addr));
-	*flags |= proto_to_flags(proto);
-
-	return (0);
-}
-
-#define zero_key(key) 		\
-do {				\
-	key[0] = 0;		\
-	key[1] = 0;		\
-	key[2] = 0;		\
-	key[3] = 0;		\
-	key[4] = 0;		\
-	key[5] = 0;		\
-	key[6] = 0;		\
-	key[7] = 0;		\
-	key[8] = 0;		\
-} while (0)
-	
-static uint32_t
-ipv6_flow_lookup_hash_internal(
-	struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6, 
-	    uint32_t *key, uint16_t flags)
-{
-	uint16_t sport, dport;
-	uint8_t proto;
-	int offset = 0;
-
-	if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
-		return (0);
-
-	proto = flags_to_proto(flags);
-	zero_key(key);
-	sport = dport = 0;
-	if (dsin6 != NULL) {
-		memcpy(&key[1], &dsin6->sin6_addr, sizeof(struct in6_addr));
-		dport = dsin6->sin6_port;
-	}
-	if ((ssin6 != NULL) && (flags & FL_HASH_ALL)) {
-		memcpy(&key[5], &ssin6->sin6_addr, sizeof(struct in6_addr));
-		sport = ssin6->sin6_port;
-	}
-	if (flags & FL_HASH_ALL) {
-		((uint16_t *)key)[0] = sport;
-		((uint16_t *)key)[1] = dport; 
-	} else
-		offset = V_flow_hashjitter + proto;
-
-	return (jenkins_hash32(key, 9, offset));
-}
+	bcopy(&ip6->ip6_dst, &key[0], sizeof(struct in6_addr));
+	bcopy(&ip6->ip6_src, &key[4], sizeof(struct in6_addr));
+	key[8] = (dport << 16) | sport;
+	fibnum |= proto << 16;
+
+	fle = flowtable_lookup_common(&V_ip6_ft, key, 9 * sizeof(uint32_t),
+	    fibnum);
+#else	/* !FLOWTABLE_HASH_ALL */
+	bcopy(&ip6->ip6_dst, &key[0], sizeof(struct in6_addr));
+	fle = flowtable_lookup_common(&V_ip6_ft, key, sizeof(struct in6_addr),
+	    fibnum);
+#endif	/* FLOWTABLE_HASH_ALL */
 
-static struct flentry *
-flowtable_lookup_mbuf6(struct flowtable *ft, struct mbuf *m)
-{
-	struct sockaddr_storage ssa, dsa;
-	struct sockaddr_in6 *dsin6, *ssin6;	
-	uint16_t flags;
-
-	dsin6 = (struct sockaddr_in6 *)&dsa;
-	ssin6 = (struct sockaddr_in6 *)&ssa;
-	bzero(dsin6, sizeof(*dsin6));
-	bzero(ssin6, sizeof(*ssin6));
-	flags = ft->ft_flags;
-	
-	if (ipv6_mbuf_demarshal(ft, m, ssin6, dsin6, &flags) != 0)
+	if (fle == NULL)
 		return (NULL);
 
-	return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
-}
-
-void
-flow_to_route_in6(struct flentry *fle, struct route_in6 *ro)
-{
-	uint32_t *hashkey = NULL;
-	struct sockaddr_in6 *sin6;
-
 	sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
-
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_len = sizeof(*sin6);
-	hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
-	memcpy(&sin6->sin6_addr, &hashkey[5], sizeof (struct in6_addr));
-	ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
-	ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
-	ro->ro_flags |= RT_NORTREF;
+	bcopy(&ip6->ip6_dst, &sin6->sin6_addr, sizeof(struct in6_addr));
+
+	return (fle);
 }
 #endif /* INET6 */
 

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201403041514.s24FEmr0063015>