Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 18 Aug 2009 20:28:58 +0000 (UTC)
From:      Kip Macy <kmacy@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r196368 - in head/sys: net netinet
Message-ID:  <200908182028.n7IKSwJ9027599@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kmacy
Date: Tue Aug 18 20:28:58 2009
New Revision: 196368
URL: http://svn.freebsd.org/changeset/base/196368

Log:
   - change the interface to flowtable_lookup so that we don't rely on
     the mbuf for obtaining the fib index
   - check that a cached flow corresponds to the same fib index as the
     packet for which we are doing the lookup
   - at interface detach time flush any flows referencing stale rtentrys
     associated with the interface that is going away (fixes reported
     panics)
   - reduce the time between cleans in case the cleaner is running at
     the time the eventhandler is called and the wakeup is missed less
     time will elapse before the eventhandler returns
   - separate per-vnet initialization from global initialization
     (pointed out by jeli@)
  
  Reviewed by:	sam@
  Approved by:	re@

Modified:
  head/sys/net/flowtable.c
  head/sys/net/flowtable.h
  head/sys/netinet/ip_output.c

Modified: head/sys/net/flowtable.c
==============================================================================
--- head/sys/net/flowtable.c	Tue Aug 18 20:25:02 2009	(r196367)
+++ head/sys/net/flowtable.c	Tue Aug 18 20:28:58 2009	(r196368)
@@ -29,6 +29,7 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #include "opt_route.h"
 #include "opt_mpath.h"
+#include "opt_ddb.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
@@ -36,6 +37,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>  
 #include <sys/types.h>
 #include <sys/bitstring.h>
+#include <sys/condvar.h>
 #include <sys/callout.h>
 #include <sys/kernel.h>  
 #include <sys/kthread.h>
@@ -66,6 +68,7 @@ __FBSDID("$FreeBSD$");
 #include <netinet/sctp.h>
 
 #include <libkern/jenkins.h>
+#include <ddb/ddb.h>
 
 struct ipv4_tuple {
 	uint16_t 	ip_sport;	/* source port */
@@ -94,8 +97,9 @@ union ipv6_flow {
 struct flentry {
 	volatile uint32_t	f_fhash;	/* hash flowing forward */
 	uint16_t		f_flags;	/* flow flags */
-	uint8_t			f_pad;		/* alignment */
+	uint8_t			f_pad;		
 	uint8_t			f_proto;	/* protocol */
+	uint32_t		f_fibnum;	/* fib index */
 	uint32_t		f_uptime;	/* uptime at last access */
 	struct flentry		*f_next;	/* pointer to collision entry */
 	volatile struct rtentry *f_rt;		/* rtentry for flow */
@@ -173,6 +177,10 @@ static VNET_DEFINE(uma_zone_t, flow_ipv6
 #define	V_flow_ipv4_zone	VNET(flow_ipv4_zone)
 #define	V_flow_ipv6_zone	VNET(flow_ipv6_zone)
 
+static struct cv 	flowclean_cv;
+static struct mtx	flowclean_lock;
+static uint32_t		flowclean_cycles;
+
 /*
  * TODO:
  * - Make flowtable stats per-cpu, aggregated at sysctl call time,
@@ -288,10 +296,10 @@ SYSCTL_VNET_PROC(_net_inet_flowtable, OI
 
 #ifndef RADIX_MPATH
 static void
-in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fib)
+in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum)
 {
 
-	rtalloc_ign_fib(ro, 0, fib);
+	rtalloc_ign_fib(ro, 0, fibnum);
 }
 #endif
 
@@ -425,7 +433,7 @@ static bitstr_t *
 flowtable_mask(struct flowtable *ft)
 {
 	bitstr_t *mask;
-	
+
 	if (ft->ft_flags & FL_PCPU)
 		mask = ft->ft_masks[curcpu];
 	else
@@ -501,7 +509,7 @@ flowtable_set_hashkey(struct flentry *fl
 
 static int
 flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
-    uint8_t proto, struct route *ro, uint16_t flags)
+    uint8_t proto, uint32_t fibnum, struct route *ro, uint16_t flags)
 {
 	struct flentry *fle, *fletail, *newfle, **flep;
 	int depth;
@@ -564,6 +572,7 @@ skip:
 	fle->f_rt = ro->ro_rt;
 	fle->f_lle = ro->ro_lle;
 	fle->f_fhash = hash;
+	fle->f_fibnum = fibnum;
 	fle->f_uptime = time_uptime;
 	FL_ENTRY_UNLOCK(ft, hash);
 	return (0);
@@ -591,13 +600,13 @@ flowtable_key_equal(struct flentry *fle,
 }
 
 int
-flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro)
+flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro, uint32_t fibnum)
 {
 	uint32_t key[9], hash;
 	struct flentry *fle;
 	uint16_t flags;
 	uint8_t proto = 0;
-	int error = 0, fib = 0;
+	int error = 0;
 	struct rtentry *rt;
 	struct llentry *lle;
 
@@ -640,6 +649,7 @@ keycheck:	
 	    && fle->f_fhash == hash
 	    && flowtable_key_equal(fle, key)
 	    && (proto == fle->f_proto)
+	    && (fibnum == fle->f_fibnum)
 	    && (rt->rt_flags & RTF_UP)
 	    && (rt->rt_ifp != NULL)) {
 		V_flowtable_hits++;
@@ -668,10 +678,8 @@ uncached:
 	 * of arpresolve with an rt_check variant that expected to
 	 * receive the route locked
 	 */
-	if (m != NULL)
-		fib = M_GETFIB(m);
 
-	ft->ft_rtalloc(ro, hash, fib);
+	ft->ft_rtalloc(ro, hash, fibnum);
 	if (ro->ro_rt == NULL) 
 		error = ENETUNREACH;
 	else {
@@ -692,7 +700,7 @@ uncached:
 			ro->ro_rt = NULL;
 			return (ENOENT);
 		}
-		error = flowtable_insert(ft, hash, key, proto,
+		error = flowtable_insert(ft, hash, key, proto, fibnum,
 		    ro, flags);
 				
 		if (error) {
@@ -791,35 +799,6 @@ flowtable_alloc(int nentry, int flags)
 	return (ft);
 }
 
-static void
-flowtable_init(const void *unused __unused)
-{
-
-	V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
-	    NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
-	V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
-	    NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);	
-	uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows);
-	uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
-	V_flowtable_ready = 1;
-}
-
-VNET_SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
-    flowtable_init, NULL);
-
-#ifdef VIMAGE
-static void
-flowtable_uninit(const void *unused __unused)
-{
-
-	uma_zdestroy(V_flow_ipv4_zone);
-	uma_zdestroy(V_flow_ipv6_zone);
-}
-
-VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
-    flowtable_uninit, NULL);
-#endif
-
 /*
  * The rest of the code is devoted to garbage collection of expired entries.
  * It is a new additon made necessary by the switch to dynamically allocating
@@ -973,12 +952,30 @@ flowtable_cleaner(void)
 		}
 		VNET_LIST_RUNLOCK();
 
+		flowclean_cycles++;
 		/*
 		 * The 20 second interval between cleaning checks
 		 * is arbitrary
 		 */
-		pause("flowcleanwait", 20*hz);
+		mtx_lock(&flowclean_lock);
+		cv_broadcast(&flowclean_cv);
+		cv_timedwait(&flowclean_cv, &flowclean_lock, 10*hz);
+		mtx_unlock(&flowclean_lock);
+	}
+}
+
+static void
+flowtable_flush(void *unused __unused)
+{
+	uint64_t start;
+	
+	mtx_lock(&flowclean_lock);
+	start = flowclean_cycles;
+	while (start == flowclean_cycles) {
+		cv_broadcast(&flowclean_cv);
+		cv_wait(&flowclean_cv, &flowclean_lock);
 	}
+	mtx_unlock(&flowclean_lock);
 }
 
 static struct kproc_desc flow_kp = {
@@ -988,3 +985,159 @@ static struct kproc_desc flow_kp = {
 };
 SYSINIT(flowcleaner, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &flow_kp);
 
+static void
+flowtable_init_vnet(const void *unused __unused)
+{
+
+	V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
+	    NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
+	V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
+	    NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);	
+	uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows);
+	uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
+}
+VNET_SYSINIT(flowtable_init_vnet, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE,
+    flowtable_init_vnet, NULL);
+
+static void
+flowtable_init(const void *unused __unused)
+{
+
+	cv_init(&flowclean_cv, "flowcleanwait");
+	mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF);
+	EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL,
+	    EVENTHANDLER_PRI_ANY);
+	V_flowtable_ready = 1;
+}
+SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
+    flowtable_init, NULL);
+
+
+#ifdef VIMAGE
+static void
+flowtable_uninit(const void *unused __unused)
+{
+
+	uma_zdestroy(V_flow_ipv4_zone);
+	uma_zdestroy(V_flow_ipv6_zone);
+}
+
+VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
+    flowtable_uninit, NULL);
+#endif
+
+#ifdef DDB
+static bitstr_t *
+flowtable_mask_pcpu(struct flowtable *ft, int cpuid)
+{
+	bitstr_t *mask;
+
+	if (ft->ft_flags & FL_PCPU)
+		mask = ft->ft_masks[cpuid];
+	else
+		mask = ft->ft_masks[0];
+
+	return (mask);
+}
+
+static struct flentry **
+flowtable_entry_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
+{
+	struct flentry **fle;
+	int index = (hash % ft->ft_size);
+
+	if (ft->ft_flags & FL_PCPU) {
+		fle = &ft->ft_table.pcpu[cpuid][index];
+	} else {
+		fle = &ft->ft_table.global[index];
+	}
+	
+	return (fle);
+}
+
+static void
+flow_show(struct flowtable *ft, struct flentry *fle)
+{
+	int idle_time;
+	int rt_valid;
+
+	idle_time = (int)(time_uptime - fle->f_uptime);
+	rt_valid = fle->f_rt != NULL;
+	db_printf("hash=0x%08x idle_time=%03d rt=%p ifp=%p",
+	    fle->f_fhash, idle_time,
+	    fle->f_rt, rt_valid ? fle->f_rt->rt_ifp : NULL);
+	if (rt_valid && (fle->f_rt->rt_flags & RTF_UP))
+		db_printf(" RTF_UP ");
+	if (fle->f_flags & FL_STALE)
+		db_printf(" FL_STALE ");
+	db_printf("\n");
+}
+
+static void
+flowtable_show(struct flowtable *ft, int cpuid)
+{
+	int curbit = 0;
+	struct flentry *fle,  **flehead;
+	bitstr_t *mask, *tmpmask;
+
+	db_printf("cpu: %d\n", cpuid);
+	mask = flowtable_mask_pcpu(ft, cpuid);
+	tmpmask = ft->ft_tmpmask;
+	memcpy(tmpmask, mask, ft->ft_size/8);
+	/*
+	 * XXX Note to self, bit_ffs operates at the byte level
+	 * and thus adds gratuitous overhead
+	 */
+	bit_ffs(tmpmask, ft->ft_size, &curbit);
+	while (curbit != -1) {
+		if (curbit >= ft->ft_size || curbit < -1) {
+			db_printf("warning: bad curbit value %d \n",
+			    curbit);
+			break;
+		}
+
+		flehead = flowtable_entry_pcpu(ft, curbit, cpuid);
+		fle = *flehead;
+
+		while (fle != NULL) {	
+			flow_show(ft, fle);
+			fle = fle->f_next;
+			continue;
+		}
+		bit_clear(tmpmask, curbit);
+		bit_ffs(tmpmask, ft->ft_size, &curbit);
+	}
+}
+
+static void
+flowtable_show_vnet(void)
+{
+	struct flowtable *ft;
+	int i;
+
+	ft = V_flow_list_head;
+	while (ft != NULL) {
+		if (ft->ft_flags & FL_PCPU) {
+			for (i = 0; i <= mp_maxid; i++) {
+				if (CPU_ABSENT(i))
+					continue;
+				flowtable_show(ft, i);
+			}
+		} else {
+			flowtable_show(ft, 0);
+		}
+		ft = ft->ft_next;
+	}
+}
+
+DB_SHOW_COMMAND(flowtables, db_show_flowtables)
+{
+	VNET_ITERATOR_DECL(vnet_iter);
+
+	VNET_FOREACH(vnet_iter) {
+		CURVNET_SET(vnet_iter);
+		flowtable_show_vnet();
+		CURVNET_RESTORE();
+	}
+}
+#endif

Modified: head/sys/net/flowtable.h
==============================================================================
--- head/sys/net/flowtable.h	Tue Aug 18 20:25:02 2009	(r196367)
+++ head/sys/net/flowtable.h	Tue Aug 18 20:28:58 2009	(r196368)
@@ -49,7 +49,7 @@ struct flowtable *flowtable_alloc(int ne
  *
  */
 int flowtable_lookup(struct flowtable *ft, struct mbuf *m,
-    struct route *ro);
+    struct route *ro, uint32_t fibnum);
 
 #endif /* _KERNEL */
 #endif

Modified: head/sys/netinet/ip_output.c
==============================================================================
--- head/sys/netinet/ip_output.c	Tue Aug 18 20:25:02 2009	(r196367)
+++ head/sys/netinet/ip_output.c	Tue Aug 18 20:28:58 2009	(r196368)
@@ -157,7 +157,7 @@ ip_output(struct mbuf *m, struct mbuf *o
 		 * longer than that long for the stability of ro_rt.  The
 		 * flow ID assignment must have happened before this point.
 		 */
-		if (flowtable_lookup(V_ip_ft, m, ro) == 0)
+		if (flowtable_lookup(V_ip_ft, m, ro, M_GETFIB(m)) == 0)
 			nortfree = 1;
 #endif
 	}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200908182028.n7IKSwJ9027599>