Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 4 Mar 2009 02:38:38 +0000 (UTC)
From:      Kip Macy <kmacy@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r189342 - in user/kmacy/HEAD_fast_net_merge: sbin/route sys/net sys/netinet usr.sbin/route6d
Message-ID:  <200903040238.n242ccNQ028860@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kmacy
Date: Wed Mar  4 02:38:38 2009
New Revision: 189342
URL: http://svn.freebsd.org/changeset/base/189342

Log:
  add route weighting and generalizing of affinity to source ip
  instead of per-flow
  
  186625:
  -  import kernel support for route shutdown
  
  186626:
  - import user support for route shutdown
  
  186628:
  - don't lookup laddr or lport if they're already set
  
  186630:
  - fix route shutdown merge
  
  186923:
  - Add kernel support for weighting routes
  
  186924:
  - remove RTA_GENMASK
  
  186925:
  - remove genmask
  - add -weight option to route for adding / changing
    route weight
  
  1868994:
   - add kernel support for "sticky" routes
    (all connections from a given source ip will
    be routed to the same dst ip)
  
  186995:
  - add support to the route command for making
    routes sticky
  
  187003:
  - add new flags to route output
  - remove hopcount
  
  187004:
  -  update route flags and metricnames in route command
  
  187005:
  - fetch weight when getting metrics
  
  187006:
  - try to improve formatting slightly in route
  
  187007:
  - more output futzing
  - add show as alias for get
  
  187008:
  - update show handling
  
  187009:
  - remove shutdown
  - update route flags
  
  187010:
  - fix flag setting in RTM_CHANGE
  
  187011:
  - add debug cruft to route selection
  
  187012:
  - fix rn_mpath_count and reduce frequency of printing
  
  187013:
  - update loop condition print hash earlier
  
  187040:
  - reduce default timeouts in the flowtable
  - remove references to shutdown (redundant with
    zero weight route)
  - simplify weight checking
  
  187041:
  - fix radix_mpath comment
  - remove shutdown flag and message
  
  187206:
  - include opt_mpath.h so that RADIX_MPATH will be
    pulled in
  - remove locking overhead to forwarding workloads
    by making forwarding table pcpu

Modified:
  user/kmacy/HEAD_fast_net_merge/sbin/route/keywords
  user/kmacy/HEAD_fast_net_merge/sbin/route/route.c
  user/kmacy/HEAD_fast_net_merge/sys/net/flowtable.c
  user/kmacy/HEAD_fast_net_merge/sys/net/radix_mpath.c
  user/kmacy/HEAD_fast_net_merge/sys/net/route.c
  user/kmacy/HEAD_fast_net_merge/sys/net/route.h
  user/kmacy/HEAD_fast_net_merge/sys/net/rtsock.c
  user/kmacy/HEAD_fast_net_merge/sys/netinet/ip_input.c
  user/kmacy/HEAD_fast_net_merge/usr.sbin/route6d/route6d.c

Modified: user/kmacy/HEAD_fast_net_merge/sbin/route/keywords
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sbin/route/keywords	Wed Mar  4 02:12:29 2009	(r189341)
+++ user/kmacy/HEAD_fast_net_merge/sbin/route/keywords	Wed Mar  4 02:38:38 2009	(r189342)
@@ -33,6 +33,7 @@ mtu
 net
 netmask
 nostatic
+nostick
 osi
 prefixlen
 proto1
@@ -44,8 +45,11 @@ rtt
 rttvar
 sa
 sendpipe
+show
 ssthresh
 static
+sticky
+weight
 x25
 xns
 xresolve

Modified: user/kmacy/HEAD_fast_net_merge/sbin/route/route.c
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sbin/route/route.c	Wed Mar  4 02:12:29 2009	(r189341)
+++ user/kmacy/HEAD_fast_net_merge/sbin/route/route.c	Wed Mar  4 02:38:38 2009	(r189342)
@@ -169,6 +169,7 @@ main(argc, argv)
 	if (*argv)
 		switch (keyword(*argv)) {
 		case K_GET:
+		case K_SHOW:
 			uid = 0;
 			/* FALLTHROUGH */
 
@@ -548,6 +549,7 @@ set_metric(value, key)
 	caseof(K_SSTHRESH, RTV_SSTHRESH, rmx_ssthresh);
 	caseof(K_RTT, RTV_RTT, rmx_rtt);
 	caseof(K_RTTVAR, RTV_RTTVAR, rmx_rttvar);
+	caseof(K_WEIGHT, RTV_WEIGHT, rmx_weight);
 	}
 	rtm_inits |= flag;
 	if (lockrest || locking)
@@ -571,8 +573,9 @@ newroute(argc, argv)
 		errx(EX_NOPERM, "must be root to alter routing table");
 	}
 	cmd = argv[0];
-	if (*cmd != 'g')
+	if (*cmd != 'g' && *cmd != 's')
 		shutdown(s, SHUT_RD); /* Don't want to read back our messages */
+
 	while (--argc > 0) {
 		if (**(++argv)== '-') {
 			switch (key = keyword(1 + *argv)) {
@@ -635,6 +638,12 @@ newroute(argc, argv)
 			case K_STATIC:
 				flags |= RTF_STATIC;
 				break;
+			case K_STICKY:
+				flags |= RTF_STICKY;
+				break;
+			case K_NOSTICK:
+				flags &= ~RTF_STICKY;
+				break;
 			case K_IFA:
 				if (!--argc)
 					usage((char *)NULL);
@@ -645,11 +654,6 @@ newroute(argc, argv)
 					usage((char *)NULL);
 				(void) getaddr(RTA_IFP, *++argv, 0);
 				break;
-			case K_GENMASK:
-				if (!--argc)
-					usage((char *)NULL);
-				(void) getaddr(RTA_GENMASK, *++argv, 0);
-				break;
 			case K_GATEWAY:
 				if (!--argc)
 					usage((char *)NULL);
@@ -688,6 +692,7 @@ newroute(argc, argv)
 			case K_SSTHRESH:
 			case K_RTT:
 			case K_RTTVAR:
+			case K_WEIGHT:
 				if (!--argc)
 					usage((char *)NULL);
 				set_metric(*++argv, key);
@@ -741,7 +746,7 @@ newroute(argc, argv)
 		} else
 			break;
 	}
-	if (*cmd == 'g')
+	if (*cmd == 'g' || *cmd == 's')
 		exit(ret != 0);
 	if (!qflag) {
 		oerrno = errno;
@@ -925,9 +930,6 @@ getaddr(which, s, hpp)
 	case RTA_NETMASK:
 		su = &so_mask;
 		break;
-	case RTA_GENMASK:
-		su = &so_genmask;
-		break;
 	case RTA_IFP:
 		su = &so_ifp;
 		afamily = AF_LINK;
@@ -1191,7 +1193,7 @@ rtmsg(cmd, flags)
 		cmd = RTM_ADD;
 	else if (cmd == 'c')
 		cmd = RTM_CHANGE;
-	else if (cmd == 'g') {
+	else if (cmd == 'g' || cmd == 's') {
 		cmd = RTM_GET;
 		if (so_ifp.sa.sa_family == 0) {
 			so_ifp.sa.sa_family = AF_LINK;
@@ -1208,13 +1210,11 @@ rtmsg(cmd, flags)
 	rtm.rtm_addrs = rtm_addrs;
 	rtm.rtm_rmx = rt_metrics;
 	rtm.rtm_inits = rtm_inits;
-
 	if (rtm_addrs & RTA_NETMASK)
 		mask_addr();
 	NEXTADDR(RTA_DST, so_dst);
 	NEXTADDR(RTA_GATEWAY, so_gate);
 	NEXTADDR(RTA_NETMASK, so_mask);
-	NEXTADDR(RTA_GENMASK, so_genmask);
 	NEXTADDR(RTA_IFP, so_ifp);
 	NEXTADDR(RTA_IFA, so_ifa);
 	rtm.rtm_msglen = l = cp - (char *)&m_rtmsg;
@@ -1295,13 +1295,13 @@ char *msgtypes[] = {
 };
 
 char metricnames[] =
-"\011pksent\010rttvar\7rtt\6ssthresh\5sendpipe\4recvpipe\3expire\2hopcount"
+"\011weight\010rttvar\7rtt\6ssthresh\5sendpipe\4recvpipe\3expire"
 "\1mtu";
 char routeflags[] =
-"\1UP\2GATEWAY\3HOST\4REJECT\5DYNAMIC\6MODIFIED\7DONE\010MASK_PRESENT"
-"\011CLONING\012XRESOLVE\013LLINFO\014STATIC\015BLACKHOLE\016b016"
-"\017PROTO2\020PROTO1\021PRCLONING\022WASCLONED\023PROTO3\024CHAINDELETE"
-"\025PINNED\026LOCAL\027BROADCAST\030MULTICAST";
+"\1UP\2GATEWAY\3HOST\4REJECT\5DYNAMIC\6MODIFIED\7DONE"
+"\012XRESOLVE\013LLINFO\014STATIC\015BLACKHOLE"
+"\017PROTO2\020PROTO1\021PRCLONING\022WASCLONED\023PROTO3"
+"\025PINNED\026LOCAL\027BROADCAST\030MULTICAST\035STICKY";
 char ifnetflags[] =
 "\1UP\2BROADCAST\3DEBUG\4LOOPBACK\5PTP\6b6\7RUNNING\010NOARP"
 "\011PPROMISC\012ALLMULTI\013OACTIVE\014SIMPLEX\015LINK0\016LINK1"
@@ -1464,14 +1464,13 @@ print_getmsg(rtm, msglen)
 #define msec(u)	(((u) + 500) / 1000)		/* usec to msec */
 
 	(void) printf("\n%s\n", "\
- recvpipe  sendpipe  ssthresh  rtt,msec    rttvar  hopcount      mtu     expire");
+ recvpipe  sendpipe  ssthresh  rtt,msec    mtu        weight    expire");
 	printf("%8ld%c ", rtm->rtm_rmx.rmx_recvpipe, lock(RPIPE));
 	printf("%8ld%c ", rtm->rtm_rmx.rmx_sendpipe, lock(SPIPE));
 	printf("%8ld%c ", rtm->rtm_rmx.rmx_ssthresh, lock(SSTHRESH));
 	printf("%8ld%c ", msec(rtm->rtm_rmx.rmx_rtt), lock(RTT));
-	printf("%8ld%c ", msec(rtm->rtm_rmx.rmx_rttvar), lock(RTTVAR));
-	printf("%8ld%c ", rtm->rtm_rmx.rmx_hopcount, lock(HOPCOUNT));
 	printf("%8ld%c ", rtm->rtm_rmx.rmx_mtu, lock(MTU));
+	printf("%8ld%c ", rtm->rtm_rmx.rmx_weight, lock(WEIGHT));
 	if (rtm->rtm_rmx.rmx_expire)
 		rtm->rtm_rmx.rmx_expire -= time(0);
 	printf("%8ld%c\n", rtm->rtm_rmx.rmx_expire, lock(EXPIRE));

Modified: user/kmacy/HEAD_fast_net_merge/sys/net/flowtable.c
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sys/net/flowtable.c	Wed Mar  4 02:12:29 2009	(r189341)
+++ user/kmacy/HEAD_fast_net_merge/sys/net/flowtable.c	Wed Mar  4 02:38:38 2009	(r189342)
@@ -232,13 +232,10 @@ struct flentry_v6 {
 #define	fl_rt		fl_entry.fl_rt
 #define	fl_lle		fl_entry.fl_lle
 
-#define	SECS_PER_HOUR		3600
-#define	SECS_PER_DAY		(24*SECS_PER_HOUR)
-
-#define	SYN_IDLE		300
-#define	UDP_IDLE		300
-#define	FIN_WAIT_IDLE		600
-#define	TCP_IDLE		SECS_PER_DAY
+#define	SYN_IDLE		120
+#define	UDP_IDLE		60
+#define	FIN_WAIT_IDLE		300
+#define	TCP_IDLE		1200
 
 
 typedef	void fl_lock_t(struct flowtable *, uint32_t);
@@ -331,13 +328,14 @@ flowtable_pcpu_unlock(struct flowtable *
 
 static uint32_t
 ipv4_flow_lookup_hash_internal(struct mbuf *m, struct route *ro,
-    uint32_t *key, uint16_t *flags, uint8_t *protop)
+    uint32_t *key, uint16_t *flags, uint8_t *protop, uint32_t *hash,
+    uint32_t *hash_noports)
 {
 	uint16_t sport = 0, dport = 0;
 	struct ip *ip;
 	uint8_t proto = 0;
 	int iphlen;
-	uint32_t hash;
+	uint32_t rh;
 	struct sockaddr_in *sin;
 	struct tcphdr *th;
 	struct udphdr *uh;
@@ -353,14 +351,16 @@ ipv4_flow_lookup_hash_internal(struct mb
 	key[1] = 0;
 	key[2] = sin->sin_addr.s_addr;
 
-	if (m == NULL || (*flags & FL_HASH_PORTS) == 0)
+	if (m == NULL) 
 		goto skipports;
-
 	ip = mtod(m, struct ip *);
 	proto = ip->ip_p;
 	iphlen = ip->ip_hl << 2; /* XXX options? */
 	key[1] = ip->ip_src.s_addr;
-	
+
+	if ((*flags & FL_HASH_PORTS) == 0)
+		goto skipports;
+
 	switch (proto) {
 	case IPPROTO_TCP:
 		th = (struct tcphdr *)((caddr_t)ip + iphlen);
@@ -387,30 +387,27 @@ ipv4_flow_lookup_hash_internal(struct mb
 		break;;
 	
 	}
-	*protop = proto;
-
-	/*
-	 * If this is a transmit route cache then 
-	 * hash all flows to a given destination to
-	 * the same bucket
-	 */
-	if ((*flags & FL_HASH_PORTS) == 0)
-		proto = sport = dport = 0;
-
-	((uint16_t *)key)[0] = sport;
-	((uint16_t *)key)[1] = dport; 
 
 skipports:
-	hash = hashword(key, 3, hashjitter + proto);
+	rh = hashword(key, 3, hashjitter + proto);
+	*hash_noports = rh;
+	*hash = 0;
+	if ((*flags & FL_HASH_PORTS) && sport) {
+		((uint16_t *)key)[0] = sport;
+		((uint16_t *)key)[1] = dport; 
+		rh = hashword(key, 3, hashjitter + proto);
+		*hash = rh;
+	}
 	if (m != NULL && (m->m_flags & M_FLOWID) == 0)
-		m->m_pkthdr.flowid = hash;
-	
-	CTR5(KTR_SPARE3, "proto=%d hash=%x key[0]=%x sport=%d dport=%d\n", proto, hash, key[0], sport, dport);
-	
-	return (hash);
+		m->m_pkthdr.flowid = rh;
+
+	CTR5(KTR_SPARE3, "proto=%d hash=%x key[0]=%x sport=%d dport=%d\n",
+	    proto, *hash, key[0], sport, dport);
+
+	return (0);
 noop:
 	*protop = proto;
-	return (0);
+	return (ENOENT);
 }
 
 static bitstr_t *
@@ -567,7 +564,7 @@ flowtable_key_equal(struct flentry *fle,
 int
 flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro)
 {
-	uint32_t key[9], hash;
+	uint32_t key[9], hash, hash_noports;
 	struct flentry *fle;
 	uint16_t flags;
 	uint8_t proto = 0;
@@ -578,13 +575,14 @@ flowtable_lookup(struct flowtable *ft, s
 	flags = ft ? ft->ft_flags : 0;
 	ro->ro_rt = NULL;
 	ro->ro_lle = NULL;
-
+	hash = hash_noports = 0;
+	
 	/*
 	 * The internal hash lookup is the only IPv4 specific bit
 	 * remaining
 	 */
-	hash = ipv4_flow_lookup_hash_internal(m, ro, key,
-	    &flags, &proto);
+	error = ipv4_flow_lookup_hash_internal(m, ro, key,
+	    &flags, &proto, &hash, &hash_noports);
 
 	/*
 	 * Ports are zero and this isn't a transmit cache
@@ -592,10 +590,13 @@ flowtable_lookup(struct flowtable *ft, s
 	 * statex
 	 * FL_HASH_PORTS => key[0] != 0 for TCP || UDP || SCTP
 	 */
-	if (hash == 0 || (key[0] == 0 && (ft->ft_flags & FL_HASH_PORTS))) {
+	if (error == ENOENT || (key[0] == 0 && (ft->ft_flags & FL_HASH_PORTS))) {
 		cache = 0;
 		goto uncached;
 	}
+	if ((ft->ft_flags & FL_HASH_PORTS) == 0)
+		goto skipports;
+
 	FL_ENTRY_LOCK(ft, hash);
 	fle = FL_ENTRY(ft, hash);
 	rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
@@ -615,6 +616,27 @@ flowtable_lookup(struct flowtable *ft, s
 	} 
 	FL_ENTRY_UNLOCK(ft, hash);
 
+skipports:
+	key[0] = 0;
+	FL_ENTRY_LOCK(ft, hash_noports);
+	fle = FL_ENTRY(ft, hash_noports);
+	rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
+	lle = __DEVOLATILE(struct llentry *, fle->f_lle);
+	if ((rt != NULL)
+	    && fle->f_fhash == hash_noports
+	    && flowtable_key_equal(fle, key, flags)
+	    && (proto == fle->f_proto)
+	    && (rt->rt_flags & RTF_UP)
+	    && (rt->rt_ifp != NULL)) {
+		fle->f_uptime = time_uptime;
+		fle->f_flags |= flags;
+		ro->ro_rt = rt;
+		ro->ro_lle = lle;
+		FL_ENTRY_UNLOCK(ft, hash_noports);
+		return (0);
+	} 
+	FL_ENTRY_UNLOCK(ft, hash_noports);
+	
 uncached:
 	/*
 	 * This bit of code ends up locking the
@@ -640,6 +662,18 @@ uncached:
 		struct rtentry *rt = ro->ro_rt;
 		struct ifnet *ifp = rt->rt_ifp;
 
+		if (rt->rt_flags & RTF_STICKY) {
+			RTFREE(rt);
+			hash = hash_noports;
+			ft->ft_rtalloc(ro, hash, fib);
+			if (ro->ro_rt == NULL) {
+				error = ENETUNREACH;
+				goto done;
+			}
+			rt = ro->ro_rt;
+			ifp = rt->rt_ifp;
+		}
+
 		if (rt->rt_flags & RTF_GATEWAY)
 			l3addr = rt->rt_gateway;
 		else
@@ -671,7 +705,7 @@ uncached:
 		}
 		error = 0;
 	} 
-
+done:
 	return (error);
 }
 
@@ -720,7 +754,7 @@ flowtable_alloc(int nentry, int flags)
 			ft->ft_masks[i] = bit_alloc(nentry);
 		}
 	} else {
-		ft->ft_lock_count = 2*(powerof2(mp_ncpus) ? mp_ncpus :
+		ft->ft_lock_count = 8*(powerof2(mp_ncpus) ? mp_ncpus :
 		    (fls(mp_ncpus) << 1));
 		
 		ft->ft_lock = flowtable_global_lock;

Modified: user/kmacy/HEAD_fast_net_merge/sys/net/radix_mpath.c
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sys/net/radix_mpath.c	Wed Mar  4 02:12:29 2009	(r189341)
+++ user/kmacy/HEAD_fast_net_merge/sys/net/radix_mpath.c	Wed Mar  4 02:38:38 2009	(r189342)
@@ -77,15 +77,18 @@ rn_mpath_next(struct radix_node *rn)
 		return NULL;
 }
 
-u_int32_t
+uint32_t
 rn_mpath_count(struct radix_node *rn)
 {
-	u_int32_t i;
-
-	i = 1;
-	while ((rn = rn_mpath_next(rn)) != NULL)
-		i++;
-	return i;
+	uint32_t i = 0;
+	struct rtentry *rt;
+	
+	while (rn != NULL) {
+		rt = (struct rtentry *)rn;
+		i += rt->rt_rmx.rmx_weight;
+		rn = rn_mpath_next(rn);
+	}
+	return (i);
 }
 
 struct rtentry *
@@ -256,10 +259,12 @@ different:
 }
 
 void
-rtalloc_mpath_fib(struct route *ro, u_int32_t hash, u_int fibnum)
+rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
 {
 	struct radix_node *rn0, *rn;
 	u_int32_t n;
+	struct rtentry *rt;
+	int64_t weight;
 
 	/*
 	 * XXX we don't attempt to lookup cached route again; what should
@@ -284,25 +289,31 @@ rtalloc_mpath_fib(struct route *ro, u_in
 	/* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
 	hash += hashjitter;
 	hash %= n;
-	while (hash-- > 0 && rn) {
+	for (weight = abs((int32_t)hash), rt = ro->ro_rt;
+	     weight >= rt->rt_rmx.rmx_weight && rn; 
+	     weight -= rt->rt_rmx.rmx_weight) {
+		
 		/* stay within the multipath routes */
 		if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
 			break;
 		rn = rn->rn_dupedkey;
+		rt = (struct rtentry *)rn;
 	}
-
 	/* XXX try filling rt_gwroute and avoid unreachable gw  */
 
-	/* if gw selection fails, use the first match (default) */
+	/* gw selection has failed - there must be only zero weight routes */
 	if (!rn) {
 		RT_UNLOCK(ro->ro_rt);
+		ro->ro_rt = NULL;
 		return;
 	}
-	
-	RTFREE_LOCKED(ro->ro_rt);
-	ro->ro_rt = (struct rtentry *)rn;
-	RT_LOCK(ro->ro_rt);
-	RT_ADDREF(ro->ro_rt);
+	if (ro->ro_rt != rt) {
+		RTFREE_LOCKED(ro->ro_rt);
+		ro->ro_rt = (struct rtentry *)rn;
+		RT_LOCK(ro->ro_rt);
+		RT_ADDREF(ro->ro_rt);
+
+	} 
 	RT_UNLOCK(ro->ro_rt);
 }
 

Modified: user/kmacy/HEAD_fast_net_merge/sys/net/route.c
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sys/net/route.c	Wed Mar  4 02:12:29 2009	(r189341)
+++ user/kmacy/HEAD_fast_net_merge/sys/net/route.c	Wed Mar  4 02:38:38 2009	(r189342)
@@ -803,6 +803,103 @@ bad:
 	return (error);
 }
 
+#ifdef RADIX_MPATH
+static int
+rn_mpath_update(int req, struct rt_addrinfo *info,
+    struct radix_node_head *rnh, struct rtentry **ret_nrt)
+{
+	/*
+	 * if we got multipath routes, we require users to specify
+	 * a matching RTAX_GATEWAY.
+	 */
+	struct rtentry *rt, *rto = NULL;
+	register struct radix_node *rn;
+	int error = 0;
+
+	rn = rnh->rnh_matchaddr(dst, rnh);
+	if (rn == NULL)
+		return (ESRCH);
+	rto = rt = RNTORT(rn);
+	rt = rt_mpath_matchgate(rt, gateway);
+	if (rt == NULL)
+		return (ESRCH);
+	/*
+	 * this is the first entry in the chain
+	 */
+	if (rto == rt) {
+		rn = rn_mpath_next((struct radix_node *)rt);
+		/*
+		 * there is another entry, now it's active
+		 */
+		if (rn) {
+			rto = RNTORT(rn);
+			RT_LOCK(rto);
+			rto->rt_flags |= RTF_UP;
+			RT_UNLOCK(rto);
+		} else if (rt->rt_flags & RTF_GATEWAY) {
+			/*
+			 * For gateway routes, we need to 
+			 * make sure that we we are deleting
+			 * the correct gateway. 
+			 * rt_mpath_matchgate() does not 
+			 * check the case when there is only
+			 * one route in the chain.  
+			 */
+			if (gateway &&
+			    (rt->rt_gateway->sa_len != gateway->sa_len ||
+				memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
+				error = ESRCH;
+			goto done;
+		}
+		/*
+		 * use the normal delete code to remove
+		 * the first entry
+		 */
+		if (req != RTM_DELETE) 
+			goto nondelete;
+
+		error = ENOENT;
+		goto done;
+	}
+		
+	/*
+	 * if the entry is 2nd and on up
+	 */
+	if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
+		panic ("rtrequest1: rt_mpath_deldup");
+	RT_LOCK(rt);
+	RT_ADDREF(rt);
+	if (req == RTM_DELETE) {
+		rt->rt_flags &= ~RTF_UP;
+		/*
+		 * One more rtentry floating around that is not
+		 * linked to the routing table. rttrash will be decremented
+		 * when RTFREE(rt) is eventually called.
+		 */
+		V_rttrash++;
+		
+	}
+	
+nondelete:
+	if (req != RTM_DELETE)
+		panic("unrecognized request %d", req);
+	
+
+	/*
+	 * If the caller wants it, then it can have it,
+	 * but it's up to it to free the rtentry as we won't be
+	 * doing it.
+	 */
+	if (ret_nrt) {
+		*ret_nrt = rt;
+		RT_UNLOCK(rt);
+	} else
+		RTFREE_LOCKED(rt);
+done:
+	return (error);
+}
+#endif
+
 int
 rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 				u_int fibnum)
@@ -841,65 +938,15 @@ rtrequest1_fib(int req, struct rt_addrin
 	switch (req) {
 	case RTM_DELETE:
 #ifdef RADIX_MPATH
-		/*
-		 * if we got multipath routes, we require users to specify
-		 * a matching RTAX_GATEWAY.
-		 */
 		if (rn_mpath_capable(rnh)) {
-			struct rtentry *rto = NULL;
-
-			rn = rnh->rnh_matchaddr(dst, rnh);
-			if (rn == NULL)
-				senderr(ESRCH);
- 			rto = rt = RNTORT(rn);
-			rt = rt_mpath_matchgate(rt, gateway);
-			if (!rt)
-				senderr(ESRCH);
-			/*
-			 * this is the first entry in the chain
-			 */
-			if (rto == rt) {
-				rn = rn_mpath_next((struct radix_node *)rt);
-				/*
-				 * there is another entry, now it's active
-				 */
-				if (rn) {
-					rto = RNTORT(rn);
-					RT_LOCK(rto);
-					rto->rt_flags |= RTF_UP;
-					RT_UNLOCK(rto);
-				} else if (rt->rt_flags & RTF_GATEWAY) {
-					/*
-					 * For gateway routes, we need to 
-					 * make sure that we we are deleting
-					 * the correct gateway. 
-					 * rt_mpath_matchgate() does not 
-					 * check the case when there is only
-					 * one route in the chain.  
-					 */
-					if (gateway &&
-					    (rt->rt_gateway->sa_len != gateway->sa_len ||
-					    memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
-						senderr(ESRCH);
-				}
-				/*
-				 * use the normal delete code to remove
-				 * the first entry
-				 */
-				goto normal_rtdel;
-			}
+			error = rn_mpath_update(req, info, rnh, ret_nrt);
 			/*
-			 * if the entry is 2nd and on up
+			 * "bad" holds true for the success case
+			 * as well
 			 */
-			if (!rt_mpath_deldup(rto, rt))
-				panic ("rtrequest1: rt_mpath_deldup");
-			RT_LOCK(rt);
-			RT_ADDREF(rt);
-			rt->rt_flags &= ~RTF_UP;
-			goto deldone;  /* done with the RTM_DELETE command */
+			if (error != ENOENT)
+				goto bad;
 		}
-
-normal_rtdel:
 #endif
 		/*
 		 * Remove the item from the tree and return it.
@@ -921,9 +968,6 @@ normal_rtdel:
 		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
 			ifa->ifa_rtrequest(RTM_DELETE, rt, info);
 
-#ifdef RADIX_MPATH
-deldone:
-#endif
 		/*
 		 * One more rtentry floating around that is not
 		 * linked to the routing table. rttrash will be decremented
@@ -951,11 +995,13 @@ deldone:
 	case RTM_ADD:
 		if ((flags & RTF_GATEWAY) && !gateway)
 			senderr(EINVAL);
-		if (dst && gateway && (dst->sa_family != gateway->sa_family) && 
-		    (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
+		if (dst && gateway && (dst->sa_family != gateway->sa_family) 
+		    && (gateway->sa_family != AF_UNSPEC)
+		    && (gateway->sa_family != AF_LINK))
 			senderr(EINVAL);
 
-		if (info->rti_ifa == NULL && (error = rt_getifa_fib(info, fibnum)))
+		if (info->rti_ifa == NULL &&
+		    (error = rt_getifa_fib(info, fibnum)))
 			senderr(error);
 		ifa = info->rti_ifa;
 		rt = uma_zalloc(rtzone, M_NOWAIT | M_ZERO);
@@ -996,6 +1042,7 @@ deldone:
 		IFAREF(ifa);
 		rt->rt_ifa = ifa;
 		rt->rt_ifp = ifa->ifa_ifp;
+		rt->rt_rmx.rmx_weight = 1;
 
 #ifdef RADIX_MPATH
 		/* do not permit exactly the same dst/mask/gw pair */

Modified: user/kmacy/HEAD_fast_net_merge/sys/net/route.h
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sys/net/route.h	Wed Mar  4 02:12:29 2009	(r189341)
+++ user/kmacy/HEAD_fast_net_merge/sys/net/route.h	Wed Mar  4 02:38:38 2009	(r189342)
@@ -59,6 +59,7 @@ struct rt_metrics_lite {
 	u_long	rmx_mtu;	/* MTU for this path */
 	u_long	rmx_expire;	/* lifetime for route, e.g. redirect */
 	u_long	rmx_pksent;	/* packets sent using this route */
+	u_long	rmx_weight;	/* absolute weight */ 
 };
 
 struct rt_metrics {
@@ -72,7 +73,8 @@ struct rt_metrics {
 	u_long	rmx_rtt;	/* estimated round trip time */
 	u_long	rmx_rttvar;	/* estimated rtt variance */
 	u_long	rmx_pksent;	/* packets sent using this route */
-	u_long	rmx_filler[4];	/* will be used for T/TCP later */
+	u_long	rmx_weight;	/* route weight */
+	u_long	rmx_filler[3];	/* will be used for T/TCP later */
 };
 
 /*
@@ -194,13 +196,15 @@ struct ortentry {
 #define	RTF_LOCAL	0x200000 	/* route represents a local address */
 #define	RTF_BROADCAST	0x400000	/* route represents a bcast address */
 #define	RTF_MULTICAST	0x800000	/* route represents a mcast address */
-					/* 0x1000000 and up unassigned */
-#define	RTF_RNH_LOCKED	 0x40000000	/* radix node head locked by caller */
+					/* 0x8000000 and up unassigned */
+#define	RTF_STICKY	 0x10000000	/* always route dst->src */
+
+#define	RTF_RNH_LOCKED	 0x40000000	/* radix node head is locked */
 
 /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */
 #define RTF_FMASK	\
 	(RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \
-	 RTF_REJECT | RTF_STATIC)
+	 RTF_REJECT | RTF_STATIC | RTF_STICKY)
 
 /*
  * Routing statistics.
@@ -226,12 +230,11 @@ struct rt_msghdr {
 	int	rtm_seq;	/* for sender to identify action */
 	int	rtm_errno;	/* why failed */
 	int	rtm_fmask;	/* bitmask used in RTM_CHANGE message */
-#define	rtm_use	rtm_fmask	/* deprecated, use rtm_rmx->rmx_pksent */
 	u_long	rtm_inits;	/* which metrics we are initializing */
 	struct	rt_metrics rtm_rmx; /* metrics themselves */
 };
 
-#define RTM_VERSION	5	/* Up the ante and ignore older versions */
+#define RTM_VERSION	6	/* Up the ante and ignore older versions */
 
 /*
  * Message types.
@@ -266,6 +269,7 @@ struct rt_msghdr {
 #define RTV_SSTHRESH	0x20	/* init or lock _ssthresh */
 #define RTV_RTT		0x40	/* init or lock _rtt */
 #define RTV_RTTVAR	0x80	/* init or lock _rttvar */
+#define RTV_WEIGHT	0x100	/* init or lock _weight */
 
 /*
  * Bitmask values for rtm_addrs.
@@ -273,7 +277,7 @@ struct rt_msghdr {
 #define RTA_DST		0x1	/* destination sockaddr present */
 #define RTA_GATEWAY	0x2	/* gateway sockaddr present */
 #define RTA_NETMASK	0x4	/* netmask sockaddr present */
-#define RTA_GENMASK	0x8	/* cloning mask sockaddr present */
+#define RTA_SPARE	0x8	/* unused */
 #define RTA_IFP		0x10	/* interface name sockaddr present */
 #define RTA_IFA		0x20	/* interface addr sockaddr present */
 #define RTA_AUTHOR	0x40	/* sockaddr for author of redirect */
@@ -285,7 +289,7 @@ struct rt_msghdr {
 #define RTAX_DST	0	/* destination sockaddr present */
 #define RTAX_GATEWAY	1	/* gateway sockaddr present */
 #define RTAX_NETMASK	2	/* netmask sockaddr present */
-#define RTAX_GENMASK	3	/* cloning mask sockaddr present */
+#define RTAX_SPARE	3	/* spare field */
 #define RTAX_IFP	4	/* interface name sockaddr present */
 #define RTAX_IFA	5	/* interface addr sockaddr present */
 #define RTAX_AUTHOR	6	/* sockaddr for author of redirect */
@@ -293,11 +297,11 @@ struct rt_msghdr {
 #define RTAX_MAX	8	/* size of array to allocate */
 
 struct rt_addrinfo {
-	int	rti_addrs;
-	struct	sockaddr *rti_info[RTAX_MAX];
-	int	rti_flags;
-	struct	ifaddr *rti_ifa;
-	struct	ifnet *rti_ifp;
+	int		rti_addrs;
+	struct sockaddr	*rti_info[RTAX_MAX];
+	int		rti_flags;
+	struct ifaddr 	*rti_ifa;
+	struct ifnet 	*rti_ifp;
 };
 
 /*

Modified: user/kmacy/HEAD_fast_net_merge/sys/net/rtsock.c
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sys/net/rtsock.c	Wed Mar  4 02:12:29 2009	(r189341)
+++ user/kmacy/HEAD_fast_net_merge/sys/net/rtsock.c	Wed Mar  4 02:38:38 2009	(r189342)
@@ -601,7 +601,6 @@ route_output(struct mbuf *m, struct sock
 			info.rti_info[RTAX_DST] = rt_key(rt);
 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
-			info.rti_info[RTAX_GENMASK] = 0;
 			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
 				ifp = rt->rt_ifp;
 				if (ifp) {
@@ -637,7 +636,6 @@ route_output(struct mbuf *m, struct sock
 			}
 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
 			rtm->rtm_flags = rt->rt_flags;
-			rtm->rtm_use = 0;
 			rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
 			rtm->rtm_addrs = info.rti_addrs;
 			break;
@@ -691,10 +689,8 @@ route_output(struct mbuf *m, struct sock
 				rt->rt_ifp = info.rti_ifp;
 			}
 			/* Allow some flags to be toggled on change. */
-			if (rtm->rtm_fmask & RTF_FMASK)
-				rt->rt_flags = (rt->rt_flags &
-				    ~rtm->rtm_fmask) |
-				    (rtm->rtm_flags & rtm->rtm_fmask);
+			rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) |
+				    (rtm->rtm_flags & RTF_FMASK);
 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
 					&rt->rt_rmx);
 			rtm->rtm_index = rt->rt_ifp->if_index;
@@ -767,12 +763,14 @@ static void
 rt_setmetrics(u_long which, const struct rt_metrics *in,
 	struct rt_metrics_lite *out)
 {
-#define metric(f, e) if (which & (f)) out->e = in->e;
+#define metric(f, e) if (which & (f)) { printf("setting 0x%x", f); out->e = in->e; }								
+	
 	/*
 	 * Only these are stored in the routing entry since introduction
 	 * of tcp hostcache. The rest is ignored.
 	 */
 	metric(RTV_MTU, rmx_mtu);
+	metric(RTV_WEIGHT, rmx_weight);
 	/* Userland -> kernel timebase conversion. */
 	if (which & RTV_EXPIRE)
 		out->rmx_expire = in->rmx_expire ?
@@ -786,6 +784,7 @@ rt_getmetrics(const struct rt_metrics_li
 #define metric(e) out->e = in->e;
 	bzero(out, sizeof(*out));
 	metric(rmx_mtu);
+	metric(rmx_weight);
 	/* Kernel -> userland timebase conversion. */
 	out->rmx_expire = in->rmx_expire ?
 	    in->rmx_expire - time_uptime + time_second : 0;
@@ -1245,7 +1244,6 @@ sysctl_dumpentry(struct radix_node *rn, 
 	info.rti_info[RTAX_DST] = rt_key(rt);
 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
-	info.rti_info[RTAX_GENMASK] = 0;
 	if (rt->rt_ifp) {
 		info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
@@ -1257,7 +1255,10 @@ sysctl_dumpentry(struct radix_node *rn, 
 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
 
 		rtm->rtm_flags = rt->rt_flags;
-		rtm->rtm_use = rt->rt_rmx.rmx_pksent;
+		/*
+		 * let's be honest about this being a retarded hack
+		 */
+		rtm->rtm_fmask = rt->rt_rmx.rmx_pksent;
 		rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
 		rtm->rtm_index = rt->rt_ifp->if_index;
 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;

Modified: user/kmacy/HEAD_fast_net_merge/sys/netinet/ip_input.c
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/sys/netinet/ip_input.c	Wed Mar  4 02:12:29 2009	(r189341)
+++ user/kmacy/HEAD_fast_net_merge/sys/netinet/ip_input.c	Wed Mar  4 02:38:38 2009	(r189342)
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_route.h"
 #include "opt_mac.h"
 #include "opt_carp.h"
+#include "opt_mpath.h"	
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -340,7 +341,7 @@ ip_init(void)
 	netisr_register(NETISR_IP, ip_input, &ipintrq, 0);
 
 	ipv4_ft = flowtable_alloc(ip_pcpu_flowtable_size, FL_PCPU);
-	ipv4_forward_ft = flowtable_alloc(ip_global_flowtable_size, FL_HASH_PORTS);	
+	ipv4_forward_ft = flowtable_alloc(ip_global_flowtable_size, FL_HASH_PORTS|FL_PCPU);
 }
 
 void

Modified: user/kmacy/HEAD_fast_net_merge/usr.sbin/route6d/route6d.c
==============================================================================
--- user/kmacy/HEAD_fast_net_merge/usr.sbin/route6d/route6d.c	Wed Mar  4 02:12:29 2009	(r189341)
+++ user/kmacy/HEAD_fast_net_merge/usr.sbin/route6d/route6d.c	Wed Mar  4 02:38:38 2009	(r189342)
@@ -2661,10 +2661,6 @@ rt_entry(rtm, again)
 		sin6_mask = (struct sockaddr_in6 *)rtmp;
 		rtmp += ROUNDUP(sin6_mask->sin6_len);
 	}
-	if (rtm->rtm_addrs & RTA_GENMASK) {
-		sin6_genmask = (struct sockaddr_in6 *)rtmp;
-		rtmp += ROUNDUP(sin6_genmask->sin6_len);
-	}
 	if (rtm->rtm_addrs & RTA_IFP) {
 		sin6_ifp = (struct sockaddr_in6 *)rtmp;
 		rtmp += ROUNDUP(sin6_ifp->sin6_len);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200903040238.n242ccNQ028860>