Date: Wed, 4 Mar 2009 02:38:38 +0000 (UTC) From: Kip Macy <kmacy@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r189342 - in user/kmacy/HEAD_fast_net_merge: sbin/route sys/net sys/netinet usr.sbin/route6d Message-ID: <200903040238.n242ccNQ028860@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kmacy Date: Wed Mar 4 02:38:38 2009 New Revision: 189342 URL: http://svn.freebsd.org/changeset/base/189342 Log: add route weighting and generalizing of affinity to source ip instead of per-flow 186625: - import kernel support for route shutdown 186626: - import user support for route shutdown 186628: - don't lookup laddr or lport if they're already set 186630: - fix route shutdown merge 186923: - Add kernel support for weighting routes 186924: - remove RTA_GENMASK 186925: - remove genmask - add -weight option to route for adding / changing route weight 1868994: - add kernel support for "sticky" routes (all connections from a given source ip will be routed to the same dst ip) 186995: - add support to the route command for making routes sticky 187003: - add new flags to route output - remove hopcount 187004: - update route flags and metricnames in route command 187005: - fetch weight when getting metrics 187006: - try to improve formatting slightly in route 187007: - more output futzing - add show as alias for get 187008: - update show handling 187009: - remove shutdown - update route flags 187010: - fix flag setting in RTM_CHANGE 187011: - add debug cruft to route selection 187012: - fix rn_mpath_count and reduce frequency of printing 187013: - update loop condition print hash earlier 187040: - reduce default timeouts in the flowtable - remove references to shutdown (redundant with zero weight route) - simplify weight checking 187041: - fix radix_mpath comment - remove shutdown flag and message 187206: - include opt_mpath.h so that RADIX_MPATH will be pulled in - remove locking overhead to forwarding workloads by making forwarding table pcpu Modified: user/kmacy/HEAD_fast_net_merge/sbin/route/keywords user/kmacy/HEAD_fast_net_merge/sbin/route/route.c user/kmacy/HEAD_fast_net_merge/sys/net/flowtable.c user/kmacy/HEAD_fast_net_merge/sys/net/radix_mpath.c user/kmacy/HEAD_fast_net_merge/sys/net/route.c user/kmacy/HEAD_fast_net_merge/sys/net/route.h user/kmacy/HEAD_fast_net_merge/sys/net/rtsock.c user/kmacy/HEAD_fast_net_merge/sys/netinet/ip_input.c user/kmacy/HEAD_fast_net_merge/usr.sbin/route6d/route6d.c Modified: user/kmacy/HEAD_fast_net_merge/sbin/route/keywords ============================================================================== --- user/kmacy/HEAD_fast_net_merge/sbin/route/keywords Wed Mar 4 02:12:29 2009 (r189341) +++ user/kmacy/HEAD_fast_net_merge/sbin/route/keywords Wed Mar 4 02:38:38 2009 (r189342) @@ -33,6 +33,7 @@ mtu net netmask nostatic +nostick osi prefixlen proto1 @@ -44,8 +45,11 @@ rtt rttvar sa sendpipe +show ssthresh static +sticky +weight x25 xns xresolve Modified: user/kmacy/HEAD_fast_net_merge/sbin/route/route.c ============================================================================== --- user/kmacy/HEAD_fast_net_merge/sbin/route/route.c Wed Mar 4 02:12:29 2009 (r189341) +++ user/kmacy/HEAD_fast_net_merge/sbin/route/route.c Wed Mar 4 02:38:38 2009 (r189342) @@ -169,6 +169,7 @@ main(argc, argv) if (*argv) switch (keyword(*argv)) { case K_GET: + case K_SHOW: uid = 0; /* FALLTHROUGH */ @@ -548,6 +549,7 @@ set_metric(value, key) caseof(K_SSTHRESH, RTV_SSTHRESH, rmx_ssthresh); caseof(K_RTT, RTV_RTT, rmx_rtt); caseof(K_RTTVAR, RTV_RTTVAR, rmx_rttvar); + caseof(K_WEIGHT, RTV_WEIGHT, rmx_weight); } rtm_inits |= flag; if (lockrest || locking) @@ -571,8 +573,9 @@ newroute(argc, argv) errx(EX_NOPERM, "must be root to alter routing table"); } cmd = argv[0]; - if (*cmd != 'g') + if (*cmd != 'g' && *cmd != 's') shutdown(s, SHUT_RD); /* Don't want to read back our messages */ + while (--argc > 0) { if (**(++argv)== '-') { switch (key = keyword(1 + *argv)) { @@ -635,6 +638,12 @@ newroute(argc, argv) case K_STATIC: flags |= RTF_STATIC; break; + case K_STICKY: + flags |= RTF_STICKY; + break; + case K_NOSTICK: + flags &= ~RTF_STICKY; + break; case K_IFA: if (!--argc) usage((char *)NULL); @@ -645,11 +654,6 @@ newroute(argc, argv) usage((char *)NULL); (void) getaddr(RTA_IFP, *++argv, 0); break; - case K_GENMASK: - if (!--argc) - usage((char *)NULL); - (void) getaddr(RTA_GENMASK, *++argv, 0); - break; case K_GATEWAY: if (!--argc) usage((char *)NULL); @@ -688,6 +692,7 @@ newroute(argc, argv) case K_SSTHRESH: case K_RTT: case K_RTTVAR: + case K_WEIGHT: if (!--argc) usage((char *)NULL); set_metric(*++argv, key); @@ -741,7 +746,7 @@ newroute(argc, argv) } else break; } - if (*cmd == 'g') + if (*cmd == 'g' || *cmd == 's') exit(ret != 0); if (!qflag) { oerrno = errno; @@ -925,9 +930,6 @@ getaddr(which, s, hpp) case RTA_NETMASK: su = &so_mask; break; - case RTA_GENMASK: - su = &so_genmask; - break; case RTA_IFP: su = &so_ifp; afamily = AF_LINK; @@ -1191,7 +1193,7 @@ rtmsg(cmd, flags) cmd = RTM_ADD; else if (cmd == 'c') cmd = RTM_CHANGE; - else if (cmd == 'g') { + else if (cmd == 'g' || cmd == 's') { cmd = RTM_GET; if (so_ifp.sa.sa_family == 0) { so_ifp.sa.sa_family = AF_LINK; @@ -1208,13 +1210,11 @@ rtmsg(cmd, flags) rtm.rtm_addrs = rtm_addrs; rtm.rtm_rmx = rt_metrics; rtm.rtm_inits = rtm_inits; - if (rtm_addrs & RTA_NETMASK) mask_addr(); NEXTADDR(RTA_DST, so_dst); NEXTADDR(RTA_GATEWAY, so_gate); NEXTADDR(RTA_NETMASK, so_mask); - NEXTADDR(RTA_GENMASK, so_genmask); NEXTADDR(RTA_IFP, so_ifp); NEXTADDR(RTA_IFA, so_ifa); rtm.rtm_msglen = l = cp - (char *)&m_rtmsg; @@ -1295,13 +1295,13 @@ char *msgtypes[] = { }; char metricnames[] = -"\011pksent\010rttvar\7rtt\6ssthresh\5sendpipe\4recvpipe\3expire\2hopcount" +"\011weight\010rttvar\7rtt\6ssthresh\5sendpipe\4recvpipe\3expire" "\1mtu"; char routeflags[] = -"\1UP\2GATEWAY\3HOST\4REJECT\5DYNAMIC\6MODIFIED\7DONE\010MASK_PRESENT" -"\011CLONING\012XRESOLVE\013LLINFO\014STATIC\015BLACKHOLE\016b016" -"\017PROTO2\020PROTO1\021PRCLONING\022WASCLONED\023PROTO3\024CHAINDELETE" -"\025PINNED\026LOCAL\027BROADCAST\030MULTICAST"; +"\1UP\2GATEWAY\3HOST\4REJECT\5DYNAMIC\6MODIFIED\7DONE" +"\012XRESOLVE\013LLINFO\014STATIC\015BLACKHOLE" +"\017PROTO2\020PROTO1\021PRCLONING\022WASCLONED\023PROTO3" +"\025PINNED\026LOCAL\027BROADCAST\030MULTICAST\035STICKY"; char ifnetflags[] = "\1UP\2BROADCAST\3DEBUG\4LOOPBACK\5PTP\6b6\7RUNNING\010NOARP" "\011PPROMISC\012ALLMULTI\013OACTIVE\014SIMPLEX\015LINK0\016LINK1" @@ -1464,14 +1464,13 @@ print_getmsg(rtm, msglen) #define msec(u) (((u) + 500) / 1000) /* usec to msec */ (void) printf("\n%s\n", "\ - recvpipe sendpipe ssthresh rtt,msec rttvar hopcount mtu expire"); + recvpipe sendpipe ssthresh rtt,msec mtu weight expire"); printf("%8ld%c ", rtm->rtm_rmx.rmx_recvpipe, lock(RPIPE)); printf("%8ld%c ", rtm->rtm_rmx.rmx_sendpipe, lock(SPIPE)); printf("%8ld%c ", rtm->rtm_rmx.rmx_ssthresh, lock(SSTHRESH)); printf("%8ld%c ", msec(rtm->rtm_rmx.rmx_rtt), lock(RTT)); - printf("%8ld%c ", msec(rtm->rtm_rmx.rmx_rttvar), lock(RTTVAR)); - printf("%8ld%c ", rtm->rtm_rmx.rmx_hopcount, lock(HOPCOUNT)); printf("%8ld%c ", rtm->rtm_rmx.rmx_mtu, lock(MTU)); + printf("%8ld%c ", rtm->rtm_rmx.rmx_weight, lock(WEIGHT)); if (rtm->rtm_rmx.rmx_expire) rtm->rtm_rmx.rmx_expire -= time(0); printf("%8ld%c\n", rtm->rtm_rmx.rmx_expire, lock(EXPIRE)); Modified: user/kmacy/HEAD_fast_net_merge/sys/net/flowtable.c ============================================================================== --- user/kmacy/HEAD_fast_net_merge/sys/net/flowtable.c Wed Mar 4 02:12:29 2009 (r189341) +++ user/kmacy/HEAD_fast_net_merge/sys/net/flowtable.c Wed Mar 4 02:38:38 2009 (r189342) @@ -232,13 +232,10 @@ struct flentry_v6 { #define fl_rt fl_entry.fl_rt #define fl_lle fl_entry.fl_lle -#define SECS_PER_HOUR 3600 -#define SECS_PER_DAY (24*SECS_PER_HOUR) - -#define SYN_IDLE 300 -#define UDP_IDLE 300 -#define FIN_WAIT_IDLE 600 -#define TCP_IDLE SECS_PER_DAY +#define SYN_IDLE 120 +#define UDP_IDLE 60 +#define FIN_WAIT_IDLE 300 +#define TCP_IDLE 1200 typedef void fl_lock_t(struct flowtable *, uint32_t); @@ -331,13 +328,14 @@ flowtable_pcpu_unlock(struct flowtable * static uint32_t ipv4_flow_lookup_hash_internal(struct mbuf *m, struct route *ro, - uint32_t *key, uint16_t *flags, uint8_t *protop) + uint32_t *key, uint16_t *flags, uint8_t *protop, uint32_t *hash, + uint32_t *hash_noports) { uint16_t sport = 0, dport = 0; struct ip *ip; uint8_t proto = 0; int iphlen; - uint32_t hash; + uint32_t rh; struct sockaddr_in *sin; struct tcphdr *th; struct udphdr *uh; @@ -353,14 +351,16 @@ ipv4_flow_lookup_hash_internal(struct mb key[1] = 0; key[2] = sin->sin_addr.s_addr; - if (m == NULL || (*flags & FL_HASH_PORTS) == 0) + if (m == NULL) goto skipports; - ip = mtod(m, struct ip *); proto = ip->ip_p; iphlen = ip->ip_hl << 2; /* XXX options? */ key[1] = ip->ip_src.s_addr; - + + if ((*flags & FL_HASH_PORTS) == 0) + goto skipports; + switch (proto) { case IPPROTO_TCP: th = (struct tcphdr *)((caddr_t)ip + iphlen); @@ -387,30 +387,27 @@ ipv4_flow_lookup_hash_internal(struct mb break;; } - *protop = proto; - - /* - * If this is a transmit route cache then - * hash all flows to a given destination to - * the same bucket - */ - if ((*flags & FL_HASH_PORTS) == 0) - proto = sport = dport = 0; - - ((uint16_t *)key)[0] = sport; - ((uint16_t *)key)[1] = dport; skipports: - hash = hashword(key, 3, hashjitter + proto); + rh = hashword(key, 3, hashjitter + proto); + *hash_noports = rh; + *hash = 0; + if ((*flags & FL_HASH_PORTS) && sport) { + ((uint16_t *)key)[0] = sport; + ((uint16_t *)key)[1] = dport; + rh = hashword(key, 3, hashjitter + proto); + *hash = rh; + } if (m != NULL && (m->m_flags & M_FLOWID) == 0) - m->m_pkthdr.flowid = hash; - - CTR5(KTR_SPARE3, "proto=%d hash=%x key[0]=%x sport=%d dport=%d\n", proto, hash, key[0], sport, dport); - - return (hash); + m->m_pkthdr.flowid = rh; + + CTR5(KTR_SPARE3, "proto=%d hash=%x key[0]=%x sport=%d dport=%d\n", + proto, *hash, key[0], sport, dport); + + return (0); noop: *protop = proto; - return (0); + return (ENOENT); } static bitstr_t * @@ -567,7 +564,7 @@ flowtable_key_equal(struct flentry *fle, int flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro) { - uint32_t key[9], hash; + uint32_t key[9], hash, hash_noports; struct flentry *fle; uint16_t flags; uint8_t proto = 0; @@ -578,13 +575,14 @@ flowtable_lookup(struct flowtable *ft, s flags = ft ? ft->ft_flags : 0; ro->ro_rt = NULL; ro->ro_lle = NULL; - + hash = hash_noports = 0; + /* * The internal hash lookup is the only IPv4 specific bit * remaining */ - hash = ipv4_flow_lookup_hash_internal(m, ro, key, - &flags, &proto); + error = ipv4_flow_lookup_hash_internal(m, ro, key, + &flags, &proto, &hash, &hash_noports); /* * Ports are zero and this isn't a transmit cache @@ -592,10 +590,13 @@ flowtable_lookup(struct flowtable *ft, s * statex * FL_HASH_PORTS => key[0] != 0 for TCP || UDP || SCTP */ - if (hash == 0 || (key[0] == 0 && (ft->ft_flags & FL_HASH_PORTS))) { + if (error == ENOENT || (key[0] == 0 && (ft->ft_flags & FL_HASH_PORTS))) { cache = 0; goto uncached; } + if ((ft->ft_flags & FL_HASH_PORTS) == 0) + goto skipports; + FL_ENTRY_LOCK(ft, hash); fle = FL_ENTRY(ft, hash); rt = __DEVOLATILE(struct rtentry *, fle->f_rt); @@ -615,6 +616,27 @@ flowtable_lookup(struct flowtable *ft, s } FL_ENTRY_UNLOCK(ft, hash); +skipports: + key[0] = 0; + FL_ENTRY_LOCK(ft, hash_noports); + fle = FL_ENTRY(ft, hash_noports); + rt = __DEVOLATILE(struct rtentry *, fle->f_rt); + lle = __DEVOLATILE(struct llentry *, fle->f_lle); + if ((rt != NULL) + && fle->f_fhash == hash_noports + && flowtable_key_equal(fle, key, flags) + && (proto == fle->f_proto) + && (rt->rt_flags & RTF_UP) + && (rt->rt_ifp != NULL)) { + fle->f_uptime = time_uptime; + fle->f_flags |= flags; + ro->ro_rt = rt; + ro->ro_lle = lle; + FL_ENTRY_UNLOCK(ft, hash_noports); + return (0); + } + FL_ENTRY_UNLOCK(ft, hash_noports); + uncached: /* * This bit of code ends up locking the @@ -640,6 +662,18 @@ uncached: struct rtentry *rt = ro->ro_rt; struct ifnet *ifp = rt->rt_ifp; + if (rt->rt_flags & RTF_STICKY) { + RTFREE(rt); + hash = hash_noports; + ft->ft_rtalloc(ro, hash, fib); + if (ro->ro_rt == NULL) { + error = ENETUNREACH; + goto done; + } + rt = ro->ro_rt; + ifp = rt->rt_ifp; + } + if (rt->rt_flags & RTF_GATEWAY) l3addr = rt->rt_gateway; else @@ -671,7 +705,7 @@ uncached: } error = 0; } - +done: return (error); } @@ -720,7 +754,7 @@ flowtable_alloc(int nentry, int flags) ft->ft_masks[i] = bit_alloc(nentry); } } else { - ft->ft_lock_count = 2*(powerof2(mp_ncpus) ? mp_ncpus : + ft->ft_lock_count = 8*(powerof2(mp_ncpus) ? mp_ncpus : (fls(mp_ncpus) << 1)); ft->ft_lock = flowtable_global_lock; Modified: user/kmacy/HEAD_fast_net_merge/sys/net/radix_mpath.c ============================================================================== --- user/kmacy/HEAD_fast_net_merge/sys/net/radix_mpath.c Wed Mar 4 02:12:29 2009 (r189341) +++ user/kmacy/HEAD_fast_net_merge/sys/net/radix_mpath.c Wed Mar 4 02:38:38 2009 (r189342) @@ -77,15 +77,18 @@ rn_mpath_next(struct radix_node *rn) return NULL; } -u_int32_t +uint32_t rn_mpath_count(struct radix_node *rn) { - u_int32_t i; - - i = 1; - while ((rn = rn_mpath_next(rn)) != NULL) - i++; - return i; + uint32_t i = 0; + struct rtentry *rt; + + while (rn != NULL) { + rt = (struct rtentry *)rn; + i += rt->rt_rmx.rmx_weight; + rn = rn_mpath_next(rn); + } + return (i); } struct rtentry * @@ -256,10 +259,12 @@ different: } void -rtalloc_mpath_fib(struct route *ro, u_int32_t hash, u_int fibnum) +rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) { struct radix_node *rn0, *rn; u_int32_t n; + struct rtentry *rt; + int64_t weight; /* * XXX we don't attempt to lookup cached route again; what should @@ -284,25 +289,31 @@ rtalloc_mpath_fib(struct route *ro, u_in /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */ hash += hashjitter; hash %= n; - while (hash-- > 0 && rn) { + for (weight = abs((int32_t)hash), rt = ro->ro_rt; + weight >= rt->rt_rmx.rmx_weight && rn; + weight -= rt->rt_rmx.rmx_weight) { + /* stay within the multipath routes */ if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask) break; rn = rn->rn_dupedkey; + rt = (struct rtentry *)rn; } - /* XXX try filling rt_gwroute and avoid unreachable gw */ - /* if gw selection fails, use the first match (default) */ + /* gw selection has failed - there must be only zero weight routes */ if (!rn) { RT_UNLOCK(ro->ro_rt); + ro->ro_rt = NULL; return; } - - RTFREE_LOCKED(ro->ro_rt); - ro->ro_rt = (struct rtentry *)rn; - RT_LOCK(ro->ro_rt); - RT_ADDREF(ro->ro_rt); + if (ro->ro_rt != rt) { + RTFREE_LOCKED(ro->ro_rt); + ro->ro_rt = (struct rtentry *)rn; + RT_LOCK(ro->ro_rt); + RT_ADDREF(ro->ro_rt); + + } RT_UNLOCK(ro->ro_rt); } Modified: user/kmacy/HEAD_fast_net_merge/sys/net/route.c ============================================================================== --- user/kmacy/HEAD_fast_net_merge/sys/net/route.c Wed Mar 4 02:12:29 2009 (r189341) +++ user/kmacy/HEAD_fast_net_merge/sys/net/route.c Wed Mar 4 02:38:38 2009 (r189342) @@ -803,6 +803,103 @@ bad: return (error); } +#ifdef RADIX_MPATH +static int +rn_mpath_update(int req, struct rt_addrinfo *info, + struct radix_node_head *rnh, struct rtentry **ret_nrt) +{ + /* + * if we got multipath routes, we require users to specify + * a matching RTAX_GATEWAY. + */ + struct rtentry *rt, *rto = NULL; + register struct radix_node *rn; + int error = 0; + + rn = rnh->rnh_matchaddr(dst, rnh); + if (rn == NULL) + return (ESRCH); + rto = rt = RNTORT(rn); + rt = rt_mpath_matchgate(rt, gateway); + if (rt == NULL) + return (ESRCH); + /* + * this is the first entry in the chain + */ + if (rto == rt) { + rn = rn_mpath_next((struct radix_node *)rt); + /* + * there is another entry, now it's active + */ + if (rn) { + rto = RNTORT(rn); + RT_LOCK(rto); + rto->rt_flags |= RTF_UP; + RT_UNLOCK(rto); + } else if (rt->rt_flags & RTF_GATEWAY) { + /* + * For gateway routes, we need to + * make sure that we we are deleting + * the correct gateway. + * rt_mpath_matchgate() does not + * check the case when there is only + * one route in the chain. + */ + if (gateway && + (rt->rt_gateway->sa_len != gateway->sa_len || + memcmp(rt->rt_gateway, gateway, gateway->sa_len))) + error = ESRCH; + goto done; + } + /* + * use the normal delete code to remove + * the first entry + */ + if (req != RTM_DELETE) + goto nondelete; + + error = ENOENT; + goto done; + } + + /* + * if the entry is 2nd and on up + */ + if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt)) + panic ("rtrequest1: rt_mpath_deldup"); + RT_LOCK(rt); + RT_ADDREF(rt); + if (req == RTM_DELETE) { + rt->rt_flags &= ~RTF_UP; + /* + * One more rtentry floating around that is not + * linked to the routing table. rttrash will be decremented + * when RTFREE(rt) is eventually called. + */ + V_rttrash++; + + } + +nondelete: + if (req != RTM_DELETE) + panic("unrecognized request %d", req); + + + /* + * If the caller wants it, then it can have it, + * but it's up to it to free the rtentry as we won't be + * doing it. + */ + if (ret_nrt) { + *ret_nrt = rt; + RT_UNLOCK(rt); + } else + RTFREE_LOCKED(rt); +done: + return (error); +} +#endif + int rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, u_int fibnum) @@ -841,65 +938,15 @@ rtrequest1_fib(int req, struct rt_addrin switch (req) { case RTM_DELETE: #ifdef RADIX_MPATH - /* - * if we got multipath routes, we require users to specify - * a matching RTAX_GATEWAY. - */ if (rn_mpath_capable(rnh)) { - struct rtentry *rto = NULL; - - rn = rnh->rnh_matchaddr(dst, rnh); - if (rn == NULL) - senderr(ESRCH); - rto = rt = RNTORT(rn); - rt = rt_mpath_matchgate(rt, gateway); - if (!rt) - senderr(ESRCH); - /* - * this is the first entry in the chain - */ - if (rto == rt) { - rn = rn_mpath_next((struct radix_node *)rt); - /* - * there is another entry, now it's active - */ - if (rn) { - rto = RNTORT(rn); - RT_LOCK(rto); - rto->rt_flags |= RTF_UP; - RT_UNLOCK(rto); - } else if (rt->rt_flags & RTF_GATEWAY) { - /* - * For gateway routes, we need to - * make sure that we we are deleting - * the correct gateway. - * rt_mpath_matchgate() does not - * check the case when there is only - * one route in the chain. - */ - if (gateway && - (rt->rt_gateway->sa_len != gateway->sa_len || - memcmp(rt->rt_gateway, gateway, gateway->sa_len))) - senderr(ESRCH); - } - /* - * use the normal delete code to remove - * the first entry - */ - goto normal_rtdel; - } + error = rn_mpath_update(req, info, rnh, ret_nrt); /* - * if the entry is 2nd and on up + * "bad" holds true for the success case + * as well */ - if (!rt_mpath_deldup(rto, rt)) - panic ("rtrequest1: rt_mpath_deldup"); - RT_LOCK(rt); - RT_ADDREF(rt); - rt->rt_flags &= ~RTF_UP; - goto deldone; /* done with the RTM_DELETE command */ + if (error != ENOENT) + goto bad; } - -normal_rtdel: #endif /* * Remove the item from the tree and return it. @@ -921,9 +968,6 @@ normal_rtdel: if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) ifa->ifa_rtrequest(RTM_DELETE, rt, info); -#ifdef RADIX_MPATH -deldone: -#endif /* * One more rtentry floating around that is not * linked to the routing table. rttrash will be decremented @@ -951,11 +995,13 @@ deldone: case RTM_ADD: if ((flags & RTF_GATEWAY) && !gateway) senderr(EINVAL); - if (dst && gateway && (dst->sa_family != gateway->sa_family) && - (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) + if (dst && gateway && (dst->sa_family != gateway->sa_family) + && (gateway->sa_family != AF_UNSPEC) + && (gateway->sa_family != AF_LINK)) senderr(EINVAL); - if (info->rti_ifa == NULL && (error = rt_getifa_fib(info, fibnum))) + if (info->rti_ifa == NULL && + (error = rt_getifa_fib(info, fibnum))) senderr(error); ifa = info->rti_ifa; rt = uma_zalloc(rtzone, M_NOWAIT | M_ZERO); @@ -996,6 +1042,7 @@ deldone: IFAREF(ifa); rt->rt_ifa = ifa; rt->rt_ifp = ifa->ifa_ifp; + rt->rt_rmx.rmx_weight = 1; #ifdef RADIX_MPATH /* do not permit exactly the same dst/mask/gw pair */ Modified: user/kmacy/HEAD_fast_net_merge/sys/net/route.h ============================================================================== --- user/kmacy/HEAD_fast_net_merge/sys/net/route.h Wed Mar 4 02:12:29 2009 (r189341) +++ user/kmacy/HEAD_fast_net_merge/sys/net/route.h Wed Mar 4 02:38:38 2009 (r189342) @@ -59,6 +59,7 @@ struct rt_metrics_lite { u_long rmx_mtu; /* MTU for this path */ u_long rmx_expire; /* lifetime for route, e.g. redirect */ u_long rmx_pksent; /* packets sent using this route */ + u_long rmx_weight; /* absolute weight */ }; struct rt_metrics { @@ -72,7 +73,8 @@ struct rt_metrics { u_long rmx_rtt; /* estimated round trip time */ u_long rmx_rttvar; /* estimated rtt variance */ u_long rmx_pksent; /* packets sent using this route */ - u_long rmx_filler[4]; /* will be used for T/TCP later */ + u_long rmx_weight; /* route weight */ + u_long rmx_filler[3]; /* will be used for T/TCP later */ }; /* @@ -194,13 +196,15 @@ struct ortentry { #define RTF_LOCAL 0x200000 /* route represents a local address */ #define RTF_BROADCAST 0x400000 /* route represents a bcast address */ #define RTF_MULTICAST 0x800000 /* route represents a mcast address */ - /* 0x1000000 and up unassigned */ -#define RTF_RNH_LOCKED 0x40000000 /* radix node head locked by caller */ + /* 0x8000000 and up unassigned */ +#define RTF_STICKY 0x10000000 /* always route dst->src */ + +#define RTF_RNH_LOCKED 0x40000000 /* radix node head is locked */ /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */ #define RTF_FMASK \ (RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \ - RTF_REJECT | RTF_STATIC) + RTF_REJECT | RTF_STATIC | RTF_STICKY) /* * Routing statistics. @@ -226,12 +230,11 @@ struct rt_msghdr { int rtm_seq; /* for sender to identify action */ int rtm_errno; /* why failed */ int rtm_fmask; /* bitmask used in RTM_CHANGE message */ -#define rtm_use rtm_fmask /* deprecated, use rtm_rmx->rmx_pksent */ u_long rtm_inits; /* which metrics we are initializing */ struct rt_metrics rtm_rmx; /* metrics themselves */ }; -#define RTM_VERSION 5 /* Up the ante and ignore older versions */ +#define RTM_VERSION 6 /* Up the ante and ignore older versions */ /* * Message types. @@ -266,6 +269,7 @@ struct rt_msghdr { #define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */ #define RTV_RTT 0x40 /* init or lock _rtt */ #define RTV_RTTVAR 0x80 /* init or lock _rttvar */ +#define RTV_WEIGHT 0x100 /* init or lock _weight */ /* * Bitmask values for rtm_addrs. @@ -273,7 +277,7 @@ struct rt_msghdr { #define RTA_DST 0x1 /* destination sockaddr present */ #define RTA_GATEWAY 0x2 /* gateway sockaddr present */ #define RTA_NETMASK 0x4 /* netmask sockaddr present */ -#define RTA_GENMASK 0x8 /* cloning mask sockaddr present */ +#define RTA_SPARE 0x8 /* unused */ #define RTA_IFP 0x10 /* interface name sockaddr present */ #define RTA_IFA 0x20 /* interface addr sockaddr present */ #define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */ @@ -285,7 +289,7 @@ struct rt_msghdr { #define RTAX_DST 0 /* destination sockaddr present */ #define RTAX_GATEWAY 1 /* gateway sockaddr present */ #define RTAX_NETMASK 2 /* netmask sockaddr present */ -#define RTAX_GENMASK 3 /* cloning mask sockaddr present */ +#define RTAX_SPARE 3 /* spare field */ #define RTAX_IFP 4 /* interface name sockaddr present */ #define RTAX_IFA 5 /* interface addr sockaddr present */ #define RTAX_AUTHOR 6 /* sockaddr for author of redirect */ @@ -293,11 +297,11 @@ struct rt_msghdr { #define RTAX_MAX 8 /* size of array to allocate */ struct rt_addrinfo { - int rti_addrs; - struct sockaddr *rti_info[RTAX_MAX]; - int rti_flags; - struct ifaddr *rti_ifa; - struct ifnet *rti_ifp; + int rti_addrs; + struct sockaddr *rti_info[RTAX_MAX]; + int rti_flags; + struct ifaddr *rti_ifa; + struct ifnet *rti_ifp; }; /* Modified: user/kmacy/HEAD_fast_net_merge/sys/net/rtsock.c ============================================================================== --- user/kmacy/HEAD_fast_net_merge/sys/net/rtsock.c Wed Mar 4 02:12:29 2009 (r189341) +++ user/kmacy/HEAD_fast_net_merge/sys/net/rtsock.c Wed Mar 4 02:38:38 2009 (r189342) @@ -601,7 +601,6 @@ route_output(struct mbuf *m, struct sock info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); - info.rti_info[RTAX_GENMASK] = 0; if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { ifp = rt->rt_ifp; if (ifp) { @@ -637,7 +636,6 @@ route_output(struct mbuf *m, struct sock } (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL); rtm->rtm_flags = rt->rt_flags; - rtm->rtm_use = 0; rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); rtm->rtm_addrs = info.rti_addrs; break; @@ -691,10 +689,8 @@ route_output(struct mbuf *m, struct sock rt->rt_ifp = info.rti_ifp; } /* Allow some flags to be toggled on change. */ - if (rtm->rtm_fmask & RTF_FMASK) - rt->rt_flags = (rt->rt_flags & - ~rtm->rtm_fmask) | - (rtm->rtm_flags & rtm->rtm_fmask); + rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) | + (rtm->rtm_flags & RTF_FMASK); rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); rtm->rtm_index = rt->rt_ifp->if_index; @@ -767,12 +763,14 @@ static void rt_setmetrics(u_long which, const struct rt_metrics *in, struct rt_metrics_lite *out) { -#define metric(f, e) if (which & (f)) out->e = in->e; +#define metric(f, e) if (which & (f)) { printf("setting 0x%x", f); out->e = in->e; } + /* * Only these are stored in the routing entry since introduction * of tcp hostcache. The rest is ignored. */ metric(RTV_MTU, rmx_mtu); + metric(RTV_WEIGHT, rmx_weight); /* Userland -> kernel timebase conversion. */ if (which & RTV_EXPIRE) out->rmx_expire = in->rmx_expire ? @@ -786,6 +784,7 @@ rt_getmetrics(const struct rt_metrics_li #define metric(e) out->e = in->e; bzero(out, sizeof(*out)); metric(rmx_mtu); + metric(rmx_weight); /* Kernel -> userland timebase conversion. */ out->rmx_expire = in->rmx_expire ? in->rmx_expire - time_uptime + time_second : 0; @@ -1245,7 +1244,6 @@ sysctl_dumpentry(struct radix_node *rn, info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); - info.rti_info[RTAX_GENMASK] = 0; if (rt->rt_ifp) { info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr; info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; @@ -1257,7 +1255,10 @@ sysctl_dumpentry(struct radix_node *rn, struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; rtm->rtm_flags = rt->rt_flags; - rtm->rtm_use = rt->rt_rmx.rmx_pksent; + /* + * let's be honest about this being a retarded hack + */ + rtm->rtm_fmask = rt->rt_rmx.rmx_pksent; rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); rtm->rtm_index = rt->rt_ifp->if_index; rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; Modified: user/kmacy/HEAD_fast_net_merge/sys/netinet/ip_input.c ============================================================================== --- user/kmacy/HEAD_fast_net_merge/sys/netinet/ip_input.c Wed Mar 4 02:12:29 2009 (r189341) +++ user/kmacy/HEAD_fast_net_merge/sys/netinet/ip_input.c Wed Mar 4 02:38:38 2009 (r189342) @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include "opt_route.h" #include "opt_mac.h" #include "opt_carp.h" +#include "opt_mpath.h" #include <sys/param.h> #include <sys/systm.h> @@ -340,7 +341,7 @@ ip_init(void) netisr_register(NETISR_IP, ip_input, &ipintrq, 0); ipv4_ft = flowtable_alloc(ip_pcpu_flowtable_size, FL_PCPU); - ipv4_forward_ft = flowtable_alloc(ip_global_flowtable_size, FL_HASH_PORTS); + ipv4_forward_ft = flowtable_alloc(ip_global_flowtable_size, FL_HASH_PORTS|FL_PCPU); } void Modified: user/kmacy/HEAD_fast_net_merge/usr.sbin/route6d/route6d.c ============================================================================== --- user/kmacy/HEAD_fast_net_merge/usr.sbin/route6d/route6d.c Wed Mar 4 02:12:29 2009 (r189341) +++ user/kmacy/HEAD_fast_net_merge/usr.sbin/route6d/route6d.c Wed Mar 4 02:38:38 2009 (r189342) @@ -2661,10 +2661,6 @@ rt_entry(rtm, again) sin6_mask = (struct sockaddr_in6 *)rtmp; rtmp += ROUNDUP(sin6_mask->sin6_len); } - if (rtm->rtm_addrs & RTA_GENMASK) { - sin6_genmask = (struct sockaddr_in6 *)rtmp; - rtmp += ROUNDUP(sin6_genmask->sin6_len); - } if (rtm->rtm_addrs & RTA_IFP) { sin6_ifp = (struct sockaddr_in6 *)rtmp; rtmp += ROUNDUP(sin6_ifp->sin6_len);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200903040238.n242ccNQ028860>