Date: Tue, 9 Feb 2010 17:40:02 GMT From: Balaji G <balajig81@gmail.com> To: freebsd-gnats-submit@FreeBSD.org Subject: misc/143703: Patch: ECMP Phase 1 fixes for FreeBSD 7.2 Message-ID: <201002091740.o19He2S5013505@www.freebsd.org> Resent-Message-ID: <201002091750.o19Ho1vc012759@freefall.freebsd.org>
next in thread | raw e-mail | index | archive | help
>Number: 143703 >Category: misc >Synopsis: Patch: ECMP Phase 1 fixes for FreeBSD 7.2 >Confidential: no >Severity: non-critical >Priority: medium >Responsible: freebsd-bugs >State: open >Quarter: >Keywords: >Date-Required: >Class: update >Submitter-Id: current-users >Arrival-Date: Tue Feb 09 17:50:01 UTC 2010 >Closed-Date: >Last-Modified: >Originator: Balaji G >Release: FreeBSD 7.2 >Organization: Home >Environment: FreeBSD 7.2-RELEASE FreeBSD 7.2-RELEASE #7: Sun Feb 7 13:19:58 IST 2010 root@:/usr/obj/usr/home/balaji/7.2.0/sys/MYKERNEL i386 >Description: The Patch contains the ECMP Phase 1 fixes back ported to 7.2 release. I am working on bringing in the remaining changes too. This patch installs ECMP routes in the RIB >How-To-Repeat: Create two static routes with the same destination and different gateway >Fix: Patch attached with submission follows: diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix.c net/radix.c --- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix.c 2010-02-04 22:40:29.000000000 +0530 +++ net/radix.c 2010-02-09 21:10:19.731903885 +0530 @@ -48,6 +48,8 @@ #include <net/radix.h> #endif +#include <net/radix_mpath.h> + static int rn_walktree_from(struct radix_node_head *h, void *a, void *m, walktree_f_t *f, void *w); static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *); @@ -630,6 +632,21 @@ saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); if (keyduplicated) { for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { + + /* permit multipath, if enabled for the family */ + if (rn_mpath_capable(head) && netmask == tt->rn_mask) { + /* + * go down to the end of multipaths, so that + * new entry goes into the end of rn_dupedkey + * chain. + */ + do { + t = tt; + tt = tt->rn_dupedkey; + } while (tt && t->rn_mask == tt->rn_mask); + break; + } + if (tt->rn_mask == netmask) return (0); if (netmask == 0 || diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix.h net/radix.h --- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix.h 2010-02-05 08:38:18.000000000 +0530 +++ net/radix.h 2010-02-09 21:17:10.373883032 +0530 @@ -130,9 +130,9 @@ void (*rnh_close) /* do something when the last ref drops */ (struct radix_node *rn, struct radix_node_head *head); struct radix_node rnh_nodes[3]; /* empty tree for common case */ - /* ECMP Changes Begin */ + int rnh_multipath; /* multipath capable ? */ - /* ECMP Changes End */ + #ifdef _KERNEL struct mtx rnh_mtx; /* locks entire radix tree */ #endif diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix_mpath.c net/radix_mpath.c --- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix_mpath.c 2010-02-05 08:30:52.000000000 +0530 +++ net/radix_mpath.c 2010-02-09 21:12:14.681638068 +0530 @@ -54,7 +54,7 @@ /* * give some jitter to hash, to avoid synchronization between routers */ -static uint32_t hashjitter; +static uint32_t hashjitter; int rn_mpath_capable(struct radix_node_head *rnh) @@ -258,7 +258,6 @@ return 0; } -#if 0 void rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) { @@ -317,10 +316,9 @@ } RT_UNLOCK(ro->ro_rt); } -#endif extern int in6_inithead(void **head, int off); -extern int in_inithead(void **head, int off); +extern int in_inthead(void **head, int off); #ifdef INET int @@ -352,5 +350,5 @@ } else return 0; } - #endif + Only in net: radix_mpath.h diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/route.c net/route.c --- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/route.c 2010-02-04 22:40:29.000000000 +0530 +++ net/route.c 2010-02-09 21:16:21.754622762 +0530 @@ -886,6 +886,111 @@ return (rtrequest1_fib(req, info, ret_nrt, 0)); } +static int +rn_mpath_update(int req, struct rt_addrinfo *info, + struct radix_node_head *rnh, struct rtentry **ret_nrt) +{ + /* + * if we got multipath routes, we require users to specify + * a matching RTAX_GATEWAY. + */ + struct rtentry *rt, *rto = NULL; + register struct radix_node *rn; + int error = 0; + + rn = rnh->rnh_matchaddr(dst, rnh); + if (rn == NULL) + return (ESRCH); + rto = rt = RNTORT(rn); + rt = rt_mpath_matchgate(rt, gateway); + if (rt == NULL) + return (ESRCH); + /* + * this is the first entry in the chain + */ + if (rto == rt) { + rn = rn_mpath_next((struct radix_node *)rt); + /* + * there is another entry, now it's active + */ + if (rn) { + rto = RNTORT(rn); + RT_LOCK(rto); + rto->rt_flags |= RTF_UP; + RT_UNLOCK(rto); + } else if (rt->rt_flags & RTF_GATEWAY) { + /* + * For gateway routes, we need to + * make sure that we we are deleting + * the correct gateway. + * rt_mpath_matchgate() does not + * check the case when there is only + * one route in the chain. + */ + if (gateway && + (rt->rt_gateway->sa_len != gateway->sa_len || + memcmp(rt->rt_gateway, gateway, gateway->sa_len))) + error = ESRCH; + else { + /* + * remove from tree before returning it + * to the caller + */ + rn = rnh->rnh_deladdr(dst, netmask, rnh); + KASSERT(rt == RNTORT(rn), ("radix node disappeared")); + goto gwdelete; + } + + } + /* + * use the normal delete code to remove + * the first entry + */ + if (req != RTM_DELETE) + goto nondelete; + + error = ENOENT; + goto done; + } + + /* + * if the entry is 2nd and on up + */ + if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt)) + panic ("rtrequest1: rt_mpath_deldup"); +gwdelete: + RT_LOCK(rt); + RT_ADDREF(rt); + if (req == RTM_DELETE) { + rt->rt_flags &= ~RTF_UP; + /* + * One more rtentry floating around that is not + * linked to the routing table. rttrash will be decremented + * when RTFREE(rt) is eventually called. + */ + rttrash++; + } + +nondelete: + if (req != RTM_DELETE) + panic("unrecognized request %d", req); + + + /* + * If the caller wants it, then it can have it, + * but it's up to it to free the rtentry as we won't be + * doing it. + */ + if (ret_nrt) { + *ret_nrt = rt; + RT_UNLOCK(rt); + } else + RTFREE_LOCKED(rt); +done: + return (error); +} + + int rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, u_int fibnum) @@ -923,6 +1028,17 @@ } switch (req) { case RTM_DELETE: + + if (rn_mpath_capable(rnh)) { + error = rn_mpath_update(req, info, rnh, ret_nrt); + /* + * "bad" holds true for the success case + * as well + */ + if (error != ENOENT) + goto bad; + } + /* * Remove the item from the tree and return it. * Complain if it is not there and do no more processing. @@ -1046,6 +1162,18 @@ rt->rt_ifa = ifa; rt->rt_ifp = ifa->ifa_ifp; + /* do not permit exactly the same dst/mask/gw pair */ + if (rn_mpath_capable(rnh) && + rt_mpath_conflict(rnh, rt, netmask)) { + if (rt->rt_ifa) { + IFAFREE(rt->rt_ifa); + } + Free(rt_key(rt)); + RT_LOCK_DESTROY(rt); + uma_zfree(rtzone, rt); + senderr(EEXIST); + } + /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); if (rn == NULL) { @@ -1456,6 +1584,27 @@ /* this table doesn't exist but others might */ continue; RADIX_NODE_HEAD_LOCK(rnh); + if (rn_mpath_capable(rnh)) { + + rn = rnh->rnh_matchaddr(dst, rnh); + if (rn == NULL) + error = ESRCH; + else { + rt = RNTORT(rn); + /* + * for interface route the + * rt->rt_gateway is sockaddr_intf + * for cloning ARP entries, so + * rt_mpath_matchgate must use the + * interface address + */ + rt = rt_mpath_matchgate(rt, + ifa->ifa_addr); + if (!rt) + error = ESRCH; + } + } + else rn = rnh->rnh_lookup(dst, netmask, rnh); error = (rn == NULL || (rn->rn_flags & RNF_ROOT) || @@ -1482,6 +1631,20 @@ * notify any listening routing agents of the change */ RT_LOCK(rt); + /* + * in case address alias finds the first address + * e.g. ifconfig bge0 192.103.54.246/24 + * e.g. ifconfig bge0 192.103.54.247/24 + * the address set in the route is 192.103.54.246 + * so we need to replace it with 192.103.54.247 + */ + if (memcmp(rt->rt_ifa->ifa_addr, + ifa->ifa_addr, ifa->ifa_addr->sa_len)) { + IFAFREE(rt->rt_ifa); + IFAREF(ifa); + rt->rt_ifp = ifa->ifa_ifp; + rt->rt_ifa = ifa; + } rt_newaddrmsg(cmd, ifa, error, rt); if (cmd == RTM_DELETE) { /* diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/route.h net/route.h --- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/route.h 2010-02-04 22:40:29.000000000 +0530 +++ net/route.h 2010-02-09 21:18:10.257871360 +0530 @@ -58,6 +58,7 @@ u_long rmx_mtu; /* MTU for this path */ u_long rmx_expire; /* lifetime for route, e.g. redirect */ u_long rmx_pksent; /* packets sent using this route */ + u_long rmx_weight; }; struct rt_metrics { @@ -101,6 +102,9 @@ #ifndef RNF_NORMAL #include <net/radix.h> #endif + +#include <net/radix_mpath.h> + struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ /* diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/rtsock.c net/rtsock.c --- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/rtsock.c 2010-02-04 22:40:29.000000000 +0530 +++ net/rtsock.c 2010-02-09 21:18:37.274871425 +0530 @@ -536,6 +536,24 @@ RADIX_NODE_HEAD_UNLOCK(rnh); senderr(ESRCH); } + + /* + * for RTM_CHANGE/LOCK, if we got multipath routes, + * we require users to specify a matching RTAX_GATEWAY. + * + * for RTM_GET, gate is optional even with multipath. + * if gate == NULL the first match is returned. + * (no need to call rt_mpath_matchgate if gate == NULL) + */ + if (rn_mpath_capable(rnh) && + (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) { + rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]); + if (!rt) { + RADIX_NODE_HEAD_UNLOCK(rnh); + senderr(ESRCH); + } + } + RT_LOCK(rt); RT_ADDREF(rt); RADIX_NODE_HEAD_UNLOCK(rnh); >Release-Note: >Audit-Trail: >Unformatted:
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201002091740.o19He2S5013505>