From owner-svn-src-all@freebsd.org Thu Jun 14 14:53:25 2018 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 11A18100AED2; Thu, 14 Jun 2018 14:53:25 +0000 (UTC) (envelope-from ae@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id B8DC26E659; Thu, 14 Jun 2018 14:53:24 +0000 (UTC) (envelope-from ae@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 80D3A1E7C; Thu, 14 Jun 2018 14:53:24 +0000 (UTC) (envelope-from ae@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w5EErObh020698; Thu, 14 Jun 2018 14:53:24 GMT (envelope-from ae@FreeBSD.org) Received: (from ae@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w5EErO7V020697; Thu, 14 Jun 2018 14:53:24 GMT (envelope-from ae@FreeBSD.org) Message-Id: <201806141453.w5EErO7V020697@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: ae set sender to ae@FreeBSD.org using -f From: "Andrey V. Elsukov" Date: Thu, 14 Jun 2018 14:53:24 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r335141 - head/sys/net X-SVN-Group: head X-SVN-Commit-Author: ae X-SVN-Commit-Paths: head/sys/net X-SVN-Commit-Revision: 335141 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.26 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 14 Jun 2018 14:53:25 -0000 Author: ae Date: Thu Jun 14 14:53:24 2018 New Revision: 335141 URL: https://svnweb.freebsd.org/changeset/base/335141 Log: Convert if_me(4) driver to use encap_lookup_t method and be lockless on data path. Modified: head/sys/net/if_me.c Modified: head/sys/net/if_me.c ============================================================================== --- head/sys/net/if_me.c Thu Jun 14 14:53:01 2018 (r335140) +++ head/sys/net/if_me.c Thu Jun 14 14:53:24 2018 (r335141) @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014 Andrey V. Elsukov + * Copyright (c) 2014, 2018 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,22 +28,20 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include #include -#include #include #include #include #include #include -#include #include #include #include #include #include -#include #include #include @@ -68,8 +66,6 @@ __FBSDID("$FreeBSD$"); #define MEMTU (1500 - sizeof(struct mobhdr)) static const char mename[] = "me"; static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP"); -static VNET_DEFINE(struct mtx, me_mtx); -#define V_me_mtx VNET(me_mtx) /* Minimal forwarding header RFC 2004 */ struct mobhdr { uint8_t mob_proto; /* protocol */ @@ -82,32 +78,27 @@ struct mobhdr { struct me_softc { struct ifnet *me_ifp; - LIST_ENTRY(me_softc) me_list; - struct rmlock me_lock; u_int me_fibnum; - const struct encaptab *me_ecookie; struct in_addr me_src; struct in_addr me_dst; + + CK_LIST_ENTRY(me_softc) chain; }; +CK_LIST_HEAD(me_list, me_softc); #define ME2IFP(sc) ((sc)->me_ifp) #define ME_READY(sc) ((sc)->me_src.s_addr != 0) -#define ME_LOCK_INIT(sc) rm_init(&(sc)->me_lock, "me softc") -#define ME_LOCK_DESTROY(sc) rm_destroy(&(sc)->me_lock) -#define ME_RLOCK_TRACKER struct rm_priotracker me_tracker -#define ME_RLOCK(sc) rm_rlock(&(sc)->me_lock, &me_tracker) -#define ME_RUNLOCK(sc) rm_runlock(&(sc)->me_lock, &me_tracker) -#define ME_RLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_RLOCKED) -#define ME_WLOCK(sc) rm_wlock(&(sc)->me_lock) -#define ME_WUNLOCK(sc) rm_wunlock(&(sc)->me_lock) -#define ME_WLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_WLOCKED) +#define ME_RLOCK() epoch_enter_preempt(net_epoch_preempt) +#define ME_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) +#define ME_WAIT() epoch_wait_preempt(net_epoch_preempt) -#define ME_LIST_LOCK_INIT(x) mtx_init(&V_me_mtx, "me_mtx", NULL, MTX_DEF) -#define ME_LIST_LOCK_DESTROY(x) mtx_destroy(&V_me_mtx) -#define ME_LIST_LOCK(x) mtx_lock(&V_me_mtx) -#define ME_LIST_UNLOCK(x) mtx_unlock(&V_me_mtx) +#ifndef ME_HASH_SIZE +#define ME_HASH_SIZE (1 << 4) +#endif +static VNET_DEFINE(struct me_list *, me_hashtbl) = NULL; +#define V_me_hashtbl VNET(me_hashtbl) +#define ME_HASH(src, dst) (V_me_hashtbl[\ + me_hashval((src), (dst)) & (ME_HASH_SIZE - 1)]) -static VNET_DEFINE(LIST_HEAD(, me_softc), me_softc_list); -#define V_me_softc_list VNET(me_softc_list) static struct sx me_ioctl_sx; SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl"); @@ -123,21 +114,9 @@ static int me_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static int me_input(struct mbuf *, int, int, void *); -static int me_set_tunnel(struct ifnet *, struct sockaddr_in *, - struct sockaddr_in *); -static void me_delete_tunnel(struct ifnet *); -static int me_encapcheck(const struct mbuf *, int, int, void *); +static int me_set_tunnel(struct me_softc *, in_addr_t, in_addr_t); +static void me_delete_tunnel(struct me_softc *); -#define ME_MINLEN (sizeof(struct ip) + sizeof(struct mobhdr) -\ - sizeof(in_addr_t)) -static const struct encap_config ipv4_encap_cfg = { - .proto = IPPROTO_MOBILE, - .min_length = ME_MINLEN, - .exact_match = (sizeof(in_addr_t) << 4) + 8, - .check = me_encapcheck, - .input = me_input -}; - SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0, "Minimal Encapsulation for IP (RFC 2004)"); @@ -150,11 +129,32 @@ static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST; SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(max_me_nesting), 0, "Max nested tunnels"); +static uint32_t +me_hashval(in_addr_t src, in_addr_t dst) +{ + uint32_t ret; + + ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT); + return (fnv_32_buf(&dst, sizeof(dst), ret)); +} + +static struct me_list * +me_hashinit(void) +{ + struct me_list *hash; + int i; + + hash = malloc(sizeof(struct me_list) * ME_HASH_SIZE, + M_IFME, M_WAITOK); + for (i = 0; i < ME_HASH_SIZE; i++) + CK_LIST_INIT(&hash[i]); + + return (hash); +} + static void vnet_me_init(const void *unused __unused) { - LIST_INIT(&V_me_softc_list); - ME_LIST_LOCK_INIT(); V_me_cloner = if_clone_simple(mename, me_clone_create, me_clone_destroy, 0); } @@ -165,8 +165,9 @@ static void vnet_me_uninit(const void *unused __unused) { + if (V_me_hashtbl != NULL) + free(V_me_hashtbl, M_IFME); if_clone_detach(V_me_cloner); - ME_LIST_LOCK_DESTROY(); } VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_me_uninit, NULL); @@ -179,7 +180,6 @@ me_clone_create(struct if_clone *ifc, int unit, caddr_ sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO); sc->me_fibnum = curthread->td_proc->p_fibnum; ME2IFP(sc) = if_alloc(IFT_TUNNEL); - ME_LOCK_INIT(sc); ME2IFP(sc)->if_softc = sc; if_initname(ME2IFP(sc), mename, unit); @@ -193,9 +193,6 @@ me_clone_create(struct if_clone *ifc, int unit, caddr_ ME2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; if_attach(ME2IFP(sc)); bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t)); - ME_LIST_LOCK(); - LIST_INSERT_HEAD(&V_me_softc_list, sc, me_list); - ME_LIST_UNLOCK(); return (0); } @@ -206,24 +203,20 @@ me_clone_destroy(struct ifnet *ifp) sx_xlock(&me_ioctl_sx); sc = ifp->if_softc; - me_delete_tunnel(ifp); - ME_LIST_LOCK(); - LIST_REMOVE(sc, me_list); - ME_LIST_UNLOCK(); + me_delete_tunnel(sc); bpfdetach(ifp); if_detach(ifp); ifp->if_softc = NULL; sx_xunlock(&me_ioctl_sx); + ME_WAIT(); if_free(ifp); - ME_LOCK_DESTROY(sc); free(sc, M_IFME); } static int me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { - ME_RLOCK_TRACKER; struct ifreq *ifr = (struct ifreq *)data; struct sockaddr_in *src, *dst; struct me_softc *sc; @@ -251,10 +244,8 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = 0; switch (cmd) { case SIOCSIFPHYADDR: - src = (struct sockaddr_in *) - &(((struct in_aliasreq *)data)->ifra_addr); - dst = (struct sockaddr_in *) - &(((struct in_aliasreq *)data)->ifra_dstaddr); + src = &((struct in_aliasreq *)data)->ifra_addr; + dst = &((struct in_aliasreq *)data)->ifra_dstaddr; if (src->sin_family != dst->sin_family || src->sin_family != AF_INET || src->sin_len != dst->sin_len || @@ -267,17 +258,16 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = EADDRNOTAVAIL; break; } - error = me_set_tunnel(ifp, src, dst); + error = me_set_tunnel(sc, src->sin_addr.s_addr, + dst->sin_addr.s_addr); break; case SIOCDIFPHYADDR: - me_delete_tunnel(ifp); + me_delete_tunnel(sc); break; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: - ME_RLOCK(sc); if (!ME_READY(sc)) { error = EADDRNOTAVAIL; - ME_RUNLOCK(sc); break; } src = (struct sockaddr_in *)&ifr->ifr_addr; @@ -292,7 +282,6 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) src->sin_addr = sc->me_dst; break; } - ME_RUNLOCK(sc); error = prison_if(curthread->td_ucred, sintosa(src)); if (error != 0) memset(src, 0, sizeof(*src)); @@ -318,81 +307,71 @@ end: } static int -me_encapcheck(const struct mbuf *m, int off, int proto, void *arg) +me_lookup(const struct mbuf *m, int off, int proto, void **arg) { - ME_RLOCK_TRACKER; + const struct ip *ip; struct me_softc *sc; - struct ip *ip; - int ret; - sc = (struct me_softc *)arg; - if ((ME2IFP(sc)->if_flags & IFF_UP) == 0) - return (0); - - M_ASSERTPKTHDR(m); - - ret = 0; - ME_RLOCK(sc); - if (ME_READY(sc)) { - ip = mtod(m, struct ip *); + MPASS(in_epoch()); + ip = mtod(m, const struct ip *); + CK_LIST_FOREACH(sc, &ME_HASH(ip->ip_dst.s_addr, + ip->ip_src.s_addr), chain) { if (sc->me_src.s_addr == ip->ip_dst.s_addr && - sc->me_dst.s_addr == ip->ip_src.s_addr) - ret = 32 * 2 + 8; + sc->me_dst.s_addr == ip->ip_src.s_addr) { + if ((ME2IFP(sc)->if_flags & IFF_UP) == 0) + return (0); + *arg = sc; + return (ENCAP_DRV_LOOKUP); + } } - ME_RUNLOCK(sc); - return (ret); + return (0); } static int -me_set_tunnel(struct ifnet *ifp, struct sockaddr_in *src, - struct sockaddr_in *dst) +me_set_tunnel(struct me_softc *sc, in_addr_t src, in_addr_t dst) { - struct me_softc *sc, *tsc; + struct me_softc *tmp; sx_assert(&me_ioctl_sx, SA_XLOCKED); - ME_LIST_LOCK(); - sc = ifp->if_softc; - LIST_FOREACH(tsc, &V_me_softc_list, me_list) { - if (tsc == sc || !ME_READY(tsc)) + + if (V_me_hashtbl == NULL) + V_me_hashtbl = me_hashinit(); + + if (sc->me_src.s_addr == src && sc->me_dst.s_addr == dst) + return (0); + + CK_LIST_FOREACH(tmp, &ME_HASH(src, dst), chain) { + if (tmp == sc) continue; - if (tsc->me_src.s_addr == src->sin_addr.s_addr && - tsc->me_dst.s_addr == dst->sin_addr.s_addr) { - ME_LIST_UNLOCK(); + if (tmp->me_src.s_addr == src && + tmp->me_dst.s_addr == dst) return (EADDRNOTAVAIL); - } } - ME_LIST_UNLOCK(); - ME_WLOCK(sc); - sc->me_dst = dst->sin_addr; - sc->me_src = src->sin_addr; - ME_WUNLOCK(sc); + me_delete_tunnel(sc); + sc->me_dst.s_addr = dst; + sc->me_src.s_addr = src; + CK_LIST_INSERT_HEAD(&ME_HASH(src, dst), sc, chain); - if (sc->me_ecookie == NULL) - sc->me_ecookie = ip_encap_attach(&ipv4_encap_cfg, - sc, M_WAITOK); - if (sc->me_ecookie != NULL) { - ifp->if_drv_flags |= IFF_DRV_RUNNING; - if_link_state_change(ifp, LINK_STATE_UP); - } + ME2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; + if_link_state_change(ME2IFP(sc), LINK_STATE_UP); return (0); } static void -me_delete_tunnel(struct ifnet *ifp) +me_delete_tunnel(struct me_softc *sc) { - struct me_softc *sc = ifp->if_softc; sx_assert(&me_ioctl_sx, SA_XLOCKED); - if (sc->me_ecookie != NULL) - ip_encap_detach(sc->me_ecookie); - sc->me_ecookie = NULL; - ME_WLOCK(sc); - sc->me_src.s_addr = 0; - sc->me_dst.s_addr = 0; - ME_WUNLOCK(sc); - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - if_link_state_change(ifp, LINK_STATE_DOWN); + if (ME_READY(sc)) { + CK_LIST_REMOVE(sc, chain); + ME_WAIT(); + + sc->me_src.s_addr = 0; + sc->me_dst.s_addr = 0; + ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; + if_link_state_change(ME2IFP(sc), LINK_STATE_DOWN); + } } static uint16_t @@ -505,58 +484,48 @@ me_check_nesting(struct ifnet *ifp, struct mbuf *m) static int me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, - struct route *ro) + struct route *ro __unused) { uint32_t af; - int error; -#ifdef MAC - error = mac_ifnet_check_transmit(ifp, m); - if (error != 0) - goto drop; -#endif - if ((ifp->if_flags & IFF_MONITOR) != 0 || - (ifp->if_flags & IFF_UP) == 0) { - error = ENETDOWN; - goto drop; - } - - error = me_check_nesting(ifp, m); - if (error != 0) - goto drop; - - m->m_flags &= ~(M_BCAST|M_MCAST); if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else af = dst->sa_family; - if (af != AF_INET) { - error = EAFNOSUPPORT; - goto drop; - } - BPF_MTAP2(ifp, &af, sizeof(af), m); + m->m_pkthdr.csum_data = af; return (ifp->if_transmit(ifp, m)); -drop: - m_freem(m); - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - return (error); } static int me_transmit(struct ifnet *ifp, struct mbuf *m) { - ME_RLOCK_TRACKER; struct mobhdr mh; struct me_softc *sc; struct ip *ip; + uint32_t af; int error, hlen, plen; +#ifdef MAC + error = mac_ifnet_check_transmit(ifp, m); + if (error != 0) + goto drop; +#endif + error = ENETDOWN; + ME_RLOCK(); sc = ifp->if_softc; - if (sc == NULL) { - error = ENETDOWN; + if (sc == NULL || !ME_READY(sc) || + (ifp->if_flags & IFF_MONITOR) != 0 || + (ifp->if_flags & IFF_UP) == 0 || + (error = me_check_nesting(ifp, m) != 0)) { m_freem(m); goto drop; } + af = m->m_pkthdr.csum_data; + if (af != AF_INET) { + error = EAFNOSUPPORT; + m_freem(m); + goto drop; + } if (m->m_len < sizeof(struct ip)) m = m_pullup(m, sizeof(struct ip)); if (m == NULL) { @@ -573,13 +542,6 @@ me_transmit(struct ifnet *ifp, struct mbuf *m) mh.mob_proto = ip->ip_p; mh.mob_src = ip->ip_src; mh.mob_dst = ip->ip_dst; - ME_RLOCK(sc); - if (!ME_READY(sc)) { - ME_RUNLOCK(sc); - error = ENETDOWN; - m_freem(m); - goto drop; - } if (in_hosteq(sc->me_src, ip->ip_src)) { hlen = sizeof(struct mobhdr) - sizeof(struct in_addr); mh.mob_flags = 0; @@ -590,8 +552,8 @@ me_transmit(struct ifnet *ifp, struct mbuf *m) plen = m->m_pkthdr.len; ip->ip_src = sc->me_src; ip->ip_dst = sc->me_dst; + m->m_flags &= ~(M_BCAST|M_MCAST); M_SETFIB(m, sc->me_fibnum); - ME_RUNLOCK(sc); M_PREPEND(m, hlen, M_NOWAIT); if (m == NULL) { error = ENOBUFS; @@ -619,6 +581,7 @@ drop: if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); } + ME_RUNLOCK(); return (error); } @@ -628,13 +591,26 @@ me_qflush(struct ifnet *ifp __unused) } +static const struct encaptab *ecookie = NULL; +static const struct encap_config me_encap_cfg = { + .proto = IPPROTO_MOBILE, + .min_length = sizeof(struct ip) + sizeof(struct mobhdr) - + sizeof(in_addr_t), + .exact_match = ENCAP_DRV_LOOKUP, + .lookup = me_lookup, + .input = me_input +}; + static int memodevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: + ecookie = ip_encap_attach(&me_encap_cfg, NULL, M_WAITOK); + break; case MOD_UNLOAD: + ip_encap_detach(ecookie); break; default: return (EOPNOTSUPP);