From owner-svn-src-all@FreeBSD.ORG Wed Apr 29 11:26:46 2009 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 10BAB1065674; Wed, 29 Apr 2009 11:26:46 +0000 (UTC) (envelope-from bms@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id F16EF8FC19; Wed, 29 Apr 2009 11:26:45 +0000 (UTC) (envelope-from bms@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n3TBQjVu079334; Wed, 29 Apr 2009 11:26:45 GMT (envelope-from bms@svn.freebsd.org) Received: (from bms@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n3TBQjVg079333; Wed, 29 Apr 2009 11:26:45 GMT (envelope-from bms@svn.freebsd.org) Message-Id: <200904291126.n3TBQjVg079333@svn.freebsd.org> From: Bruce M Simpson Date: Wed, 29 Apr 2009 11:26:45 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r191665 - head/sys/netinet6 X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 29 Apr 2009 11:26:46 -0000 Author: bms Date: Wed Apr 29 11:26:45 2009 New Revision: 191665 URL: http://svn.freebsd.org/changeset/base/191665 Log: Import IPv6 SSM module but do not connect it to the build. Added: head/sys/netinet6/in6_mcast.c (contents, props changed) Added: head/sys/netinet6/in6_mcast.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/netinet6/in6_mcast.c Wed Apr 29 11:26:45 2009 (r191665) @@ -0,0 +1,2625 @@ +/* + * Copyright (c) 2009 Bruce Simpson. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * IPv6 multicast socket, group, and socket option processing module. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet6.h" +#include "opt_route.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef KTR_MLD +#define KTR_MLD KTR_INET6 +#endif + +#ifndef __SOCKUNION_DECLARED +union sockunion { + struct sockaddr_storage ss; + struct sockaddr sa; + struct sockaddr_dl sdl; + struct sockaddr_in6 sin6; +}; +typedef union sockunion sockunion_t; +#define __SOCKUNION_DECLARED +#endif /* __SOCKUNION_DECLARED */ + +static MALLOC_DEFINE(M_IN6MFILTER, "in6_mfilter", + "IPv6 multicast PCB-layer source filter"); +static MALLOC_DEFINE(M_IP6MADDR, "in6_multi", "IPv6 multicast group"); +static MALLOC_DEFINE(M_IP6MOPTS, "ip6_moptions", "IPv6 multicast options"); +static MALLOC_DEFINE(M_IP6MSOURCE, "ip6_msource", + "IPv6 multicast MLD-layer source filter"); + +RB_GENERATE(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp); + +/* + * Locking: + * - Lock order is: Giant, INP_WLOCK, IN6_MULTI_LOCK, MLD_LOCK, IF_ADDR_LOCK. + * - The IF_ADDR_LOCK is implicitly taken by in6m_lookup() earlier, however + * it can be taken by code in net/if.c also. + * - ip6_moptions and in6_mfilter are covered by the INP_WLOCK. + * + * struct in6_multi is covered by IN6_MULTI_LOCK. There isn't strictly + * any need for in6_multi itself to be virtualized -- it is bound to an ifp + * anyway no matter what happens. + */ +struct mtx in6_multi_mtx; +MTX_SYSINIT(in6_multi_mtx, &in6_multi_mtx, "in6_multi_mtx", MTX_DEF); + +static void im6f_commit(struct in6_mfilter *); +static int im6f_get_source(struct in6_mfilter *imf, + const struct sockaddr_in6 *psin, + struct in6_msource **); +static struct in6_msource * + im6f_graft(struct in6_mfilter *, const uint8_t, + const struct sockaddr_in6 *); +static void im6f_leave(struct in6_mfilter *); +static int im6f_prune(struct in6_mfilter *, const struct sockaddr_in6 *); +static void im6f_purge(struct in6_mfilter *); +static void im6f_rollback(struct in6_mfilter *); +static void im6f_reap(struct in6_mfilter *); +static int im6o_grow(struct ip6_moptions *); +static size_t im6o_match_group(const struct ip6_moptions *, + const struct ifnet *, const struct sockaddr *); +static struct in6_msource * + im6o_match_source(const struct ip6_moptions *, const size_t, + const struct sockaddr *); +static void im6s_merge(struct ip6_msource *ims, + const struct in6_msource *lims, const int rollback); +static int in6_mc_get(struct ifnet *, const struct in6_addr *, + struct in6_multi **); +static int in6m_get_source(struct in6_multi *inm, + const struct in6_addr *addr, const int noalloc, + struct ip6_msource **pims); +static int in6m_is_ifp_detached(const struct in6_multi *); +static int in6m_merge(struct in6_multi *, /*const*/ struct in6_mfilter *); +static void in6m_purge(struct in6_multi *); +static void in6m_reap(struct in6_multi *); +static struct ip6_moptions * + in6p_findmoptions(struct inpcb *); +static int in6p_get_source_filters(struct inpcb *, struct sockopt *); +static int in6p_join_group(struct inpcb *, struct sockopt *); +static int in6p_leave_group(struct inpcb *, struct sockopt *); +static int in6p_block_unblock_source(struct inpcb *, struct sockopt *); +static int in6p_set_multicast_if(struct inpcb *, struct sockopt *); +static int in6p_set_source_filters(struct inpcb *, struct sockopt *); +static int sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS); + +SYSCTL_DECL(_net_inet6_ip6); /* XXX Not in any common header. */ + +SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, mcast, CTLFLAG_RW, 0, "IPv6 multicast"); + +static u_long in6_mcast_maxgrpsrc = IPV6_MAX_GROUP_SRC_FILTER; +SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxgrpsrc, + CTLFLAG_RW | CTLFLAG_TUN, &in6_mcast_maxgrpsrc, 0, + "Max source filters per group"); +TUNABLE_ULONG("net.inet6.ip6.mcast.maxgrpsrc", &in6_mcast_maxgrpsrc); + +static u_long in6_mcast_maxsocksrc = IPV6_MAX_SOCK_SRC_FILTER; +SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxsocksrc, + CTLFLAG_RW | CTLFLAG_TUN, &in6_mcast_maxsocksrc, 0, + "Max source filters per socket"); +TUNABLE_ULONG("net.inet6.ip6.mcast.maxsocksrc", &in6_mcast_maxsocksrc); + +/* TODO Virtualize this switch. */ +int in6_mcast_loop = IPV6_DEFAULT_MULTICAST_LOOP; +SYSCTL_INT(_net_inet6_ip6_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN, + &in6_mcast_loop, 0, "Loopback multicast datagrams by default"); +TUNABLE_INT("net.inet6.ip6.mcast.loop", &in6_mcast_loop); + +SYSCTL_NODE(_net_inet6_ip6_mcast, OID_AUTO, filters, + CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip6_mcast_filters, + "Per-interface stack-wide source filters"); + +/* + * Inline function which wraps assertions for a valid ifp. + * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp + * is detached. + */ +static int __inline +in6m_is_ifp_detached(const struct in6_multi *inm) +{ + struct ifnet *ifp; + + KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__)); + ifp = inm->in6m_ifma->ifma_ifp; + if (ifp != NULL) { + /* + * Sanity check that network-layer notion of ifp is the + * same as that of link-layer. + */ + KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__)); + } + + return (ifp == NULL); +} + +/* + * Initialize an in6_mfilter structure to a known state at t0, t1 + * with an empty source filter list. + */ +static __inline void +im6f_init(struct in6_mfilter *imf, const int st0, const int st1) +{ + memset(imf, 0, sizeof(struct in6_mfilter)); + RB_INIT(&imf->im6f_sources); + imf->im6f_st[0] = st0; + imf->im6f_st[1] = st1; +} + +/* + * Resize the ip6_moptions vector to the next power-of-two minus 1. + * May be called with locks held; do not sleep. + */ +static int +im6o_grow(struct ip6_moptions *imo) +{ + struct in6_multi **nmships; + struct in6_multi **omships; + struct in6_mfilter *nmfilters; + struct in6_mfilter *omfilters; + size_t idx; + size_t newmax; + size_t oldmax; + + nmships = NULL; + nmfilters = NULL; + omships = imo->im6o_membership; + omfilters = imo->im6o_mfilters; + oldmax = imo->im6o_max_memberships; + newmax = ((oldmax + 1) * 2) - 1; + + if (newmax <= IPV6_MAX_MEMBERSHIPS) { + nmships = (struct in6_multi **)realloc(omships, + sizeof(struct in6_multi *) * newmax, M_IP6MOPTS, M_NOWAIT); + nmfilters = (struct in6_mfilter *)realloc(omfilters, + sizeof(struct in6_mfilter) * newmax, M_IN6MFILTER, + M_NOWAIT); + if (nmships != NULL && nmfilters != NULL) { + /* Initialize newly allocated source filter heads. */ + for (idx = oldmax; idx < newmax; idx++) { + im6f_init(&nmfilters[idx], MCAST_UNDEFINED, + MCAST_EXCLUDE); + } + imo->im6o_max_memberships = newmax; + imo->im6o_membership = nmships; + imo->im6o_mfilters = nmfilters; + } + } + + if (nmships == NULL || nmfilters == NULL) { + if (nmships != NULL) + free(nmships, M_IP6MOPTS); + if (nmfilters != NULL) + free(nmfilters, M_IN6MFILTER); + return (ETOOMANYREFS); + } + + return (0); +} + +/* + * Find an IPv6 multicast group entry for this ip6_moptions instance + * which matches the specified group, and optionally an interface. + * Return its index into the array, or -1 if not found. + */ +static size_t +im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp, + const struct sockaddr *group) +{ + const struct sockaddr_in6 *gsin6; + struct in6_multi **pinm; + int idx; + int nmships; + + gsin6 = (const struct sockaddr_in6 *)group; + + /* The im6o_membership array may be lazy allocated. */ + if (imo->im6o_membership == NULL || imo->im6o_num_memberships == 0) + return (-1); + + nmships = imo->im6o_num_memberships; + pinm = &imo->im6o_membership[0]; + for (idx = 0; idx < nmships; idx++, pinm++) { + if (*pinm == NULL) + continue; + if ((ifp == NULL || ((*pinm)->in6m_ifp == ifp)) && + IN6_ARE_ADDR_EQUAL(&(*pinm)->in6m_addr, + &gsin6->sin6_addr)) { + break; + } + } + if (idx >= nmships) + idx = -1; + + return (idx); +} + +/* + * Find an IPv6 multicast source entry for this imo which matches + * the given group index for this socket, and source address. + * + * NOTE: This does not check if the entry is in-mode, merely if + * it exists, which may not be the desired behaviour. + */ +static struct in6_msource * +im6o_match_source(const struct ip6_moptions *imo, const size_t gidx, + const struct sockaddr *src) +{ + struct ip6_msource find; + struct in6_mfilter *imf; + struct ip6_msource *ims; + const sockunion_t *psa; + + KASSERT(src->sa_family == AF_INET6, ("%s: !AF_INET6", __func__)); + KASSERT(gidx != -1 && gidx < imo->im6o_num_memberships, + ("%s: invalid index %d\n", __func__, (int)gidx)); + + /* The im6o_mfilters array may be lazy allocated. */ + if (imo->im6o_mfilters == NULL) + return (NULL); + imf = &imo->im6o_mfilters[gidx]; + + psa = (const sockunion_t *)src; + find.im6s_addr = psa->sin6.sin6_addr; + ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find); + + return ((struct in6_msource *)ims); +} + +/* + * Perform filtering for multicast datagrams on a socket by group and source. + * + * Returns 0 if a datagram should be allowed through, or various error codes + * if the socket was not a member of the group, or the source was muted, etc. + */ +int +im6o_mc_filter(const struct ip6_moptions *imo, const struct ifnet *ifp, + const struct sockaddr *group, const struct sockaddr *src) +{ + size_t gidx; + struct in6_msource *ims; + int mode; + + KASSERT(ifp != NULL, ("%s: null ifp", __func__)); + + gidx = im6o_match_group(imo, ifp, group); + if (gidx == -1) + return (MCAST_NOTGMEMBER); + + /* + * Check if the source was included in an (S,G) join. + * Allow reception on exclusive memberships by default, + * reject reception on inclusive memberships by default. + * Exclude source only if an in-mode exclude filter exists. + * Include source only if an in-mode include filter exists. + * NOTE: We are comparing group state here at MLD t1 (now) + * with socket-layer t0 (since last downcall). + */ + mode = imo->im6o_mfilters[gidx].im6f_st[1]; + ims = im6o_match_source(imo, gidx, src); + + if ((ims == NULL && mode == MCAST_INCLUDE) || + (ims != NULL && ims->im6sl_st[0] != mode)) + return (MCAST_NOTSMEMBER); + + return (MCAST_PASS); +} + +/* + * Find and return a reference to an in6_multi record for (ifp, group), + * and bump its reference count. + * If one does not exist, try to allocate it, and update link-layer multicast + * filters on ifp to listen for group. + * Assumes the IN6_MULTI lock is held across the call. + * Return 0 if successful, otherwise return an appropriate error code. + */ +static int +in6_mc_get(struct ifnet *ifp, const struct in6_addr *group, + struct in6_multi **pinm) +{ + struct sockaddr_in6 gsin6; + struct ifmultiaddr *ifma; + struct in6_multi *inm; + int error; + + error = 0; + + /* + * XXX: Accesses to ifma_protospec must be covered by IF_ADDR_LOCK; + * if_addmulti() takes this mutex itself, so we must drop and + * re-acquire around the call. + */ + IN6_MULTI_LOCK_ASSERT(); + IF_ADDR_LOCK(ifp); + + inm = in6m_lookup_locked(ifp, group); + if (inm != NULL) { + /* + * If we already joined this group, just bump the + * refcount and return it. + */ + KASSERT(inm->in6m_refcount >= 1, + ("%s: bad refcount %d", __func__, inm->in6m_refcount)); + ++inm->in6m_refcount; + *pinm = inm; + goto out_locked; + } + + memset(&gsin6, 0, sizeof(gsin6)); + gsin6.sin6_family = AF_INET6; + gsin6.sin6_len = sizeof(struct sockaddr_in6); + gsin6.sin6_addr = *group; + + /* + * Check if a link-layer group is already associated + * with this network-layer group on the given ifnet. + */ + IF_ADDR_UNLOCK(ifp); + error = if_addmulti(ifp, (struct sockaddr *)&gsin6, &ifma); + if (error != 0) + return (error); + IF_ADDR_LOCK(ifp); + + /* + * If something other than netinet6 is occupying the link-layer + * group, print a meaningful error message and back out of + * the allocation. + * Otherwise, bump the refcount on the existing network-layer + * group association and return it. + */ + if (ifma->ifma_protospec != NULL) { + inm = (struct in6_multi *)ifma->ifma_protospec; +#ifdef INVARIANTS + KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", + __func__)); + KASSERT(ifma->ifma_addr->sa_family == AF_INET6, + ("%s: ifma not AF_INET6", __func__)); + KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); + if (inm->in6m_ifma != ifma || inm->in6m_ifp != ifp || + !IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, group)) + panic("%s: ifma %p is inconsistent with %p (%p)", + __func__, ifma, inm, group); +#endif + ++inm->in6m_refcount; + *pinm = inm; + goto out_locked; + } + + IF_ADDR_LOCK_ASSERT(ifp); + + /* + * A new in6_multi record is needed; allocate and initialize it. + * We DO NOT perform an MLD join as the in6_ layer may need to + * push an initial source list down to MLD to support SSM. + * + * The initial source filter state is INCLUDE, {} as per the RFC. + * Pending state-changes per group are subject to a bounds check. + */ + inm = malloc(sizeof(*inm), M_IP6MADDR, M_NOWAIT | M_ZERO); + if (inm == NULL) { + if_delmulti_ifma(ifma); + error = ENOMEM; + goto out_locked; + } + inm->in6m_addr = *group; + inm->in6m_ifp = ifp; + inm->in6m_mli = MLD_IFINFO(ifp); + inm->in6m_ifma = ifma; + inm->in6m_refcount = 1; + inm->in6m_state = MLD_NOT_MEMBER; + IFQ_SET_MAXLEN(&inm->in6m_scq, MLD_MAX_STATE_CHANGES); + + inm->in6m_st[0].iss_fmode = MCAST_UNDEFINED; + inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED; + RB_INIT(&inm->in6m_srcs); + + ifma->ifma_protospec = inm; + *pinm = inm; + +out_locked: + IF_ADDR_UNLOCK(ifp); + return (error); +} + +/* + * Drop a reference to an in6_multi record. + * + * If the refcount drops to 0, free the in6_multi record and + * delete the underlying link-layer membership. + */ +void +in6m_release_locked(struct in6_multi *inm) +{ + struct ifmultiaddr *ifma; + + IN6_MULTI_LOCK_ASSERT(); + + CTR2(KTR_MLD, "%s: refcount is %d", __func__, inm->in6m_refcount); + + if (--inm->in6m_refcount > 0) { + CTR2(KTR_MLD, "%s: refcount is now %d", __func__, + inm->in6m_refcount); + return; + } + + CTR2(KTR_MLD, "%s: freeing inm %p", __func__, inm); + + ifma = inm->in6m_ifma; + + /* XXX this access is not covered by IF_ADDR_LOCK */ + CTR2(KTR_MLD, "%s: purging ifma %p", __func__, ifma); + KASSERT(ifma->ifma_protospec == inm, + ("%s: ifma_protospec != inm", __func__)); + ifma->ifma_protospec = NULL; + + in6m_purge(inm); + + free(inm, M_IP6MADDR); + + if_delmulti_ifma(ifma); +} + +/* + * Clear recorded source entries for a group. + * Used by the MLD code. Caller must hold the IN6_MULTI lock. + * FIXME: Should reap. + */ +void +in6m_clear_recorded(struct in6_multi *inm) +{ + struct ip6_msource *ims; + + IN6_MULTI_LOCK_ASSERT(); + + RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) { + if (ims->im6s_stp) { + ims->im6s_stp = 0; + --inm->in6m_st[1].iss_rec; + } + } + KASSERT(inm->in6m_st[1].iss_rec == 0, + ("%s: iss_rec %d not 0", __func__, inm->in6m_st[1].iss_rec)); +} + +/* + * Record a source as pending for a Source-Group MLDv2 query. + * This lives here as it modifies the shared tree. + * + * inm is the group descriptor. + * naddr is the address of the source to record in network-byte order. + * + * If the net.inet6.mld.sgalloc sysctl is non-zero, we will + * lazy-allocate a source node in response to an SG query. + * Otherwise, no allocation is performed. This saves some memory + * with the trade-off that the source will not be reported to the + * router if joined in the window between the query response and + * the group actually being joined on the local host. + * + * VIMAGE: XXX: Currently the mld_sgalloc feature has been removed. + * This turns off the allocation of a recorded source entry if + * the group has not been joined. + * + * Return 0 if the source didn't exist or was already marked as recorded. + * Return 1 if the source was marked as recorded by this function. + * Return <0 if any error occured (negated errno code). + */ +int +in6m_record_source(struct in6_multi *inm, const struct in6_addr *addr) +{ + struct ip6_msource find; + struct ip6_msource *ims, *nims; + + IN6_MULTI_LOCK_ASSERT(); + + find.im6s_addr = *addr; + ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find); + if (ims && ims->im6s_stp) + return (0); + if (ims == NULL) { + if (inm->in6m_nsrc == in6_mcast_maxgrpsrc) + return (-ENOSPC); + nims = malloc(sizeof(struct ip6_msource), M_IP6MSOURCE, + M_NOWAIT | M_ZERO); + if (nims == NULL) + return (-ENOMEM); + nims->im6s_addr = find.im6s_addr; + RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims); + ++inm->in6m_nsrc; + ims = nims; + } + + /* + * Mark the source as recorded and update the recorded + * source count. + */ + ++ims->im6s_stp; + ++inm->in6m_st[1].iss_rec; + + return (1); +} + +/* + * Return a pointer to an in6_msource owned by an in6_mfilter, + * given its source address. + * Lazy-allocate if needed. If this is a new entry its filter state is + * undefined at t0. + * + * imf is the filter set being modified. + * addr is the source address. + * + * SMPng: May be called with locks held; malloc must not block. + */ +static int +im6f_get_source(struct in6_mfilter *imf, const struct sockaddr_in6 *psin, + struct in6_msource **plims) +{ + struct ip6_msource find; + struct ip6_msource *ims, *nims; + struct in6_msource *lims; + int error; + + error = 0; + ims = NULL; + lims = NULL; + + find.im6s_addr = psin->sin6_addr; + ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find); + lims = (struct in6_msource *)ims; + if (lims == NULL) { + if (imf->im6f_nsrc == in6_mcast_maxsocksrc) + return (ENOSPC); + nims = malloc(sizeof(struct in6_msource), M_IN6MFILTER, + M_NOWAIT | M_ZERO); + if (nims == NULL) + return (ENOMEM); + lims = (struct in6_msource *)nims; + lims->im6s_addr = find.im6s_addr; + lims->im6sl_st[0] = MCAST_UNDEFINED; + RB_INSERT(ip6_msource_tree, &imf->im6f_sources, nims); + ++imf->im6f_nsrc; + } + + *plims = lims; + + return (error); +} + +/* + * Graft a source entry into an existing socket-layer filter set, + * maintaining any required invariants and checking allocations. + * + * The source is marked as being in the new filter mode at t1. + * + * Return the pointer to the new node, otherwise return NULL. + */ +static struct in6_msource * +im6f_graft(struct in6_mfilter *imf, const uint8_t st1, + const struct sockaddr_in6 *psin) +{ + struct ip6_msource *nims; + struct in6_msource *lims; + + nims = malloc(sizeof(struct in6_msource), M_IN6MFILTER, + M_NOWAIT | M_ZERO); + if (nims == NULL) + return (NULL); + lims = (struct in6_msource *)nims; + lims->im6s_addr = psin->sin6_addr; + lims->im6sl_st[0] = MCAST_UNDEFINED; + lims->im6sl_st[1] = st1; + RB_INSERT(ip6_msource_tree, &imf->im6f_sources, nims); + ++imf->im6f_nsrc; + + return (lims); +} + +/* + * Prune a source entry from an existing socket-layer filter set, + * maintaining any required invariants and checking allocations. + * + * The source is marked as being left at t1, it is not freed. + * + * Return 0 if no error occurred, otherwise return an errno value. + */ +static int +im6f_prune(struct in6_mfilter *imf, const struct sockaddr_in6 *psin) +{ + struct ip6_msource find; + struct ip6_msource *ims; + struct in6_msource *lims; + + find.im6s_addr = psin->sin6_addr; + ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find); + if (ims == NULL) + return (ENOENT); + lims = (struct in6_msource *)ims; + lims->im6sl_st[1] = MCAST_UNDEFINED; + return (0); +} + +/* + * Revert socket-layer filter set deltas at t1 to t0 state. + */ +static void +im6f_rollback(struct in6_mfilter *imf) +{ + struct ip6_msource *ims, *tims; + struct in6_msource *lims; + + RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) { + lims = (struct in6_msource *)ims; + if (lims->im6sl_st[0] == lims->im6sl_st[1]) { + /* no change at t1 */ + continue; + } else if (lims->im6sl_st[0] != MCAST_UNDEFINED) { + /* revert change to existing source at t1 */ + lims->im6sl_st[1] = lims->im6sl_st[0]; + } else { + /* revert source added t1 */ + CTR2(KTR_MLD, "%s: free ims %p", __func__, ims); + RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims); + free(ims, M_IN6MFILTER); + imf->im6f_nsrc--; + } + } + imf->im6f_st[1] = imf->im6f_st[0]; +} + +/* + * Mark socket-layer filter set as INCLUDE {} at t1. + */ +static void +im6f_leave(struct in6_mfilter *imf) +{ + struct ip6_msource *ims; + struct in6_msource *lims; + + RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) { + lims = (struct in6_msource *)ims; + lims->im6sl_st[1] = MCAST_UNDEFINED; + } + imf->im6f_st[1] = MCAST_INCLUDE; +} + +/* + * Mark socket-layer filter set deltas as committed. + */ +static void +im6f_commit(struct in6_mfilter *imf) +{ + struct ip6_msource *ims; + struct in6_msource *lims; + + RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) { + lims = (struct in6_msource *)ims; + lims->im6sl_st[0] = lims->im6sl_st[1]; + } + imf->im6f_st[0] = imf->im6f_st[1]; +} + +/* + * Reap unreferenced sources from socket-layer filter set. + */ +static void +im6f_reap(struct in6_mfilter *imf) +{ + struct ip6_msource *ims, *tims; + struct in6_msource *lims; + + RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) { + lims = (struct in6_msource *)ims; + if ((lims->im6sl_st[0] == MCAST_UNDEFINED) && + (lims->im6sl_st[1] == MCAST_UNDEFINED)) { + CTR2(KTR_MLD, "%s: free lims %p", __func__, ims); + RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims); + free(ims, M_IN6MFILTER); + imf->im6f_nsrc--; + } + } +} + +/* + * Purge socket-layer filter set. + */ +static void +im6f_purge(struct in6_mfilter *imf) +{ + struct ip6_msource *ims, *tims; + + RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) { + CTR2(KTR_MLD, "%s: free ims %p", __func__, ims); + RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims); + free(ims, M_IN6MFILTER); + imf->im6f_nsrc--; + } + imf->im6f_st[0] = imf->im6f_st[1] = MCAST_UNDEFINED; + KASSERT(RB_EMPTY(&imf->im6f_sources), + ("%s: im6f_sources not empty", __func__)); +} + +/* + * Look up a source filter entry for a multicast group. + * + * inm is the group descriptor to work with. + * addr is the IPv6 address to look up. + * noalloc may be non-zero to suppress allocation of sources. + * *pims will be set to the address of the retrieved or allocated source. + * + * SMPng: NOTE: may be called with locks held. + * Return 0 if successful, otherwise return a non-zero error code. + */ +static int +in6m_get_source(struct in6_multi *inm, const struct in6_addr *addr, + const int noalloc, struct ip6_msource **pims) +{ + struct ip6_msource find; + struct ip6_msource *ims, *nims; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif + + find.im6s_addr = *addr; + ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find); + if (ims == NULL && !noalloc) { + if (inm->in6m_nsrc == in6_mcast_maxgrpsrc) + return (ENOSPC); + nims = malloc(sizeof(struct ip6_msource), M_IP6MSOURCE, + M_NOWAIT | M_ZERO); + if (nims == NULL) + return (ENOMEM); + nims->im6s_addr = *addr; + RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims); + ++inm->in6m_nsrc; + ims = nims; + CTR3(KTR_MLD, "%s: allocated %s as %p", __func__, + ip6_sprintf(ip6tbuf, addr), ims); + } + + *pims = ims; + return (0); +} + +/* + * Merge socket-layer source into MLD-layer source. + * If rollback is non-zero, perform the inverse of the merge. + */ +static void +im6s_merge(struct ip6_msource *ims, const struct in6_msource *lims, + const int rollback) +{ + int n = rollback ? -1 : 1; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; + + ip6_sprintf(ip6tbuf, &lims->im6s_addr); +#endif + + if (lims->im6sl_st[0] == MCAST_EXCLUDE) { + CTR3(KTR_MLD, "%s: t1 ex -= %d on %s", __func__, n, ip6tbuf); + ims->im6s_st[1].ex -= n; + } else if (lims->im6sl_st[0] == MCAST_INCLUDE) { + CTR3(KTR_MLD, "%s: t1 in -= %d on %s", __func__, n, ip6tbuf); + ims->im6s_st[1].in -= n; + } + + if (lims->im6sl_st[1] == MCAST_EXCLUDE) { + CTR3(KTR_MLD, "%s: t1 ex += %d on %s", __func__, n, ip6tbuf); + ims->im6s_st[1].ex += n; + } else if (lims->im6sl_st[1] == MCAST_INCLUDE) { + CTR3(KTR_MLD, "%s: t1 in += %d on %s", __func__, n, ip6tbuf); + ims->im6s_st[1].in += n; + } +} + +/* + * Atomically update the global in6_multi state, when a membership's + * filter list is being updated in any way. + * + * imf is the per-inpcb-membership group filter pointer. + * A fake imf may be passed for in-kernel consumers. + * + * XXX This is a candidate for a set-symmetric-difference style loop + * which would eliminate the repeated lookup from root of ims nodes, + * as they share the same key space. + * + * If any error occurred this function will back out of refcounts + * and return a non-zero value. + */ +static int +in6m_merge(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) +{ + struct ip6_msource *ims, *nims; + struct in6_msource *lims; + int schanged, error; + int nsrc0, nsrc1; + + schanged = 0; + error = 0; + nsrc1 = nsrc0 = 0; + + /* + * Update the source filters first, as this may fail. + * Maintain count of in-mode filters at t0, t1. These are + * used to work out if we transition into ASM mode or not. + * Maintain a count of source filters whose state was + * actually modified by this operation. + */ + RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) { + lims = (struct in6_msource *)ims; + if (lims->im6sl_st[0] == imf->im6f_st[0]) nsrc0++; + if (lims->im6sl_st[1] == imf->im6f_st[1]) nsrc1++; + if (lims->im6sl_st[0] == lims->im6sl_st[1]) continue; + error = in6m_get_source(inm, &lims->im6s_addr, 0, &nims); + ++schanged; + if (error) + break; + im6s_merge(nims, lims, 0); + } + if (error) { + struct ip6_msource *bims; + + RB_FOREACH_REVERSE_FROM(ims, ip6_msource_tree, nims) { + lims = (struct in6_msource *)ims; + if (lims->im6sl_st[0] == lims->im6sl_st[1]) + continue; + (void)in6m_get_source(inm, &lims->im6s_addr, 1, &bims); + if (bims == NULL) + continue; + im6s_merge(bims, lims, 1); + } + goto out_reap; + } + + CTR3(KTR_MLD, "%s: imf filters in-mode: %d at t0, %d at t1", + __func__, nsrc0, nsrc1); + + /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ + if (imf->im6f_st[0] == imf->im6f_st[1] && + imf->im6f_st[1] == MCAST_INCLUDE) { + if (nsrc1 == 0) { + CTR1(KTR_MLD, "%s: --in on inm at t1", __func__); + --inm->in6m_st[1].iss_in; + } + } + + /* Handle filter mode transition on socket. */ + if (imf->im6f_st[0] != imf->im6f_st[1]) { + CTR3(KTR_MLD, "%s: imf transition %d to %d", + __func__, imf->im6f_st[0], imf->im6f_st[1]); + + if (imf->im6f_st[0] == MCAST_EXCLUDE) { + CTR1(KTR_MLD, "%s: --ex on inm at t1", __func__); + --inm->in6m_st[1].iss_ex; + } else if (imf->im6f_st[0] == MCAST_INCLUDE) { + CTR1(KTR_MLD, "%s: --in on inm at t1", __func__); + --inm->in6m_st[1].iss_in; + } + + if (imf->im6f_st[1] == MCAST_EXCLUDE) { + CTR1(KTR_MLD, "%s: ex++ on inm at t1", __func__); + inm->in6m_st[1].iss_ex++; + } else if (imf->im6f_st[1] == MCAST_INCLUDE && nsrc1 > 0) { + CTR1(KTR_MLD, "%s: in++ on inm at t1", __func__); + inm->in6m_st[1].iss_in++; + } + } + + /* + * Track inm filter state in terms of listener counts. + * If there are any exclusive listeners, stack-wide + * membership is exclusive. + * Otherwise, if only inclusive listeners, stack-wide is inclusive. + * If no listeners remain, state is undefined at t1, + * and the MLD lifecycle for this group should finish. + */ + if (inm->in6m_st[1].iss_ex > 0) { + CTR1(KTR_MLD, "%s: transition to EX", __func__); + inm->in6m_st[1].iss_fmode = MCAST_EXCLUDE; + } else if (inm->in6m_st[1].iss_in > 0) { + CTR1(KTR_MLD, "%s: transition to IN", __func__); + inm->in6m_st[1].iss_fmode = MCAST_INCLUDE; + } else { + CTR1(KTR_MLD, "%s: transition to UNDEF", __func__); + inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED; + } + + /* Decrement ASM listener count on transition out of ASM mode. */ + if (imf->im6f_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { + if ((imf->im6f_st[1] != MCAST_EXCLUDE) || + (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) + CTR1(KTR_MLD, "%s: --asm on inm at t1", __func__); + --inm->in6m_st[1].iss_asm; + } + + /* Increment ASM listener count on transition to ASM mode. */ *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***