Date: Wed, 30 Dec 2020 18:58:34 -0800 From: Ryan Libby <rlibby@freebsd.org> To: Hans Petter Selasky <hselasky@freebsd.org> Cc: src-committers <src-committers@freebsd.org>, dev-commits-src-all@freebsd.org, dev-commits-src-main@freebsd.org Subject: Re: git: ec52ff6d1411 - main - Streamline the infiniband code according to the ethernet code. Message-ID: <CAHgpiFx8ROyMA1y8WzrA-myp_Tpdapt=QJaAqGib6yRQzkhb_Q@mail.gmail.com> In-Reply-To: <202012291703.0BTH345t067627@gitrepo.freebsd.org> References: <202012291703.0BTH345t067627@gitrepo.freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
On Tue, Dec 29, 2020 at 9:03 AM Hans Petter Selasky <hselasky@freebsd.org> wrote: > > The branch main has been updated by hselasky: > > URL: https://cgit.FreeBSD.org/src/commit/?id=ec52ff6d14117573afef970604d5bf6b9691bc88 > > commit ec52ff6d14117573afef970604d5bf6b9691bc88 > Author: Hans Petter Selasky <hselasky@FreeBSD.org> > AuthorDate: 2020-12-29 17:01:57 +0000 > Commit: Hans Petter Selasky <hselasky@FreeBSD.org> > CommitDate: 2020-12-29 17:01:57 +0000 > > Streamline the infiniband code according to the ethernet code. > > Specifically implement the if_requestencap callback function for infiniband. > Most of the changes are simply a cut and paste of the equivalent ethernet part. > > Reviewed by: melifaro @ > Differential Revision: https://reviews.freebsd.org/D27631 > MFC after: 1 week > Sponsored by: Mellanox Technologies // NVIDIA Networking > --- > sys/net/if_infiniband.c | 303 ++++++++++++++++++++++++++++++++---------------- > 1 file changed, 201 insertions(+), 102 deletions(-) > > diff --git a/sys/net/if_infiniband.c b/sys/net/if_infiniband.c > index 1d6d561c4d83..bf33457b0a4f 100644 > --- a/sys/net/if_infiniband.c > +++ b/sys/net/if_infiniband.c > @@ -143,141 +143,240 @@ infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb) > mb->m_pkthdr.len += sizeof(*ibh); > } > > +static void > +update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst) > +{ > + int csum_flags = 0; > + > + if (src->m_pkthdr.csum_flags & CSUM_IP) > + csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID); > + if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA) > + csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR); > + if (src->m_pkthdr.csum_flags & CSUM_SCTP) > + csum_flags |= CSUM_SCTP_VALID; > + dst->m_pkthdr.csum_flags |= csum_flags; > + if (csum_flags & CSUM_DATA_VALID) > + dst->m_pkthdr.csum_data = 0xffff; > +} > + > /* > - * Infiniband output routine. > + * Handle link-layer encapsulation requests. > */ > static int > -infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, > - struct route *ro) > +infiniband_requestencap(struct ifnet *ifp, struct if_encap_req *req) > { > - uint8_t edst[INFINIBAND_ADDR_LEN]; > -#if defined(INET) || defined(INET6) > - struct llentry *lle = NULL; > -#endif > - struct infiniband_header *ibh; > - int error = 0; > - uint16_t type; > - bool is_gw; > + struct infiniband_header *ih; > + struct arphdr *ah; > + uint16_t etype; > + const uint8_t *lladdr; > > - NET_EPOCH_ASSERT(); > - > - is_gw = ((ro != NULL) && (ro->ro_flags & RT_HAS_GW) != 0); > + if (req->rtype != IFENCAP_LL) > + return (EOPNOTSUPP); > > -#ifdef MAC > - error = mac_ifnet_check_transmit(ifp, m); > - if (error) > - goto bad; > -#endif > + if (req->bufsize < INFINIBAND_HDR_LEN) > + return (ENOMEM); > > - M_PROFILE(m); > - if (ifp->if_flags & IFF_MONITOR) { > - error = ENETDOWN; > - goto bad; > - } > - if (!((ifp->if_flags & IFF_UP) && > - (ifp->if_drv_flags & IFF_DRV_RUNNING))) { > - error = ENETDOWN; > - goto bad; > - } > + ih = (struct infiniband_header *)req->buf; > + lladdr = req->lladdr; > + req->lladdr_off = 0; > > - switch (dst->sa_family) { > - case AF_LINK: > - goto output; > -#ifdef INET > + switch (req->family) { > case AF_INET: > - if (lle != NULL && (lle->la_flags & LLE_VALID)) { > - memcpy(edst, lle->ll_addr, sizeof(edst)); > - } else if (m->m_flags & M_MCAST) { > - infiniband_ipv4_multicast_map( > - ((const struct sockaddr_in *)dst)->sin_addr.s_addr, > - ifp->if_broadcastaddr, edst); > - } else { > - error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL); > - if (error) { > - if (error == EWOULDBLOCK) > - error = 0; > - m = NULL; /* mbuf is consumed by resolver */ > - goto bad; > - } > - } > - type = htons(ETHERTYPE_IP); > + etype = htons(ETHERTYPE_IP); > break; > - case AF_ARP: { > - struct arphdr *ah; > - > - if (m->m_len < sizeof(*ah)) { > - error = EINVAL; > - goto bad; > - } > - > - ah = mtod(m, struct arphdr *); > - > - if (m->m_len < arphdr_len(ah)) { > - error = EINVAL; > - goto bad; > - } > + case AF_INET6: > + etype = htons(ETHERTYPE_IPV6); > + break; > + case AF_ARP: > + ah = (struct arphdr *)req->hdata; > ah->ar_hrd = htons(ARPHRD_INFINIBAND); > > switch (ntohs(ah->ar_op)) { > case ARPOP_REVREQUEST: > case ARPOP_REVREPLY: > - type = htons(ETHERTYPE_REVARP); > + etype = htons(ETHERTYPE_REVARP); > break; > case ARPOP_REQUEST: > case ARPOP_REPLY: > default: > - type = htons(ETHERTYPE_ARP); > + etype = htons(ETHERTYPE_ARP); > break; > } > > - if (m->m_flags & M_BCAST) { > - memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN); > + if (req->flags & IFENCAP_FLAG_BROADCAST) > + lladdr = ifp->if_broadcastaddr; > + break; > + default: > + return (EAFNOSUPPORT); > + } > + > + ih->ib_protocol = etype; > + ih->ib_reserved = 0; > + memcpy(ih->ib_hwaddr, lladdr, INFINIBAND_ADDR_LEN); > + req->bufsize = sizeof(struct infiniband_header); > + > + return (0); > +} > + > +static int > +infiniband_resolve_addr(struct ifnet *ifp, struct mbuf *m, > + const struct sockaddr *dst, struct route *ro, uint8_t *phdr, > + uint32_t *pflags, struct llentry **plle) > +{ > + struct infiniband_header *ih; > + uint32_t lleflags = 0; > + int error = 0; > + > + if (plle) > + *plle = NULL; > + ih = (struct infiniband_header *)phdr; > + > + switch (dst->sa_family) { > +#ifdef INET > + case AF_INET: > + if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) { > + error = arpresolve(ifp, 0, m, dst, phdr, &lleflags, plle); > } else { > - if (ah->ar_hln != INFINIBAND_ADDR_LEN) { > - error = EINVAL; > - goto bad; > + if (m->m_flags & M_BCAST) { > + memcpy(ih->ib_hwaddr, ifp->if_broadcastaddr, > + INFINIBAND_ADDR_LEN); > + } else { > + infiniband_ipv4_multicast_map( > + ((const struct sockaddr_in *)dst)->sin_addr.s_addr, > + ifp->if_broadcastaddr, ih->ib_hwaddr); > } > - memcpy(edst, ar_tha(ah), INFINIBAND_ADDR_LEN); > + ih->ib_protocol = htons(ETHERTYPE_IP); > + ih->ib_reserved = 0; > } > break; > - } > #endif > #ifdef INET6 > - case AF_INET6: { > - const struct ip6_hdr *ip6; > - > - ip6 = mtod(m, const struct ip6_hdr *); > - if (m->m_len < sizeof(*ip6)) { > - error = EINVAL; > - goto bad; > - } else if (lle != NULL && (lle->la_flags & LLE_VALID)) { > - memcpy(edst, lle->ll_addr, sizeof(edst)); > - } else if (m->m_flags & M_MCAST) { > + case AF_INET6: > + if ((m->m_flags & M_MCAST) == 0) { > + error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags, plle); > + } else { > infiniband_ipv6_multicast_map( > &((const struct sockaddr_in6 *)dst)->sin6_addr, > - ifp->if_broadcastaddr, edst); > - } else if (ip6->ip6_nxt == IPPROTO_ICMPV6) { > - memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN); > - } else { > - error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL); > - if (error) { > - if (error == EWOULDBLOCK) > - error = 0; > - m = NULL; /* mbuf is consumed by resolver */ > - goto bad; > - } > + ifp->if_broadcastaddr, ih->ib_hwaddr); > + ih->ib_protocol = htons(ETHERTYPE_IPV6); > + ih->ib_reserved = 0; > } > - type = htons(ETHERTYPE_IPV6); > break; > - } > #endif > default: > - error = EAFNOSUPPORT; > + if_printf(ifp, "can't handle af%d\n", dst->sa_family); > + if (m != NULL) > + m_freem(m); > + return (EAFNOSUPPORT); > + } > + > + if (error == EHOSTDOWN) { > + if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0) > + error = EHOSTUNREACH; > + } > + > + if (error != 0) > + return (error); > + > + *pflags = RT_MAY_LOOP; > + if (lleflags & LLE_IFADDR) > + *pflags |= RT_L2_ME; > + > + return (0); > +} > + > +/* > + * Infiniband output routine. > + */ > +static int > +infiniband_output(struct ifnet *ifp, struct mbuf *m, > + const struct sockaddr *dst, struct route *ro) > +{ > + uint8_t linkhdr[INFINIBAND_HDR_LEN]; > + uint8_t *phdr; > +#if defined(INET) || defined(INET6) > + struct llentry *lle = NULL; > +#endif This broke tinderbox via the LINT-NOIP kernels. The code compiles fine with the ifdef guard removed. Can we just delete it? > + struct infiniband_header *ih; > + int error = 0; > + int hlen; /* link layer header length */ > + uint32_t pflags; > + bool addref; > + > + NET_EPOCH_ASSERT(); > + > + addref = false; > + phdr = NULL; > + pflags = 0; > + if (ro != NULL) { > + /* XXX BPF uses ro_prepend */ > + if (ro->ro_prepend != NULL) { > + phdr = ro->ro_prepend; > + hlen = ro->ro_plen; > + } else if (!(m->m_flags & (M_BCAST | M_MCAST))) { > + if ((ro->ro_flags & RT_LLE_CACHE) != 0) { > + lle = ro->ro_lle; > + if (lle != NULL && > + (lle->la_flags & LLE_VALID) == 0) { > + LLE_FREE(lle); > + lle = NULL; /* redundant */ > + ro->ro_lle = NULL; > + } > + if (lle == NULL) { > + /* if we lookup, keep cache */ > + addref = 1; > + } else > + /* > + * Notify LLE code that > + * the entry was used > + * by datapath. > + */ > + llentry_mark_used(lle); > + } > + if (lle != NULL) { > + phdr = lle->r_linkdata; > + hlen = lle->r_hdrlen; > + pflags = lle->r_flags; > + } > + } > + } > + > +#ifdef MAC > + error = mac_ifnet_check_transmit(ifp, m); > + if (error) > + goto bad; > +#endif > + > + M_PROFILE(m); > + if (ifp->if_flags & IFF_MONITOR) { > + error = ENETDOWN; > + goto bad; > + } > + if (!((ifp->if_flags & IFF_UP) && > + (ifp->if_drv_flags & IFF_DRV_RUNNING))) { > + error = ENETDOWN; > goto bad; > } > > + if (phdr == NULL) { > + /* No prepend data supplied. Try to calculate ourselves. */ > + phdr = linkhdr; > + hlen = INFINIBAND_HDR_LEN; > + error = infiniband_resolve_addr(ifp, m, dst, ro, phdr, &pflags, > + addref ? &lle : NULL); > + if (addref && lle != NULL) > + ro->ro_lle = lle; > + if (error != 0) > + return (error == EWOULDBLOCK ? 0 : error); > + } > + > + if ((pflags & RT_L2_ME) != 0) { > + update_mbuf_csumflags(m, m); > + return (if_simloop(ifp, m, dst->sa_family, 0)); > + } > + > /* > - * Add local net header. If no space in first mbuf, > + * Add local infiniband header. If no space in first mbuf, > * allocate another. > */ > M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT); > @@ -285,16 +384,15 @@ infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, > error = ENOBUFS; > goto bad; > } > - ibh = mtod(m, struct infiniband_header *); > - > - ibh->ib_protocol = type; > - memcpy(ibh->ib_hwaddr, edst, sizeof(edst)); > + if ((pflags & RT_HAS_HEADER) == 0) { > + ih = mtod(m, struct infiniband_header *); > + memcpy(ih, phdr, hlen); > + } > > /* > * Queue message on interface, update output statistics if > * successful, and start output if interface not yet active. > */ > -output: > return (ifp->if_transmit(ifp, m)); > bad: > if (m != NULL) > @@ -484,6 +582,7 @@ infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb) > ifp->if_output = infiniband_output; > ifp->if_input = infiniband_input; > ifp->if_resolvemulti = infiniband_resolvemulti; > + ifp->if_requestencap = infiniband_requestencap; > > if (ifp->if_baudrate == 0) > ifp->if_baudrate = IF_Gbps(10); /* default value */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAHgpiFx8ROyMA1y8WzrA-myp_Tpdapt=QJaAqGib6yRQzkhb_Q>