Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 30 Dec 2020 18:58:34 -0800
From:      Ryan Libby <rlibby@freebsd.org>
To:        Hans Petter Selasky <hselasky@freebsd.org>
Cc:        src-committers <src-committers@freebsd.org>, dev-commits-src-all@freebsd.org, dev-commits-src-main@freebsd.org
Subject:   Re: git: ec52ff6d1411 - main - Streamline the infiniband code according to the ethernet code.
Message-ID:  <CAHgpiFx8ROyMA1y8WzrA-myp_Tpdapt=QJaAqGib6yRQzkhb_Q@mail.gmail.com>
In-Reply-To: <202012291703.0BTH345t067627@gitrepo.freebsd.org>
References:  <202012291703.0BTH345t067627@gitrepo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
On Tue, Dec 29, 2020 at 9:03 AM Hans Petter Selasky
<hselasky@freebsd.org> wrote:
>
> The branch main has been updated by hselasky:
>
> URL: https://cgit.FreeBSD.org/src/commit/?id=ec52ff6d14117573afef970604d5bf6b9691bc88
>
> commit ec52ff6d14117573afef970604d5bf6b9691bc88
> Author:     Hans Petter Selasky <hselasky@FreeBSD.org>
> AuthorDate: 2020-12-29 17:01:57 +0000
> Commit:     Hans Petter Selasky <hselasky@FreeBSD.org>
> CommitDate: 2020-12-29 17:01:57 +0000
>
>     Streamline the infiniband code according to the ethernet code.
>
>     Specifically implement the if_requestencap callback function for infiniband.
>     Most of the changes are simply a cut and paste of the equivalent ethernet part.
>
>     Reviewed by:    melifaro @
>     Differential Revision:  https://reviews.freebsd.org/D27631
>     MFC after:      1 week
>     Sponsored by:   Mellanox Technologies // NVIDIA Networking
> ---
>  sys/net/if_infiniband.c | 303 ++++++++++++++++++++++++++++++++----------------
>  1 file changed, 201 insertions(+), 102 deletions(-)
>
> diff --git a/sys/net/if_infiniband.c b/sys/net/if_infiniband.c
> index 1d6d561c4d83..bf33457b0a4f 100644
> --- a/sys/net/if_infiniband.c
> +++ b/sys/net/if_infiniband.c
> @@ -143,141 +143,240 @@ infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb)
>         mb->m_pkthdr.len += sizeof(*ibh);
>  }
>
> +static void
> +update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
> +{
> +       int csum_flags = 0;
> +
> +       if (src->m_pkthdr.csum_flags & CSUM_IP)
> +               csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
> +       if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
> +               csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
> +       if (src->m_pkthdr.csum_flags & CSUM_SCTP)
> +               csum_flags |= CSUM_SCTP_VALID;
> +       dst->m_pkthdr.csum_flags |= csum_flags;
> +       if (csum_flags & CSUM_DATA_VALID)
> +               dst->m_pkthdr.csum_data = 0xffff;
> +}
> +
>  /*
> - * Infiniband output routine.
> + * Handle link-layer encapsulation requests.
>   */
>  static int
> -infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
> -    struct route *ro)
> +infiniband_requestencap(struct ifnet *ifp, struct if_encap_req *req)
>  {
> -       uint8_t edst[INFINIBAND_ADDR_LEN];
> -#if defined(INET) || defined(INET6)
> -       struct llentry *lle = NULL;
> -#endif
> -       struct infiniband_header *ibh;
> -       int error = 0;
> -       uint16_t type;
> -       bool is_gw;
> +       struct infiniband_header *ih;
> +       struct arphdr *ah;
> +       uint16_t etype;
> +       const uint8_t *lladdr;
>
> -       NET_EPOCH_ASSERT();
> -
> -       is_gw = ((ro != NULL) && (ro->ro_flags & RT_HAS_GW) != 0);
> +       if (req->rtype != IFENCAP_LL)
> +               return (EOPNOTSUPP);
>
> -#ifdef MAC
> -       error = mac_ifnet_check_transmit(ifp, m);
> -       if (error)
> -               goto bad;
> -#endif
> +       if (req->bufsize < INFINIBAND_HDR_LEN)
> +               return (ENOMEM);
>
> -       M_PROFILE(m);
> -       if (ifp->if_flags & IFF_MONITOR) {
> -               error = ENETDOWN;
> -               goto bad;
> -       }
> -       if (!((ifp->if_flags & IFF_UP) &&
> -           (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
> -               error = ENETDOWN;
> -               goto bad;
> -       }
> +       ih = (struct infiniband_header *)req->buf;
> +       lladdr = req->lladdr;
> +       req->lladdr_off = 0;
>
> -       switch (dst->sa_family) {
> -       case AF_LINK:
> -               goto output;
> -#ifdef INET
> +       switch (req->family) {
>         case AF_INET:
> -               if (lle != NULL && (lle->la_flags & LLE_VALID)) {
> -                       memcpy(edst, lle->ll_addr, sizeof(edst));
> -               } else if (m->m_flags & M_MCAST) {
> -                       infiniband_ipv4_multicast_map(
> -                           ((const struct sockaddr_in *)dst)->sin_addr.s_addr,
> -                           ifp->if_broadcastaddr, edst);
> -               } else {
> -                       error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
> -                       if (error) {
> -                               if (error == EWOULDBLOCK)
> -                                       error = 0;
> -                               m = NULL; /* mbuf is consumed by resolver */
> -                               goto bad;
> -                       }
> -               }
> -               type = htons(ETHERTYPE_IP);
> +               etype = htons(ETHERTYPE_IP);
>                 break;
> -       case AF_ARP: {
> -               struct arphdr *ah;
> -
> -               if (m->m_len < sizeof(*ah)) {
> -                       error = EINVAL;
> -                       goto bad;
> -               }
> -
> -               ah = mtod(m, struct arphdr *);
> -
> -               if (m->m_len < arphdr_len(ah)) {
> -                       error = EINVAL;
> -                       goto bad;
> -               }
> +       case AF_INET6:
> +               etype = htons(ETHERTYPE_IPV6);
> +               break;
> +       case AF_ARP:
> +               ah = (struct arphdr *)req->hdata;
>                 ah->ar_hrd = htons(ARPHRD_INFINIBAND);
>
>                 switch (ntohs(ah->ar_op)) {
>                 case ARPOP_REVREQUEST:
>                 case ARPOP_REVREPLY:
> -                       type = htons(ETHERTYPE_REVARP);
> +                       etype = htons(ETHERTYPE_REVARP);
>                         break;
>                 case ARPOP_REQUEST:
>                 case ARPOP_REPLY:
>                 default:
> -                       type = htons(ETHERTYPE_ARP);
> +                       etype = htons(ETHERTYPE_ARP);
>                         break;
>                 }
>
> -               if (m->m_flags & M_BCAST) {
> -                       memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
> +               if (req->flags & IFENCAP_FLAG_BROADCAST)
> +                       lladdr = ifp->if_broadcastaddr;
> +               break;
> +       default:
> +               return (EAFNOSUPPORT);
> +       }
> +
> +       ih->ib_protocol = etype;
> +       ih->ib_reserved = 0;
> +       memcpy(ih->ib_hwaddr, lladdr, INFINIBAND_ADDR_LEN);
> +       req->bufsize = sizeof(struct infiniband_header);
> +
> +       return (0);
> +}
> +
> +static int
> +infiniband_resolve_addr(struct ifnet *ifp, struct mbuf *m,
> +    const struct sockaddr *dst, struct route *ro, uint8_t *phdr,
> +    uint32_t *pflags, struct llentry **plle)
> +{
> +       struct infiniband_header *ih;
> +       uint32_t lleflags = 0;
> +       int error = 0;
> +
> +       if (plle)
> +               *plle = NULL;
> +       ih = (struct infiniband_header *)phdr;
> +
> +       switch (dst->sa_family) {
> +#ifdef INET
> +       case AF_INET:
> +               if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
> +                       error = arpresolve(ifp, 0, m, dst, phdr, &lleflags, plle);
>                 } else {
> -                       if (ah->ar_hln != INFINIBAND_ADDR_LEN) {
> -                               error = EINVAL;
> -                               goto bad;
> +                       if (m->m_flags & M_BCAST) {
> +                               memcpy(ih->ib_hwaddr, ifp->if_broadcastaddr,
> +                                   INFINIBAND_ADDR_LEN);
> +                       } else {
> +                               infiniband_ipv4_multicast_map(
> +                                   ((const struct sockaddr_in *)dst)->sin_addr.s_addr,
> +                                   ifp->if_broadcastaddr, ih->ib_hwaddr);
>                         }
> -                       memcpy(edst, ar_tha(ah), INFINIBAND_ADDR_LEN);
> +                       ih->ib_protocol = htons(ETHERTYPE_IP);
> +                       ih->ib_reserved = 0;
>                 }
>                 break;
> -       }
>  #endif
>  #ifdef INET6
> -       case AF_INET6: {
> -               const struct ip6_hdr *ip6;
> -
> -               ip6 = mtod(m, const struct ip6_hdr *);
> -               if (m->m_len < sizeof(*ip6)) {
> -                       error = EINVAL;
> -                       goto bad;
> -               } else if (lle != NULL && (lle->la_flags & LLE_VALID)) {
> -                       memcpy(edst, lle->ll_addr, sizeof(edst));
> -               } else if (m->m_flags & M_MCAST) {
> +       case AF_INET6:
> +               if ((m->m_flags & M_MCAST) == 0) {
> +                       error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags, plle);
> +               } else {
>                         infiniband_ipv6_multicast_map(
>                             &((const struct sockaddr_in6 *)dst)->sin6_addr,
> -                           ifp->if_broadcastaddr, edst);
> -               } else if (ip6->ip6_nxt == IPPROTO_ICMPV6) {
> -                       memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
> -               } else {
> -                       error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
> -                       if (error) {
> -                               if (error == EWOULDBLOCK)
> -                                       error = 0;
> -                               m = NULL; /* mbuf is consumed by resolver */
> -                               goto bad;
> -                       }
> +                           ifp->if_broadcastaddr, ih->ib_hwaddr);
> +                       ih->ib_protocol = htons(ETHERTYPE_IPV6);
> +                       ih->ib_reserved = 0;
>                 }
> -               type = htons(ETHERTYPE_IPV6);
>                 break;
> -       }
>  #endif
>         default:
> -               error = EAFNOSUPPORT;
> +               if_printf(ifp, "can't handle af%d\n", dst->sa_family);
> +               if (m != NULL)
> +                       m_freem(m);
> +               return (EAFNOSUPPORT);
> +       }
> +
> +       if (error == EHOSTDOWN) {
> +               if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
> +                       error = EHOSTUNREACH;
> +       }
> +
> +       if (error != 0)
> +               return (error);
> +
> +       *pflags = RT_MAY_LOOP;
> +       if (lleflags & LLE_IFADDR)
> +               *pflags |= RT_L2_ME;
> +
> +       return (0);
> +}
> +
> +/*
> + * Infiniband output routine.
> + */
> +static int
> +infiniband_output(struct ifnet *ifp, struct mbuf *m,
> +    const struct sockaddr *dst, struct route *ro)
> +{
> +       uint8_t linkhdr[INFINIBAND_HDR_LEN];
> +       uint8_t *phdr;
> +#if defined(INET) || defined(INET6)
> +       struct llentry *lle = NULL;
> +#endif

This broke tinderbox via the LINT-NOIP kernels.  The code compiles fine
with the ifdef guard removed.  Can we just delete it?

> +       struct infiniband_header *ih;
> +       int error = 0;
> +       int hlen;       /* link layer header length */
> +       uint32_t pflags;
> +       bool addref;
> +
> +       NET_EPOCH_ASSERT();
> +
> +       addref = false;
> +       phdr = NULL;
> +       pflags = 0;
> +       if (ro != NULL) {
> +               /* XXX BPF uses ro_prepend */
> +               if (ro->ro_prepend != NULL) {
> +                       phdr = ro->ro_prepend;
> +                       hlen = ro->ro_plen;
> +               } else if (!(m->m_flags & (M_BCAST | M_MCAST))) {
> +                       if ((ro->ro_flags & RT_LLE_CACHE) != 0) {
> +                               lle = ro->ro_lle;
> +                               if (lle != NULL &&
> +                                   (lle->la_flags & LLE_VALID) == 0) {
> +                                       LLE_FREE(lle);
> +                                       lle = NULL;     /* redundant */
> +                                       ro->ro_lle = NULL;
> +                               }
> +                               if (lle == NULL) {
> +                                       /* if we lookup, keep cache */
> +                                       addref = 1;
> +                               } else
> +                                       /*
> +                                        * Notify LLE code that
> +                                        * the entry was used
> +                                        * by datapath.
> +                                        */
> +                                       llentry_mark_used(lle);
> +                       }
> +                       if (lle != NULL) {
> +                               phdr = lle->r_linkdata;
> +                               hlen = lle->r_hdrlen;
> +                               pflags = lle->r_flags;
> +                       }
> +               }
> +       }
> +
> +#ifdef MAC
> +       error = mac_ifnet_check_transmit(ifp, m);
> +       if (error)
> +               goto bad;
> +#endif
> +
> +       M_PROFILE(m);
> +       if (ifp->if_flags & IFF_MONITOR) {
> +               error = ENETDOWN;
> +               goto bad;
> +       }
> +       if (!((ifp->if_flags & IFF_UP) &&
> +           (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
> +               error = ENETDOWN;
>                 goto bad;
>         }
>
> +       if (phdr == NULL) {
> +               /* No prepend data supplied. Try to calculate ourselves. */
> +               phdr = linkhdr;
> +               hlen = INFINIBAND_HDR_LEN;
> +               error = infiniband_resolve_addr(ifp, m, dst, ro, phdr, &pflags,
> +                   addref ? &lle : NULL);
> +               if (addref && lle != NULL)
> +                       ro->ro_lle = lle;
> +               if (error != 0)
> +                       return (error == EWOULDBLOCK ? 0 : error);
> +       }
> +
> +       if ((pflags & RT_L2_ME) != 0) {
> +               update_mbuf_csumflags(m, m);
> +               return (if_simloop(ifp, m, dst->sa_family, 0));
> +       }
> +
>         /*
> -        * Add local net header.  If no space in first mbuf,
> +        * Add local infiniband header. If no space in first mbuf,
>          * allocate another.
>          */
>         M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT);
> @@ -285,16 +384,15 @@ infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
>                 error = ENOBUFS;
>                 goto bad;
>         }
> -       ibh = mtod(m, struct infiniband_header *);
> -
> -       ibh->ib_protocol = type;
> -       memcpy(ibh->ib_hwaddr, edst, sizeof(edst));
> +       if ((pflags & RT_HAS_HEADER) == 0) {
> +               ih = mtod(m, struct infiniband_header *);
> +               memcpy(ih, phdr, hlen);
> +       }
>
>         /*
>          * Queue message on interface, update output statistics if
>          * successful, and start output if interface not yet active.
>          */
> -output:
>         return (ifp->if_transmit(ifp, m));
>  bad:
>         if (m != NULL)
> @@ -484,6 +582,7 @@ infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb)
>         ifp->if_output = infiniband_output;
>         ifp->if_input = infiniband_input;
>         ifp->if_resolvemulti = infiniband_resolvemulti;
> +       ifp->if_requestencap = infiniband_requestencap;
>
>         if (ifp->if_baudrate == 0)
>                 ifp->if_baudrate = IF_Gbps(10); /* default value */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAHgpiFx8ROyMA1y8WzrA-myp_Tpdapt=QJaAqGib6yRQzkhb_Q>