Date: Fri, 11 Feb 2011 11:22:14 +0000 (UTC) From: Jeff Roberson <jeff@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r218552 - projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib Message-ID: <201102111122.p1BBMEIh057808@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: jeff Date: Fri Feb 11 11:22:14 2011 New Revision: 218552 URL: http://svn.freebsd.org/changeset/base/218552 Log: - Eliminate zero length mbufs when loading the tx descriptor. These cause the driver to hang. The stack creates them when making ip fragments for unknown reasons. - Make it safe to poll tx completions without the device lock held. This significantly improves TCP performance. - I had erroneously set the mtu based on the receive size which includes the GRH. Correct this. Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h ============================================================================== --- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h Fri Feb 11 10:50:33 2011 (r218551) +++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h Fri Feb 11 11:22:14 2011 (r218552) @@ -91,7 +91,7 @@ /* constants */ #define INFINIBAND_ALEN 20 /* Octets in IPoIB HW addr */ -#define MAX_MB_FRAGS (8192 / MCLBYTES) +#define MAX_MB_FRAGS ((8192 / MCLBYTES) + 2) #ifdef IPOIB_CM #define CONFIG_INFINIBAND_IPOIB_CM @@ -99,6 +99,7 @@ #ifdef IPOIB_DEBUG #define CONFIG_INFINIBAND_IPOIB_DEBUG +#define CONFIG_INFINIBAND_IPOIB_DEBUG_DATA #endif enum ipoib_flush_level { @@ -110,7 +111,6 @@ enum ipoib_flush_level { enum { IPOIB_ENCAP_LEN = 4, IPOIB_HEADER_LEN = IPOIB_ENCAP_LEN + INFINIBAND_ALEN, - IPOIB_UD_HEAD_SIZE = IB_GRH_BYTES + IPOIB_ENCAP_LEN, IPOIB_UD_RX_SG = 1, /* max buffer needed for 4K mtu */ IPOIB_CM_MAX_MTU = MJUM16BYTES, @@ -286,7 +286,6 @@ struct ipoib_cm_dev_priv { struct ifqueue mb_queue; struct list_head start_list; struct list_head reap_list; - struct ib_wc ibwc[IPOIB_NUM_WC]; struct ib_sge rx_sge[IPOIB_CM_RX_SG]; struct ib_recv_wr rx_wr; int nonsrq_conn_qp; @@ -414,7 +413,7 @@ struct ipoib_path { }; /* UD Only transmits encap len but we want the two sizes to be symmetrical. */ -#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IB_GRH_BYTES) +#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) #define IPOIB_CM_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) #define IPOIB_IS_MULTICAST(addr) ((addr)[4] == 0xff) @@ -519,6 +518,8 @@ void ipoib_drain_cq(struct ipoib_dev_pri int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req); void ipoib_dma_unmap_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req); +int ipoib_poll_tx(struct ipoib_dev_priv *priv); + void ipoib_set_ethtool_ops(struct ifnet *dev); int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca); Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c ============================================================================== --- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c Fri Feb 11 10:50:33 2011 (r218551) +++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c Fri Feb 11 11:22:14 2011 (r218552) @@ -94,6 +94,7 @@ static int ipoib_cm_post_receive_srq(str priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; priv->cm.rx_sge[0].addr = priv->cm.srq_ring[id].mapping[0]; + priv->cm.rx_sge[0].length = priv->cm.max_cm_mtu; ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); if (unlikely(ret)) { @@ -117,6 +118,7 @@ static int ipoib_cm_post_receive_nonsrq( wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; sge[0].addr = rx->rx_ring[id].mapping[0]; + priv->cm.rx_sge[0].length = priv->cm.max_cm_mtu; ret = ib_post_recv(rx->qp, wr, &bad_wr); if (unlikely(ret)) { @@ -505,11 +507,13 @@ void ipoib_cm_handle_rx_wc(struct ipoib_ if (unlikely(wr_id >= ipoib_recvq_size)) { if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) { + spin_lock(&priv->lock); list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); ipoib_cm_start_rx_drain(priv); if (priv->cm.id != NULL) queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); + spin_unlock(&priv->lock); } else ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", wr_id, ipoib_recvq_size); @@ -532,8 +536,10 @@ void ipoib_cm_handle_rx_wc(struct ipoib_ goto repost; else { if (!--p->recv_count) { + spin_lock(&priv->lock); list_move(&p->list, &priv->cm.rx_reap_list); queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); + spin_unlock(&priv->lock); } return; } @@ -574,13 +580,9 @@ void ipoib_cm_handle_rx_wc(struct ipoib_ mb->m_pkthdr.rcvif = dev; proto = *mtod(mb, uint16_t *); m_adj(mb, IPOIB_ENCAP_LEN); - if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok)) - mb->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID; IPOIB_MTAP_PROTO(dev, mb, proto); - spin_unlock(&priv->lock); ipoib_demux(dev, mb, ntohs(proto)); - spin_lock(&priv->lock); repost: if (has_srq) { @@ -626,8 +628,11 @@ void ipoib_cm_send(struct ipoib_dev_priv struct ipoib_tx_buf *tx_req; struct ifnet *dev = priv->dev; + if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) + while (ipoib_poll_tx(priv)); /* nothing */ + m_adj(mb, sizeof(struct ipoib_pseudoheader)); - if (unlikely(mb->m_pkthdr.len > IPOIB_CM_MTU(tx->mtu))) { + if (unlikely(mb->m_pkthdr.len > tx->mtu)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", mb->m_pkthdr.len, tx->mtu); ++dev->if_oerrors; @@ -655,11 +660,6 @@ void ipoib_cm_send(struct ipoib_dev_priv return; } - if (mb->m_pkthdr.csum_flags & (CSUM_IP|CSUM_TCP|CSUM_UDP)) - priv->tx_wr.send_flags |= IB_SEND_IP_CSUM; - else - priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; - if (unlikely(post_send(priv, tx, tx_req, tx->tx_head & (ipoib_sendq_size - 1)))) { ipoib_warn(priv, "post_send failed\n"); ++dev->if_oerrors; @@ -676,6 +676,7 @@ void ipoib_cm_send(struct ipoib_dev_priv dev->if_drv_flags |= IFF_DRV_OACTIVE; } } + } void ipoib_cm_handle_tx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc) @@ -936,7 +937,7 @@ static struct ib_qp *ipoib_cm_create_tx_ struct ipoib_cm_tx *tx) { struct ib_qp_init_attr attr = { - .send_cq = priv->recv_cq, + .send_cq = priv->send_cq, .recv_cq = priv->recv_cq, .srq = priv->cm.srq, .cap.max_send_wr = ipoib_sendq_size, Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c ============================================================================== --- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c Fri Feb 11 10:50:33 2011 (r218551) +++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c Fri Feb 11 11:22:14 2011 (r218552) @@ -90,8 +90,8 @@ void ipoib_free_ah(struct kref *kref) static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv, u64 mapping[IPOIB_UD_RX_SG]) { - ib_dma_unmap_single(priv->ca, mapping[0], priv->max_ib_mtu, - DMA_FROM_DEVICE); + ib_dma_unmap_single(priv->ca, mapping[0], + priv->max_ib_mtu + IB_GRH_BYTES, DMA_FROM_DEVICE); } static void ipoib_ud_mb_put_frags(struct ipoib_dev_priv *priv, @@ -110,6 +110,8 @@ static int ipoib_ib_post_receive(struct priv->rx_wr.wr_id = id | IPOIB_OP_RECV; priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0]; + priv->rx_sge[0].length = priv->max_ib_mtu + IB_GRH_BYTES; + ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr); if (unlikely(ret)) { @@ -131,7 +133,7 @@ static struct mbuf *ipoib_alloc_rx_mb(st /* * XXX Should be calculated once and cached. */ - buf_size = priv->max_ib_mtu; + buf_size = priv->max_ib_mtu + IB_GRH_BYTES; if (buf_size <= MCLBYTES) buf_size = MCLBYTES; else if (buf_size <= MJUMPAGESIZE) @@ -198,13 +200,18 @@ ipoib_ib_handle_rx_wc(struct ipoib_dev_p mb = priv->rx_ring[wr_id].mb; if (unlikely(wc->status != IB_WC_SUCCESS)) { - if (wc->status != IB_WC_WR_FLUSH_ERR) + if (wc->status != IB_WC_WR_FLUSH_ERR) { ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); - ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); - m_freem(mb); - priv->rx_ring[wr_id].mb = NULL; + goto repost; + } + if (mb) { + ipoib_ud_dma_unmap_rx(priv, + priv->rx_ring[wr_id].mapping); + m_freem(mb); + priv->rx_ring[wr_id].mb = NULL; + } return; } @@ -243,9 +250,7 @@ ipoib_ib_handle_rx_wc(struct ipoib_dev_p if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok)) mb->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID; - spin_unlock(&priv->lock); dev->if_input(dev, mb); - spin_lock(&priv->lock); repost: if (unlikely(ipoib_ib_post_receive(priv, wr_id))) @@ -257,11 +262,19 @@ int ipoib_dma_map_tx(struct ib_device *c { struct mbuf *mb = tx_req->mb; u64 *mapping = tx_req->mapping; - struct mbuf *m; + struct mbuf *m, *p; int error; int i; - for (m = mb, i = 0; m != NULL; m = m->m_next, i++); + for (m = mb, p = NULL, i = 0; m != NULL; p = m, m = m->m_next, i++) { + if (m->m_len != 0) + continue; + if (p == NULL) + panic("ipoib_dma_map_tx: First mbuf empty\n"); + p->m_next = m_free(m); + m = p; + i--; + } i--; if (i >= MAX_MB_FRAGS) { tx_req->mb = mb = m_defrag(mb, M_DONTWAIT); @@ -339,13 +352,19 @@ static void ipoib_ib_handle_tx_wc(struct wc->status, wr_id, wc->vendor_err); } -static int poll_tx(struct ipoib_dev_priv *priv) +int +ipoib_poll_tx(struct ipoib_dev_priv *priv) { int n, i; n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc); - for (i = 0; i < n; ++i) - ipoib_ib_handle_tx_wc(priv, priv->send_wc + i); + for (i = 0; i < n; ++i) { + struct ib_wc *wc = priv->send_wc + i; + if (wc->wr_id & IPOIB_OP_CM) + ipoib_cm_handle_tx_wc(priv, wc); + else + ipoib_ib_handle_tx_wc(priv, wc); + } return n == MAX_SEND_CQE; } @@ -362,13 +381,13 @@ poll_more: for (i = 0; i < n; i++) { struct ib_wc *wc = priv->ibwc + i; - if (wc->wr_id & IPOIB_OP_RECV) { - if (wc->wr_id & IPOIB_OP_CM) - ipoib_cm_handle_rx_wc(priv, wc); - else - ipoib_ib_handle_rx_wc(priv, wc); - } else - ipoib_cm_handle_tx_wc(priv, wc); + if ((wc->wr_id & IPOIB_OP_RECV) == 0) + panic("ipoib_poll: Bad wr_id 0x%jX\n", + (intmax_t)wc->wr_id); + if (wc->wr_id & IPOIB_OP_CM) + ipoib_cm_handle_rx_wc(priv, wc); + else + ipoib_ib_handle_rx_wc(priv, wc); } if (n != IPOIB_NUM_WC) @@ -384,9 +403,7 @@ void ipoib_ib_completion(struct ib_cq *c { struct ipoib_dev_priv *priv = dev_ptr; - spin_lock(&priv->lock); ipoib_poll(priv); - spin_unlock(&priv->lock); } static void drain_tx_cq(struct ipoib_dev_priv *priv) @@ -394,7 +411,7 @@ static void drain_tx_cq(struct ipoib_dev struct ifnet *dev = priv->dev; spin_lock(&priv->lock); - while (poll_tx(priv)) + while (ipoib_poll_tx(priv)) ; /* nothing */ if (dev->if_drv_flags & IFF_DRV_OACTIVE) @@ -430,6 +447,7 @@ post_send(struct ipoib_dev_priv *priv, u priv->tx_wr.wr.ud.remote_qpn = qpn; priv->tx_wr.wr.ud.ah = address; + if (head) { priv->tx_wr.wr.ud.mss = 0; /* XXX mb_shinfo(mb)->gso_size; */ priv->tx_wr.wr.ud.header = head; @@ -450,6 +468,10 @@ ipoib_send(struct ipoib_dev_priv *priv, int hlen; void *phead; + if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) + while (ipoib_poll_tx(priv)) + ; /* nothing */ + m_adj(mb, sizeof (struct ipoib_pseudoheader)); if (0 /* XXX segment offload mb_is_gso(mb) */) { /* XXX hlen = mb_transport_offset(mb) + tcp_hdrlen(mb); */ @@ -462,7 +484,7 @@ ipoib_send(struct ipoib_dev_priv *priv, } m_adj(mb, hlen); } else { - if (unlikely(mb->m_pkthdr.len > priv->mcast_mtu)) { + if (unlikely(mb->m_pkthdr.len - IPOIB_ENCAP_LEN > priv->mcast_mtu)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", mb->m_pkthdr.len, priv->mcast_mtu); ++dev->if_oerrors; @@ -518,10 +540,6 @@ ipoib_send(struct ipoib_dev_priv *priv, address->last_send = priv->tx_head; ++priv->tx_head; } - - if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) - while (poll_tx(priv)) - ; /* nothing */ } static void __ipoib_reap_ah(struct ipoib_dev_priv *priv) @@ -681,7 +699,6 @@ void ipoib_drain_cq(struct ipoib_dev_pri { int i, n; - spin_lock(&priv->lock); do { n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc); for (i = 0; i < n; ++i) { @@ -693,17 +710,18 @@ void ipoib_drain_cq(struct ipoib_dev_pri if (priv->ibwc[i].status == IB_WC_SUCCESS) priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR; - if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) { - if (priv->ibwc[i].wr_id & IPOIB_OP_CM) - ipoib_cm_handle_rx_wc(priv, priv->ibwc + i); - else - ipoib_ib_handle_rx_wc(priv, priv->ibwc + i); - } else - ipoib_cm_handle_tx_wc(priv, priv->ibwc + i); + if ((priv->ibwc[i].wr_id & IPOIB_OP_RECV) == 0) + panic("ipoib_drain_cq: Bad wrid 0x%jX\n", + (intmax_t)priv->ibwc[i].wr_id); + if (priv->ibwc[i].wr_id & IPOIB_OP_CM) + ipoib_cm_handle_rx_wc(priv, priv->ibwc + i); + else + ipoib_ib_handle_rx_wc(priv, priv->ibwc + i); } } while (n == IPOIB_NUM_WC); - while (poll_tx(priv)) + spin_lock(&priv->lock); + while (ipoib_poll_tx(priv)) ; /* nothing */ spin_unlock(&priv->lock); Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c ============================================================================== --- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c Fri Feb 11 10:50:33 2011 (r218551) +++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c Fri Feb 11 11:22:14 2011 (r218552) @@ -596,7 +596,7 @@ path_rec_start(struct ipoib_dev_priv *pr p_rec = path->pathrec; p_rec.mtu_selector = IB_SA_GT; - switch (roundup_pow_of_two(dev->if_mtu + IB_GRH_BYTES)) { + switch (roundup_pow_of_two(dev->if_mtu + IPOIB_ENCAP_LEN)) { case 512: p_rec.mtu = IB_MTU_256; break; @@ -923,9 +923,11 @@ ipoib_set_dev_features(struct ipoib_dev_ priv->dev->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; } +#if 0 if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO) priv->dev->if_capabilities |= IFCAP_TSO4 | CSUM_TSO; #endif +#endif priv->dev->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_LINKSTATE; priv->dev->if_capenable = priv->dev->if_capabilities; Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c ============================================================================== --- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c Fri Feb 11 10:50:33 2011 (r218551) +++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c Fri Feb 11 11:22:14 2011 (r218552) @@ -222,7 +222,6 @@ int ipoib_transport_dev_init(struct ipoi priv->tx_wr.send_flags = IB_SEND_SIGNALED; priv->rx_sge[0].lkey = priv->mr->lkey; - priv->rx_sge[0].length = priv->max_ib_mtu; priv->rx_wr.num_sge = 1; priv->rx_wr.next = NULL; priv->rx_wr.sg_list = priv->rx_sge;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201102111122.p1BBMEIh057808>