From owner-svn-src-projects@FreeBSD.ORG Sat Feb 26 04:02:55 2011 Return-Path: Delivered-To: svn-src-projects@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 4561F106564A; Sat, 26 Feb 2011 04:02:55 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 331308FC0A; Sat, 26 Feb 2011 04:02:55 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id p1Q42tNo038348; Sat, 26 Feb 2011 04:02:55 GMT (envelope-from jeff@svn.freebsd.org) Received: (from jeff@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id p1Q42tON038342; Sat, 26 Feb 2011 04:02:55 GMT (envelope-from jeff@svn.freebsd.org) Message-Id: <201102260402.p1Q42tON038342@svn.freebsd.org> From: Jeff Roberson Date: Sat, 26 Feb 2011 04:02:55 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org X-SVN-Group: projects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r219047 - projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 26 Feb 2011 04:02:55 -0000 Author: jeff Date: Sat Feb 26 04:02:54 2011 New Revision: 219047 URL: http://svn.freebsd.org/changeset/base/219047 Log: - Permit non-contiguous receive buffers. - If IPOIB_CM is enabled set the MTU to the largest supported to match the behavior of Linux. Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h ============================================================================== --- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h Fri Feb 25 23:14:24 2011 (r219046) +++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h Sat Feb 26 04:02:54 2011 (r219047) @@ -91,7 +91,6 @@ /* constants */ #define INFINIBAND_ALEN 20 /* Octets in IPoIB HW addr */ -#define MAX_MB_FRAGS ((8192 / MCLBYTES) + 2) #ifdef IPOIB_CM #define CONFIG_INFINIBAND_IPOIB_CM @@ -111,12 +110,16 @@ enum ipoib_flush_level { enum { IPOIB_ENCAP_LEN = 4, IPOIB_HEADER_LEN = IPOIB_ENCAP_LEN + INFINIBAND_ALEN, - IPOIB_UD_RX_SG = 1, /* max buffer needed for 4K mtu */ - - IPOIB_CM_MAX_MTU = MJUM16BYTES, - IPOIB_CM_RX_SG = 1, /* We only allocate a single mbuf. */ + IPOIB_UD_MAX_MTU = 4 * 1024, + IPOIB_UD_RX_SG = (IPOIB_UD_MAX_MTU / MJUMPAGESIZE), + IPOIB_UD_TX_SG = (IPOIB_UD_MAX_MTU / MCLBYTES) + 2, + IPOIB_CM_MAX_MTU = (64 * 1024), + IPOIB_CM_TX_SG = (IPOIB_CM_MAX_MTU / MCLBYTES) + 2, + IPOIB_CM_RX_SG = (IPOIB_CM_MAX_MTU / MJUMPAGESIZE), IPOIB_RX_RING_SIZE = 256, IPOIB_TX_RING_SIZE = 128, + IPOIB_MAX_RX_SG = MAX(IPOIB_CM_RX_SG, IPOIB_UD_RX_SG), + IPOIB_MAX_TX_SG = MAX(IPOIB_CM_TX_SG, IPOIB_UD_TX_SG), IPOIB_MAX_QUEUE_SIZE = 8192, IPOIB_MIN_QUEUE_SIZE = 2, IPOIB_CM_MAX_CONN_QP = 4096, @@ -190,6 +193,16 @@ struct ipoib_mcast { struct ipoib_dev_priv *priv; }; +struct ipoib_cm_rx_buf { + struct mbuf *mb; + u64 mapping[IPOIB_CM_RX_SG]; +}; + +struct ipoib_cm_tx_buf { + struct mbuf *mb; + u64 mapping[IPOIB_CM_TX_SG]; +}; + struct ipoib_rx_buf { struct mbuf *mb; u64 mapping[IPOIB_UD_RX_SG]; @@ -197,7 +210,7 @@ struct ipoib_rx_buf { struct ipoib_tx_buf { struct mbuf *mb; - u64 mapping[MAX_MB_FRAGS]; + u64 mapping[IPOIB_UD_TX_SG]; }; struct ib_cm_id; @@ -257,18 +270,13 @@ struct ipoib_cm_tx { struct list_head list; struct ipoib_dev_priv *priv; struct ipoib_path *path; - struct ipoib_tx_buf *tx_ring; + struct ipoib_cm_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; unsigned long flags; u32 mtu; /* remote specified mtu, with grh. */ }; -struct ipoib_cm_rx_buf { - struct mbuf *mb; - u64 mapping[IPOIB_CM_RX_SG]; -}; - struct ipoib_cm_dev_priv { struct ib_srq *srq; struct ipoib_cm_rx_buf *srq_ring; @@ -287,7 +295,7 @@ struct ipoib_cm_dev_priv { struct list_head start_list; struct list_head reap_list; struct ib_sge rx_sge[IPOIB_CM_RX_SG]; - struct ib_recv_wr rx_wr; + struct ib_recv_wr rx_wr; int nonsrq_conn_qp; int max_cm_mtu; /* Actual buf size. */ int num_frags; @@ -353,13 +361,13 @@ struct ipoib_dev_priv { struct ipoib_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; - struct ib_sge tx_sge[MAX_MB_FRAGS]; + struct ib_sge tx_sge[IPOIB_MAX_TX_SG]; struct ib_send_wr tx_wr; unsigned tx_outstanding; struct ib_wc send_wc[MAX_SEND_CQE]; struct ib_recv_wr rx_wr; - struct ib_sge rx_sge[IPOIB_UD_RX_SG]; + struct ib_sge rx_sge[IPOIB_MAX_RX_SG]; struct ib_wc ibwc[IPOIB_NUM_WC]; @@ -414,7 +422,7 @@ struct ipoib_path { /* UD Only transmits encap len but we want the two sizes to be symmetrical. */ #define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) -#define IPOIB_CM_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) +#define IPOIB_CM_MTU(ib_mtu) (ib_mtu - 0x10) #define IPOIB_IS_MULTICAST(addr) ((addr)[4] == 0xff) @@ -516,10 +524,14 @@ void ipoib_pkey_poll(struct work_struct int ipoib_pkey_dev_delay_open(struct ipoib_dev_priv *priv); void ipoib_drain_cq(struct ipoib_dev_priv *priv); -int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req); +int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req, int max); void ipoib_dma_unmap_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req); int ipoib_poll_tx(struct ipoib_dev_priv *priv); +void ipoib_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req); +void ipoib_dma_mb(struct ipoib_dev_priv *priv, struct mbuf *mb, unsigned int length); +struct mbuf *ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req, int size); + void ipoib_set_ethtool_ops(struct ifnet *dev); int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca); Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c ============================================================================== --- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c Fri Feb 25 23:14:24 2011 (r219046) +++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c Sat Feb 26 04:02:54 2011 (r219047) @@ -78,28 +78,34 @@ static struct ib_send_wr ipoib_cm_rx_dra static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); -static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, - u64 mapping[IPOIB_CM_RX_SG]) +static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_cm_rx_buf *rx_req) { - ib_dma_unmap_single(priv->ca, mapping[0], priv->cm.max_cm_mtu, DMA_FROM_DEVICE); + ipoib_dma_unmap_rx(priv, (struct ipoib_rx_buf *)rx_req); } static int ipoib_cm_post_receive_srq(struct ipoib_dev_priv *priv, int id) { struct ib_recv_wr *bad_wr; + struct ipoib_rx_buf *rx_req; + struct mbuf *m; int ret; + int i; - priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; + rx_req = (struct ipoib_rx_buf *)&priv->cm.srq_ring[id]; + for (m = rx_req->mb, i = 0; m != NULL; m = m->m_next, i++) { + priv->cm.rx_sge[i].addr = rx_req->mapping[i]; + priv->cm.rx_sge[i].length = m->m_len; + } - priv->cm.rx_sge[0].addr = priv->cm.srq_ring[id].mapping[0]; - priv->cm.rx_sge[0].length = priv->cm.max_cm_mtu; + priv->cm.rx_wr.num_sge = i; + priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); - ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[id].mapping); + ipoib_dma_unmap_rx(priv, rx_req); m_freem(priv->cm.srq_ring[id].mb); priv->cm.srq_ring[id].mb = NULL; } @@ -112,18 +118,25 @@ static int ipoib_cm_post_receive_nonsrq( struct ib_recv_wr *wr, struct ib_sge *sge, int id) { + struct ipoib_rx_buf *rx_req; struct ib_recv_wr *bad_wr; + struct mbuf *m; int ret; + int i; - wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; + rx_req = (struct ipoib_rx_buf *)&rx->rx_ring[id]; + for (m = rx_req->mb, i = 0; m != NULL; m = m->m_next, i++) { + sge[i].addr = rx_req->mapping[i]; + sge[i].length = m->m_len; + } - sge[0].addr = rx->rx_ring[id].mapping[0]; - priv->cm.rx_sge[0].length = priv->cm.max_cm_mtu; + wr->num_sge = i; + wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; ret = ib_post_recv(rx->qp, wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); - ipoib_cm_dma_unmap_rx(priv, rx->rx_ring[id].mapping); + ipoib_dma_unmap_rx(priv, rx_req); m_freem(rx->rx_ring[id].mb); rx->rx_ring[id].mb = NULL; } @@ -131,37 +144,11 @@ static int ipoib_cm_post_receive_nonsrq( return ret; } -static struct mbuf *ipoib_cm_alloc_rx_mb(struct ipoib_dev_priv *priv, - struct ipoib_cm_rx_buf *rx_ring, - int id, - u64 mapping[IPOIB_CM_RX_SG]) +static struct mbuf * +ipoib_cm_alloc_rx_mb(struct ipoib_dev_priv *priv, struct ipoib_cm_rx_buf *rx_req) { - struct mbuf *mb; - int buf_size; - - buf_size = priv->cm.max_cm_mtu; - if (buf_size <= MCLBYTES) - buf_size = MCLBYTES; - else if (buf_size <= MJUMPAGESIZE) - buf_size = MJUMPAGESIZE; - else if (buf_size <= MJUM9BYTES) - buf_size = MJUM9BYTES; - else if (buf_size < MJUM16BYTES) - buf_size = MJUM16BYTES; - - mb = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, buf_size); - if (unlikely(!mb)) - return NULL; - - mapping[0] = ib_dma_map_single(priv->ca, mtod(mb, void *), - buf_size, DMA_FROM_DEVICE); - if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) { - m_freem(mb); - return NULL; - } - - rx_ring[id].mb = mb; - return mb; + return ipoib_alloc_map_mb(priv, (struct ipoib_rx_buf *)rx_req, + priv->cm.max_cm_mtu); } static void ipoib_cm_free_rx_ring(struct ipoib_dev_priv *priv, @@ -171,7 +158,7 @@ static void ipoib_cm_free_rx_ring(struct for (i = 0; i < ipoib_recvq_size; ++i) if (rx_ring[i].mb) { - ipoib_cm_dma_unmap_rx(priv, rx_ring[i].mapping); + ipoib_cm_dma_unmap_rx(priv, &rx_ring[i]); m_freem(rx_ring[i].mb); } @@ -225,7 +212,7 @@ static struct ib_qp *ipoib_cm_create_rx_ .recv_cq = priv->recv_cq, .srq = priv->cm.srq, .cap.max_send_wr = 1, /* For drain WR */ - .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ + .cap.max_send_sge = 1, .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_RC, .qp_context = p, @@ -233,7 +220,7 @@ static struct ib_qp *ipoib_cm_create_rx_ if (!ipoib_cm_has_srq(priv)) { attr.cap.max_recv_wr = ipoib_recvq_size; - attr.cap.max_recv_sge = IPOIB_CM_RX_SG; + attr.cap.max_recv_sge = priv->cm.num_frags; } return ib_create_qp(priv->pd, &attr); @@ -297,9 +284,10 @@ static void ipoib_cm_init_rx_wr(struct i struct ib_recv_wr *wr, struct ib_sge *sge) { + int i; - sge[0].length = priv->cm.max_cm_mtu; - sge[0].lkey = priv->mr->lkey; + for (i = 0; i < IPOIB_CM_RX_SG; i++) + sge[i].lkey = priv->mr->lkey; wr->next = NULL; wr->sg_list = sge; @@ -346,8 +334,7 @@ static int ipoib_cm_nonsrq_init_rx(struc spin_unlock_irq(&priv->lock); for (i = 0; i < ipoib_recvq_size; ++i) { - if (!ipoib_cm_alloc_rx_mb(priv, rx->rx_ring, i, - rx->rx_ring[i].mapping)) { + if (!ipoib_cm_alloc_rx_mb(priv, &rx->rx_ring[i])) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); ret = -ENOMEM; goto err_count; @@ -482,23 +469,15 @@ static int ipoib_cm_rx_handler(struct ib return 0; } } -/* Adjust length of mb with fragments to match received data */ -static void mb_put_frags(struct mbuf *mb, - unsigned int length, struct mbuf *tomb) -{ - - mb->m_pkthdr.len = length; - mb->m_len = length; -} void ipoib_cm_handle_rx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc) { + struct ipoib_cm_rx_buf saverx; struct ipoib_cm_rx_buf *rx_ring; unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); struct ifnet *dev = priv->dev; struct mbuf *mb, *newmb; struct ipoib_cm_rx *p; - u64 mapping[IPOIB_CM_RX_SG]; int has_srq; u_short proto; @@ -555,7 +534,8 @@ void ipoib_cm_handle_rx_wc(struct ipoib_ } } - newmb = ipoib_cm_alloc_rx_mb(priv, rx_ring, wr_id, mapping); + memcpy(&saverx, &rx_ring[wr_id], sizeof(saverx)); + newmb = ipoib_cm_alloc_rx_mb(priv, &rx_ring[wr_id]); if (unlikely(!newmb)) { /* * If we can't allocate a new RX buffer, dump @@ -563,16 +543,16 @@ void ipoib_cm_handle_rx_wc(struct ipoib_ */ ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id); ++dev->if_ierrors; + memcpy(&rx_ring[wr_id], &saverx, sizeof(saverx)); goto repost; } - ipoib_cm_dma_unmap_rx(priv, rx_ring[wr_id].mapping); - memcpy(rx_ring[wr_id].mapping, mapping, sizeof *mapping); + ipoib_cm_dma_unmap_rx(priv, &saverx); ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); - mb_put_frags(mb, wc->byte_len, newmb); + ipoib_dma_mb(priv, mb, wc->byte_len); ++dev->if_opackets; dev->if_obytes += mb->m_pkthdr.len; @@ -603,7 +583,7 @@ repost: static inline int post_send(struct ipoib_dev_priv *priv, struct ipoib_cm_tx *tx, - struct ipoib_tx_buf *tx_req, + struct ipoib_cm_tx_buf *tx_req, unsigned int wr_id) { struct ib_send_wr *bad_wr; @@ -625,7 +605,7 @@ static inline int post_send(struct ipoib void ipoib_cm_send(struct ipoib_dev_priv *priv, struct mbuf *mb, struct ipoib_cm_tx *tx) { - struct ipoib_tx_buf *tx_req; + struct ipoib_cm_tx_buf *tx_req; struct ifnet *dev = priv->dev; if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) @@ -653,7 +633,8 @@ void ipoib_cm_send(struct ipoib_dev_priv */ tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)]; tx_req->mb = mb; - if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) { + if (unlikely(ipoib_dma_map_tx(priv->ca, (struct ipoib_tx_buf *)tx_req, + priv->cm.num_frags))) { ++dev->if_oerrors; if (tx_req->mb) m_freem(tx_req->mb); @@ -663,7 +644,7 @@ void ipoib_cm_send(struct ipoib_dev_priv if (unlikely(post_send(priv, tx, tx_req, tx->tx_head & (ipoib_sendq_size - 1)))) { ipoib_warn(priv, "post_send failed\n"); ++dev->if_oerrors; - ipoib_dma_unmap_tx(priv->ca, tx_req); + ipoib_dma_unmap_tx(priv->ca, (struct ipoib_tx_buf *)tx_req); m_freem(mb); } else { ++tx->tx_head; @@ -684,7 +665,7 @@ void ipoib_cm_handle_tx_wc(struct ipoib_ struct ipoib_cm_tx *tx = wc->qp->qp_context; unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; struct ifnet *dev = priv->dev; - struct ipoib_tx_buf *tx_req; + struct ipoib_cm_tx_buf *tx_req; ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n", wr_id, wc->status); @@ -697,7 +678,7 @@ void ipoib_cm_handle_tx_wc(struct ipoib_ tx_req = &tx->tx_ring[wr_id]; - ipoib_dma_unmap_tx(priv->ca, tx_req); + ipoib_dma_unmap_tx(priv->ca, (struct ipoib_tx_buf *)tx_req); /* FIXME: is this right? Shouldn't we only increment on success? */ ++dev->if_opackets; @@ -934,7 +915,7 @@ static struct ib_qp *ipoib_cm_create_tx_ .recv_cq = priv->recv_cq, .srq = priv->cm.srq, .cap.max_send_wr = ipoib_sendq_size, - .cap.max_send_sge = MAX_MB_FRAGS, + .cap.max_send_sge = priv->cm.num_frags, .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_RC, .qp_context = tx @@ -1067,7 +1048,7 @@ static void ipoib_cm_tx_destroy(struct i { struct ipoib_dev_priv *priv = p->priv; struct ifnet *dev = priv->dev; - struct ipoib_tx_buf *tx_req; + struct ipoib_cm_tx_buf *tx_req; unsigned long begin; ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", @@ -1097,7 +1078,7 @@ timeout: while ((int) p->tx_tail - (int) p->tx_head < 0) { tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; - ipoib_dma_unmap_tx(priv->ca, tx_req); + ipoib_dma_unmap_tx(priv->ca, (struct ipoib_tx_buf *)tx_req); m_freem(tx_req->mb); ++p->tx_tail; if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && @@ -1404,7 +1385,7 @@ int ipoib_cm_dev_init(struct ipoib_dev_p attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge); ipoib_cm_create_srq(priv, attr.max_srq_sge); if (ipoib_cm_has_srq(priv)) { - priv->cm.max_cm_mtu = attr.max_srq_sge * MJUM16BYTES; + priv->cm.max_cm_mtu = attr.max_srq_sge * MJUMPAGESIZE; priv->cm.num_frags = attr.max_srq_sge; ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n", priv->cm.max_cm_mtu, priv->cm.num_frags); @@ -1417,8 +1398,7 @@ int ipoib_cm_dev_init(struct ipoib_dev_p if (ipoib_cm_has_srq(priv)) { for (i = 0; i < ipoib_recvq_size; ++i) { - if (!ipoib_cm_alloc_rx_mb(priv, priv->cm.srq_ring, i, - priv->cm.srq_ring[i].mapping)) { + if (!ipoib_cm_alloc_rx_mb(priv, &priv->cm.srq_ring[i])) { ipoib_warn(priv, "failed to allocate " "receive buffer %d\n", i); ipoib_cm_dev_cleanup(priv); Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c ============================================================================== --- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c Fri Feb 25 23:14:24 2011 (r219046) +++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c Sat Feb 26 04:02:54 2011 (r219047) @@ -87,36 +87,77 @@ void ipoib_free_ah(struct kref *kref) spin_unlock_irqrestore(&priv->lock, flags); } -static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv, - u64 mapping[IPOIB_UD_RX_SG]) +void +ipoib_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req) { - ib_dma_unmap_single(priv->ca, mapping[0], - priv->max_ib_mtu + IB_GRH_BYTES, DMA_FROM_DEVICE); + struct mbuf *m; + int i; + + for (i = 0, m = rx_req->mb; m != NULL; m = m->m_next, i++) + ib_dma_unmap_single(priv->ca, rx_req->mapping[i], m->m_len, + DMA_FROM_DEVICE); } -static void ipoib_ud_mb_put_frags(struct ipoib_dev_priv *priv, - struct mbuf *mb, - unsigned int length) +void +ipoib_dma_mb(struct ipoib_dev_priv *priv, struct mbuf *mb, unsigned int length) { - mb->m_pkthdr.len = length; - mb->m_len = length; + m_adj(mb, -(mb->m_pkthdr.len - length)); +} + +struct mbuf * +ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req, + int size) +{ + struct mbuf *mb, *m; + int i, j; + + rx_req->mb = NULL; + mb = m_getm2(NULL, size, M_NOWAIT, MT_DATA, M_PKTHDR); + if (mb == NULL) + return (NULL); + for (i = 0, m = mb; m != NULL; m = m->m_next, i++) { + m->m_len = (m->m_flags & M_EXT) ? m->m_ext.ext_size : + ((m->m_flags & M_PKTHDR) ? MHLEN : MLEN); + mb->m_pkthdr.len += m->m_len; + rx_req->mapping[i] = ib_dma_map_single(priv->ca, + mtod(m, void *), m->m_len, DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, + rx_req->mapping[i]))) + goto error; + + } + rx_req->mb = mb; + return (mb); +error: + for (j = 0, m = mb; j < i; m = m->m_next, j++) + ib_dma_unmap_single(priv->ca, rx_req->mapping[j], m->m_len, + DMA_FROM_DEVICE); + m_freem(mb); + return (NULL); + } static int ipoib_ib_post_receive(struct ipoib_dev_priv *priv, int id) { + struct ipoib_rx_buf *rx_req; struct ib_recv_wr *bad_wr; + struct mbuf *m; int ret; + int i; - priv->rx_wr.wr_id = id | IPOIB_OP_RECV; - priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0]; - priv->rx_sge[0].length = priv->max_ib_mtu + IB_GRH_BYTES; - + rx_req = &priv->rx_ring[id]; + for (m = rx_req->mb, i = 0; m != NULL; m = m->m_next, i++) { + priv->rx_sge[i].addr = rx_req->mapping[i]; + priv->rx_sge[i].length = m->m_len; + } + priv->rx_wr.num_sge = i; + priv->rx_wr.wr_id = id | IPOIB_OP_RECV; ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); - ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping); + ipoib_dma_unmap_rx(priv, &priv->rx_ring[id]); m_freem(priv->rx_ring[id].mb); priv->rx_ring[id].mb = NULL; } @@ -124,41 +165,12 @@ static int ipoib_ib_post_receive(struct return ret; } -static struct mbuf *ipoib_alloc_rx_mb(struct ipoib_dev_priv *priv, int id) +static struct mbuf * +ipoib_alloc_rx_mb(struct ipoib_dev_priv *priv, int id) { - struct mbuf *mb; - int buf_size; - u64 *mapping; - /* - * XXX Should be calculated once and cached. - */ - buf_size = priv->max_ib_mtu + IB_GRH_BYTES; - if (buf_size <= MCLBYTES) - buf_size = MCLBYTES; - else if (buf_size <= MJUMPAGESIZE) - buf_size = MJUMPAGESIZE; - else if (buf_size <= MJUM9BYTES) - buf_size = MJUM9BYTES; - else if (buf_size < MJUM16BYTES) - buf_size = MJUM16BYTES; - - mb = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, buf_size); - if (unlikely(!mb)) - return NULL; - - mapping = priv->rx_ring[id].mapping; - mapping[0] = ib_dma_map_single(priv->ca, mtod(mb, void *), buf_size, - DMA_FROM_DEVICE); - if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) - goto error; - - priv->rx_ring[id].mb = mb; - return mb; - -error: - m_freem(mb); - return NULL; + return ipoib_alloc_map_mb(priv, &priv->rx_ring[id], + priv->max_ib_mtu + IB_GRH_BYTES); } static int ipoib_ib_post_receives(struct ipoib_dev_priv *priv) @@ -182,11 +194,11 @@ static int ipoib_ib_post_receives(struct static void ipoib_ib_handle_rx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc) { + struct ipoib_rx_buf saverx; unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; struct ifnet *dev = priv->dev; struct ipoib_header *eh; struct mbuf *mb; - u64 mapping[IPOIB_UD_RX_SG]; ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -207,8 +219,7 @@ ipoib_ib_handle_rx_wc(struct ipoib_dev_p goto repost; } if (mb) { - ipoib_ud_dma_unmap_rx(priv, - priv->rx_ring[wr_id].mapping); + ipoib_dma_unmap_rx(priv, &priv->rx_ring[wr_id]); m_freem(mb); priv->rx_ring[wr_id].mb = NULL; } @@ -222,14 +233,13 @@ ipoib_ib_handle_rx_wc(struct ipoib_dev_p if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) goto repost; - memcpy(mapping, priv->rx_ring[wr_id].mapping, - IPOIB_UD_RX_SG * sizeof *mapping); - + memcpy(&saverx, &priv->rx_ring[wr_id], sizeof(saverx)); /* * If we can't allocate a new RX buffer, dump * this packet and reuse the old buffer. */ if (unlikely(!ipoib_alloc_rx_mb(priv, wr_id))) { + memcpy(&priv->rx_ring[wr_id], &saverx, sizeof(saverx)); dev->if_iqdrops++; goto repost; } @@ -237,8 +247,8 @@ ipoib_ib_handle_rx_wc(struct ipoib_dev_p ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); - ipoib_ud_dma_unmap_rx(priv, mapping); - ipoib_ud_mb_put_frags(priv, mb, wc->byte_len); + ipoib_dma_unmap_rx(priv, &saverx); + ipoib_dma_mb(priv, mb, wc->byte_len); ++dev->if_ipackets; dev->if_ibytes += mb->m_pkthdr.len; @@ -258,7 +268,7 @@ repost: "for buf %d\n", wr_id); } -int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req) +int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req, int max) { struct mbuf *mb = tx_req->mb; u64 *mapping = tx_req->mapping; @@ -276,12 +286,12 @@ int ipoib_dma_map_tx(struct ib_device *c i--; } i--; - if (i >= MAX_MB_FRAGS) { + if (i >= max) { tx_req->mb = mb = m_defrag(mb, M_DONTWAIT); if (mb == NULL) return -EIO; for (m = mb, i = 0; m != NULL; m = m->m_next, i++); - if (i >= MAX_MB_FRAGS) + if (i >= max) return -EIO; } error = 0; @@ -507,7 +517,7 @@ ipoib_send(struct ipoib_dev_priv *priv, */ tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)]; tx_req->mb = mb; - if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) { + if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req, IPOIB_UD_TX_SG))) { ++dev->if_oerrors; if (tx_req->mb) m_freem(tx_req->mb); @@ -771,8 +781,7 @@ int ipoib_ib_dev_stop(struct ipoib_dev_p rx_req = &priv->rx_ring[i]; if (!rx_req->mb) continue; - ipoib_ud_dma_unmap_rx(priv, - priv->rx_ring[i].mapping); + ipoib_dma_unmap_rx(priv, &priv->rx_ring[i]); m_freem(rx_req->mb); rx_req->mb = NULL; } Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c ============================================================================== --- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c Fri Feb 25 23:14:24 2011 (r219046) +++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c Sat Feb 26 04:02:54 2011 (r219047) @@ -992,6 +992,8 @@ ipoib_add_port(const char *format, struc hca->name, port, result); goto device_init_failed; } + if (ipoib_cm_admin_enabled(priv)) + priv->dev->if_mtu = IPOIB_CM_MTU(ipoib_cm_max_mtu(priv)); INIT_IB_EVENT_HANDLER(&priv->event_handler, priv->ca, ipoib_event); Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c ============================================================================== --- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c Fri Feb 25 23:14:24 2011 (r219046) +++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c Sat Feb 26 04:02:54 2011 (r219047) @@ -202,7 +202,7 @@ int ipoib_transport_dev_init(struct ipoi if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK) init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; - init_attr.cap.max_send_sge = MAX_MB_FRAGS; + init_attr.cap.max_send_sge = IPOIB_UD_TX_SG; priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { @@ -214,15 +214,15 @@ int ipoib_transport_dev_init(struct ipoi IF_LLADDR(priv->dev)[2] = (priv->qp->qp_num >> 8) & 0xff; IF_LLADDR(priv->dev)[3] = (priv->qp->qp_num ) & 0xff; - for (i = 0; i < MAX_MB_FRAGS + 1; ++i) + for (i = 0; i < IPOIB_MAX_TX_SG; ++i) priv->tx_sge[i].lkey = priv->mr->lkey; priv->tx_wr.opcode = IB_WR_SEND; priv->tx_wr.sg_list = priv->tx_sge; priv->tx_wr.send_flags = IB_SEND_SIGNALED; - priv->rx_sge[0].lkey = priv->mr->lkey; - priv->rx_wr.num_sge = 1; + for (i = 0; i < IPOIB_UD_RX_SG; ++i) + priv->rx_sge[i].lkey = priv->mr->lkey; priv->rx_wr.next = NULL; priv->rx_wr.sg_list = priv->rx_sge;