Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 26 Feb 2011 04:02:55 +0000 (UTC)
From:      Jeff Roberson <jeff@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r219047 - projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib
Message-ID:  <201102260402.p1Q42tON038342@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jeff
Date: Sat Feb 26 04:02:54 2011
New Revision: 219047
URL: http://svn.freebsd.org/changeset/base/219047

Log:
   - Permit non-contiguous receive buffers.
   - If IPOIB_CM is enabled set the MTU to the largest supported to match
     the behavior of Linux.

Modified:
  projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
  projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
  projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
  projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
  projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c

Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
==============================================================================
--- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h	Fri Feb 25 23:14:24 2011	(r219046)
+++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h	Sat Feb 26 04:02:54 2011	(r219047)
@@ -91,7 +91,6 @@
 /* constants */
 
 #define	INFINIBAND_ALEN		20	/* Octets in IPoIB HW addr */
-#define	MAX_MB_FRAGS		((8192 / MCLBYTES) + 2)
 
 #ifdef IPOIB_CM
 #define	CONFIG_INFINIBAND_IPOIB_CM
@@ -111,12 +110,16 @@ enum ipoib_flush_level {
 enum {
 	IPOIB_ENCAP_LEN		  = 4,
 	IPOIB_HEADER_LEN	  = IPOIB_ENCAP_LEN + INFINIBAND_ALEN,
-	IPOIB_UD_RX_SG		  = 1, /* max buffer needed for 4K mtu */
-
-	IPOIB_CM_MAX_MTU	  = MJUM16BYTES,
-	IPOIB_CM_RX_SG		  = 1,	/* We only allocate a single mbuf. */
+	IPOIB_UD_MAX_MTU	  = 4 * 1024,
+	IPOIB_UD_RX_SG		  = (IPOIB_UD_MAX_MTU / MJUMPAGESIZE),
+	IPOIB_UD_TX_SG		  = (IPOIB_UD_MAX_MTU / MCLBYTES) + 2,
+	IPOIB_CM_MAX_MTU	  = (64 * 1024),
+	IPOIB_CM_TX_SG		  = (IPOIB_CM_MAX_MTU / MCLBYTES) + 2,
+	IPOIB_CM_RX_SG		  = (IPOIB_CM_MAX_MTU / MJUMPAGESIZE),
 	IPOIB_RX_RING_SIZE	  = 256,
 	IPOIB_TX_RING_SIZE	  = 128,
+	IPOIB_MAX_RX_SG		  = MAX(IPOIB_CM_RX_SG, IPOIB_UD_RX_SG),
+	IPOIB_MAX_TX_SG		  = MAX(IPOIB_CM_TX_SG, IPOIB_UD_TX_SG),
 	IPOIB_MAX_QUEUE_SIZE	  = 8192,
 	IPOIB_MIN_QUEUE_SIZE	  = 2,
 	IPOIB_CM_MAX_CONN_QP	  = 4096,
@@ -190,6 +193,16 @@ struct ipoib_mcast {
 	struct ipoib_dev_priv *priv;
 };
 
+struct ipoib_cm_rx_buf {
+	struct mbuf *mb;
+	u64		mapping[IPOIB_CM_RX_SG];
+};
+
+struct ipoib_cm_tx_buf {
+	struct mbuf *mb;
+	u64		mapping[IPOIB_CM_TX_SG];
+};
+
 struct ipoib_rx_buf {
 	struct mbuf *mb;
 	u64		mapping[IPOIB_UD_RX_SG];
@@ -197,7 +210,7 @@ struct ipoib_rx_buf {
 
 struct ipoib_tx_buf {
 	struct mbuf *mb;
-	u64		mapping[MAX_MB_FRAGS];
+	u64		mapping[IPOIB_UD_TX_SG];
 };
 
 struct ib_cm_id;
@@ -257,18 +270,13 @@ struct ipoib_cm_tx {
 	struct list_head     list;
 	struct ipoib_dev_priv *priv;
 	struct ipoib_path   *path;
-	struct ipoib_tx_buf *tx_ring;
+	struct ipoib_cm_tx_buf *tx_ring;
 	unsigned	     tx_head;
 	unsigned	     tx_tail;
 	unsigned long	     flags;
 	u32		     mtu;	/* remote specified mtu, with grh. */
 };
 
-struct ipoib_cm_rx_buf {
-	struct mbuf *mb;
-	u64 mapping[IPOIB_CM_RX_SG];
-};
-
 struct ipoib_cm_dev_priv {
 	struct ib_srq	       *srq;
 	struct ipoib_cm_rx_buf *srq_ring;
@@ -287,7 +295,7 @@ struct ipoib_cm_dev_priv {
 	struct list_head	start_list;
 	struct list_head	reap_list;
 	struct ib_sge		rx_sge[IPOIB_CM_RX_SG];
-	struct ib_recv_wr       rx_wr;
+	struct ib_recv_wr	rx_wr;
 	int			nonsrq_conn_qp;
 	int			max_cm_mtu;	/* Actual buf size. */
 	int			num_frags;
@@ -353,13 +361,13 @@ struct ipoib_dev_priv {
 	struct ipoib_tx_buf *tx_ring;
 	unsigned	     tx_head;
 	unsigned	     tx_tail;
-	struct ib_sge	     tx_sge[MAX_MB_FRAGS];
+	struct ib_sge	     tx_sge[IPOIB_MAX_TX_SG];
 	struct ib_send_wr    tx_wr;
 	unsigned	     tx_outstanding;
 	struct ib_wc	     send_wc[MAX_SEND_CQE];
 
 	struct ib_recv_wr    rx_wr;
-	struct ib_sge	     rx_sge[IPOIB_UD_RX_SG];
+	struct ib_sge	     rx_sge[IPOIB_MAX_RX_SG];
 
 	struct ib_wc ibwc[IPOIB_NUM_WC];
 
@@ -414,7 +422,7 @@ struct ipoib_path {
 
 /* UD Only transmits encap len but we want the two sizes to be symmetrical. */
 #define IPOIB_UD_MTU(ib_mtu)		(ib_mtu - IPOIB_ENCAP_LEN)
-#define	IPOIB_CM_MTU(ib_mtu)		(ib_mtu - IPOIB_ENCAP_LEN)
+#define	IPOIB_CM_MTU(ib_mtu)		(ib_mtu - 0x10)
 
 #define	IPOIB_IS_MULTICAST(addr)	((addr)[4] == 0xff)
 
@@ -516,10 +524,14 @@ void ipoib_pkey_poll(struct work_struct 
 int ipoib_pkey_dev_delay_open(struct ipoib_dev_priv *priv);
 void ipoib_drain_cq(struct ipoib_dev_priv *priv);
 
-int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req);
+int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req, int max);
 void ipoib_dma_unmap_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req);
 int ipoib_poll_tx(struct ipoib_dev_priv *priv);
 
+void ipoib_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req);
+void ipoib_dma_mb(struct ipoib_dev_priv *priv, struct mbuf *mb, unsigned int length);
+struct mbuf *ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req, int size);
+
 
 void ipoib_set_ethtool_ops(struct ifnet *dev);
 int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);

Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
==============================================================================
--- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c	Fri Feb 25 23:14:24 2011	(r219046)
+++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c	Sat Feb 26 04:02:54 2011	(r219047)
@@ -78,28 +78,34 @@ static struct ib_send_wr ipoib_cm_rx_dra
 static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 			       struct ib_cm_event *event);
 
-static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv,
-				  u64 mapping[IPOIB_CM_RX_SG])
+static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_cm_rx_buf *rx_req)
 {
 
-	ib_dma_unmap_single(priv->ca, mapping[0], priv->cm.max_cm_mtu, DMA_FROM_DEVICE);
+	ipoib_dma_unmap_rx(priv, (struct ipoib_rx_buf *)rx_req);
 
 }
 
 static int ipoib_cm_post_receive_srq(struct ipoib_dev_priv *priv, int id)
 {
 	struct ib_recv_wr *bad_wr;
+	struct ipoib_rx_buf *rx_req;
+	struct mbuf *m;
 	int ret;
+	int i;
 
-	priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
+	rx_req = (struct ipoib_rx_buf *)&priv->cm.srq_ring[id];
+	for (m = rx_req->mb, i = 0; m != NULL; m = m->m_next, i++) {
+		priv->cm.rx_sge[i].addr = rx_req->mapping[i];
+		priv->cm.rx_sge[i].length = m->m_len;
+	}
 
-	priv->cm.rx_sge[0].addr = priv->cm.srq_ring[id].mapping[0];
-	priv->cm.rx_sge[0].length = priv->cm.max_cm_mtu;
+	priv->cm.rx_wr.num_sge = i;
+	priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
 
 	ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
 	if (unlikely(ret)) {
 		ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
-		ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[id].mapping);
+		ipoib_dma_unmap_rx(priv, rx_req);
 		m_freem(priv->cm.srq_ring[id].mb);
 		priv->cm.srq_ring[id].mb = NULL;
 	}
@@ -112,18 +118,25 @@ static int ipoib_cm_post_receive_nonsrq(
 					struct ib_recv_wr *wr,
 					struct ib_sge *sge, int id)
 {
+	struct ipoib_rx_buf *rx_req;
 	struct ib_recv_wr *bad_wr;
+	struct mbuf *m;
 	int ret;
+	int i;
 
-	wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
+	rx_req = (struct ipoib_rx_buf *)&rx->rx_ring[id];
+	for (m = rx_req->mb, i = 0; m != NULL; m = m->m_next, i++) {
+		sge[i].addr = rx_req->mapping[i];
+		sge[i].length = m->m_len;
+	}
 
-	sge[0].addr = rx->rx_ring[id].mapping[0];
-	priv->cm.rx_sge[0].length = priv->cm.max_cm_mtu;
+	wr->num_sge = i;
+	wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
 
 	ret = ib_post_recv(rx->qp, wr, &bad_wr);
 	if (unlikely(ret)) {
 		ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
-		ipoib_cm_dma_unmap_rx(priv, rx->rx_ring[id].mapping);
+		ipoib_dma_unmap_rx(priv, rx_req);
 		m_freem(rx->rx_ring[id].mb);
 		rx->rx_ring[id].mb = NULL;
 	}
@@ -131,37 +144,11 @@ static int ipoib_cm_post_receive_nonsrq(
 	return ret;
 }
 
-static struct mbuf *ipoib_cm_alloc_rx_mb(struct ipoib_dev_priv *priv,
-					     struct ipoib_cm_rx_buf *rx_ring,
-					     int id,
-					     u64 mapping[IPOIB_CM_RX_SG])
+static struct mbuf *
+ipoib_cm_alloc_rx_mb(struct ipoib_dev_priv *priv, struct ipoib_cm_rx_buf *rx_req)
 {
-	struct mbuf *mb;
-	int buf_size;
-
-	buf_size = priv->cm.max_cm_mtu;
-	if (buf_size <= MCLBYTES)
-		buf_size = MCLBYTES;
-	else if (buf_size <= MJUMPAGESIZE)
-		buf_size = MJUMPAGESIZE;
-	else if (buf_size <= MJUM9BYTES)
-		buf_size = MJUM9BYTES;
-	else if (buf_size < MJUM16BYTES)
-		buf_size = MJUM16BYTES;
-
-	mb = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, buf_size);
-	if (unlikely(!mb))
-		return NULL;
-
-	mapping[0] = ib_dma_map_single(priv->ca, mtod(mb, void *),
-				       buf_size, DMA_FROM_DEVICE);
-	if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {
-		m_freem(mb);
-		return NULL;
-	}
-
-	rx_ring[id].mb = mb;
-	return mb;
+	return ipoib_alloc_map_mb(priv, (struct ipoib_rx_buf *)rx_req,
+	    priv->cm.max_cm_mtu);
 }
 
 static void ipoib_cm_free_rx_ring(struct ipoib_dev_priv *priv,
@@ -171,7 +158,7 @@ static void ipoib_cm_free_rx_ring(struct
 
 	for (i = 0; i < ipoib_recvq_size; ++i)
 		if (rx_ring[i].mb) {
-			ipoib_cm_dma_unmap_rx(priv, rx_ring[i].mapping);
+			ipoib_cm_dma_unmap_rx(priv, &rx_ring[i]);
 			m_freem(rx_ring[i].mb);
 		}
 
@@ -225,7 +212,7 @@ static struct ib_qp *ipoib_cm_create_rx_
 		.recv_cq = priv->recv_cq,
 		.srq = priv->cm.srq,
 		.cap.max_send_wr = 1, /* For drain WR */
-		.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
+		.cap.max_send_sge = 1,
 		.sq_sig_type = IB_SIGNAL_ALL_WR,
 		.qp_type = IB_QPT_RC,
 		.qp_context = p,
@@ -233,7 +220,7 @@ static struct ib_qp *ipoib_cm_create_rx_
 
 	if (!ipoib_cm_has_srq(priv)) {
 		attr.cap.max_recv_wr  = ipoib_recvq_size;
-		attr.cap.max_recv_sge = IPOIB_CM_RX_SG;
+		attr.cap.max_recv_sge = priv->cm.num_frags;
 	}
 
 	return ib_create_qp(priv->pd, &attr);
@@ -297,9 +284,10 @@ static void ipoib_cm_init_rx_wr(struct i
 				struct ib_recv_wr *wr,
 				struct ib_sge *sge)
 {
+	int i;
 
-	sge[0].length = priv->cm.max_cm_mtu;
-	sge[0].lkey = priv->mr->lkey;
+	for (i = 0; i < IPOIB_CM_RX_SG; i++)
+		sge[i].lkey = priv->mr->lkey;
 
 	wr->next    = NULL;
 	wr->sg_list = sge;
@@ -346,8 +334,7 @@ static int ipoib_cm_nonsrq_init_rx(struc
 	spin_unlock_irq(&priv->lock);
 
 	for (i = 0; i < ipoib_recvq_size; ++i) {
-		if (!ipoib_cm_alloc_rx_mb(priv, rx->rx_ring, i,
-					   rx->rx_ring[i].mapping)) {
+		if (!ipoib_cm_alloc_rx_mb(priv, &rx->rx_ring[i])) {
 			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
 				ret = -ENOMEM;
 				goto err_count;
@@ -482,23 +469,15 @@ static int ipoib_cm_rx_handler(struct ib
 		return 0;
 	}
 }
-/* Adjust length of mb with fragments to match received data */
-static void mb_put_frags(struct mbuf *mb,
-			  unsigned int length, struct mbuf *tomb)
-{
-
-	mb->m_pkthdr.len = length;
-	mb->m_len = length;
-}
 
 void ipoib_cm_handle_rx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc)
 {
+	struct ipoib_cm_rx_buf saverx;
 	struct ipoib_cm_rx_buf *rx_ring;
 	unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
 	struct ifnet *dev = priv->dev;
 	struct mbuf *mb, *newmb;
 	struct ipoib_cm_rx *p;
-	u64 mapping[IPOIB_CM_RX_SG];
 	int has_srq;
 	u_short proto;
 
@@ -555,7 +534,8 @@ void ipoib_cm_handle_rx_wc(struct ipoib_
 		}
 	}
 
-	newmb = ipoib_cm_alloc_rx_mb(priv, rx_ring, wr_id, mapping);
+	memcpy(&saverx, &rx_ring[wr_id], sizeof(saverx));
+	newmb = ipoib_cm_alloc_rx_mb(priv, &rx_ring[wr_id]);
 	if (unlikely(!newmb)) {
 		/*
 		 * If we can't allocate a new RX buffer, dump
@@ -563,16 +543,16 @@ void ipoib_cm_handle_rx_wc(struct ipoib_
 		 */
 		ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id);
 		++dev->if_ierrors;
+		memcpy(&rx_ring[wr_id], &saverx, sizeof(saverx));
 		goto repost;
 	}
 
-	ipoib_cm_dma_unmap_rx(priv, rx_ring[wr_id].mapping);
-	memcpy(rx_ring[wr_id].mapping, mapping, sizeof *mapping);
+	ipoib_cm_dma_unmap_rx(priv, &saverx);
 
 	ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
 		       wc->byte_len, wc->slid);
 
-	mb_put_frags(mb, wc->byte_len, newmb);
+	ipoib_dma_mb(priv, mb, wc->byte_len);
 
 	++dev->if_opackets;
 	dev->if_obytes += mb->m_pkthdr.len;
@@ -603,7 +583,7 @@ repost:
 
 static inline int post_send(struct ipoib_dev_priv *priv,
 			    struct ipoib_cm_tx *tx,
-			    struct ipoib_tx_buf *tx_req,
+			    struct ipoib_cm_tx_buf *tx_req,
 			    unsigned int wr_id)
 {
 	struct ib_send_wr *bad_wr;
@@ -625,7 +605,7 @@ static inline int post_send(struct ipoib
 
 void ipoib_cm_send(struct ipoib_dev_priv *priv, struct mbuf *mb, struct ipoib_cm_tx *tx)
 {
-	struct ipoib_tx_buf *tx_req;
+	struct ipoib_cm_tx_buf *tx_req;
 	struct ifnet *dev = priv->dev;
 
 	if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
@@ -653,7 +633,8 @@ void ipoib_cm_send(struct ipoib_dev_priv
 	 */
 	tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
 	tx_req->mb = mb;
-	if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
+	if (unlikely(ipoib_dma_map_tx(priv->ca, (struct ipoib_tx_buf *)tx_req,
+	    priv->cm.num_frags))) {
 		++dev->if_oerrors;
 		if (tx_req->mb)
 			m_freem(tx_req->mb);
@@ -663,7 +644,7 @@ void ipoib_cm_send(struct ipoib_dev_priv
 	if (unlikely(post_send(priv, tx, tx_req, tx->tx_head & (ipoib_sendq_size - 1)))) {
 		ipoib_warn(priv, "post_send failed\n");
 		++dev->if_oerrors;
-		ipoib_dma_unmap_tx(priv->ca, tx_req);
+		ipoib_dma_unmap_tx(priv->ca, (struct ipoib_tx_buf *)tx_req);
 		m_freem(mb);
 	} else {
 		++tx->tx_head;
@@ -684,7 +665,7 @@ void ipoib_cm_handle_tx_wc(struct ipoib_
 	struct ipoib_cm_tx *tx = wc->qp->qp_context;
 	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
 	struct ifnet *dev = priv->dev;
-	struct ipoib_tx_buf *tx_req;
+	struct ipoib_cm_tx_buf *tx_req;
 
 	ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
 		       wr_id, wc->status);
@@ -697,7 +678,7 @@ void ipoib_cm_handle_tx_wc(struct ipoib_
 
 	tx_req = &tx->tx_ring[wr_id];
 
-	ipoib_dma_unmap_tx(priv->ca, tx_req);
+	ipoib_dma_unmap_tx(priv->ca, (struct ipoib_tx_buf *)tx_req);
 
 	/* FIXME: is this right? Shouldn't we only increment on success? */
 	++dev->if_opackets;
@@ -934,7 +915,7 @@ static struct ib_qp *ipoib_cm_create_tx_
 		.recv_cq		= priv->recv_cq,
 		.srq			= priv->cm.srq,
 		.cap.max_send_wr	= ipoib_sendq_size,
-		.cap.max_send_sge	= MAX_MB_FRAGS,
+		.cap.max_send_sge	= priv->cm.num_frags,
 		.sq_sig_type		= IB_SIGNAL_ALL_WR,
 		.qp_type		= IB_QPT_RC,
 		.qp_context		= tx
@@ -1067,7 +1048,7 @@ static void ipoib_cm_tx_destroy(struct i
 {
 	struct ipoib_dev_priv *priv = p->priv;
 	struct ifnet *dev = priv->dev;
-	struct ipoib_tx_buf *tx_req;
+	struct ipoib_cm_tx_buf *tx_req;
 	unsigned long begin;
 
 	ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
@@ -1097,7 +1078,7 @@ timeout:
 
 	while ((int) p->tx_tail - (int) p->tx_head < 0) {
 		tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
-		ipoib_dma_unmap_tx(priv->ca, tx_req);
+		ipoib_dma_unmap_tx(priv->ca, (struct ipoib_tx_buf *)tx_req);
 		m_freem(tx_req->mb);
 		++p->tx_tail;
 		if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
@@ -1404,7 +1385,7 @@ int ipoib_cm_dev_init(struct ipoib_dev_p
 	attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge);
 	ipoib_cm_create_srq(priv, attr.max_srq_sge);
 	if (ipoib_cm_has_srq(priv)) {
-		priv->cm.max_cm_mtu = attr.max_srq_sge * MJUM16BYTES;
+		priv->cm.max_cm_mtu = attr.max_srq_sge * MJUMPAGESIZE;
 		priv->cm.num_frags  = attr.max_srq_sge;
 		ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
 			  priv->cm.max_cm_mtu, priv->cm.num_frags);
@@ -1417,8 +1398,7 @@ int ipoib_cm_dev_init(struct ipoib_dev_p
 
 	if (ipoib_cm_has_srq(priv)) {
 		for (i = 0; i < ipoib_recvq_size; ++i) {
-			if (!ipoib_cm_alloc_rx_mb(priv, priv->cm.srq_ring, i,
-						   priv->cm.srq_ring[i].mapping)) {
+			if (!ipoib_cm_alloc_rx_mb(priv, &priv->cm.srq_ring[i])) {
 				ipoib_warn(priv, "failed to allocate "
 					   "receive buffer %d\n", i);
 				ipoib_cm_dev_cleanup(priv);

Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
==============================================================================
--- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c	Fri Feb 25 23:14:24 2011	(r219046)
+++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c	Sat Feb 26 04:02:54 2011	(r219047)
@@ -87,36 +87,77 @@ void ipoib_free_ah(struct kref *kref)
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
-				  u64 mapping[IPOIB_UD_RX_SG])
+void
+ipoib_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req)
 {
-	ib_dma_unmap_single(priv->ca, mapping[0],
-	    priv->max_ib_mtu + IB_GRH_BYTES, DMA_FROM_DEVICE);
+	struct mbuf *m;
+	int i;
+
+	for (i = 0, m = rx_req->mb; m != NULL; m = m->m_next, i++)
+		ib_dma_unmap_single(priv->ca, rx_req->mapping[i], m->m_len,
+		    DMA_FROM_DEVICE);
 }
 
-static void ipoib_ud_mb_put_frags(struct ipoib_dev_priv *priv,
-				   struct mbuf *mb,
-				   unsigned int length)
+void
+ipoib_dma_mb(struct ipoib_dev_priv *priv, struct mbuf *mb, unsigned int length)
 {
 
-	mb->m_pkthdr.len = length;
-	mb->m_len = length;
+	m_adj(mb, -(mb->m_pkthdr.len - length));
+}
+
+struct mbuf *
+ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req,
+    int size)
+{
+	struct mbuf *mb, *m;
+	int i, j;
+
+	rx_req->mb = NULL;
+	mb = m_getm2(NULL, size, M_NOWAIT, MT_DATA, M_PKTHDR);
+	if (mb == NULL)
+		return (NULL);
+	for (i = 0, m = mb; m != NULL; m = m->m_next, i++) {
+		m->m_len = (m->m_flags & M_EXT) ? m->m_ext.ext_size :
+		    ((m->m_flags & M_PKTHDR) ? MHLEN : MLEN);
+		mb->m_pkthdr.len += m->m_len;
+		rx_req->mapping[i] = ib_dma_map_single(priv->ca,
+		    mtod(m, void *), m->m_len, DMA_FROM_DEVICE);
+		if (unlikely(ib_dma_mapping_error(priv->ca,
+		    rx_req->mapping[i])))
+			goto error;
+
+	}
+	rx_req->mb = mb;
+	return (mb);
+error:
+	for (j = 0, m = mb; j < i; m = m->m_next, j++)
+		ib_dma_unmap_single(priv->ca, rx_req->mapping[j], m->m_len,
+		    DMA_FROM_DEVICE);
+	m_freem(mb);
+	return (NULL);
+
 }
 
 static int ipoib_ib_post_receive(struct ipoib_dev_priv *priv, int id)
 {
+	struct ipoib_rx_buf *rx_req;
 	struct ib_recv_wr *bad_wr;
+	struct mbuf *m;
 	int ret;
+	int i;
 
-	priv->rx_wr.wr_id   = id | IPOIB_OP_RECV;
-	priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0];
-	priv->rx_sge[0].length = priv->max_ib_mtu + IB_GRH_BYTES;
-
+	rx_req = &priv->rx_ring[id];
+	for (m = rx_req->mb, i = 0; m != NULL; m = m->m_next, i++) {
+		priv->rx_sge[i].addr = rx_req->mapping[i];
+		priv->rx_sge[i].length = m->m_len;
+	}
+	priv->rx_wr.num_sge = i;
+	priv->rx_wr.wr_id = id | IPOIB_OP_RECV;
 
 	ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr);
 	if (unlikely(ret)) {
 		ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
-		ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping);
+		ipoib_dma_unmap_rx(priv, &priv->rx_ring[id]);
 		m_freem(priv->rx_ring[id].mb);
 		priv->rx_ring[id].mb = NULL;
 	}
@@ -124,41 +165,12 @@ static int ipoib_ib_post_receive(struct 
 	return ret;
 }
 
-static struct mbuf *ipoib_alloc_rx_mb(struct ipoib_dev_priv *priv, int id)
+static struct mbuf *
+ipoib_alloc_rx_mb(struct ipoib_dev_priv *priv, int id)
 {
-	struct mbuf *mb;
-	int buf_size;
-	u64 *mapping;
 
-	/*
-	 * XXX Should be calculated once and cached.
-	 */
-	buf_size = priv->max_ib_mtu + IB_GRH_BYTES;
-	if (buf_size <= MCLBYTES)
-		buf_size = MCLBYTES;
-	else if (buf_size <= MJUMPAGESIZE)
-		buf_size = MJUMPAGESIZE;
-	else if (buf_size <= MJUM9BYTES)
-		buf_size = MJUM9BYTES;
-	else if (buf_size < MJUM16BYTES)
-		buf_size = MJUM16BYTES;
-
-	mb = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, buf_size);
-	if (unlikely(!mb))
-		return NULL;
-
-	mapping = priv->rx_ring[id].mapping;
-	mapping[0] = ib_dma_map_single(priv->ca, mtod(mb, void *), buf_size,
-				       DMA_FROM_DEVICE);
-	if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
-		goto error;
-
-	priv->rx_ring[id].mb = mb;
-	return mb;
-
-error:
-	m_freem(mb);
-	return NULL;
+	return ipoib_alloc_map_mb(priv, &priv->rx_ring[id],
+	    priv->max_ib_mtu + IB_GRH_BYTES);
 }
 
 static int ipoib_ib_post_receives(struct ipoib_dev_priv *priv)
@@ -182,11 +194,11 @@ static int ipoib_ib_post_receives(struct
 static void
 ipoib_ib_handle_rx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc)
 {
+	struct ipoib_rx_buf saverx;
 	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
 	struct ifnet *dev = priv->dev;
 	struct ipoib_header *eh;
 	struct mbuf *mb;
-	u64 mapping[IPOIB_UD_RX_SG];
 
 	ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n",
 		       wr_id, wc->status);
@@ -207,8 +219,7 @@ ipoib_ib_handle_rx_wc(struct ipoib_dev_p
 			goto repost;
 		}
 		if (mb) {
-			ipoib_ud_dma_unmap_rx(priv,
-			     priv->rx_ring[wr_id].mapping);
+			ipoib_dma_unmap_rx(priv, &priv->rx_ring[wr_id]);
 			m_freem(mb);
 			priv->rx_ring[wr_id].mb = NULL;
 		}
@@ -222,14 +233,13 @@ ipoib_ib_handle_rx_wc(struct ipoib_dev_p
 	if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
 		goto repost;
 
-	memcpy(mapping, priv->rx_ring[wr_id].mapping,
-	       IPOIB_UD_RX_SG * sizeof *mapping);
-
+	memcpy(&saverx, &priv->rx_ring[wr_id], sizeof(saverx));
 	/*
 	 * If we can't allocate a new RX buffer, dump
 	 * this packet and reuse the old buffer.
 	 */
 	if (unlikely(!ipoib_alloc_rx_mb(priv, wr_id))) {
+		memcpy(&priv->rx_ring[wr_id], &saverx, sizeof(saverx));
 		dev->if_iqdrops++;
 		goto repost;
 	}
@@ -237,8 +247,8 @@ ipoib_ib_handle_rx_wc(struct ipoib_dev_p
 	ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
 		       wc->byte_len, wc->slid);
 
-	ipoib_ud_dma_unmap_rx(priv, mapping);
-	ipoib_ud_mb_put_frags(priv, mb, wc->byte_len);
+	ipoib_dma_unmap_rx(priv, &saverx);
+	ipoib_dma_mb(priv, mb, wc->byte_len);
 
 	++dev->if_ipackets;
 	dev->if_ibytes += mb->m_pkthdr.len;
@@ -258,7 +268,7 @@ repost:
 			   "for buf %d\n", wr_id);
 }
 
-int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req)
+int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req, int max)
 {
 	struct mbuf *mb = tx_req->mb;
 	u64 *mapping = tx_req->mapping;
@@ -276,12 +286,12 @@ int ipoib_dma_map_tx(struct ib_device *c
 		i--;
 	}
 	i--;
-	if (i >= MAX_MB_FRAGS) {
+	if (i >= max) {
 		tx_req->mb = mb = m_defrag(mb, M_DONTWAIT);
 		if (mb == NULL)
 			return -EIO;
 		for (m = mb, i = 0; m != NULL; m = m->m_next, i++);
-		if (i >= MAX_MB_FRAGS)
+		if (i >= max)
 			return -EIO;
 	}
 	error = 0;
@@ -507,7 +517,7 @@ ipoib_send(struct ipoib_dev_priv *priv, 
 	 */
 	tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
 	tx_req->mb = mb;
-	if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
+	if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req, IPOIB_UD_TX_SG))) {
 		++dev->if_oerrors;
 		if (tx_req->mb)
 			m_freem(tx_req->mb);
@@ -771,8 +781,7 @@ int ipoib_ib_dev_stop(struct ipoib_dev_p
 				rx_req = &priv->rx_ring[i];
 				if (!rx_req->mb)
 					continue;
-				ipoib_ud_dma_unmap_rx(priv,
-						      priv->rx_ring[i].mapping);
+				ipoib_dma_unmap_rx(priv, &priv->rx_ring[i]);
 				m_freem(rx_req->mb);
 				rx_req->mb = NULL;
 			}

Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
==============================================================================
--- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c	Fri Feb 25 23:14:24 2011	(r219046)
+++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c	Sat Feb 26 04:02:54 2011	(r219047)
@@ -992,6 +992,8 @@ ipoib_add_port(const char *format, struc
 		       hca->name, port, result);
 		goto device_init_failed;
 	}
+	if (ipoib_cm_admin_enabled(priv))
+		priv->dev->if_mtu = IPOIB_CM_MTU(ipoib_cm_max_mtu(priv));
 
 	INIT_IB_EVENT_HANDLER(&priv->event_handler,
 			      priv->ca, ipoib_event);

Modified: projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
==============================================================================
--- projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	Fri Feb 25 23:14:24 2011	(r219046)
+++ projects/ofed/head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	Sat Feb 26 04:02:54 2011	(r219047)
@@ -202,7 +202,7 @@ int ipoib_transport_dev_init(struct ipoi
 	if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
 		init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
 
-	init_attr.cap.max_send_sge = MAX_MB_FRAGS;
+	init_attr.cap.max_send_sge = IPOIB_UD_TX_SG;
 
 	priv->qp = ib_create_qp(priv->pd, &init_attr);
 	if (IS_ERR(priv->qp)) {
@@ -214,15 +214,15 @@ int ipoib_transport_dev_init(struct ipoi
 	IF_LLADDR(priv->dev)[2] = (priv->qp->qp_num >>  8) & 0xff;
 	IF_LLADDR(priv->dev)[3] = (priv->qp->qp_num      ) & 0xff;
 
-	for (i = 0; i < MAX_MB_FRAGS + 1; ++i)
+	for (i = 0; i < IPOIB_MAX_TX_SG; ++i)
 		priv->tx_sge[i].lkey = priv->mr->lkey;
 
 	priv->tx_wr.opcode	= IB_WR_SEND;
 	priv->tx_wr.sg_list	= priv->tx_sge;
 	priv->tx_wr.send_flags	= IB_SEND_SIGNALED;
 
-	priv->rx_sge[0].lkey = priv->mr->lkey;
-	priv->rx_wr.num_sge = 1;
+	for (i = 0; i < IPOIB_UD_RX_SG; ++i)
+		priv->rx_sge[i].lkey = priv->mr->lkey;
 	priv->rx_wr.next = NULL;
 	priv->rx_wr.sg_list = priv->rx_sge;
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201102260402.p1Q42tON038342>