Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 24 Jan 2008 02:05:56 GMT
From:      Kip Macy <kmacy@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 133966 for review
Message-ID:  <200801240205.m0O25uYf051995@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=133966

Change 133966 by kmacy@kmacy:storage:toehead on 2008/01/24 02:04:55

	first pass integration of ddp support code

Affected files ...

.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#4 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#3 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#3 edit

Differences ...

==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#4 (text+ko) ====

@@ -867,14 +867,6 @@
 	return (idx);
 }
 
-void
-t3_release_ddp_resources(struct toepcb *toep)
-{
-	/*
-	 * This is a no-op until we have DDP support
-	 */
-}
-
 static inline void
 free_atid(struct t3cdev *cdev, unsigned int tid)
 {

==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#3 (text+ko) ====

@@ -679,7 +679,7 @@
 	p->dgl_length = len;
 	p->dgl_offset = pg_off;
 	p->dgl_nelem = npages;
-#if 0	
+#ifdef notyet
 	p->phys_addr[0] = pci_map_page(pdev, p->pages[0], pg_off,
 				       PAGE_SIZE - pg_off,
 				       PCI_DMA_FROMDEVICE) - pg_off;
@@ -708,6 +708,47 @@
 	return (pages + PPOD_PAGES - 1) / PPOD_PAGES + NUM_SENTINEL_PPODS;
 }
 
+
+static void
+unmap_ddp_gl(const struct ddp_gather_list *gl)
+{
+#ifdef notyet	
+	int i;
+
+	if (!gl->nelem)
+		return;
+
+	pci_unmap_page(pdev, gl->phys_addr[0] + gl->offset,
+		       PAGE_SIZE - gl->offset, PCI_DMA_FROMDEVICE);
+	for (i = 1; i < gl->nelem; ++i)
+		pci_unmap_page(pdev, gl->phys_addr[i], PAGE_SIZE,
+			       PCI_DMA_FROMDEVICE);
+
+#endif
+}
+
+static void
+ddp_gl_free_pages(struct ddp_gather_list *gl, int dirty)
+{
+#ifdef notyet
+	int i;
+
+	for (i = 0; i < gl->nelem; ++i) {
+		if (dirty)
+			set_page_dirty_lock(gl->pages[i]);
+		put_page(gl->pages[i]);
+	}
+#endif	
+}
+
+void
+t3_free_ddp_gl(struct ddp_gather_list *gl)
+{
+	unmap_ddp_gl(gl);
+	ddp_gl_free_pages(gl, 0);
+	free(gl, M_DEVBUF);
+}
+
 /* Max # of page pods for a buffer, enough for 1MB buffer at 4KB page size */
 #define MAX_PPODS 64U
 
@@ -746,6 +787,46 @@
 	return 0;
 }
 
+
+
+/*
+ * Reposts the kernel DDP buffer after it has been previously become full and
+ * invalidated.  We just need to reset the offset and adjust the DDP flags.
+ * Conveniently, we can set the flags and the offset with a single message.
+ * Note that this function does not set the buffer length.  Again conveniently
+ * our kernel buffer is of fixed size.  If the length needs to be changed it
+ * needs to be done separately.
+ */
+static void
+t3_repost_kbuf(struct socket *so, unsigned int bufidx, int modulate, 
+		    int activate)
+{
+	struct toepcb *toep = sototcpcb(so)->t_toe;
+	struct ddp_state *p = &toep->tp_ddp_state;
+
+	p->buf_state[bufidx].cur_offset = p->kbuf[bufidx]->dgl_offset;
+	p->buf_state[bufidx].flags = p->kbuf_noinval ? DDP_BF_NOINVAL : 0;
+	p->buf_state[bufidx].gl = p->kbuf[bufidx];
+	p->cur_buf = bufidx;
+	p->kbuf_idx = bufidx;
+	if (!bufidx)
+	t3_setup_ddpbufs(so, 0, 0, 0, 0,
+			 V_TF_DDP_PSH_NO_INVALIDATE(p->kbuf_noinval) |
+			 V_TF_DDP_BUF0_VALID(1),
+			 V_TF_DDP_PSH_NO_INVALIDATE(1) | V_TF_DDP_OFF(1) |
+			 V_TF_DDP_BUF0_VALID(1) |
+			 V_TF_DDP_ACTIVE_BUF(activate), modulate);
+	else
+	t3_setup_ddpbufs(so, 0, 0, 0, 0,
+			 V_TF_DDP_PSH_NO_INVALIDATE(p->kbuf_noinval) |
+			 V_TF_DDP_BUF1_VALID(1) | 
+			 V_TF_DDP_ACTIVE_BUF(activate),
+			 V_TF_DDP_PSH_NO_INVALIDATE(1) | V_TF_DDP_OFF(1) |
+			 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 
+			 modulate);
+	
+}
+
 /*
  * Starting offset for the user DDP buffer.  A non-0 value ensures a DDP flush
  * won't block indefinitely if there's nothing to place (which should be rare).
@@ -820,7 +901,7 @@
 		return err;
 	if (gl) {
 		if (p->ubuf)
-			t3_free_ddp_gl(p->pdev, p->ubuf);
+			t3_free_ddp_gl(p->ubuf);
 		p->ubuf = gl;
 		t3_setup_ppods(so, gl, pages2ppods(gl->dgl_nelem), p->ubuf_tag, len,
 			       gl->dgl_offset, 0);
@@ -847,7 +928,7 @@
 	struct toepcb *toep = sototcpcb(so)->t_toe;
 	struct ddp_state *p = &toep->tp_ddp_state;
 
-	if (!p || !p->pdev)
+	if (!p)
 		return -1;
 
 	len = setup_iovec_ppods(so, iov, 0);
@@ -888,10 +969,251 @@
 				 OVERLAY_MASK | flags, 1);
 	}
 #ifdef T3_TRACE
-	T3_TRACE5(TIDTB(sk),
+	T3_TRACE5(TIDTB(so),
 		  "t3_overlay_ubuf: tag %u flags 0x%x mask 0x%x ubuf_idx %d "
 		  " kbuf_idx %d",
 		   p->ubuf_tag, flags, OVERLAY_MASK, ubuf_idx, p->kbuf_idx);
 #endif
 	return 0;
 }
+
+
+
+/*
+ * Returns whether a connection should enable DDP.  This happens when all of
+ * the following conditions are met:
+ * - the connection's ULP mode is DDP
+ * - DDP is not already enabled
+ * - the last receive was above the DDP threshold
+ * - receive buffers are in user space
+ * - receive side isn't shutdown (handled by caller)
+ * - the connection's receive window is big enough so that sizable buffers
+ *   can be posted without closing the window in the middle of DDP (checked
+ *   when the connection is offloaded)
+ */
+#ifdef notyet
+static int
+so_should_ddp(const struct toepcb *toep, int last_recv_len)
+{
+	return toep->tp_ulp_mode == ULP_MODE_TCPDDP && !toep->tp_dpp_state.cur_buf &&
+	       last_recv_len > TOM_TUNABLE(toep->tp_toedev, ddp_thres) &&
+	       toep->tp_tp->rcv_wnd > 
+	           (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) + 
+		    DDP_RSVD_WIN);
+}
+#endif
+/*
+ * Clean up DDP state that needs to survive until socket close time, such as the
+ * DDP buffers.  The buffers are already unmapped at this point as unmapping
+ * needs the PCI device and a socket may close long after the device is removed.
+ */
+void
+t3_cleanup_ddp(struct socket *so)
+{
+	struct toepcb *toep = sototcpcb(so)->t_toe;
+	struct ddp_state *p = &toep->tp_ddp_state;
+	int idx;
+
+	if (!p)
+		return;
+	
+	for (idx = 0; idx < NUM_DDP_KBUF; idx++)
+		if (p->kbuf[idx]) {
+			ddp_gl_free_pages(p->kbuf[idx], 0);
+			free(p->kbuf[idx], M_DEVBUF);
+		}
+
+	if (p->ubuf) {
+		ddp_gl_free_pages(p->ubuf, 0);
+		free(p->ubuf, M_DEVBUF);
+	}
+	toep->tp_ulp_mode = 0;
+}
+
+/*
+ * This is a companion to t3_cleanup_ddp() and releases the HW resources
+ * associated with a connection's DDP state, such as the page pods.
+ * It's called when HW is done with a connection.   The rest of the state
+ * remains available until both HW and the app are done with the connection.
+ */
+void
+t3_release_ddp_resources(struct toepcb *toep)
+{
+	struct ddp_state *p = &toep->tp_ddp_state;
+	struct tom_data *d = TOM_DATA(toep->tp_toedev);
+	int idx;
+	
+	for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
+		t3_free_ppods(d, p->kbuf_tag[idx], 
+		    p->kbuf_nppods[idx]);
+		unmap_ddp_gl(p->kbuf[idx]);
+	}
+
+	if (p->ubuf_nppods) {
+		t3_free_ppods(d, p->ubuf_tag, p->ubuf_nppods);
+		p->ubuf_nppods = 0;
+	}
+	if (p->ubuf)
+		unmap_ddp_gl(p->ubuf);
+	
+}
+
+void
+t3_post_kbuf(struct socket *so, int modulate)
+{
+	struct toepcb *toep = sototcpcb(so)->t_toe;
+	struct ddp_state *p = &toep->tp_ddp_state;
+
+	t3_set_ddp_tag(so, p->cur_buf, p->kbuf_tag[p->cur_buf] << 6);
+	t3_set_ddp_buf(so, p->cur_buf, 0, p->kbuf[p->cur_buf]->dgl_length);
+	t3_repost_kbuf(so, p->cur_buf, modulate, 1);
+
+#ifdef T3_TRACE
+	T3_TRACE1(TIDTB(so),
+		  "t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf);
+#endif
+}
+
+/*
+ * Prepare a socket for DDP.  Must be called when the socket is known to be
+ * open.
+ */
+int
+t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall)
+{
+	int err = ENOMEM;
+	unsigned int nppods, kbuf_pages, idx = 0;
+	struct toepcb *toep = sototcpcb(so)->t_toe;
+	struct ddp_state *p = &toep->tp_ddp_state;
+	struct tom_data *d = TOM_DATA(toep->tp_toedev);
+
+	if (kbuf_size > M_TCB_RX_DDP_BUF0_LEN)
+		return (EINVAL);
+
+	kbuf_pages = (kbuf_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	nppods = pages2ppods(kbuf_pages);
+
+	p->kbuf_noinval = !!waitall;
+	
+	p->kbuf_tag[NUM_DDP_KBUF - 1] = -1;
+	for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
+		p->kbuf[idx] = 
+		    malloc(sizeof (struct ddp_gather_list) + kbuf_pages *
+			sizeof(vm_page_t *), M_DEVBUF, M_NOWAIT|M_ZERO);
+		if (!p->kbuf[idx])
+			goto err;
+
+		p->kbuf_tag[idx] = t3_alloc_ppods(d, nppods);
+		if (p->kbuf_tag[idx] < 0)
+			goto err;
+
+		p->kbuf_nppods[idx] = nppods;
+		p->kbuf[idx]->dgl_length = kbuf_size;
+		p->kbuf[idx]->dgl_offset = 0;
+		p->kbuf[idx]->dgl_nelem = kbuf_pages;
+#ifdef notyet		
+		p->kbuf[idx]->pages = 
+		    (struct page **)&p->kbuf[idx]->phys_addr[kbuf_pages];
+
+		
+		for (i = 0; i < kbuf_pages; ++i) {
+
+			p->kbuf[idx]->pages[i] = alloc_page(sk->sk_allocation);
+			if (!p->kbuf[idx]->pages[i]) {
+				p->kbuf[idx]->nelem = i;
+				goto err;
+			}
+
+		}
+
+		for (i = 0; i < kbuf_pages; ++i)
+			p->kbuf[idx]->phys_addr[i] = 
+			    pci_map_page(p->pdev, p->kbuf[idx]->pages[i],
+					 0, PAGE_SIZE, PCI_DMA_FROMDEVICE);
+#endif		
+		t3_setup_ppods(so, p->kbuf[idx], nppods, p->kbuf_tag[idx], 
+			       p->kbuf[idx]->dgl_length, 0, 0);
+	}
+	t3_set_ddp_tag(so, 0, p->kbuf_tag[0] << 6);
+	t3_set_ddp_buf(so, 0, 0, p->kbuf[0]->dgl_length);
+	t3_repost_kbuf(so, 0, 0, 1);
+	t3_set_rcv_coalesce_enable(so, 
+	    TOM_TUNABLE(TOE_DEV(so), ddp_rcvcoalesce));
+
+#ifdef T3_TRACE
+	T3_TRACE4(TIDTB(so),
+		  "t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d",
+		   kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]);
+#endif
+
+	return 0;
+
+err:
+	t3_release_ddp_resources(toep);
+	t3_cleanup_ddp(so);
+	return err;
+}
+
+int
+t3_ddp_copy(const struct mbuf *m, int offset, struct iovec *to, int len)
+{
+#ifdef notyet	
+	int err, page_no, page_off;
+	struct ddp_gather_list *gl = (struct ddp_gather_list *)skb->mac.raw;
+
+	if (!gl->pages) {
+		dump_stack();
+		BUG_ON(1);
+	}
+
+	offset += gl->offset + TCP_SKB_CB(skb)->when;
+	page_no = offset >> PAGE_SHIFT;
+	page_off = offset & ~PAGE_MASK;
+
+	while (len) {
+		int copy = min_t(int, len, PAGE_SIZE - page_off);
+
+		err = memcpy_toiovec(to, page_address(gl->pages[page_no]) +
+				     page_off, copy);
+		if (err)
+			return -EFAULT;
+		page_no++;
+		page_off = 0;
+		len -= copy;
+	}
+#endif	
+	return 0;
+}
+
+/*
+ * Allocate n page pods.  Returns -1 on failure or the page pod tag.
+ */
+int t3_alloc_ppods(struct tom_data *td, unsigned int n)
+{
+	unsigned int i, j;
+
+	if (__predict_false(!td->ppod_map))
+		return -1;
+
+	mtx_lock(&td->ppod_map_lock);
+	for (i = 0; i < td->nppods; ) {
+		for (j = 0; j < n; ++j)           /* scan ppod_map[i..i+n-1] */
+			if (td->ppod_map[i + j]) {
+				i = i + j + 1;
+				goto next;
+			}
+
+		memset(&td->ppod_map[i], 1, n);   /* allocate range */
+		mtx_unlock(&td->ppod_map_lock);
+		return i;
+next:		;
+	}	
+	mtx_unlock(&td->ppod_map_lock);
+	return (0);
+}
+
+void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n)
+{
+	/* No need to take ppod_lock here */
+	memset(&td->ppod_map[tag], 0, n);
+}

==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#3 (text+ko) ====

@@ -85,7 +85,6 @@
 #define M_PPOD_PGSZ    0x3
 #define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ)
 
-struct pci_dev;
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 #include <machine/bus.h>
@@ -105,7 +104,6 @@
 };
 
 struct ddp_state {
-	struct pci_dev *pdev;
 	struct ddp_buf_state buf_state[2];   /* per buffer state */
 	int cur_buf;
 	unsigned short kbuf_noinval;
@@ -132,33 +130,30 @@
 				      PSH flag set */
 };
 
-#ifdef notyet
+#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
+
 /*
  * Returns 1 if a UBUF DMA buffer might be active.
  */
-static inline int t3_ddp_ubuf_pending(struct sock *so)
+static inline int t3_ddp_ubuf_pending(struct socket *so)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct ddp_state *p = DDP_STATE(tp);
+	struct toepcb *toep = sototcpcb(so)->t_toe;
+	struct ddp_state *p = &toep->tp_ddp_state;
 
 	/* When the TOM_TUNABLE(ddp) is enabled, we're always in ULP_MODE DDP,
 	 * but DDP_STATE() is only valid if the connection actually enabled
 	 * DDP.
 	 */
-	if (!p)
-		return 0;
-
 	return (p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY)) || 
 	       (p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY));
 }
-#endif
 
 int t3_setup_ppods(struct socket *so, const struct ddp_gather_list *gl,
 		   unsigned int nppods, unsigned int tag, unsigned int maxoff,
 		   unsigned int pg_off, unsigned int color);
 int t3_alloc_ppods(struct tom_data *td, unsigned int n);
 void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n);
-void t3_free_ddp_gl(struct pci_dev *pdev, struct ddp_gather_list *gl);
+void t3_free_ddp_gl(struct ddp_gather_list *gl);
 int t3_ddp_copy(const struct mbuf *skb, int offset, struct iovec *to,
 		int len);
 //void t3_repost_kbuf(struct socket *so, int modulate, int activate);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200801240205.m0O25uYf051995>