Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 7 Feb 2008 00:52:33 GMT
From:      Kip Macy <kmacy@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 134944 for review
Message-ID:  <200802070052.m170qX7B059331@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=134944

Change 134944 by kmacy@kmacy:entropy:iwarp on 2008/02/07 00:52:21

	IFtoehead 134879
	fix various issues tied to ddp setup
	fix dependencies for cxgb module unload

Affected files ...

.. //depot/projects/iwarp/sys/dev/cxgb/cxgb_offload.c#14 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/cxgb_sge.c#11 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/sys/mvec.h#6 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#8 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#7 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_ddp.c#2 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_defs.h#7 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#5 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_tom.c#5 integrate
.. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_vm.c#2 integrate
.. //depot/projects/iwarp/usr.sbin/cxgbtool/cxgbtool.c#5 integrate

Differences ...

==== //depot/projects/iwarp/sys/dev/cxgb/cxgb_offload.c#14 (text+ko) ====

@@ -448,7 +448,7 @@
 	case GET_RX_PAGE_INFO:
 		rx_page_info = data;
 		rx_page_info->page_size = tp->rx_pg_size;
-		rx_page_info->num = tp->rx_num_pgs;
+		rx_page_info->num = tp->rx_num_pgs;	
 		break;
 	case ULP_ISCSI_GET_PARAMS:
 	case ULP_ISCSI_SET_PARAMS:

==== //depot/projects/iwarp/sys/dev/cxgb/cxgb_sge.c#11 (text+ko) ====

@@ -73,11 +73,16 @@
 #endif
 
 int      txq_fills = 0;
-static int recycle_enable = 1;
+/*
+ * XXX don't re-enable this until TOE stops assuming
+ * we have an m_ext
+ */
+static int recycle_enable = 0;
 extern int cxgb_txq_buf_ring_size;
 int cxgb_cached_allocations;
 int cxgb_cached;
-int cxgb_ext_freed;
+int cxgb_ext_freed = 0;
+int cxgb_ext_inited = 0;
 extern int cxgb_use_16k_clusters;
 extern int cxgb_pcpu_cache_enable;
 
@@ -775,14 +780,6 @@
 void
 t3_sge_deinit_sw(adapter_t *sc)
 {
-	int i;
-	
-	callout_drain(&sc->sge_timer_ch);
-	if (sc->tq) 
-		taskqueue_drain(sc->tq, &sc->slow_intr_task);
-	for (i = 0; i < sc->params.nports; i++) 
-		if (sc->port[i].tq != NULL)
-			taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
 
 	mi_deinit();
 }
@@ -2441,7 +2438,8 @@
 	
 	bzero(cl, header_size);
 	m = (struct mbuf *)cl;
-
+	
+	cxgb_ext_inited++;
 	SLIST_INIT(&m->m_pkthdr.tags);
 	m->m_type = MT_DATA;
 	m->m_flags = flags | M_NOFREE | M_EXT;
@@ -3003,12 +3001,8 @@
 	return (err);
 }	
 
-
-/* 
- * broken by recent mbuf changes 
- */ 
 static int
-t3_dump_txq(SYSCTL_HANDLER_ARGS)
+t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
 {
 	struct sge_txq *txq;
 	struct sge_qset *qs;
@@ -3037,7 +3031,7 @@
 		txq->txq_dump_start = 0;
 		return (EINVAL);
 	}
-	err = t3_sge_read_ecntxt(qs->port->adapter, txq->cntxt_id, data);
+	err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
 	if (err)
 		return (err);
 	
@@ -3081,7 +3075,68 @@
 	return (err);
 }
 
+static int
+t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
+{
+	struct sge_txq *txq;
+	struct sge_qset *qs;
+	int i, j, err, dump_end;
+	static int multiplier = 1;
+	struct sbuf *sb;
+	struct tx_desc *txd;
+	uint32_t *WR, wr_hi, wr_lo, gen;
+	
+	txq = arg1;
+	qs = txq_to_qset(txq, TXQ_CTRL);
+	if (txq->txq_dump_count == 0) {
+		return (0);
+	}
+	if (txq->txq_dump_count > 256) {
+		log(LOG_WARNING,
+		    "dump count is too large %d\n", txq->txq_dump_count);
+		txq->txq_dump_count = 1;
+		return (EINVAL);
+	}
+	if (txq->txq_dump_start > 255) {
+		log(LOG_WARNING,
+		    "dump start of %d is greater than queue size\n",
+		    txq->txq_dump_start);
+		txq->txq_dump_start = 0;
+		return (EINVAL);
+	}
 
+retry_sbufops:
+	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
+	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
+	    txq->txq_dump_start,
+	    (txq->txq_dump_start + txq->txq_dump_count) & 255);
+
+	dump_end = txq->txq_dump_start + txq->txq_dump_count;
+	for (i = txq->txq_dump_start; i < dump_end; i++) {
+		txd = &txq->desc[i & (255)];
+		WR = (uint32_t *)txd->flit;
+		wr_hi = ntohl(WR[0]);
+		wr_lo = ntohl(WR[1]);		
+		gen = G_WR_GEN(wr_lo);
+		
+		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
+		    wr_hi, wr_lo, gen);
+		for (j = 2; j < 30; j += 4) 
+			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
+			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
+
+	}
+	if (sbuf_overflowed(sb)) {
+		sbuf_delete(sb);
+		multiplier++;
+		goto retry_sbufops;
+	}
+	sbuf_finish(sb);
+	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
+	sbuf_delete(sb);
+	return (err);
+}
+
 static int
 t3_lro_enable(SYSCTL_HANDLER_ARGS)
 {
@@ -3206,6 +3261,10 @@
 	    CTLFLAG_RD, &cxgb_ext_freed,
 	    0, "#times a cluster was freed through ext_free");
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
+	    "ext_inited",
+	    CTLFLAG_RD, &cxgb_ext_inited,
+	    0, "#times a cluster was initialized for ext_free");
+	SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
 	    "mbufs_outstanding",
 	    CTLFLAG_RD, &cxgb_mbufs_outstanding,
 	    0, "#mbufs in flight in the driver");
@@ -3255,8 +3314,8 @@
 		
 		for (j = 0; j < pi->nqsets; j++) {
 			struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
-			struct sysctl_oid *qspoid, *rspqpoid, *txqpoid;
-			struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist;
+			struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, *ctrlqpoid;
+			struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist, *ctrlqpoidlist;
 			struct sge_txq *txq = &qs->txq[TXQ_ETH];
 			
 			snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
@@ -3273,8 +3332,10 @@
 			    txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
 			txqpoidlist = SYSCTL_CHILDREN(txqpoid);
 
-			
-			
+			ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 
+			    txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
+			ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
+
 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
 			    CTLFLAG_RD, &qs->rspq.size,
 			    0, "#entries in response queue");
@@ -3297,8 +3358,7 @@
 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
 			    0, t3_dump_rspq, "A", "dump of the response queue");
 
-			
-			
+
 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
 			    0, "#tunneled packets dropped");
@@ -3355,7 +3415,22 @@
 			    0, "txq #entries to dump");			
 			SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
-			    0, t3_dump_txq, "A", "dump of the transmit queue");
+			    0, t3_dump_txq_eth, "A", "dump of the transmit queue");
+
+			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
+			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
+			    0, "ctrlq start idx for dump");
+			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
+			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
+			    0, "ctrl #entries to dump");			
+			SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
+			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
+			    0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
+
+
+			
+
+			
 		}
 	}
 }

==== //depot/projects/iwarp/sys/dev/cxgb/sys/mvec.h#6 (text+ko) ====

@@ -66,7 +66,7 @@
 #define m_seq		m_pkthdr.csum_data	/* stored sequence */
 #define m_ddp_gl	m_ext.ext_buf		/* ddp list	*/
 #define m_ddp_flags	m_pkthdr.csum_flags	/* ddp flags	*/
-#define m_ulp_mode	m_ext.ext_type		/* upper level protocol	*/
+#define m_ulp_mode	m_pkthdr.tso_segsz	/* upper level protocol	*/
 
 extern uma_zone_t zone_miovec;
 

==== //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#8 (text+ko) ====

@@ -579,7 +579,7 @@
  * to the HW for the amount of data processed.
  */
 void
-t3_cleanup_rbuf(struct tcpcb *tp)
+t3_cleanup_rbuf(struct tcpcb *tp, int copied)
 {
 	struct toepcb *toep = tp->t_toe;
 	struct socket *so;
@@ -587,17 +587,28 @@
 	int dack_mode, must_send, read;
 	u32 thres, credits, dack = 0;
 
+	so = tp->t_inpcb->inp_socket;
 	if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) ||
-		(tp->t_state == TCPS_FIN_WAIT_2)))
+		(tp->t_state == TCPS_FIN_WAIT_2))) {
+		if (copied) {
+			SOCKBUF_LOCK(&so->so_rcv);
+			toep->tp_copied_seq += copied;
+			SOCKBUF_UNLOCK(&so->so_rcv);
+		}
+		
 		return;
-	INP_LOCK_ASSERT(tp->t_inpcb);
+	}
 	
-	so = tp->t_inpcb->inp_socket;
+	INP_LOCK_ASSERT(tp->t_inpcb);	
 	SOCKBUF_LOCK(&so->so_rcv);
-	read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc;
-	toep->tp_copied_seq += read;
-	toep->tp_enqueued_bytes -= read;
+	if (copied)
+		toep->tp_copied_seq += copied;
+	else {
+		read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc;
+		toep->tp_copied_seq += read;
+	}
 	credits = toep->tp_copied_seq - toep->tp_rcv_wup;
+	toep->tp_enqueued_bytes = so->so_rcv.sb_cc;
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	if (credits > so->so_rcv.sb_mbmax)
@@ -687,7 +698,7 @@
 cxgb_toe_rcvd(struct tcpcb *tp)
 {
 	INP_LOCK_ASSERT(tp->t_inpcb);
-	t3_cleanup_rbuf(tp);
+	t3_cleanup_rbuf(tp, 0);
 	
 	return (0);
 }
@@ -1054,10 +1065,9 @@
 /*
  * Determine the receive window size for a socket.
  */
-static unsigned int
-select_rcv_wnd(struct socket *so)
+static unsigned long
+select_rcv_wnd(struct toedev *dev, struct socket *so)
 {
-	struct toedev *dev = TOE_DEV(so);
 	struct tom_data *d = TOM_DATA(dev);
 	unsigned int wnd;
 	unsigned int max_rcv_wnd;
@@ -1065,7 +1075,9 @@
 	if (tcp_do_autorcvbuf)
 		wnd = tcp_autorcvbuf_max;
 	else
-		wnd = sbspace(&so->so_rcv);
+		wnd = so->so_rcv.sb_hiwat;
+
+	
 	
 	/* XXX
 	 * For receive coalescing to work effectively we need a receive window
@@ -1079,7 +1091,7 @@
 				    (uint32_t)d->rx_page_size * 23 :
 				    MAX_RCV_WND);
 	
-	return (min(wnd, max_rcv_wnd));
+	return min(wnd, max_rcv_wnd);
 }
 
 /*
@@ -1118,8 +1130,8 @@
 	 * XXX broken
 	 * 
 	 */
-	tp->rcv_wnd = select_rcv_wnd(so);
-	
+	tp->rcv_wnd = select_rcv_wnd(dev, so);
+
         toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so->so_options & SO_NO_DDP) &&
 		       tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
 	toep->tp_qset_idx = 0;
@@ -1608,7 +1620,6 @@
 	uint64_t t;
 	__be64 *tcb;
 
-	TRACE_ENTER;
 	/* Note that we only accout for CPL_GET_TCB issued by the DDP code. We
 	 * really need a cookie in order to dispatch the RPLs.
 	 */
@@ -1625,7 +1636,6 @@
 		m_freem(m);
 		if (__predict_true((so->so_state & SS_NOFDREF) == 0))
 			sorwakeup(so);
-		TRACE_EXIT;	
 		return;
 	}
 
@@ -1684,7 +1694,6 @@
 	
 	if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) {
 		handle_excess_rx(toep, m);
-		TRACE_EXIT;
 		return;
 	}
 
@@ -1704,7 +1713,7 @@
 		}
 #endif
 		m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1;
-		bsp->flags &= ~DDP_BF_NOCOPY;
+		bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA);
 		q->cur_buf ^= 1;
 	} else if (bsp->flags & DDP_BF_NOFLIP) {
 
@@ -1720,11 +1729,11 @@
 		 * and we need to decrement the posted count.
 		 */
 		if (m->m_pkthdr.len == 0) {
-			if (ddp_offset == 0)
+			if (ddp_offset == 0) {
 				q->kbuf_posted--;
-			panic("length not set");
+				bsp->flags |= DDP_BF_NODATA;
+			}
 			m_free(m);
-			TRACE_EXIT;
 			return;
 		}
 	} else {
@@ -1732,12 +1741,12 @@
 		 * but it got here way late and nobody cares anymore.
 		 */
 		m_free(m);
-		TRACE_EXIT;
 		return;
 	}
 
 	tp = toep->tp_tp;
 	m->m_ddp_gl = (unsigned char *)bsp->gl;
+	m->m_flags |= M_DDP;
 	m->m_seq = tp->rcv_nxt;
 	tp->rcv_nxt += m->m_pkthdr.len;
 	tp->t_rcvtime = ticks;
@@ -1750,10 +1759,12 @@
 		  "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u lskb->len %u",
 		  m->m_seq, q->cur_buf, m->m_pkthdr.len);
 #endif
-	sbappend(&so->so_rcv, m);
+	SOCKBUF_LOCK(&so->so_rcv);
+	sbappendstream_locked(&so->so_rcv, m);
 	if (__predict_true((so->so_state & SS_NOFDREF) == 0))
-		sorwakeup(so);
-	TRACE_EXIT;
+		sorwakeup_locked(so);
+	else
+		SOCKBUF_UNLOCK(&so->so_rcv);
 }
 
 /*
@@ -1766,9 +1777,10 @@
 	struct toepcb *toep = (struct toepcb *)ctx;
 
 	/* OK if socket doesn't exist */
-	if (toep == NULL)
+	if (toep == NULL) {
+		printf("null toep in do_get_tcb_rpl\n");
 		return (CPL_RET_BUF_DONE);
-
+	}
 	tcb_rpl_as_ddp_complete(toep, m);
 
 	return (0);
@@ -1778,7 +1790,6 @@
 handle_ddp_data(struct toepcb *toep, struct mbuf *m)
 {
 	struct tcpcb *tp = toep->tp_tp;
-	struct socket *so;
 	struct ddp_state *q;
 	struct ddp_buf_state *bsp;
 	struct cpl_rx_data *hdr = cplhdr(m);
@@ -1790,7 +1801,10 @@
 	TRACE_ENTER;
 	q = &toep->tp_ddp_state;
 	bsp = &q->buf_state[q->cur_buf];
-	m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
+	m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
+
+	printf("rcv_nxt=0x%x tp->rcv_next=0x%x len=%d\n",
+	    rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len);
 
 #ifdef T3_TRACE
 	if ((int)m->m_pkthdr.len < 0) {
@@ -1799,20 +1813,19 @@
 #endif
 
 	m->m_ddp_gl = (unsigned char *)bsp->gl;
+	m->m_flags |= M_DDP;
 	m->m_cur_offset = bsp->cur_offset;
 	m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
 	if (bsp->flags & DDP_BF_NOCOPY)
 		bsp->flags &= ~DDP_BF_NOCOPY;
 
+	printf("ddp flags=0x%x\n", m->m_ddp_flags);
+
 	m->m_seq = tp->rcv_nxt;
 	tp->rcv_nxt = rcv_nxt;
 	bsp->cur_offset += m->m_pkthdr.len;
 	if (!(bsp->flags & DDP_BF_NOFLIP))
 		q->cur_buf ^= 1;
-	tp->t_rcvtime = ticks;
-
-	so = toeptoso(toep);
-	sbappend(&so->so_rcv, m);
 	/*
 	 * For now, don't re-enable DDP after a connection fell out of  DDP
 	 * mode.
@@ -1837,14 +1850,14 @@
 	if (__predict_false(so_no_receive(so))) {
 		handle_excess_rx(toep, m);
 		INP_UNLOCK(tp->t_inpcb);
+		TRACE_EXIT;
 		return;
 	}
 
 	if (toep->tp_ulp_mode == ULP_MODE_TCPDDP)
 		handle_ddp_data(toep, m);
-
+	
 	m->m_seq = ntohl(hdr->seq);
-	m->m_ddp_flags = 0;
 	m->m_ulp_mode = 0;                    /* for iSCSI */
 
 #if VALIDATE_SEQ
@@ -1889,11 +1902,12 @@
 	    "new_rx_data: seq 0x%x len %u",
 	    m->m_seq, m->m_pkthdr.len);
 #endif
+	INP_UNLOCK(tp->t_inpcb);
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sb_notify(&so->so_rcv))
 		DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, so->so_rcv.sb_flags, m->m_pkthdr.len);
 
-	sbappend_locked(&so->so_rcv, m);
+	sbappendstream_locked(&so->so_rcv, m);
 
 #ifdef notyet
 	/*
@@ -1906,7 +1920,7 @@
 		so, so->so_rcv.sb_cc, so->so_rcv.sb_mbmax));
 #endif
 	
-	INP_UNLOCK(tp->t_inpcb);
+
 	DPRINTF("sb_cc=%d sb_mbcnt=%d\n",
 	    so->so_rcv.sb_cc, so->so_rcv.sb_mbcnt);
 	    
@@ -1940,13 +1954,17 @@
 	struct cpl_rx_data_ddp *hdr;
 	unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx;
 	struct socket *so = toeptoso(toep);
+	int nomoredata = 0;
+	
+	if (__predict_false(so_no_receive(so))) {
+		struct inpcb *inp = sotoinpcb(so);
 
-	if (__predict_false(so_no_receive(so))) {
+		INP_LOCK(inp);
 		handle_excess_rx(toep, m);
+		INP_UNLOCK(inp);
 		return;
 	}
 	
-	TRACE_ENTER;
 	tp = sototcpcb(so);
 	q = &toep->tp_ddp_state;
 	hdr = cplhdr(m);
@@ -1971,7 +1989,7 @@
 	/*
 	 * Overload to store old RCV_NXT
 	 */
-	m->m_pkthdr.csum_data = tp->rcv_nxt;
+	m->m_seq = tp->rcv_nxt;
 	tp->rcv_nxt = rcv_nxt;
 
 	/*
@@ -1979,7 +1997,14 @@
 	 * m->m_len here, we need to be very careful that nothing from now on
 	 * interprets ->len of this packet the usual way.
 	 */
-	m->m_len = tp->rcv_nxt - m->m_pkthdr.csum_data;
+	m->m_len = m->m_pkthdr.len = tp->rcv_nxt - m->m_seq;
+	/*
+	 * Length is only meaningful for kbuf
+	 */
+	if (!(bsp->flags & DDP_BF_NOCOPY))
+		KASSERT(m->m_len <= bsp->gl->dgl_length,
+		    ("length received exceeds ddp pages: len=%d dgl_length=%d",
+			m->m_len, bsp->gl->dgl_length));
 
 	/*
 	 * Figure out where the new data was placed in the buffer and store it
@@ -1989,8 +2014,9 @@
 	end_offset = G_DDP_OFFSET(ddp_report) + ddp_len;
 	m->m_cur_offset = end_offset - m->m_pkthdr.len;
 	m->m_ddp_gl = (unsigned char *)bsp->gl;
+	m->m_flags |= M_DDP;
 	bsp->cur_offset = end_offset;
-
+	toep->tp_enqueued_bytes += m->m_pkthdr.len;
 	/*
 	 * Bit 0 of flags stores whether the DDP buffer is completed.
 	 * Note that other parts of the code depend on this being in bit 0.
@@ -1998,26 +2024,30 @@
 	if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) {
 		panic("spurious ddp completion");
 	} else {
-		m->m_pkthdr.csum_flags = !!(ddp_report & F_DDP_BUF_COMPLETE);
-		if (m->m_pkthdr.csum_flags && !(bsp->flags & DDP_BF_NOFLIP))
+		m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE);
+		if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP)) 
 			q->cur_buf ^= 1;                     /* flip buffers */
 	}
 
 	if (bsp->flags & DDP_BF_NOCOPY) {
-		m->m_pkthdr.csum_flags |= (bsp->flags & DDP_BF_NOCOPY);
+		m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY);
 		bsp->flags &= ~DDP_BF_NOCOPY;
 	}
 
 	if (ddp_report & F_DDP_PSH)
-		m->m_pkthdr.csum_flags |= DDP_BF_PSH;
-	
+		m->m_ddp_flags |= DDP_BF_PSH;
+	if (nomoredata)
+		m->m_ddp_flags |= DDP_BF_NODATA;
+
 	tp->t_rcvtime = ticks;
+
+	SOCKBUF_LOCK(&so->so_rcv);
 	sbappendstream_locked(&so->so_rcv, m);
 	
 	if ((so->so_state & SS_NOFDREF) == 0)
 		sorwakeup_locked(so);
-
-	TRACE_EXIT;
+	else
+		SOCKBUF_UNLOCK(&so->so_rcv);
 }
 
 #define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\
@@ -2057,9 +2087,14 @@
 	struct ddp_buf_state *bsp;
 	struct cpl_rx_ddp_complete *hdr;
 	unsigned int ddp_report, buf_idx, when;
+	int nomoredata = 0;
 
 	if (__predict_false(so_no_receive(so))) {
+		struct inpcb *inp = sotoinpcb(so);
+
+		INP_LOCK(inp);
 		handle_excess_rx(toep, m);
+		INP_UNLOCK(inp);
 		return;
 	}
 	TRACE_ENTER;
@@ -2070,7 +2105,7 @@
 	bsp = &q->buf_state[buf_idx];
 
 	when = bsp->cur_offset;
-	m->m_len = G_DDP_OFFSET(ddp_report) - when;
+	m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when;
 
 #ifdef T3_TRACE
 	T3_TRACE5(TIDTB(sk),
@@ -2082,9 +2117,12 @@
 
 	bsp->cur_offset += m->m_len;
 
-	if (!(bsp->flags & DDP_BF_NOFLIP))
+	if (!(bsp->flags & DDP_BF_NOFLIP)) {
 		q->cur_buf ^= 1;                     /* flip buffers */
-
+		if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length)
+			nomoredata=1;
+	}
+		
 #ifdef T3_TRACE
 	T3_TRACE4(TIDTB(sk),
 		  "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
@@ -2093,18 +2131,23 @@
 		   G_DDP_OFFSET(ddp_report));
 #endif
 	m->m_ddp_gl = (unsigned char *)bsp->gl;
-	m->m_pkthdr.csum_flags = (bsp->flags & DDP_BF_NOCOPY) | 1;
+	m->m_flags |= M_DDP;
+	m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1;
 	if (bsp->flags & DDP_BF_NOCOPY)
 		bsp->flags &= ~DDP_BF_NOCOPY;
+	if (nomoredata)
+		m->m_ddp_flags |= DDP_BF_NODATA;
+
 	m->m_pkthdr.csum_data = tp->rcv_nxt;
 	tp->rcv_nxt += m->m_len;
 
 	tp->t_rcvtime = ticks;
+	SOCKBUF_LOCK(&so->so_rcv);
 	sbappendstream_locked(&so->so_rcv, m);
 	
 	if ((so->so_state & SS_NOFDREF) == 0)
 		sorwakeup_locked(so);
-	
+	SOCKBUF_UNLOCK(&so->so_rcv);
 	TRACE_EXIT;
 }
 
@@ -2184,8 +2227,9 @@
 
 	q = &toep->tp_ddp_state;
 	bsp = &q->buf_state[q->cur_buf];
-	m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
+	m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
 	m->m_ddp_gl = (unsigned char *)bsp->gl;
+	m->m_flags |= M_DDP;
 	m->m_cur_offset = bsp->cur_offset;
 	m->m_ddp_flags = 
 	    DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
@@ -2195,7 +2239,7 @@
 	if (!(bsp->flags & DDP_BF_NOFLIP))
 		q->cur_buf ^= 1;
 	tp->t_rcvtime = ticks;
-	sbappend(&so->so_rcv, m);
+	sbappendstream(&so->so_rcv, m);
 	if (__predict_true((so->so_state & SS_NOFDREF) == 0))
 		sorwakeup(so);
 	return (1);
@@ -2918,7 +2962,8 @@
 	th.th_seq = req->rcv_isn;
 	th.th_flags = TH_SYN;
 
-	toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn;
+	toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1;
+
 	
 	inc.inc_isipv6 = 0;
 	inc.inc_len = 0;
@@ -3047,14 +3092,15 @@
 	newtoep->tp_flags = TP_SYN_RCVD;
 	newtoep->tp_tid = tid;
 	newtoep->tp_toedev = tdev;
+	tp->rcv_wnd = select_rcv_wnd(tdev, so);
 	
-	printf("inserting tid=%d\n", tid);
+	printf("inserting tid=%d rcv_wnd=%ld\n", tid, tp->rcv_wnd);
 	cxgb_insert_tid(cdev, d->client, newtoep, tid);
 	SOCK_LOCK(so);
 	LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry);
 	SOCK_UNLOCK(so);
 	
-	
+#ifdef notyet	
 	if (lctx->ulp_mode) {
 		ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
 		
@@ -3063,7 +3109,22 @@
 		else
 			newtoep->tp_ulp_mode = lctx->ulp_mode;
 	}
+#else
+	newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && /* !sock_flag(sk, NO_DDP) && */
+		       tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
+
+	if (newtoep->tp_ulp_mode) {
+		ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
+		
+		if (ddp_mbuf == NULL)
+			newtoep->tp_ulp_mode = 0;
+	}
+	
+	printf("ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d\n",
+	    TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode);
 
+#endif
+
 	set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure);
 
 	DPRINTF("adding request to syn cache\n");
@@ -3346,7 +3407,8 @@
 	toep->tp_flags = 0;
 	tp->t_toe = toep;
 	reset_wr_list(toep);
-	tp->rcv_wnd = select_rcv_wnd(so);
+	tp->rcv_wnd = select_rcv_wnd(tdev, so);
+	tp->rcv_nxt = toep->tp_copied_seq;
 	install_offload_ops(so);
 	
 	toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs);
@@ -3728,7 +3790,6 @@
 	const struct tom_data *td = TOM_DATA(TOE_DEV(so));
 	unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit;
 	
-	TRACE_ENTER;
 	for (i = 0; i < nppods; ++i) {
 		m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE);
 		m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
@@ -3756,8 +3817,6 @@
 		send_or_defer(toep, m, 0);
 		ppod_addr += PPOD_SIZE;
 	}
-
-	TRACE_EXIT;
 	return (0);
 }
 
@@ -3905,7 +3964,6 @@
 	struct cpl_set_tcb_field *req;
 	struct ddp_state *p = &toep->tp_ddp_state;
 
-	TRACE_ENTER;
 	wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq);
 	m = m_gethdr_nofail(wrlen);
 	m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
@@ -3960,7 +4018,6 @@
 		  bufidx, tag0, tag1, len);
 #endif
 	cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
-	TRACE_EXIT;
 }
 
 /*
@@ -3978,7 +4035,6 @@
 	struct work_request_hdr *wr;
 	struct cpl_set_tcb_field *req;
 
-	TRACE_ENTER;
 	wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) +
 		(len1 ? sizeof(*req) : 0) +
 		(modulate ? sizeof(struct cpl_rx_data_ack) : 0);
@@ -4024,7 +4080,6 @@
 #endif
 
 	cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
-	TRACE_EXIT;
 }
 
 void

==== //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#7 (text+ko) ====

@@ -41,6 +41,7 @@
 #include <sys/condvar.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
+#include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
 #include <sys/socketvar.h>
@@ -48,6 +49,7 @@
 #include <sys/file.h>
 
 #include <machine/bus.h>
+#include <machine/cpu.h>
 
 #include <net/if.h>
 #include <net/route.h>
@@ -257,6 +259,11 @@
 static int
 so_should_ddp(const struct toepcb *toep, int last_recv_len)
 {
+
+	DPRINTF("ulp_mode=%d last_recv_len=%d ddp_thresh=%d rcv_wnd=%ld ddp_copy_limit=%d\n",
+	    toep->tp_ulp_mode, last_recv_len,  TOM_TUNABLE(toep->tp_toedev, ddp_thres),
+	    toep->tp_tp->rcv_wnd, (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) + DDP_RSVD_WIN));
+
 	return toep->tp_ulp_mode == ULP_MODE_TCPDDP && (toep->tp_ddp_state.kbuf[0] == NULL) &&
 	       last_recv_len > TOM_TUNABLE(toep->tp_toedev, ddp_thres) &&
 	       toep->tp_tp->rcv_wnd > 
@@ -278,28 +285,37 @@
 static int
 m_uiomove(const struct mbuf *m, int offset, int len, struct uio *uio)
 {
-	int curlen, err = 0;
+	int curlen, startlen, resid_init, err = 0;
 	caddr_t buf;
-	
+
+	DPRINTF("m_uiomove(m=%p, offset=%d, len=%d, ...)\n",
+	    m, offset, len);
+
+	startlen = len;
+	resid_init = uio->uio_resid;
 	while (m && len) {
 		buf = mtod(m, caddr_t);
 		curlen = m->m_len;
-		if (offset < curlen) {
+		if (offset && (offset < curlen)) {
 			curlen -= offset;
 			buf += offset;
 			offset = 0;
-		} else {
+		} else if (offset) {
 			offset -= curlen;
 			m = m->m_next;
 			continue;
 		}
+		err = uiomove(buf, min(len, curlen), uio);
+		if (err) {
+			printf("uiomove returned %d\n", err);
+			return (err);
+		}
 		
-		err = uiomove_frombuf(buf, min(len, curlen), uio);
-		if (err)
-			return (err);
-		len -= min(len, m->m_len);
+		len -= min(len, curlen);
 		m = m->m_next;
 	}
+	DPRINTF("copied %d bytes - resid_init=%d uio_resid=%d\n",
+	    startlen - len, resid_init, uio->uio_resid);
 	return (err);
 }
 
@@ -312,16 +328,20 @@
 copy_data(const struct mbuf *m, int offset, int len, struct uio *uio)
 {
 	struct iovec *to = uio->uio_iov;
+	int err;
+
 	
-	if (__predict_true(!is_ddp(m)))                             /* RX_DATA */
+	if (__predict_true(!is_ddp(m))) {                             /* RX_DATA */
 		return m_uiomove(m, offset, len, uio);
-	if (__predict_true(m->m_ddp_flags & DDP_BF_NOCOPY)) { /* user DDP */
+	} if (__predict_true(m->m_ddp_flags & DDP_BF_NOCOPY)) { /* user DDP */
 		to->iov_len -= len;
 		to->iov_base = ((caddr_t)to->iov_base) + len;
 		uio->uio_iov = to;
+		uio->uio_resid -= len;
 		return (0);
 	}
-	return t3_ddp_copy(m, offset, uio, len);             /* kernel DDP */
+	err = t3_ddp_copy(m, offset, uio, len);             /* kernel DDP */
+	return (err);
 }
 
 static void
@@ -509,7 +529,45 @@
 	return pru_sosend(so, addr, uio, top, control, flags, td);
 }
 
+/*
+ * Following replacement or removal of the first mbuf on the first mbuf chain
+ * of a socket buffer, push necessary state changes back into the socket
+ * buffer so that other consumers see the values consistently.  'nextrecord'
+ * is the callers locally stored value of the original value of
+ * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes.
+ * NOTE: 'nextrecord' may be NULL.
+ */
+#if 1
+static __inline void
+sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
+{
+
+	SOCKBUF_LOCK_ASSERT(sb);
+	/*
+	 * First, update for the new value of nextrecord.  If necessary, make
+	 * it the first record.
+	 */
+	if (sb->sb_mb != NULL)
+		sb->sb_mb->m_nextpkt = nextrecord;
+	else
+		sb->sb_mb = nextrecord;
 
+        /*
+         * Now update any dependent socket buffer fields to reflect the new
+         * state.  This is an expanded inline of SB_EMPTY_FIXUP(), with the
+	 * addition of a second clause that takes care of the case where
+	 * sb_mb has been updated, but remains the last record.
+         */
+        if (sb->sb_mb == NULL) {
+                sb->sb_mbtail = NULL;
+                sb->sb_lastrecord = NULL;
+        } else if (sb->sb_mb->m_nextpkt == NULL)
+                sb->sb_lastrecord = sb->sb_mb;
+}
+#endif
+
+#define IS_NONBLOCKING(so)	((so)->so_state & SS_NBIO)
+
 static int
 t3_soreceive(struct socket *so, int *flagsp, struct uio *uio)
 {
@@ -517,23 +575,25 @@
 	struct toepcb *toep = tp->t_toe;
 	struct mbuf *m;
 	uint32_t offset;
-	int err, flags, avail, len, buffers_freed = 0, copied = 0;
+	int err, flags, avail, len, copied, copied_unacked;
 	int target;		/* Read at least this many bytes */
 	int user_ddp_ok, user_ddp_pending = 0;
 	struct ddp_state *p;
 	struct inpcb *inp = sotoinpcb(so);
-	
+
+	avail = offset = copied = copied_unacked = 0;
 	flags = flagsp ? (*flagsp &~ MSG_EOR) : 0;
 
 	err = sblock(&so->so_rcv, SBLOCKWAIT(flags));
+
 	if (err)
 		return (err);
-	TRACE_ENTER;
+
 	SOCKBUF_LOCK(&so->so_rcv);
 restart:
 	len = uio->uio_resid;
 	m = so->so_rcv.sb_mb;
-	target = (flags & MSG_WAITALL) ? min(len, so->so_rcv.sb_hiwat) : so->so_rcv.sb_lowat;
+	target = (flags & MSG_WAITALL) ? len : so->so_rcv.sb_lowat;
 	p = &toep->tp_ddp_state;
 	user_ddp_ok = p->ubuf_ddp_ready;
 	p->cancel_ubuf = 0;
@@ -561,6 +621,8 @@
 			so->so_error = 0;
 			goto done;
 		}
+		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) 
+			goto done;

>>> TRUNCATED FOR MAIL (1000 lines) <<<



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200802070052.m170qX7B059331>