From owner-p4-projects@FreeBSD.ORG Thu Feb 7 00:52:34 2008 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id C7B3B16A419; Thu, 7 Feb 2008 00:52:33 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 8C45116A417 for ; Thu, 7 Feb 2008 00:52:33 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 74EC413C45E for ; Thu, 7 Feb 2008 00:52:33 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.1/8.14.1) with ESMTP id m170qXXa059334 for ; Thu, 7 Feb 2008 00:52:33 GMT (envelope-from kmacy@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.1/8.14.1/Submit) id m170qX7B059331 for perforce@freebsd.org; Thu, 7 Feb 2008 00:52:33 GMT (envelope-from kmacy@freebsd.org) Date: Thu, 7 Feb 2008 00:52:33 GMT Message-Id: <200802070052.m170qX7B059331@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to kmacy@freebsd.org using -f From: Kip Macy To: Perforce Change Reviews Cc: Subject: PERFORCE change 134944 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 07 Feb 2008 00:52:34 -0000 http://perforce.freebsd.org/chv.cgi?CH=134944 Change 134944 by kmacy@kmacy:entropy:iwarp on 2008/02/07 00:52:21 IFtoehead 134879 fix various issues tied to ddp setup fix dependencies for cxgb module unload Affected files ... .. //depot/projects/iwarp/sys/dev/cxgb/cxgb_offload.c#14 integrate .. //depot/projects/iwarp/sys/dev/cxgb/cxgb_sge.c#11 integrate .. //depot/projects/iwarp/sys/dev/cxgb/sys/mvec.h#6 integrate .. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#8 integrate .. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#7 integrate .. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_ddp.c#2 integrate .. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_defs.h#7 integrate .. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#5 integrate .. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_tom.c#5 integrate .. //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_vm.c#2 integrate .. //depot/projects/iwarp/usr.sbin/cxgbtool/cxgbtool.c#5 integrate Differences ... ==== //depot/projects/iwarp/sys/dev/cxgb/cxgb_offload.c#14 (text+ko) ==== @@ -448,7 +448,7 @@ case GET_RX_PAGE_INFO: rx_page_info = data; rx_page_info->page_size = tp->rx_pg_size; - rx_page_info->num = tp->rx_num_pgs; + rx_page_info->num = tp->rx_num_pgs; break; case ULP_ISCSI_GET_PARAMS: case ULP_ISCSI_SET_PARAMS: ==== //depot/projects/iwarp/sys/dev/cxgb/cxgb_sge.c#11 (text+ko) ==== @@ -73,11 +73,16 @@ #endif int txq_fills = 0; -static int recycle_enable = 1; +/* + * XXX don't re-enable this until TOE stops assuming + * we have an m_ext + */ +static int recycle_enable = 0; extern int cxgb_txq_buf_ring_size; int cxgb_cached_allocations; int cxgb_cached; -int cxgb_ext_freed; +int cxgb_ext_freed = 0; +int cxgb_ext_inited = 0; extern int cxgb_use_16k_clusters; extern int cxgb_pcpu_cache_enable; @@ -775,14 +780,6 @@ void t3_sge_deinit_sw(adapter_t *sc) { - int i; - - callout_drain(&sc->sge_timer_ch); - if (sc->tq) - taskqueue_drain(sc->tq, &sc->slow_intr_task); - for (i = 0; i < sc->params.nports; i++) - if (sc->port[i].tq != NULL) - taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task); mi_deinit(); } @@ -2441,7 +2438,8 @@ bzero(cl, header_size); m = (struct mbuf *)cl; - + + cxgb_ext_inited++; SLIST_INIT(&m->m_pkthdr.tags); m->m_type = MT_DATA; m->m_flags = flags | M_NOFREE | M_EXT; @@ -3003,12 +3001,8 @@ return (err); } - -/* - * broken by recent mbuf changes - */ static int -t3_dump_txq(SYSCTL_HANDLER_ARGS) +t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) { struct sge_txq *txq; struct sge_qset *qs; @@ -3037,7 +3031,7 @@ txq->txq_dump_start = 0; return (EINVAL); } - err = t3_sge_read_ecntxt(qs->port->adapter, txq->cntxt_id, data); + err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); if (err) return (err); @@ -3081,7 +3075,68 @@ return (err); } +static int +t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) +{ + struct sge_txq *txq; + struct sge_qset *qs; + int i, j, err, dump_end; + static int multiplier = 1; + struct sbuf *sb; + struct tx_desc *txd; + uint32_t *WR, wr_hi, wr_lo, gen; + + txq = arg1; + qs = txq_to_qset(txq, TXQ_CTRL); + if (txq->txq_dump_count == 0) { + return (0); + } + if (txq->txq_dump_count > 256) { + log(LOG_WARNING, + "dump count is too large %d\n", txq->txq_dump_count); + txq->txq_dump_count = 1; + return (EINVAL); + } + if (txq->txq_dump_start > 255) { + log(LOG_WARNING, + "dump start of %d is greater than queue size\n", + txq->txq_dump_start); + txq->txq_dump_start = 0; + return (EINVAL); + } +retry_sbufops: + sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); + sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, + txq->txq_dump_start, + (txq->txq_dump_start + txq->txq_dump_count) & 255); + + dump_end = txq->txq_dump_start + txq->txq_dump_count; + for (i = txq->txq_dump_start; i < dump_end; i++) { + txd = &txq->desc[i & (255)]; + WR = (uint32_t *)txd->flit; + wr_hi = ntohl(WR[0]); + wr_lo = ntohl(WR[1]); + gen = G_WR_GEN(wr_lo); + + sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", + wr_hi, wr_lo, gen); + for (j = 2; j < 30; j += 4) + sbuf_printf(sb, "\t%08x %08x %08x %08x \n", + WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); + + } + if (sbuf_overflowed(sb)) { + sbuf_delete(sb); + multiplier++; + goto retry_sbufops; + } + sbuf_finish(sb); + err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); + sbuf_delete(sb); + return (err); +} + static int t3_lro_enable(SYSCTL_HANDLER_ARGS) { @@ -3206,6 +3261,10 @@ CTLFLAG_RD, &cxgb_ext_freed, 0, "#times a cluster was freed through ext_free"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "ext_inited", + CTLFLAG_RD, &cxgb_ext_inited, + 0, "#times a cluster was initialized for ext_free"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "mbufs_outstanding", CTLFLAG_RD, &cxgb_mbufs_outstanding, 0, "#mbufs in flight in the driver"); @@ -3255,8 +3314,8 @@ for (j = 0; j < pi->nqsets; j++) { struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; - struct sysctl_oid *qspoid, *rspqpoid, *txqpoid; - struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist; + struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, *ctrlqpoid; + struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist, *ctrlqpoidlist; struct sge_txq *txq = &qs->txq[TXQ_ETH]; snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); @@ -3273,8 +3332,10 @@ txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); txqpoidlist = SYSCTL_CHILDREN(txqpoid); - - + ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, + txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); + ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); + SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", CTLFLAG_RD, &qs->rspq.size, 0, "#entries in response queue"); @@ -3297,8 +3358,7 @@ CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 0, t3_dump_rspq, "A", "dump of the response queue"); - - + SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped", CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops, 0, "#tunneled packets dropped"); @@ -3355,7 +3415,22 @@ 0, "txq #entries to dump"); SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], - 0, t3_dump_txq, "A", "dump of the transmit queue"); + 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); + + SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", + CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, + 0, "ctrlq start idx for dump"); + SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", + CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, + 0, "ctrl #entries to dump"); + SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", + CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], + 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); + + + + + } } } ==== //depot/projects/iwarp/sys/dev/cxgb/sys/mvec.h#6 (text+ko) ==== @@ -66,7 +66,7 @@ #define m_seq m_pkthdr.csum_data /* stored sequence */ #define m_ddp_gl m_ext.ext_buf /* ddp list */ #define m_ddp_flags m_pkthdr.csum_flags /* ddp flags */ -#define m_ulp_mode m_ext.ext_type /* upper level protocol */ +#define m_ulp_mode m_pkthdr.tso_segsz /* upper level protocol */ extern uma_zone_t zone_miovec; ==== //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#8 (text+ko) ==== @@ -579,7 +579,7 @@ * to the HW for the amount of data processed. */ void -t3_cleanup_rbuf(struct tcpcb *tp) +t3_cleanup_rbuf(struct tcpcb *tp, int copied) { struct toepcb *toep = tp->t_toe; struct socket *so; @@ -587,17 +587,28 @@ int dack_mode, must_send, read; u32 thres, credits, dack = 0; + so = tp->t_inpcb->inp_socket; if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) || - (tp->t_state == TCPS_FIN_WAIT_2))) + (tp->t_state == TCPS_FIN_WAIT_2))) { + if (copied) { + SOCKBUF_LOCK(&so->so_rcv); + toep->tp_copied_seq += copied; + SOCKBUF_UNLOCK(&so->so_rcv); + } + return; - INP_LOCK_ASSERT(tp->t_inpcb); + } - so = tp->t_inpcb->inp_socket; + INP_LOCK_ASSERT(tp->t_inpcb); SOCKBUF_LOCK(&so->so_rcv); - read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc; - toep->tp_copied_seq += read; - toep->tp_enqueued_bytes -= read; + if (copied) + toep->tp_copied_seq += copied; + else { + read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc; + toep->tp_copied_seq += read; + } credits = toep->tp_copied_seq - toep->tp_rcv_wup; + toep->tp_enqueued_bytes = so->so_rcv.sb_cc; SOCKBUF_UNLOCK(&so->so_rcv); if (credits > so->so_rcv.sb_mbmax) @@ -687,7 +698,7 @@ cxgb_toe_rcvd(struct tcpcb *tp) { INP_LOCK_ASSERT(tp->t_inpcb); - t3_cleanup_rbuf(tp); + t3_cleanup_rbuf(tp, 0); return (0); } @@ -1054,10 +1065,9 @@ /* * Determine the receive window size for a socket. */ -static unsigned int -select_rcv_wnd(struct socket *so) +static unsigned long +select_rcv_wnd(struct toedev *dev, struct socket *so) { - struct toedev *dev = TOE_DEV(so); struct tom_data *d = TOM_DATA(dev); unsigned int wnd; unsigned int max_rcv_wnd; @@ -1065,7 +1075,9 @@ if (tcp_do_autorcvbuf) wnd = tcp_autorcvbuf_max; else - wnd = sbspace(&so->so_rcv); + wnd = so->so_rcv.sb_hiwat; + + /* XXX * For receive coalescing to work effectively we need a receive window @@ -1079,7 +1091,7 @@ (uint32_t)d->rx_page_size * 23 : MAX_RCV_WND); - return (min(wnd, max_rcv_wnd)); + return min(wnd, max_rcv_wnd); } /* @@ -1118,8 +1130,8 @@ * XXX broken * */ - tp->rcv_wnd = select_rcv_wnd(so); - + tp->rcv_wnd = select_rcv_wnd(dev, so); + toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so->so_options & SO_NO_DDP) && tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; toep->tp_qset_idx = 0; @@ -1608,7 +1620,6 @@ uint64_t t; __be64 *tcb; - TRACE_ENTER; /* Note that we only accout for CPL_GET_TCB issued by the DDP code. We * really need a cookie in order to dispatch the RPLs. */ @@ -1625,7 +1636,6 @@ m_freem(m); if (__predict_true((so->so_state & SS_NOFDREF) == 0)) sorwakeup(so); - TRACE_EXIT; return; } @@ -1684,7 +1694,6 @@ if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) { handle_excess_rx(toep, m); - TRACE_EXIT; return; } @@ -1704,7 +1713,7 @@ } #endif m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1; - bsp->flags &= ~DDP_BF_NOCOPY; + bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA); q->cur_buf ^= 1; } else if (bsp->flags & DDP_BF_NOFLIP) { @@ -1720,11 +1729,11 @@ * and we need to decrement the posted count. */ if (m->m_pkthdr.len == 0) { - if (ddp_offset == 0) + if (ddp_offset == 0) { q->kbuf_posted--; - panic("length not set"); + bsp->flags |= DDP_BF_NODATA; + } m_free(m); - TRACE_EXIT; return; } } else { @@ -1732,12 +1741,12 @@ * but it got here way late and nobody cares anymore. */ m_free(m); - TRACE_EXIT; return; } tp = toep->tp_tp; m->m_ddp_gl = (unsigned char *)bsp->gl; + m->m_flags |= M_DDP; m->m_seq = tp->rcv_nxt; tp->rcv_nxt += m->m_pkthdr.len; tp->t_rcvtime = ticks; @@ -1750,10 +1759,12 @@ "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u lskb->len %u", m->m_seq, q->cur_buf, m->m_pkthdr.len); #endif - sbappend(&so->so_rcv, m); + SOCKBUF_LOCK(&so->so_rcv); + sbappendstream_locked(&so->so_rcv, m); if (__predict_true((so->so_state & SS_NOFDREF) == 0)) - sorwakeup(so); - TRACE_EXIT; + sorwakeup_locked(so); + else + SOCKBUF_UNLOCK(&so->so_rcv); } /* @@ -1766,9 +1777,10 @@ struct toepcb *toep = (struct toepcb *)ctx; /* OK if socket doesn't exist */ - if (toep == NULL) + if (toep == NULL) { + printf("null toep in do_get_tcb_rpl\n"); return (CPL_RET_BUF_DONE); - + } tcb_rpl_as_ddp_complete(toep, m); return (0); @@ -1778,7 +1790,6 @@ handle_ddp_data(struct toepcb *toep, struct mbuf *m) { struct tcpcb *tp = toep->tp_tp; - struct socket *so; struct ddp_state *q; struct ddp_buf_state *bsp; struct cpl_rx_data *hdr = cplhdr(m); @@ -1790,7 +1801,10 @@ TRACE_ENTER; q = &toep->tp_ddp_state; bsp = &q->buf_state[q->cur_buf]; - m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; + m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; + + printf("rcv_nxt=0x%x tp->rcv_next=0x%x len=%d\n", + rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len); #ifdef T3_TRACE if ((int)m->m_pkthdr.len < 0) { @@ -1799,20 +1813,19 @@ #endif m->m_ddp_gl = (unsigned char *)bsp->gl; + m->m_flags |= M_DDP; m->m_cur_offset = bsp->cur_offset; m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; if (bsp->flags & DDP_BF_NOCOPY) bsp->flags &= ~DDP_BF_NOCOPY; + printf("ddp flags=0x%x\n", m->m_ddp_flags); + m->m_seq = tp->rcv_nxt; tp->rcv_nxt = rcv_nxt; bsp->cur_offset += m->m_pkthdr.len; if (!(bsp->flags & DDP_BF_NOFLIP)) q->cur_buf ^= 1; - tp->t_rcvtime = ticks; - - so = toeptoso(toep); - sbappend(&so->so_rcv, m); /* * For now, don't re-enable DDP after a connection fell out of DDP * mode. @@ -1837,14 +1850,14 @@ if (__predict_false(so_no_receive(so))) { handle_excess_rx(toep, m); INP_UNLOCK(tp->t_inpcb); + TRACE_EXIT; return; } if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) handle_ddp_data(toep, m); - + m->m_seq = ntohl(hdr->seq); - m->m_ddp_flags = 0; m->m_ulp_mode = 0; /* for iSCSI */ #if VALIDATE_SEQ @@ -1889,11 +1902,12 @@ "new_rx_data: seq 0x%x len %u", m->m_seq, m->m_pkthdr.len); #endif + INP_UNLOCK(tp->t_inpcb); SOCKBUF_LOCK(&so->so_rcv); if (sb_notify(&so->so_rcv)) DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, so->so_rcv.sb_flags, m->m_pkthdr.len); - sbappend_locked(&so->so_rcv, m); + sbappendstream_locked(&so->so_rcv, m); #ifdef notyet /* @@ -1906,7 +1920,7 @@ so, so->so_rcv.sb_cc, so->so_rcv.sb_mbmax)); #endif - INP_UNLOCK(tp->t_inpcb); + DPRINTF("sb_cc=%d sb_mbcnt=%d\n", so->so_rcv.sb_cc, so->so_rcv.sb_mbcnt); @@ -1940,13 +1954,17 @@ struct cpl_rx_data_ddp *hdr; unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx; struct socket *so = toeptoso(toep); + int nomoredata = 0; + + if (__predict_false(so_no_receive(so))) { + struct inpcb *inp = sotoinpcb(so); - if (__predict_false(so_no_receive(so))) { + INP_LOCK(inp); handle_excess_rx(toep, m); + INP_UNLOCK(inp); return; } - TRACE_ENTER; tp = sototcpcb(so); q = &toep->tp_ddp_state; hdr = cplhdr(m); @@ -1971,7 +1989,7 @@ /* * Overload to store old RCV_NXT */ - m->m_pkthdr.csum_data = tp->rcv_nxt; + m->m_seq = tp->rcv_nxt; tp->rcv_nxt = rcv_nxt; /* @@ -1979,7 +1997,14 @@ * m->m_len here, we need to be very careful that nothing from now on * interprets ->len of this packet the usual way. */ - m->m_len = tp->rcv_nxt - m->m_pkthdr.csum_data; + m->m_len = m->m_pkthdr.len = tp->rcv_nxt - m->m_seq; + /* + * Length is only meaningful for kbuf + */ + if (!(bsp->flags & DDP_BF_NOCOPY)) + KASSERT(m->m_len <= bsp->gl->dgl_length, + ("length received exceeds ddp pages: len=%d dgl_length=%d", + m->m_len, bsp->gl->dgl_length)); /* * Figure out where the new data was placed in the buffer and store it @@ -1989,8 +2014,9 @@ end_offset = G_DDP_OFFSET(ddp_report) + ddp_len; m->m_cur_offset = end_offset - m->m_pkthdr.len; m->m_ddp_gl = (unsigned char *)bsp->gl; + m->m_flags |= M_DDP; bsp->cur_offset = end_offset; - + toep->tp_enqueued_bytes += m->m_pkthdr.len; /* * Bit 0 of flags stores whether the DDP buffer is completed. * Note that other parts of the code depend on this being in bit 0. @@ -1998,26 +2024,30 @@ if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) { panic("spurious ddp completion"); } else { - m->m_pkthdr.csum_flags = !!(ddp_report & F_DDP_BUF_COMPLETE); - if (m->m_pkthdr.csum_flags && !(bsp->flags & DDP_BF_NOFLIP)) + m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE); + if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP)) q->cur_buf ^= 1; /* flip buffers */ } if (bsp->flags & DDP_BF_NOCOPY) { - m->m_pkthdr.csum_flags |= (bsp->flags & DDP_BF_NOCOPY); + m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY); bsp->flags &= ~DDP_BF_NOCOPY; } if (ddp_report & F_DDP_PSH) - m->m_pkthdr.csum_flags |= DDP_BF_PSH; - + m->m_ddp_flags |= DDP_BF_PSH; + if (nomoredata) + m->m_ddp_flags |= DDP_BF_NODATA; + tp->t_rcvtime = ticks; + + SOCKBUF_LOCK(&so->so_rcv); sbappendstream_locked(&so->so_rcv, m); if ((so->so_state & SS_NOFDREF) == 0) sorwakeup_locked(so); - - TRACE_EXIT; + else + SOCKBUF_UNLOCK(&so->so_rcv); } #define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\ @@ -2057,9 +2087,14 @@ struct ddp_buf_state *bsp; struct cpl_rx_ddp_complete *hdr; unsigned int ddp_report, buf_idx, when; + int nomoredata = 0; if (__predict_false(so_no_receive(so))) { + struct inpcb *inp = sotoinpcb(so); + + INP_LOCK(inp); handle_excess_rx(toep, m); + INP_UNLOCK(inp); return; } TRACE_ENTER; @@ -2070,7 +2105,7 @@ bsp = &q->buf_state[buf_idx]; when = bsp->cur_offset; - m->m_len = G_DDP_OFFSET(ddp_report) - when; + m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when; #ifdef T3_TRACE T3_TRACE5(TIDTB(sk), @@ -2082,9 +2117,12 @@ bsp->cur_offset += m->m_len; - if (!(bsp->flags & DDP_BF_NOFLIP)) + if (!(bsp->flags & DDP_BF_NOFLIP)) { q->cur_buf ^= 1; /* flip buffers */ - + if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length) + nomoredata=1; + } + #ifdef T3_TRACE T3_TRACE4(TIDTB(sk), "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " @@ -2093,18 +2131,23 @@ G_DDP_OFFSET(ddp_report)); #endif m->m_ddp_gl = (unsigned char *)bsp->gl; - m->m_pkthdr.csum_flags = (bsp->flags & DDP_BF_NOCOPY) | 1; + m->m_flags |= M_DDP; + m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1; if (bsp->flags & DDP_BF_NOCOPY) bsp->flags &= ~DDP_BF_NOCOPY; + if (nomoredata) + m->m_ddp_flags |= DDP_BF_NODATA; + m->m_pkthdr.csum_data = tp->rcv_nxt; tp->rcv_nxt += m->m_len; tp->t_rcvtime = ticks; + SOCKBUF_LOCK(&so->so_rcv); sbappendstream_locked(&so->so_rcv, m); if ((so->so_state & SS_NOFDREF) == 0) sorwakeup_locked(so); - + SOCKBUF_UNLOCK(&so->so_rcv); TRACE_EXIT; } @@ -2184,8 +2227,9 @@ q = &toep->tp_ddp_state; bsp = &q->buf_state[q->cur_buf]; - m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; + m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; m->m_ddp_gl = (unsigned char *)bsp->gl; + m->m_flags |= M_DDP; m->m_cur_offset = bsp->cur_offset; m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; @@ -2195,7 +2239,7 @@ if (!(bsp->flags & DDP_BF_NOFLIP)) q->cur_buf ^= 1; tp->t_rcvtime = ticks; - sbappend(&so->so_rcv, m); + sbappendstream(&so->so_rcv, m); if (__predict_true((so->so_state & SS_NOFDREF) == 0)) sorwakeup(so); return (1); @@ -2918,7 +2962,8 @@ th.th_seq = req->rcv_isn; th.th_flags = TH_SYN; - toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn; + toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1; + inc.inc_isipv6 = 0; inc.inc_len = 0; @@ -3047,14 +3092,15 @@ newtoep->tp_flags = TP_SYN_RCVD; newtoep->tp_tid = tid; newtoep->tp_toedev = tdev; + tp->rcv_wnd = select_rcv_wnd(tdev, so); - printf("inserting tid=%d\n", tid); + printf("inserting tid=%d rcv_wnd=%ld\n", tid, tp->rcv_wnd); cxgb_insert_tid(cdev, d->client, newtoep, tid); SOCK_LOCK(so); LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry); SOCK_UNLOCK(so); - +#ifdef notyet if (lctx->ulp_mode) { ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA); @@ -3063,7 +3109,22 @@ else newtoep->tp_ulp_mode = lctx->ulp_mode; } +#else + newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && /* !sock_flag(sk, NO_DDP) && */ + tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; + + if (newtoep->tp_ulp_mode) { + ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA); + + if (ddp_mbuf == NULL) + newtoep->tp_ulp_mode = 0; + } + + printf("ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d\n", + TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode); +#endif + set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure); DPRINTF("adding request to syn cache\n"); @@ -3346,7 +3407,8 @@ toep->tp_flags = 0; tp->t_toe = toep; reset_wr_list(toep); - tp->rcv_wnd = select_rcv_wnd(so); + tp->rcv_wnd = select_rcv_wnd(tdev, so); + tp->rcv_nxt = toep->tp_copied_seq; install_offload_ops(so); toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs); @@ -3728,7 +3790,6 @@ const struct tom_data *td = TOM_DATA(TOE_DEV(so)); unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit; - TRACE_ENTER; for (i = 0; i < nppods; ++i) { m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE); m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); @@ -3756,8 +3817,6 @@ send_or_defer(toep, m, 0); ppod_addr += PPOD_SIZE; } - - TRACE_EXIT; return (0); } @@ -3905,7 +3964,6 @@ struct cpl_set_tcb_field *req; struct ddp_state *p = &toep->tp_ddp_state; - TRACE_ENTER; wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq); m = m_gethdr_nofail(wrlen); m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); @@ -3960,7 +4018,6 @@ bufidx, tag0, tag1, len); #endif cxgb_ofld_send(TOEP_T3C_DEV(toep), m); - TRACE_EXIT; } /* @@ -3978,7 +4035,6 @@ struct work_request_hdr *wr; struct cpl_set_tcb_field *req; - TRACE_ENTER; wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) + (len1 ? sizeof(*req) : 0) + (modulate ? sizeof(struct cpl_rx_data_ack) : 0); @@ -4024,7 +4080,6 @@ #endif cxgb_ofld_send(TOEP_T3C_DEV(toep), m); - TRACE_EXIT; } void ==== //depot/projects/iwarp/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#7 (text+ko) ==== @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,7 @@ #include #include +#include #include #include @@ -257,6 +259,11 @@ static int so_should_ddp(const struct toepcb *toep, int last_recv_len) { + + DPRINTF("ulp_mode=%d last_recv_len=%d ddp_thresh=%d rcv_wnd=%ld ddp_copy_limit=%d\n", + toep->tp_ulp_mode, last_recv_len, TOM_TUNABLE(toep->tp_toedev, ddp_thres), + toep->tp_tp->rcv_wnd, (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) + DDP_RSVD_WIN)); + return toep->tp_ulp_mode == ULP_MODE_TCPDDP && (toep->tp_ddp_state.kbuf[0] == NULL) && last_recv_len > TOM_TUNABLE(toep->tp_toedev, ddp_thres) && toep->tp_tp->rcv_wnd > @@ -278,28 +285,37 @@ static int m_uiomove(const struct mbuf *m, int offset, int len, struct uio *uio) { - int curlen, err = 0; + int curlen, startlen, resid_init, err = 0; caddr_t buf; - + + DPRINTF("m_uiomove(m=%p, offset=%d, len=%d, ...)\n", + m, offset, len); + + startlen = len; + resid_init = uio->uio_resid; while (m && len) { buf = mtod(m, caddr_t); curlen = m->m_len; - if (offset < curlen) { + if (offset && (offset < curlen)) { curlen -= offset; buf += offset; offset = 0; - } else { + } else if (offset) { offset -= curlen; m = m->m_next; continue; } + err = uiomove(buf, min(len, curlen), uio); + if (err) { + printf("uiomove returned %d\n", err); + return (err); + } - err = uiomove_frombuf(buf, min(len, curlen), uio); - if (err) - return (err); - len -= min(len, m->m_len); + len -= min(len, curlen); m = m->m_next; } + DPRINTF("copied %d bytes - resid_init=%d uio_resid=%d\n", + startlen - len, resid_init, uio->uio_resid); return (err); } @@ -312,16 +328,20 @@ copy_data(const struct mbuf *m, int offset, int len, struct uio *uio) { struct iovec *to = uio->uio_iov; + int err; + - if (__predict_true(!is_ddp(m))) /* RX_DATA */ + if (__predict_true(!is_ddp(m))) { /* RX_DATA */ return m_uiomove(m, offset, len, uio); - if (__predict_true(m->m_ddp_flags & DDP_BF_NOCOPY)) { /* user DDP */ + } if (__predict_true(m->m_ddp_flags & DDP_BF_NOCOPY)) { /* user DDP */ to->iov_len -= len; to->iov_base = ((caddr_t)to->iov_base) + len; uio->uio_iov = to; + uio->uio_resid -= len; return (0); } - return t3_ddp_copy(m, offset, uio, len); /* kernel DDP */ + err = t3_ddp_copy(m, offset, uio, len); /* kernel DDP */ + return (err); } static void @@ -509,7 +529,45 @@ return pru_sosend(so, addr, uio, top, control, flags, td); } +/* + * Following replacement or removal of the first mbuf on the first mbuf chain + * of a socket buffer, push necessary state changes back into the socket + * buffer so that other consumers see the values consistently. 'nextrecord' + * is the callers locally stored value of the original value of + * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes. + * NOTE: 'nextrecord' may be NULL. + */ +#if 1 +static __inline void +sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord) +{ + + SOCKBUF_LOCK_ASSERT(sb); + /* + * First, update for the new value of nextrecord. If necessary, make + * it the first record. + */ + if (sb->sb_mb != NULL) + sb->sb_mb->m_nextpkt = nextrecord; + else + sb->sb_mb = nextrecord; + /* + * Now update any dependent socket buffer fields to reflect the new + * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the + * addition of a second clause that takes care of the case where + * sb_mb has been updated, but remains the last record. + */ + if (sb->sb_mb == NULL) { + sb->sb_mbtail = NULL; + sb->sb_lastrecord = NULL; + } else if (sb->sb_mb->m_nextpkt == NULL) + sb->sb_lastrecord = sb->sb_mb; +} +#endif + +#define IS_NONBLOCKING(so) ((so)->so_state & SS_NBIO) + static int t3_soreceive(struct socket *so, int *flagsp, struct uio *uio) { @@ -517,23 +575,25 @@ struct toepcb *toep = tp->t_toe; struct mbuf *m; uint32_t offset; - int err, flags, avail, len, buffers_freed = 0, copied = 0; + int err, flags, avail, len, copied, copied_unacked; int target; /* Read at least this many bytes */ int user_ddp_ok, user_ddp_pending = 0; struct ddp_state *p; struct inpcb *inp = sotoinpcb(so); - + + avail = offset = copied = copied_unacked = 0; flags = flagsp ? (*flagsp &~ MSG_EOR) : 0; err = sblock(&so->so_rcv, SBLOCKWAIT(flags)); + if (err) return (err); - TRACE_ENTER; + SOCKBUF_LOCK(&so->so_rcv); restart: len = uio->uio_resid; m = so->so_rcv.sb_mb; - target = (flags & MSG_WAITALL) ? min(len, so->so_rcv.sb_hiwat) : so->so_rcv.sb_lowat; + target = (flags & MSG_WAITALL) ? len : so->so_rcv.sb_lowat; p = &toep->tp_ddp_state; user_ddp_ok = p->ubuf_ddp_ready; p->cancel_ubuf = 0; @@ -561,6 +621,8 @@ so->so_error = 0; goto done; } + if (so->so_rcv.sb_state & SBS_CANTRCVMORE) + goto done; >>> TRUNCATED FOR MAIL (1000 lines) <<<