Date: Mon, 12 Nov 2007 06:29:33 GMT From: Kip Macy <kmacy@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 128974 for review Message-ID: <200711120629.lAC6TXxi035536@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=128974 Change 128974 by kmacy@kmacy:storage:toestack on 2007/11/12 06:28:34 - fix passive establish handling by initializing tp_ulp - fix race condition in t3_push_frames by adding locking - lock calls to tcp_close - change remaining cases where ctx was still being treated as a socket - update notes Affected files ... .. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#21 edit .. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_listen.c#5 edit .. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/notes#4 edit Differences ... ==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#21 (text+ko) ==== @@ -139,12 +139,25 @@ static void t3_send_reset(struct socket *so); -static inline -int is_t3a(const struct toedev *dev) +static inline int +is_t3a(const struct toedev *dev) { return (dev->ttid == TOE_ID_CHELSIO_T3); } +static void +dump_toepcb(struct toepcb *toep) +{ + printf("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n", + toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode, + toep->tp_mtu_idx, toep->tp_tid); + + printf("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n", + toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked, + toep->tp_mss_clamp, toep->tp_flags); +} + + /* * Determine whether to send a CPL message now or defer it. A message is * deferred if the connection is in SYN_SENT since we don't know the TID yet. @@ -234,27 +247,45 @@ bus_dma_segment_t segs[TX_MAX_SEGS], *segp; segp = segs; - if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) + if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) { + printf("tcp state=%d\n", tp->t_state); return (0); + } - if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) + if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) { + printf("disconnecting\n"); + return (0); + } + SOCKBUF_LOCK(&so->so_snd); + d = TOM_DATA(TOE_DEV(so)); cdev = d->cdev; last = tail = so->so_snd.sb_sndptr ? so->so_snd.sb_sndptr : so->so_snd.sb_mb; total_bytes = 0; + printf("tail=%p snd.cc=%d tp_last=%4\n", tail, so->so_snd.sb_cc, + toep->tp_m_last); + if (last && toep->tp_m_last == last) { KASSERT(tail, ("sbdrop error")); last = tail = tail->m_next; } + if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) { + SOCKBUF_UNLOCK(&so->so_snd); + return (0); + } + while (toep->tp_wr_avail && (tail != NULL)) { + count = bytes = 0; - if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) + if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) { + SOCKBUF_UNLOCK(&so->so_snd); return (0); - + } + while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail) && (tail != NULL) && (count < TX_MAX_SEGS)) { bytes += tail->m_len; count++; @@ -269,6 +300,8 @@ segp++; tail = tail->m_next; } + printf("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n", + toep->tp_wr_avail, count, mbuf_wrs[count], tail); if (tail) { so->so_snd.sb_sndptr = tail; toep->tp_m_last = NULL; @@ -279,6 +312,8 @@ total_bytes += bytes; toep->tp_write_seq += bytes; + + SOCKBUF_UNLOCK(&so->so_snd); /* * XXX can drop socket buffer lock here @@ -310,9 +345,11 @@ bytes, count); l2t_send(cdev, m0, toep->tp_l2t); + if (toep->tp_wr_avail && (tail != NULL)) + SOCKBUF_LOCK(&so->so_snd); } - - + + SOCKBUF_UNLOCK_ASSERT(&so->so_snd); return (total_bytes); } @@ -334,7 +371,7 @@ struct toepcb *toep = tp->t_toe; unsigned int tid = toep->tp_tid; - d = TOM_DATA(TOE_DEV(so)); + d = TOM_DATA(toep->tp_toedev); if (tp->t_state != TCPS_SYN_SENT) t3_push_frames(so, 1); @@ -494,9 +531,10 @@ cxgb_toe_send(struct tcpcb *tp) { struct socket *so; + + printf("cxgb_toe_send\n"); + dump_toepcb(tp->t_toe); - printf("cxgb_toe_send\n"); - so = tp->t_inpcb->inp_socket; t3_push_frames(so, 1); return (0); @@ -944,9 +982,13 @@ calc_opt0l(struct socket *so, int ulp_mode) { struct tcpcb *tp = sototcpcb(so); + unsigned int val; - return V_TOS(SO_TOS(so)) | V_ULP_MODE(ulp_mode) | + val = V_TOS(SO_TOS(so)) | V_ULP_MODE(ulp_mode) | V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ)); + + printf("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", SO_TOS(so), tp->rcv_wnd, val); + return (val); } static inline unsigned int @@ -984,7 +1026,7 @@ req->peer_port = inp->inp_fport; memcpy(&req->local_ip, &inp->inp_laddr, 4); memcpy(&req->peer_ip, &inp->inp_faddr, 4); - + printf("connect smt_idx=%d\n", e->smt_idx); req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx)); req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode)); @@ -1070,7 +1112,9 @@ static int do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) { - struct socket *so = (struct socket *)ctx; + struct toepcb *toep = (struct toepcb *)ctx; + struct socket *so = toeptoso(toep); + #ifdef notyet struct cpl_act_open_rpl *rpl = cplhdr(m); @@ -1468,9 +1512,13 @@ * Otherwise we enter TIME_WAIT. */ t3_release_offload_resources(so); - if (toep->tp_flags & TP_ABORT_RPL_PENDING) + if (toep->tp_flags & TP_ABORT_RPL_PENDING) { + INP_INFO_WLOCK(&tcbinfo); + INP_LOCK(tp->t_inpcb); tcp_close(tp); - else + INP_INFO_WUNLOCK(&tcbinfo); + INP_UNLOCK(tp->t_inpcb); + } else enter_timewait(so); break; default: @@ -1502,7 +1550,8 @@ static int do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx) { - struct socket *so = (struct socket *)ctx; + struct toepcb *toep = (struct toepcb *)ctx; + struct socket *so = toeptoso(toep); VALIDATE_SOCK(so); @@ -1525,9 +1574,13 @@ switch (tp->t_state) { case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */ t3_release_offload_resources(so); - if (toep->tp_flags & TP_ABORT_RPL_PENDING) + if (toep->tp_flags & TP_ABORT_RPL_PENDING) { + INP_INFO_WLOCK(&tcbinfo); + INP_LOCK(tp->t_inpcb); tcp_close(tp); - else + INP_INFO_WUNLOCK(&tcbinfo); + INP_UNLOCK(tp->t_inpcb); + } else enter_timewait(so); break; case TCPS_LAST_ACK: @@ -1537,7 +1590,12 @@ * late, this close_con_rpl is the actual last message. */ t3_release_offload_resources(so); + INP_INFO_WLOCK(&tcbinfo); + INP_LOCK(tp->t_inpcb); tcp_close(tp); + INP_INFO_WUNLOCK(&tcbinfo); + INP_UNLOCK(tp->t_inpcb); + break; case TCPS_FIN_WAIT_1: #ifdef notyet @@ -1576,7 +1634,8 @@ do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) { - struct socket *so = (struct socket *)ctx; + struct toepcb *toep = (struct toepcb *)ctx; + struct socket *so = toeptoso(toep); VALIDATE_SOCK(so); @@ -1613,7 +1672,11 @@ if (toep->tp_flags & TP_ABORT_REQ_RCVD) panic("TP_ABORT_REQ_RCVD set"); t3_release_offload_resources(so); + INP_INFO_WLOCK(&tcbinfo); + INP_LOCK(tp->t_inpcb); tcp_close(tp); + INP_INFO_WUNLOCK(&tcbinfo); + INP_UNLOCK(tp->t_inpcb); } } } @@ -1642,9 +1705,10 @@ return (0); } - so = (struct socket *)ctx; - - /* + toep = (struct toepcb *)ctx; + so = toeptoso(toep); + + /* * Sometimes we've already closed the socket, e.g., a post-close * abort races with ABORT_REQ_RSS, the latter frees the socket * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED, @@ -1654,7 +1718,6 @@ if (!so) goto discard; - toep = sototcpcb(so)->t_toe; toepcb_hold(toep); process_abort_rpl(so, m); toepcb_release(toep); @@ -1775,6 +1838,7 @@ { struct tcpcb *parenttp = sototcpcb(parent); struct tcpcb *childtp = sototcpcb(child); + struct inpcb *inp = sotoinpcb(child); /* * If the server is still open we clean up the child connection, @@ -1784,7 +1848,11 @@ if (__predict_false(parenttp->t_state == TCPS_LISTEN)) { cleanup_syn_rcv_conn(child, parent); t3_release_offload_resources(child); + INP_INFO_WLOCK(&tcbinfo); + INP_LOCK(inp); tcp_close(childtp); + INP_INFO_WUNLOCK(&tcbinfo); + INP_UNLOCK(inp); } } @@ -1865,7 +1933,11 @@ return; t3_release_offload_resources(so); + INP_INFO_WLOCK(&tcbinfo); + INP_LOCK(tp->t_inpcb); tcp_close(tp); + INP_INFO_WUNLOCK(&tcbinfo); + INP_UNLOCK(tp->t_inpcb); } send_abort_rpl(m, tdev, rst_status); @@ -2140,6 +2212,8 @@ toep->tp_iss = th.th_seq = req->rcv_isn; th.th_flags = TH_SYN; + toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = ntohl(req->rcv_isn); + inc.inc_isipv6 = 0; inc.inc_len = 0; inc.inc_faddr.s_addr = req->peer_ip; @@ -2293,17 +2367,35 @@ rpl = cplhdr(reply_mbuf); reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl); rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + rpl->wr.wr_lo = 0; OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); + rpl->opt2 = htonl(calc_opt2(so, tdev)); + rpl->rsvd = rpl->opt2; /* workaround for HW bug */ rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten + + printf("accept smt_idx=%d\n", e->smt_idx); - rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) | V_L2T_IDX(e->idx) | - V_TX_CHANNEL(e->smt_idx)); + rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) | + V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx)); rpl->opt0l_status = htonl(calc_opt0l(so, lctx->ulp_mode) | CPL_PASS_OPEN_ACCEPT); - rpl->opt2 = htonl(calc_opt2(so, tdev)); - rpl->rsvd = rpl->opt2; /* workaround for HW bug */ + + printf("opt0l_status=%08x\n", rpl->opt0l_status); + m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, so)); + { + int i; + + printf("rpl:\n"); + uint32_t *rplbuf = mtod(reply_mbuf, uint32_t *); + + for (i = 0; i < sizeof(*rpl)/sizeof(uint32_t); i++) + printf("[%d] %08x\n", i, rplbuf[i]); + } + + + l2t_send(cdev, reply_mbuf, e); m_free(m); #ifdef notyet @@ -2429,6 +2521,8 @@ if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10)) toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10); + dump_toepcb(toep); + #ifdef notyet /* * no clean interface for marking ARP up to date @@ -2539,9 +2633,14 @@ tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; toep->tp_qset_idx = 0; toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu); - + + /* + * XXX Cancel any keep alive timer + */ + make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); INP_INFO_WUNLOCK(&tcbinfo); + soisconnected(so); #ifdef notyet @@ -2587,6 +2686,8 @@ struct toepcb *toep = tp->t_toe; unsigned int tid = toep->tp_tid; + printf("fixup_and_send_ofo\n"); + while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) { /* * A variety of messages can be waiting but the fields we'll @@ -2799,6 +2900,7 @@ struct toepcb *toep = (struct toepcb *)ctx; printf("do_wr_ack\n"); + dump_toepcb(toep); VALIDATE_SOCK(so); ==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_listen.c#5 (text+ko) ==== @@ -253,7 +253,8 @@ ctx->tom_data = d; ctx->lso = so; - + ctx->ulp_mode = 0; /* DDP if the default */ + stid = cxgb_alloc_stid(d->cdev, d->client, ctx); if (stid < 0) goto free_ctx; ==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/notes#4 (text+ko) ==== @@ -1,9 +1,10 @@ -Currently untested: +Somewhat untested: - abort Currently unimplemented: - - complete listen handling + - DDP + - module unload - close for a subset of states - correct ARP failure handling - urgent data @@ -11,7 +12,7 @@ - connection retry - fragment assembly and re-tunneling is not implemented, but may work just using the native stack - not clear how credit accounting will sync up - - DDP + open questions: What attributes are inherited from the listen socket. Should we be inheriting more?
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200711120629.lAC6TXxi035536>