Date: Sat, 3 Nov 2007 04:21:18 GMT From: Kip Macy <kmacy@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 128546 for review Message-ID: <200711030421.lA34LIdQ041270@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=128546 Change 128546 by kmacy@kmacy:storage:toestack on 2007/11/03 04:20:33 handle tx completions and (to some degree) failed connects don't use toe_mbuf as it causes us to write past the end of the current mbuf Affected files ... .. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#11 edit .. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.h#3 edit Differences ... ==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#11 (text+ko) ==== @@ -59,6 +59,7 @@ #include <netinet/tcp_var.h> #include <netinet/tcp_fsm.h> #include <netinet/tcp_ofld.h> +#include <netinet/tcp_seq.h> #include <net/route.h> #include <dev/cxgb/t3cdev.h> @@ -122,6 +123,9 @@ */ #define MIN_RCV_WND (24 * 1024U) +#define VALIDATE_SEQ 0 +#define VALIDATE_SOCK(so) +#define DEBUG_WR 0 extern int tcp_do_autorcvbuf; extern int tcp_do_autosndbuf; @@ -179,7 +183,7 @@ struct tcpcb *tp = sototcpcb(so); struct toepcb *toep = tp->t_toe; - struct mbuf *tail, *m0; + struct mbuf *tail, *m0, *last; struct t3cdev *cdev; struct tom_data *d; int bytes, count, total_bytes; @@ -194,9 +198,13 @@ d = TOM_DATA(TOE_DEV(so)); cdev = d->cdev; - tail = so->so_snd.sb_sndptr ? so->so_snd.sb_sndptr : so->so_snd.sb_mb; + last = tail = so->so_snd.sb_sndptr ? so->so_snd.sb_sndptr : so->so_snd.sb_mb; total_bytes = 0; - + if (toep->tp_m_last == last) { + KASSERT(tail, ("sbdrop error")); + last = tail = tail->m_next; + } + while (toep->tp_wr_avail && (tail != NULL)) { count = bytes = 0; @@ -206,7 +214,7 @@ while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail) && (tail != NULL) && (count < TX_MAX_SEGS)) { bytes += tail->m_len; count++; - + last = tail; /* * technically an abuse to be using this for a VA * but less gross than defining my own structure @@ -217,8 +225,12 @@ segp++; tail = tail->m_next; } - - so->so_snd.sb_sndptr = tail; + if (tail) { + so->so_snd.sb_sndptr = tail; + toep->tp_m_last = NULL; + } else + toep->tp_m_last = so->so_snd.sb_sndptr = last; + so->so_snd.sb_sndptroff += bytes; total_bytes += bytes; @@ -229,11 +241,15 @@ toep->tp_wr_avail -= mbuf_wrs[count]; toep->tp_wr_unacked += mbuf_wrs[count]; - + make_tx_data_wr(so, m0, bytes, tail); m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, so)); m_set_sgl(m0, segs); m_set_sgllen(m0, count); + /* + * remember credits used + */ + m0->m_pkthdr.csum_data = mbuf_wrs[count]; m0->m_pkthdr.len = bytes; if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) || toep->tp_wr_unacked >= toep->tp_wr_max / 2) { @@ -243,6 +259,11 @@ toep->tp_wr_unacked = 0; } + m0->m_type = MT_DONTFREE; + enqueue_wr(tp, m0); + printf("sending offload tx with %d bytes in %d segments\n", + bytes, count); + l2t_send(cdev, m0, toep->tp_l2t); } @@ -266,7 +287,12 @@ static int cxgb_toe_send(struct tcpcb *tp) { - printf("%s UNIMPLEMENTED!!!!\n", __FUNCTION__); + struct socket *so; + + printf("cxgb_toe_send\n"); + + so = tp->t_inpcb->inp_socket; + t3_push_frames(so, 1); return (0); } @@ -449,7 +475,6 @@ toep->tp_delack_mode = 0; toep->tp_mtu_idx = select_mss(so, dst->rt_ifp->if_mtu); - printf("mss selected\n"); tp->rcv_wnd = select_rcv_wnd(so); toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so->so_options & SO_NO_DDP) && @@ -529,6 +554,29 @@ } +/* + * Convert an ACT_OPEN_RPL status to an errno. + */ +static int +act_open_rpl_status_to_errno(int status) +{ + switch (status) { + case CPL_ERR_CONN_RESET: + return (ECONNREFUSED); + case CPL_ERR_ARP_MISS: + return (EHOSTUNREACH); + case CPL_ERR_CONN_TIMEDOUT: + return (ETIMEDOUT); + case CPL_ERR_TCAM_FULL: + return (ENOMEM); + case CPL_ERR_CONN_EXIST: + log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n"); + return (EADDRINUSE); + default: + return (EIO); + } +} + static void fail_act_open(struct socket *so, int errno) { @@ -544,6 +592,63 @@ } /* + * Handle active open failures. + */ +static void +active_open_failed(struct socket *so, struct mbuf *m) +{ + struct cpl_act_open_rpl *rpl = cplhdr(m); + +/* + * Don't handle connection retry for now + */ +#ifdef notyet + struct inet_connection_sock *icsk = inet_csk(sk); + + if (rpl->status == CPL_ERR_CONN_EXIST && + icsk->icsk_retransmit_timer.function != act_open_retry_timer) { + icsk->icsk_retransmit_timer.function = act_open_retry_timer; + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, + jiffies + HZ / 2); + } else +#endif + fail_act_open(so, act_open_rpl_status_to_errno(rpl->status)); + m_free(m); +} + +/* + * Return whether a failed active open has allocated a TID + */ +static inline int +act_open_has_tid(int status) +{ + return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && + status != CPL_ERR_ARP_MISS; +} + +/* + * Process an ACT_OPEN_RPL CPL message. + */ +static int +do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) +{ + struct socket *so = (struct socket *)ctx; +#ifdef notyet + struct cpl_act_open_rpl *rpl = cplhdr(m); + + if (cdev->type != T3A && act_open_has_tid(rpl->status)) + cxgb_release_tid(cdev, GET_TID(rpl)); + + cxgb3_queue_tid_release(cdev, GET_TID(rpl)); +#else + printf("%s UNIMPLEMENTED\n", __FUNCTION__); +#endif + + active_open_failed(so, m); + return (0); +} + +/* * Handle an ARP failure for an active open. XXX purge ofo queue * * XXX badly broken for crossed SYNs as the ATID is no longer valid. @@ -553,8 +658,7 @@ */ static void act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m) { - struct toe_mbuf *tm = (struct toe_mbuf *)m; - struct toepcb *toep = tm->m_toe.mt_toepcb; + struct toepcb *toep = m_get_toep(m); struct tcpcb *tp = toep->tp_tp; struct inpcb *inp = tp->t_inpcb; struct socket *so = toeptoso(toep); @@ -595,22 +699,23 @@ goto free_tid; m = (struct toe_mbuf *)m_gethdr(MT_DATA, M_WAITOK); + m_set_toep(m, tp->t_toe); + +#if 0 m->m_toe.mt_toepcb = tp->t_toe; set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure); - +#endif if ((err = init_offload_socket(so, tdev, atid, e, dst))) return (err); install_offload_ops(so); mk_act_open_req(so, m, atid, e); + soisconnecting(so); l2t_send(d->cdev, (struct mbuf *)m, e); toep = tp->t_toe; if (toep->tp_ulp_mode) t3_enable_ddp(so, 0); - - soisconnecting(so); - return (0); free_tid: @@ -796,7 +901,117 @@ return 0; } +/* + * Process an acknowledgment of WR completion. Advance snd_una and send the + * next batch of work requests from the write queue. + */ +static void +wr_ack(struct socket *so, struct mbuf *m) +{ + struct tcpcb *tp = sototcpcb(so); + struct toepcb *toep = tp->t_toe; + struct cpl_wr_ack *hdr = cplhdr(m); + unsigned int credits = ntohs(hdr->credits); + u32 snd_una = ntohl(hdr->snd_una); + int bytes = 0; + + printf("wr_ack: snd_una=%u credits=%d\n", snd_una, credits); + + toep->tp_wr_avail += credits; + if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail) + toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail; + + while (credits) { + struct mbuf *p = peek_wr(tp); + printf("p->credits=%d p->bytes=%d\n", p->m_pkthdr.csum_data, p->m_pkthdr.len) ; + + if (__predict_false(!p)) { + log(LOG_ERR, "%u WR_ACK credits for TID %u with " + "nothing pending, state %u\n", + credits, toep->tp_tid, tp->t_state); + break; + } + if (__predict_false(credits < p->m_pkthdr.csum_data)) { +#if DEBUG_WR > 1 + struct tx_data_wr *w = cplhdr(p); +#ifdef notyet + log(LOG_ERR, + "TID %u got %u WR credits, need %u, len %u, " + "main body %u, frags %u, seq # %u, ACK una %u," + " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n", + toep->tp_tid, credits, p->csum, p->len, + p->len - p->data_len, skb_shinfo(p)->nr_frags, + ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt), + WR_AVAIL(tp), count_pending_wrs(tp) - credits); +#endif +#endif + p->m_pkthdr.csum_data -= credits; + break; + } else { + dequeue_wr(tp); + credits -= p->m_pkthdr.csum_data; + bytes += p->m_pkthdr.len; + printf("done with wr of %d bytes\n", p->m_pkthdr.len); + + m_free(p); + } + } + +#if DEBUG_WR + check_wr_invariants(tp); +#endif + + if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { +#if VALIDATE_SEQ + struct tom_data *d = TOM_DATA(TOE_DEV(so)); + + log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK " + "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una, + toep->tp_tid, tp->snd_una); +#endif + goto out_free; + } + + if (tp->snd_una != snd_una) { + tp->snd_una = snd_una; + tp->ts_recent_age = ticks; +#ifdef notyet + /* + * Keep ARP entry "minty fresh" + */ + dst_confirm(sk->sk_dst_cache); +#endif + if (tp->snd_una == tp->snd_nxt) + toep->tp_flags &= ~TP_TX_WAIT_IDLE; + } + if (bytes) { + printf("sbdrop(%d)\n", bytes); + + sbdrop(&so->so_snd, bytes); + } + + if (so->so_snd.sb_sndptroff < so->so_snd.sb_cc) + t3_push_frames(so, 0); + +out_free: + m_free(m); +} + +/* + * Handler for TX_DATA_ACK CPL messages. + */ +static int do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx) +{ + struct socket *so = (struct socket *)ctx; + + printf("do_wr_ack\n"); + + VALIDATE_SOCK(so); + wr_ack(so, m); + return 0; +} + void t3_init_wr_tab(unsigned int wr_len) { @@ -832,15 +1047,15 @@ #endif t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish); + t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl); + t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack); #ifdef notyet t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish); - t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl); t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req); t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify); t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data); t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp); t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete); - t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack); t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); ==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.h#3 (text+ko) ==== @@ -78,7 +78,8 @@ #define T3C_DEV(sk) ((TOM_DATA(TOE_DEV(sk)))->cdev) #define TOM_TUNABLE(dev, param) (TOM_DATA(dev)->conf.param) -#define TP_DATASENT (1 << 0) +#define TP_DATASENT (1 << 0) +#define TP_TX_WAIT_IDLE (1 << 1) struct toepcb { struct toedev *tp_toedev; @@ -94,10 +95,13 @@ int tp_mss_clamp; int tp_qset; int tp_flags; + volatile int tp_refcount; struct tcpcb *tp_tp; + struct mbuf *tp_m_last; + struct mbuf_head wr_list; struct mbuf_head out_of_order_queue; }; @@ -109,7 +113,29 @@ mbufq_init(&toep->wr_list); } - +static inline void enqueue_wr(struct tcpcb *tp, struct mbuf *m) +{ + struct toepcb *toep = tp->t_toe; + + mbufq_tail(&toep->wr_list, m); +} + + +static inline struct mbuf *peek_wr(struct tcpcb *tp) +{ + struct toepcb *toep = tp->t_toe; + + return mbufq_peek(&toep->wr_list); +} + +static inline struct mbuf *dequeue_wr(struct tcpcb *tp) +{ + struct toepcb *toep = tp->t_toe; + + return mbufq_dequeue(&toep->wr_list); +} + void t3_init_tunables(struct tom_data *t); + #endif
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200711030421.lA34LIdQ041270>