Date: Wed, 26 Aug 1998 21:55:19 -0400 (EDT) From: Garrett Wollman <wollman@khavrinen.lcs.mit.edu> To: net@FreeBSD.ORG Subject: Next big network patch: specialized sosend for TCP Message-ID: <199808270155.VAA07365@khavrinen.lcs.mit.edu>
next in thread | raw e-mail | index | archive | help
Here's the next patch that's going into the TCP stack. I am running this right now, so I'm certain it's not completely bogus, but have not stress-tested it as yet. It does appear to be somewhat faster (having eliminated about a dozen branches), but -current is a very hostile environment for microbenchmarks of the sort I would usually use. Once again, any comments would be appreciated. -GAWollman Index: netinet/tcp_usrreq.c =================================================================== RCS file: /home/cvs/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.38 diff -u -r1.38 tcp_usrreq.c --- tcp_usrreq.c 1998/08/23 03:07:15 1.38 +++ tcp_usrreq.c 1998/08/27 01:44:26 @@ -73,6 +73,9 @@ struct proc *)); static struct tcpcb * tcp_disconnect __P((struct tcpcb *)); +static int tcp_sosend __P((struct socket *, struct sockaddr *, + struct uio *, struct mbuf *, struct mbuf *, + int, struct proc *)); static struct tcpcb * tcp_usrclosed __P((struct tcpcb *)); @@ -325,6 +328,10 @@ /* * Do a send by putting data in output queue and updating urgent * marker if URG set. Possibly send more data. + * + * XXX - this routine is really only here for the benefit of NFS. + * Somebody who knows the NFS code should figure out why NFS + * is going through here and where it should go. */ static int tcp_usr_send(struct socket *so, int flags, struct mbuf *m, @@ -336,14 +343,12 @@ struct tcpcb *tp; COMMON_START(); - if (control && control->m_len) { - m_freem(control); /* XXX shouldn't caller do this??? */ - if (m) - m_freem(m); - error = EINVAL; - goto out; - } - + /* + * We used to check for control information here, but + * tcp_sosend() doesn't call here, and any direct callers (i.e., NFS) + * should know enough to refrain from sending any since TCP + * has never supported control information. + */ if(!(flags & PRUS_OOB)) { sbappend(&so->so_snd, m); if (nam && tp->t_state < TCPS_SYN_SENT) { @@ -459,7 +464,7 @@ tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach, tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd, tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, - in_setsockaddr, sosend, soreceive, sopoll + in_setsockaddr, tcp_sosend, soreceive, sopoll }; /* @@ -786,5 +791,230 @@ tp->t_timer[TCPT_2MSL] = tcp_maxidle; } return (tp); +} + +#include <sys/malloc.h> +#include <sys/proc.h> +#include <sys/resource.h> +#include <sys/resourcevar.h> +#include <sys/uio.h> + +#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) +/* + * Specialized from kern/uipc_socket.c:sosend(). This isn't + * even close to fully optimized, but it has been run through + * a round of invisible constant propagation and dead code + * elimination. The (uio != 0) and (top != 0) should probably + * be separated by a single major branch, since the code + * is well interspersed at present (which is probably bad + * for branch prediction). + * + * Returns nonzero on error, timeout or signal; callers + * must check for short counts if EINTR/ERESTART are returned. + * Data and control buffers are freed on return. + */ +static int +tcp_sosend(so, addr, uio, top, control, flags, p) + register struct socket *so; + struct sockaddr *addr; + struct uio *uio; + struct mbuf *top; + struct mbuf *control; + int flags; + struct proc *p; +{ + struct mbuf **mp; + register struct mbuf *m; + register long space, len, resid; + struct inpcb *inp; + struct tcpcb *tp; + int error, s, mlen; + TCPDEBUG0; + + /* + * I believe that a socket can never become ``bare'' + * during the execution of this routine. (I don't think + * that a socket can ever become ``bare'' except for brief + * moments during initialization and rundown, but haven't + * proven that.) (Of course, I haven't proven this, either. + * We'll see.) + */ + if ((inp = sotoinpcb(so)) == 0) { + error = EINVAL; + goto release; + } + tp = intotcpcb(inp); + + if (uio) + resid = uio->uio_resid; + else + resid = top->m_pkthdr.len; + /* + * In theory resid should be unsigned. + * However, space must be signed, as it might be less than 0 + * if we over-committed, and we must use a signed comparison + * of space and resid. On the other hand, a negative resid + * causes us to loop sending 0-length segments to the protocol. + * Also bail early if we get control information -- TCP doesn't + * support that. + */ + if (resid < 0 || (control && control->m_len) || inp == 0) { + error = EINVAL; + goto out; + } + + if (p) + p->p_stats->p_ru.ru_msgsnd++; + +restart: + error = sblock(&so->so_snd, SBLOCKWAIT(flags)); + if (error) + goto out; + do { + s = splnet(); +#define snderr(errno) do { error = errno; splx(s); goto release; } while (0) + if (so->so_state & SS_CANTSENDMORE) + snderr(EPIPE); + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + splx(s); + goto release; + } + if ((so->so_state & SS_ISCONNECTED) == 0 + && addr == 0) + snderr(ENOTCONN); + + space = sbspace(&so->so_snd); + if (flags & MSG_OOB) + space += 1024; /* XXX totally arbitrary */ + if (uio == 0 && resid > so->so_snd.sb_hiwat) + snderr(EMSGSIZE); + if (space < resid && uio && space < so->so_snd.sb_lowat) { + if (so->so_state & SS_NBIO) + snderr(EWOULDBLOCK); + sbunlock(&so->so_snd); + error = sbwait(&so->so_snd); + splx(s); + if (error) + goto out; + goto restart; + } + splx(s); + mp = ⊤ + do { + if (uio == NULL) { + /* + * Data is prepackaged in "top". + */ + resid = 0; + } else do { + if (top == 0) { + MGETHDR(m, M_WAIT, MT_DATA); + mlen = MHLEN; + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = (struct ifnet *)0; + } else { + MGET(m, M_WAIT, MT_DATA); + mlen = MLEN; + } + if (resid >= MINCLSIZE) { + MCLGET(m, M_WAIT); + if ((m->m_flags & M_EXT) == 0) + goto nopages; + mlen = MCLBYTES; + len = min(min(mlen, resid), space); + } else { +nopages: + len = min(min(mlen, resid), space); + } + space -= len; + error = uiomove(mtod(m, caddr_t), (int)len, uio); + resid = uio->uio_resid; + m->m_len = len; + *mp = m; + top->m_pkthdr.len += len; + if (error) + goto release; + mp = &m->m_next; + if (resid <= 0) { + break; + } + } while (space > 0); + s = splnet(); /* XXX */ + TCPDEBUG1(); + /* + * XXX -- should be possible to perform this check + * out of the loop. + */ + if ((flags & MSG_OOB) && sbspace(&so->so_snd) < -512) { + splx(s); + snderr(ENOBUFS); + } + sbappend(&so->so_snd, top); + + /* + * Do implied connect if not yet connected, + * initialize window to default value, and + * initialize maxseg/maxopd using peer's cached + * MSS. + */ + if (addr && tp->t_state < TCPS_SYN_SENT) { + error = tcp_connect(tp, addr, p); + if (error) + goto out; + tp->snd_wnd = TTCP_CLIENT_SND_WND; + tcp_mss(tp, -1); + } + + if (flags & MSG_OOB) { + /* + * According to RFC961 (Assigned Protocols), + * the urgent pointer points to the last octet + * of urgent data. We continue, however, + * to consider it to indicate the first octet + * of data past the urgent section. + * Otherwise, snd_up should be one lower. + */ + tp->snd_up = tp->snd_una + so->so_snd.sb_cc; + tp->t_force = 1; + error = tcp_output(tp); + tp->t_force = 0; + TCPDEBUG2(PRU_SEND_OOB); + } else if ((flags & MSG_EOF) && resid <= 0) { + socantsendmore(so); + tp = tcp_usrclosed(tp); +#ifdef DIAGNOSTIC + /* + * The only way tcp_usrclosed() can cause the + * tcpcb to go away entirely is if it was either + * CLOSED or LISTENing. In either state, we + * should never have gotten this far. + */ + if (tp == 0) + panic("tcp_sosend: socket already closed"); +#endif + error = tcp_output(tp); + TCPDEBUG2(PRU_SEND_EOF); + } else { + error = tcp_output(tp); + TCPDEBUG2(PRU_SEND); + } + splx(s); + top = 0; + mp = ⊤ + if (error) + goto release; + } while (resid && space > 0); + } while (resid); + +release: + sbunlock(&so->so_snd); +out: + if (top) + m_freem(top); + if (control) + m_freem(control); + return (error); } To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-net" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199808270155.VAA07365>