Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 22 Jun 2009 23:08:05 +0000 (UTC)
From:      Andre Oppermann <andre@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r194672 - in head/sys: kern netinet sys
Message-ID:  <200906222308.n5MN856I055711@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: andre
Date: Mon Jun 22 23:08:05 2009
New Revision: 194672
URL: http://svn.freebsd.org/changeset/base/194672

Log:
  Add soreceive_stream(), an optimized version of soreceive() for
  stream (TCP) sockets.
  
  It is functionally identical to generic soreceive() but has a
  number stream specific optimizations:
  o does only one sockbuf unlock/lock per receive independent of
    the length of data to be moved into the uio compared to
    soreceive() which unlocks/locks per *mbuf*.
  o uses m_mbuftouio() instead of its own copy(out) variant.
  o much more compact code flow as a large number of special
    cases is removed.
  o much improved reability.
  
  It offers significantly reduced CPU usage and lock contention
  when receiving fast TCP streams.  Additional gains are obtained
  when the receiving application is using SO_RCVLOWAT to batch up
  some data before a read (and wakeup) is done.
  
  This function was written by "reverse engineering" and is not
  just a stripped down variant of soreceive().
  
  It is not yet enabled by default on TCP sockets.  Instead it is
  commented out in the protocol initialization in tcp_usrreq.c
  until more widespread testing has been done.
  
  Testers, especially with 10GigE gear, are welcome.
  
  MFP4:	r164817 //depot/user/andre/soreceive_stream/

Modified:
  head/sys/kern/uipc_socket.c
  head/sys/netinet/tcp_usrreq.c
  head/sys/sys/socketvar.h

Modified: head/sys/kern/uipc_socket.c
==============================================================================
--- head/sys/kern/uipc_socket.c	Mon Jun 22 22:54:44 2009	(r194671)
+++ head/sys/kern/uipc_socket.c	Mon Jun 22 23:08:05 2009	(r194672)
@@ -1857,6 +1857,202 @@ release:
 }
 
 /*
+ * Optimized version of soreceive() for stream (TCP) sockets.
+ */
+int
+soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
+    struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
+{
+	int len = 0, error = 0, flags, oresid;
+	struct sockbuf *sb;
+	struct mbuf *m, *n = NULL;
+
+	/* We only do stream sockets. */
+	if (so->so_type != SOCK_STREAM)
+		return (EINVAL);
+	if (psa != NULL)
+		*psa = NULL;
+	if (controlp != NULL)
+		return (EINVAL);
+	if (flagsp != NULL)
+		flags = *flagsp &~ MSG_EOR;
+	else
+		flags = 0;
+	if (flags & MSG_OOB)
+		return (soreceive_rcvoob(so, uio, flags));
+	if (mp0 != NULL)
+		*mp0 = NULL;
+
+	sb = &so->so_rcv;
+
+	/* Prevent other readers from entering the socket. */
+	error = sblock(sb, SBLOCKWAIT(flags));
+	if (error)
+		goto out;
+	SOCKBUF_LOCK(sb);
+
+	/* Easy one, no space to copyout anything. */
+	if (uio->uio_resid == 0) {
+		error = EINVAL;
+		goto out;
+	}
+	oresid = uio->uio_resid;
+
+	/* We will never ever get anything unless we are connected. */
+	if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
+		/* When disconnecting there may be still some data left. */
+		if (sb->sb_cc > 0)
+			goto deliver;
+		if (!(so->so_state & SS_ISDISCONNECTED))
+			error = ENOTCONN;
+		goto out;
+	}
+
+	/* Socket buffer is empty and we shall not block. */
+	if (sb->sb_cc == 0 &&
+	    ((sb->sb_flags & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
+		error = EAGAIN;
+		goto out;
+	}
+
+restart:
+	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+
+	/* Abort if socket has reported problems. */
+	if (so->so_error) {
+		if (sb->sb_cc > 0)
+			goto deliver;
+		if (oresid > uio->uio_resid)
+			goto out;
+		error = so->so_error;
+		if (!(flags & MSG_PEEK))
+			so->so_error = 0;
+		goto out;
+	}
+
+	/* Door is closed.  Deliver what is left, if any. */
+	if (sb->sb_state & SBS_CANTRCVMORE) {
+		if (sb->sb_cc > 0)
+			goto deliver;
+		else
+			goto out;
+	}
+
+	/* Socket buffer got some data that we shall deliver now. */
+	if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) &&
+	    ((sb->sb_flags & SS_NBIO) ||
+	     (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
+	     sb->sb_cc >= sb->sb_lowat ||
+	     sb->sb_cc >= uio->uio_resid ||
+	     sb->sb_cc >= sb->sb_hiwat) ) {
+		goto deliver;
+	}
+
+	/* On MSG_WAITALL we must wait until all data or error arrives. */
+	if ((flags & MSG_WAITALL) &&
+	    (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_lowat))
+		goto deliver;
+
+	/*
+	 * Wait and block until (more) data comes in.
+	 * NB: Drops the sockbuf lock during wait.
+	 */
+	error = sbwait(sb);
+	if (error)
+		goto out;
+	goto restart;
+
+deliver:
+	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+	KASSERT(sb->sb_cc > 0, ("%s: sockbuf empty", __func__));
+	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
+
+	/* Statistics. */
+	if (uio->uio_td)
+		uio->uio_td->td_ru.ru_msgrcv++;
+
+	/* Fill uio until full or current end of socket buffer is reached. */
+	len = min(uio->uio_resid, sb->sb_cc);
+	if (mp0 != NULL) {
+		/* Dequeue as many mbufs as possible. */
+		if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
+			for (*mp0 = m = sb->sb_mb;
+			     m != NULL && m->m_len <= len;
+			     m = m->m_next) {
+				len -= m->m_len;
+				uio->uio_resid -= m->m_len;
+				sbfree(sb, m);
+				n = m;
+			}
+			sb->sb_mb = m;
+			if (sb->sb_mb == NULL)
+				SB_EMPTY_FIXUP(sb);
+			n->m_next = NULL;
+		}
+		/* Copy the remainder. */
+		if (len > 0) {
+			KASSERT(sb->sb_mb != NULL,
+			    ("%s: len > 0 && sb->sb_mb empty", __func__));
+
+			m = m_copym(sb->sb_mb, 0, len, M_DONTWAIT);
+			if (m == NULL)
+				len = 0;	/* Don't flush data from sockbuf. */
+			else
+				uio->uio_resid -= m->m_len;
+			if (*mp0 != NULL)
+				n->m_next = m;
+			else
+				*mp0 = m;
+			if (*mp0 == NULL) {
+				error = ENOBUFS;
+				goto out;
+			}
+		}
+	} else {
+		/* NB: Must unlock socket buffer as uiomove may sleep. */
+		SOCKBUF_UNLOCK(sb);
+		error = m_mbuftouio(uio, sb->sb_mb, len);
+		SOCKBUF_LOCK(sb);
+		if (error)
+			goto out;
+	}
+	SBLASTRECORDCHK(sb);
+	SBLASTMBUFCHK(sb);
+
+	/*
+	 * Remove the delivered data from the socket buffer unless we
+	 * were only peeking.
+	 */
+	if (!(flags & MSG_PEEK)) {
+		if (len > 0)
+			sbdrop_locked(sb, len);
+
+		/* Notify protocol that we drained some data. */
+		if ((so->so_proto->pr_flags & PR_WANTRCVD) &&
+		    (((flags & MSG_WAITALL) && uio->uio_resid > 0) ||
+		     !(flags & MSG_SOCALLBCK))) {
+			SOCKBUF_UNLOCK(sb);
+			(*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags);
+			SOCKBUF_LOCK(sb);
+		}
+	}
+
+	/*
+	 * For MSG_WAITALL we may have to loop again and wait for
+	 * more data to come in.
+	 */
+	if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
+		goto restart;
+out:
+	SOCKBUF_LOCK_ASSERT(sb);
+	SBLASTRECORDCHK(sb);
+	SBLASTMBUFCHK(sb);
+	SOCKBUF_UNLOCK(sb);
+	sbunlock(sb);
+	return (error);
+}
+
+/*
  * Optimized version of soreceive() for simple datagram cases from userspace.
  * Unlike in the stream case, we're able to drop a datagram if copyout()
  * fails, and because we handle datagrams atomically, we don't need to use a

Modified: head/sys/netinet/tcp_usrreq.c
==============================================================================
--- head/sys/netinet/tcp_usrreq.c	Mon Jun 22 22:54:44 2009	(r194671)
+++ head/sys/netinet/tcp_usrreq.c	Mon Jun 22 23:08:05 2009	(r194672)
@@ -1032,6 +1032,9 @@ struct pr_usrreqs tcp_usrreqs = {
 	.pru_send =		tcp_usr_send,
 	.pru_shutdown =		tcp_usr_shutdown,
 	.pru_sockaddr =		in_getsockaddr,
+#if 0
+	.pru_soreceive =	soreceive_stream,
+#endif
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		tcp_usr_close,
 };
@@ -1053,6 +1056,9 @@ struct pr_usrreqs tcp6_usrreqs = {
 	.pru_send =		tcp_usr_send,
 	.pru_shutdown =		tcp_usr_shutdown,
 	.pru_sockaddr =		in6_mapped_sockaddr,
+#if 0
+	.pru_soreceive =	soreceive_stream,
+#endif
  	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		tcp_usr_close,
 };

Modified: head/sys/sys/socketvar.h
==============================================================================
--- head/sys/sys/socketvar.h	Mon Jun 22 22:54:44 2009	(r194671)
+++ head/sys/sys/socketvar.h	Mon Jun 22 23:08:05 2009	(r194672)
@@ -345,6 +345,9 @@ int	sopoll_generic(struct socket *so, in
 	    struct ucred *active_cred, struct thread *td);
 int	soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio,
 	    struct mbuf **mp0, struct mbuf **controlp, int *flagsp);
+int	soreceive_stream(struct socket *so, struct sockaddr **paddr,
+	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
+	    int *flagsp);
 int	soreceive_dgram(struct socket *so, struct sockaddr **paddr,
 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 	    int *flagsp);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200906222308.n5MN856I055711>