Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 28 Oct 2003 16:32:40 -0800 (PST)
From:      Sam Leffler <sam@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 40701 for review
Message-ID:  <200310290032.h9T0We94048821@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=40701

Change 40701 by sam@sam_ebb on 2003/10/28 16:32:15

	checkpoint: multiuser and ping works; tcp locks up; a
	couple of LOR's to deal with

Affected files ...

.. //depot/projects/netperf+sockets/sys/kern/sys_socket.c#2 edit
.. //depot/projects/netperf+sockets/sys/kern/uipc_socket.c#2 edit
.. //depot/projects/netperf+sockets/sys/kern/uipc_socket2.c#2 edit
.. //depot/projects/netperf+sockets/sys/kern/uipc_syscalls.c#2 edit
.. //depot/projects/netperf+sockets/sys/kern/uipc_usrreq.c#2 edit
.. //depot/projects/netperf+sockets/sys/kern/vfs_aio.c#3 edit
.. //depot/projects/netperf+sockets/sys/net/raw_usrreq.c#2 edit
.. //depot/projects/netperf+sockets/sys/netatalk/ddp_usrreq.c#2 edit
.. //depot/projects/netperf+sockets/sys/netinet6/in6_proto.c#2 edit
.. //depot/projects/netperf+sockets/sys/netinet6/in6_rmx.c#2 edit
.. //depot/projects/netperf+sockets/sys/netipx/ipx_pcb.c#2 edit
.. //depot/projects/netperf+sockets/sys/netipx/spx_usrreq.c#2 edit
.. //depot/projects/netperf+sockets/sys/netnatm/natm.c#2 edit
.. //depot/projects/netperf+sockets/sys/netsmb/smb_conn.c#2 edit
.. //depot/projects/netperf+sockets/sys/netsmb/smb_trantcp.c#2 edit
.. //depot/projects/netperf+sockets/sys/nfsclient/nfs_vfsops.c#2 edit
.. //depot/projects/netperf+sockets/sys/nfsserver/nfs_srvcache.c#2 edit
.. //depot/projects/netperf+sockets/sys/sys/socketvar.h#2 edit
.. //depot/projects/netperf+sockets/sys/sys/unpcb.h#2 edit

Differences ...

==== //depot/projects/netperf+sockets/sys/kern/sys_socket.c#2 (text+ko) ====

@@ -77,19 +77,12 @@
 	int flags;
 {
 	struct socket *so = fp->f_data;
-	int error;
-
-	mtx_lock(&Giant);
 #ifdef MAC
-	error = mac_check_socket_receive(active_cred, so);
-	if (error) {
-		mtx_unlock(&Giant);
+	int error = mac_check_socket_receive(active_cred, so);
+	if (error)
 		return (error);
-	}
 #endif
-	error = so->so_proto->pr_usrreqs->pru_soreceive(so, 0, uio, 0, 0, 0);
-	mtx_unlock(&Giant);
-	return (error);
+	return (so->so_proto->pr_usrreqs->pru_soreceive(so, 0, uio, 0, 0, 0));
 }
 
 /* ARGSUSED */
@@ -102,20 +95,13 @@
 	int flags;
 {
 	struct socket *so = fp->f_data;
-	int error;
-
-	mtx_lock(&Giant);
 #ifdef MAC
-	error = mac_check_socket_send(active_cred, so);
-	if (error) {
-		mtx_unlock(&Giant);
+	int error = mac_check_socket_send(active_cred, so);
+	if (error)
 		return (error);
-	}
 #endif
-	error = so->so_proto->pr_usrreqs->pru_sosend(so, 0, uio, 0, 0, 0,
-						    uio->uio_td);
-	mtx_unlock(&Giant);
-	return (error);
+	return (so->so_proto->pr_usrreqs->pru_sosend(so, 0, uio, 0, 0, 0,
+						    uio->uio_td));
 }
 
 int

==== //depot/projects/netperf+sockets/sys/kern/uipc_socket.c#2 (text+ko) ====

@@ -128,21 +128,14 @@
  * soalloc() returns a socket with a ref count of 0.
  */
 struct socket *
-soalloc(waitok)
-	int waitok;
+soalloc(int mflags)
 {
 	struct socket *so;
 #ifdef MAC
 	int error;
 #endif
-	int flag;
 
-	if (waitok == 1)
-		flag = M_WAITOK;
-	else
-		flag = M_NOWAIT;
-	flag |= M_ZERO;
-	so = uma_zalloc(socket_zone, flag);
+	so = uma_zalloc(socket_zone, mflags | M_ZERO);
 	if (so) {
 #ifdef MAC
 		error = mac_init_socket(so, flag);
@@ -152,6 +145,8 @@
 			return so;
 		}
 #endif
+		SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
+		SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
 		/* XXX race condition for reentrant kernel */
 		so->so_gencnt = ++so_gencnt;
 		/* sx_init(&so->so_sxlock, "socket sxlock"); */
@@ -195,7 +190,7 @@
 
 	if (prp->pr_type != type)
 		return (EPROTOTYPE);
-	so = soalloc(1);
+	so = soalloc(M_WAITOK);
 	if (so == NULL)
 		return (ENOBUFS);
 
@@ -210,6 +205,7 @@
 	soref(so);
 	error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
 	if (error) {
+		SOCK_LOCK(so);
 		so->so_state |= SS_NOFDREF;
 		sorele(so);
 		return (error);
@@ -253,6 +249,8 @@
 	mac_destroy_socket(so);
 #endif
 	crfree(so->so_cred);
+	SOCKBUF_LOCK_DESTROY(&so->so_snd);
+	SOCKBUF_LOCK_DESTROY(&so->so_rcv);
 	/* sx_destroy(&so->so_sxlock); */
 	uma_zfree(socket_zone, so);
 	--numopensockets;
@@ -277,11 +275,13 @@
 		splx(s);
 		return (error);
 	}
+	SOCKBUF_LOCK(&so->so_rcv);
 	if (TAILQ_EMPTY(&so->so_comp))
 		so->so_options |= SO_ACCEPTCONN;
 	if (backlog < 0 || backlog > somaxconn)
 		backlog = somaxconn;
 	so->so_qlimit = backlog;
+	SOCKBUF_UNLOCK(&so->so_rcv);
 	splx(s);
 	return (0);
 }
@@ -290,13 +290,16 @@
 sofree(so)
 	struct socket *so;
 {
-	struct socket *head = so->so_head;
-
 	KASSERT(so->so_count == 0, ("socket %p so_count not 0", so));
 
-	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
+	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
+		SOCK_UNLOCK(so);
 		return;
-	if (head != NULL) {
+	}
+	SOCK_UNLOCK(so);
+	SOCKBUF_LOCK(&so->so_rcv);
+	if (so->so_head != NULL) {
+		struct socket *head = so->so_head;
 		if (so->so_state & SS_INCOMP) {
 			TAILQ_REMOVE(&head->so_incomp, so, so_list);
 			head->so_incqlen--;
@@ -307,6 +310,7 @@
 			 * accept(2) may hang after select(2) indicated
 			 * that the listening socket was ready.
 			 */
+			/* XXX SOCKBUF_UNLOCK(&so->so_rcv); */
 			return;
 		} else {
 			panic("sofree: not queued");
@@ -314,7 +318,10 @@
 		so->so_state &= ~SS_INCOMP;
 		so->so_head = NULL;
 	}
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	SOCKBUF_LOCK(&so->so_snd);
 	sbrelease(&so->so_snd, so);
+	SOCKBUF_UNLOCK(&so->so_snd);
 	sorflush(so);
 	sodealloc(so);
 }
@@ -354,11 +361,14 @@
 			(void) soabort(sp);
 		}
 	}
+	SOCK_LOCK(so);
 	if (so->so_pcb == 0)
 		goto discard;
 	if (so->so_state & SS_ISCONNECTED) {
 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
+			SOCK_UNLOCK(so);
 			error = sodisconnect(so);
+			SOCK_LOCK(so);
 			if (error)
 				goto drop;
 		}
@@ -376,7 +386,10 @@
 	}
 drop:
 	if (so->so_pcb) {
-		int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
+		int error2;
+		SOCK_UNLOCK(so);
+		error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
+		SOCK_LOCK(so);
 		if (error == 0)
 			error = error2;
 	}
@@ -411,14 +424,12 @@
 	struct socket *so;
 	struct sockaddr **nam;
 {
-	int s = splnet();
 	int error;
 
 	if ((so->so_state & SS_NOFDREF) == 0)
 		panic("soaccept: !NOFDREF");
 	so->so_state &= ~SS_NOFDREF;
 	error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
-	splx(s);
 	return (error);
 }
 
@@ -428,12 +439,10 @@
 	struct sockaddr *nam;
 	struct thread *td;
 {
-	int s;
 	int error;
 
 	if (so->so_options & SO_ACCEPTCONN)
 		return (EOPNOTSUPP);
-	s = splnet();
 	/*
 	 * If protocol is connection-based, can only connect once.
 	 * Otherwise, if connected, try to disconnect first.
@@ -446,7 +455,6 @@
 		error = EISCONN;
 	else
 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td);
-	splx(s);
 	return (error);
 }
 
@@ -455,11 +463,9 @@
 	struct socket *so1;
 	struct socket *so2;
 {
-	int s = splnet();
 	int error;
 
 	error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
-	splx(s);
 	return (error);
 }
 
@@ -467,20 +473,13 @@
 sodisconnect(so)
 	struct socket *so;
 {
-	int s = splnet();
 	int error;
 
-	if ((so->so_state & SS_ISCONNECTED) == 0) {
-		error = ENOTCONN;
-		goto bad;
-	}
-	if (so->so_state & SS_ISDISCONNECTING) {
-		error = EALREADY;
-		goto bad;
-	}
+	if ((so->so_state & SS_ISCONNECTED) == 0)
+		return ENOTCONN;
+	if (so->so_state & SS_ISDISCONNECTING)
+		return EALREADY;
 	error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
-bad:
-	splx(s);
 	return (error);
 }
 
@@ -565,7 +564,7 @@
 		clen = control->m_len;
 #define	snderr(errno)	{ error = (errno); splx(s); goto release; }
 
-restart:
+	SOCKBUF_LOCK(&so->so_snd);
 	error = sblock(&so->so_snd, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
@@ -605,12 +604,11 @@
 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
 			if (so->so_state & SS_NBIO)
 				snderr(EWOULDBLOCK);
-			sbunlock(&so->so_snd);
 			error = sbwait(&so->so_snd);
 			splx(s);
 			if (error)
-				goto out;
-			goto restart;
+				goto release;
+			continue;
 		}
 		splx(s);
 		mp = &top;
@@ -627,10 +625,12 @@
 #ifdef ZERO_COPY_SOCKETS
 			cow_send = 0;
 #endif /* ZERO_COPY_SOCKETS */
+			SOCKBUF_UNLOCK(&so->so_snd);
 			if (top == 0) {
 				MGETHDR(m, M_TRYWAIT, MT_DATA);
 				if (m == NULL) {
 					error = ENOBUFS;
+					SOCKBUF_LOCK(&so->so_snd); /* XXX */
 					goto release;
 				}
 				mlen = MHLEN;
@@ -640,6 +640,7 @@
 				MGET(m, M_TRYWAIT, MT_DATA);
 				if (m == NULL) {
 					error = ENOBUFS;
+					SOCKBUF_LOCK(&so->so_snd); /* XXX */
 					goto release;
 				}
 				mlen = MLEN;
@@ -687,6 +688,7 @@
 			else
 #endif /* ZERO_COPY_SOCKETS */
 			error = uiomove(mtod(m, void *), (int)len, uio);
+			SOCKBUF_LOCK(&so->so_snd);
 			resid = uio->uio_resid;
 			m->m_len = len;
 			*mp = m;
@@ -741,6 +743,7 @@
 release:
 	sbunlock(&so->so_snd);
 out:
+	SOCKBUF_UNLOCK(&so->so_snd);
 	if (top)
 		m_freem(top);
 	if (control)
@@ -832,12 +835,13 @@
 	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
 		(*pr->pr_usrreqs->pru_rcvd)(so, 0);
 
-restart:
+	SOCKBUF_LOCK(&so->so_rcv);
 	error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
 	if (error)
-		return (error);
+		goto out;
 	s = splnet();
 
+restart:
 	m = so->so_rcv.sb_mb;
 	/*
 	 * If we have less data than requested, block awaiting more
@@ -855,9 +859,8 @@
 	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
 	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
 	    m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
-		KASSERT(m != 0 || !so->so_rcv.sb_cc,
-		    ("receive: m == %p so->so_rcv.sb_cc == %u",
-		    m, so->so_rcv.sb_cc));
+		KASSERT(!(m == 0 && so->so_rcv.sb_cc),
+		    ("m %p so->so_rcv.sb_cc %u", m, so->so_rcv.sb_cc));
 		if (so->so_error) {
 			if (m)
 				goto dontblock;
@@ -890,11 +893,10 @@
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
-		sbunlock(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
 		splx(s);
 		if (error)
-			return (error);
+			goto out;
 		goto restart;
 	}
 dontblock:
@@ -908,8 +910,8 @@
 		    ("m->m_type == %d", m->m_type));
 		orig_resid = 0;
 		if (psa)
-			*psa = dup_sockaddr(mtod(m, struct sockaddr *),
-					    mp0 == 0);
+			*psa = sodupsockaddr(mtod(m, struct sockaddr *),
+					M_NOWAIT);	/* XXX */
 		if (flags & MSG_PEEK) {
 			m = m->m_next;
 		} else {
@@ -998,6 +1000,7 @@
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			splx(s);
+			SOCKBUF_UNLOCK(&so->so_rcv);
 #ifdef ZERO_COPY_SOCKETS
 			if (so_zero_copy_receive) {
 				vm_page_t pg;
@@ -1021,6 +1024,7 @@
 			} else
 #endif /* ZERO_COPY_SOCKETS */
 			error = uiomove(mtod(m, char *) + moff, (int)len, uio);
+			SOCKBUF_LOCK(&so->so_rcv);
 			s = splnet();
 			if (error)
 				goto release;
@@ -1102,9 +1106,8 @@
 			SBLASTMBUFCHK(&so->so_rcv);
 			error = sbwait(&so->so_rcv);
 			if (error) {
-				sbunlock(&so->so_rcv);
-				splx(s);
-				return (0);
+				error = 0;
+				goto release;
 			}
 			m = so->so_rcv.sb_mb;
 			if (m)
@@ -1148,6 +1151,8 @@
 release:
 	sbunlock(&so->so_rcv);
 	splx(s);
+out:
+	SOCKBUF_UNLOCK(&so->so_rcv);
 	return (error);
 }
 
@@ -1174,22 +1179,21 @@
 {
 	struct sockbuf *sb = &so->so_rcv;
 	struct protosw *pr = so->so_proto;
-	int s;
 	struct sockbuf asb;
 
+	SOCKBUF_LOCK(sb);
 	sb->sb_flags |= SB_NOINTR;
 	(void) sblock(sb, M_WAITOK);
-	s = splimp();
-	socantrcvmore(so);
+	socantrcvmore_locked(so);
 	sbunlock(sb);
 	asb = *sb;
 	/*
-	 * Invalidate/clear most of the sockbuf structure, but keep
-	 * its selinfo structure valid.
+	 * Invalidate/clear most of the sockbuf structure, but leave
+	 * selinfo and mutex data unchanged.
 	 */
 	bzero(&sb->sb_startzero,
 	    sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
-	splx(s);
+	SOCKBUF_UNLOCK(sb);
 
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
 		(*pr->pr_domain->dom_dispose)(asb.sb_mb);
@@ -1207,6 +1211,7 @@
 	struct so_accf	*af = so->so_accf;
 	int	error = 0;
 
+/* XXX locking */
 	/* do not set/remove accept filters on non listen sockets */
 	if ((so->so_options & SO_ACCEPTCONN) == 0) {
 		error = EINVAL;
@@ -1794,7 +1799,6 @@
 {
 	struct socket *so = kn->kn_fp->f_data;
 	struct sockbuf *sb;
-	int s;
 
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
@@ -1812,10 +1816,10 @@
 		return (1);
 	}
 
-	s = splnet();
+	SOCKBUF_LOCK(sb);
 	SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext);
 	sb->sb_flags |= SB_KNOTE;
-	splx(s);
+	SOCKBUF_UNLOCK(sb);
 	return (0);
 }
 
@@ -1823,12 +1827,12 @@
 filt_sordetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
-	int s = splnet();
 
+	SOCKBUF_LOCK(&so->so_rcv);
 	SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
 	if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note))
 		so->so_rcv.sb_flags &= ~SB_KNOTE;
-	splx(s);
+	SOCKBUF_UNLOCK(&so->so_rcv);
 }
 
 /*ARGSUSED*/
@@ -1836,30 +1840,35 @@
 filt_soread(struct knote *kn, long hint)
 {
 	struct socket *so = kn->kn_fp->f_data;
+	int result;
 
+	SOCKBUF_LOCK(&so->so_rcv);	/* XXX too conservative? */
 	kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
 	if (so->so_state & SS_CANTRCVMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
-		return (1);
+		result = 1;
+	} else if (so->so_error) {	/* temporary udp error */
+		result = 1;
+	} else if (kn->kn_sfflags & NOTE_LOWAT) {
+		result = (kn->kn_data >= kn->kn_sdata);
+	} else {
+		result = (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat);
 	}
-	if (so->so_error)	/* temporary udp error */
-		return (1);
-	if (kn->kn_sfflags & NOTE_LOWAT)
-		return (kn->kn_data >= kn->kn_sdata);
-	return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat);
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	return (result);
 }
 
 static void
 filt_sowdetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
-	int s = splnet();
 
+	SOCKBUF_LOCK(&so->so_snd);
 	SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext);
 	if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note))
 		so->so_snd.sb_flags &= ~SB_KNOTE;
-	splx(s);
+	SOCKBUF_UNLOCK(&so->so_snd);
 }
 
 /*ARGSUSED*/
@@ -1867,21 +1876,26 @@
 filt_sowrite(struct knote *kn, long hint)
 {
 	struct socket *so = kn->kn_fp->f_data;
+	int result;
 
+	SOCKBUF_LOCK(&so->so_snd);	/* XXX too conservative? */
 	kn->kn_data = sbspace(&so->so_snd);
 	if (so->so_state & SS_CANTSENDMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
-		return (1);
+		result = 1;
+	} else if (so->so_error) {	/* temporary udp error */
+		result = 1;
+	} else if (((so->so_state & SS_ISCONNECTED) == 0) &&
+	    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+		result = 0;
+	} else if (kn->kn_sfflags & NOTE_LOWAT) {
+		result = (kn->kn_data >= kn->kn_sdata);
+	} else {
+		result = (kn->kn_data >= so->so_snd.sb_lowat);
 	}
-	if (so->so_error)	/* temporary udp error */
-		return (1);
-	if (((so->so_state & SS_ISCONNECTED) == 0) &&
-	    (so->so_proto->pr_flags & PR_CONNREQUIRED))
-		return (0);
-	if (kn->kn_sfflags & NOTE_LOWAT)
-		return (kn->kn_data >= kn->kn_sdata);
-	return (kn->kn_data >= so->so_snd.sb_lowat);
+	SOCKBUF_UNLOCK(&so->so_snd);
+	return (result);
 }
 
 /*ARGSUSED*/

==== //depot/projects/netperf+sockets/sys/kern/uipc_socket2.c#2 (text+ko) ====

@@ -108,9 +108,14 @@
 soisconnecting(so)
 	register struct socket *so;
 {
+	int need_lock = !SOCK_OWNED(so);
 
+	if (need_lock)
+		SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= SS_ISCONNECTING;
+	if (need_lock)
+		SOCK_UNLOCK(so);
 }
 
 void
@@ -118,56 +123,79 @@
 	struct socket *so;
 {
 	struct socket *head = so->so_head;
+	int need_lock = !SOCK_OWNED(so);
 
+	if (need_lock)
+		SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 	so->so_state |= SS_ISCONNECTED;
 	if (head && (so->so_state & SS_INCOMP)) {
-		if ((so->so_options & SO_ACCEPTFILTER) != 0) {
+		if ((so->so_options & SO_ACCEPTFILTER) == 0) {
+			if (need_lock)
+				SOCK_UNLOCK(so);
+			SOCK_LOCK(head);
+			TAILQ_REMOVE(&head->so_incomp, so, so_list);
+			head->so_incqlen--;
+			so->so_state &= ~SS_INCOMP;
+			TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
+			head->so_qlen++;
+			so->so_state |= SS_COMP;
+			sorwakeup_locked(head);
+			wakeup_one(&head->so_timeo);
+			SOCK_UNLOCK(head);
+		} else {
+/* XXX locking */
 			so->so_upcall = head->so_accf->so_accept_filter->accf_callback;
 			so->so_upcallarg = head->so_accf->so_accept_filter_arg;
 			so->so_rcv.sb_flags |= SB_UPCALL;
 			so->so_options &= ~SO_ACCEPTFILTER;
 			so->so_upcall(so, so->so_upcallarg, M_TRYWAIT);
-			return;
 		}
-		TAILQ_REMOVE(&head->so_incomp, so, so_list);
-		head->so_incqlen--;
-		so->so_state &= ~SS_INCOMP;
-		TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
-		head->so_qlen++;
-		so->so_state |= SS_COMP;
-		sorwakeup(head);
-		wakeup_one(&head->so_timeo);
 	} else {
 		wakeup(&so->so_timeo);
+		SOCK_UNLOCK(so);
 		sorwakeup(so);
 		sowwakeup(so);
 	}
+	if (!need_lock)
+		SOCK_LOCK(so);
 }
 
 void
 soisdisconnecting(so)
 	register struct socket *so;
 {
+	int need_lock = !SOCK_OWNED(so);
 
+	if (need_lock)
+		SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTING;
 	so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
 	wakeup(&so->so_timeo);
+	SOCK_UNLOCK(so);
 	sowwakeup(so);
 	sorwakeup(so);
+	if (!need_lock)
+		SOCK_LOCK(so);
 }
 
 void
 soisdisconnected(so)
 	register struct socket *so;
 {
+	int need_lock = !SOCK_OWNED(so);
 
+	if (need_lock)
+		SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
 	wakeup(&so->so_timeo);
+	SOCK_UNLOCK(so);
 	sbdrop(&so->so_snd, so->so_snd.sb_cc);
 	sowwakeup(so);
 	sorwakeup(so);
+	if (!need_lock)
+		SOCK_LOCK(so);
 }
 
 /*
@@ -186,10 +214,14 @@
 	int connstatus;
 {
 	register struct socket *so;
+	int over;
 
-	if (head->so_qlen > 3 * head->so_qlimit / 2)
+	SOCK_LOCK(head);
+	over = (head->so_qlen > 3 * head->so_qlimit / 2);
+	SOCK_UNLOCK(head);
+	if (over)
 		return ((struct socket *)0);
-	so = soalloc(0);
+	so = soalloc(M_NOWAIT);
 	if (so == NULL)
 		return ((struct socket *)0);
 	if ((head->so_options & SO_ACCEPTFILTER) != 0)
@@ -205,12 +237,13 @@
 #ifdef MAC
 	mac_create_socket_from_socket(head, so);
 #endif
+
 	if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) ||
 	    (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
 		sodealloc(so);
 		return ((struct socket *)0);
 	}
-
+	SOCKBUF_LOCK(&head->so_rcv);
 	if (connstatus) {
 		TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 		so->so_state |= SS_COMP;
@@ -225,10 +258,11 @@
 		so->so_state |= SS_INCOMP;
 		head->so_incqlen++;
 	}
+	SOCKBUF_UNLOCK(&head->so_rcv);
 	if (connstatus) {
+		so->so_state |= connstatus;
 		sorwakeup(head);
 		wakeup(&head->so_timeo);
-		so->so_state |= connstatus;
 	}
 	return (so);
 }
@@ -253,6 +287,16 @@
 }
 
 void
+socantsendmore_locked(so)
+	struct socket *so;
+{
+	SOCKBUF_LOCK_ASSERT(&so->so_snd);
+
+	so->so_state |= SS_CANTSENDMORE;
+	sowwakeup(so);
+}
+
+void
 socantrcvmore(so)
 	struct socket *so;
 {
@@ -261,6 +305,16 @@
 	sorwakeup(so);
 }
 
+void
+socantrcvmore_locked(so)
+	struct socket *so;
+{
+	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+
+	so->so_state |= SS_CANTRCVMORE;
+	sorwakeup_locked(so);
+}
+
 /*
  * Wait for data to arrive at/drain from a socket buffer.
  */
@@ -268,9 +322,10 @@
 sbwait(sb)
 	struct sockbuf *sb;
 {
+	SOCKBUF_LOCK_ASSERT(sb);
 
 	sb->sb_flags |= SB_WAIT;
-	return (tsleep(&sb->sb_cc,
+	return (msleep(&sb->sb_cc, &sb->sb_mtx,
 	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
 	    sb->sb_timeo));
 }
@@ -285,9 +340,11 @@
 {
 	int error;
 
+	SOCKBUF_LOCK_ASSERT(sb);
+
 	while (sb->sb_flags & SB_LOCK) {
 		sb->sb_flags |= SB_WANT;
-		error = tsleep(&sb->sb_flags,
+		error = msleep(&sb->sb_flags, &sb->sb_mtx,
 		    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
 		    "sblock", 0);
 		if (error)
@@ -298,29 +355,71 @@
 }
 
 /*
+ * The part of sowakeup that must be done while
+ * holding the sockbuf lock.
+ */
+static __inline void
+sowakeup_under_lock(struct socket *so, struct sockbuf *sb)
+{
+	SOCKBUF_LOCK_ASSERT(sb);
+
+	selwakeup(&sb->sb_sel);
+	sb->sb_flags &= ~SB_SEL;
+	if (sb->sb_flags & SB_WAIT) {
+		sb->sb_flags &= ~SB_WAIT;
+		wakeup(&sb->sb_cc);
+	}
+}
+
+/*
  * Wakeup processes waiting on a socket buffer.
  * Do asynchronous notification via SIGIO
  * if the socket has the SS_ASYNC flag set.
+ *
+ * The caller is assumed to hold the necessary
+ * sockbuf lock.
  */
 void
+sowakeup_locked(so, sb)
+	register struct socket *so;
+	register struct sockbuf *sb;
+{
+
+	sowakeup_under_lock(so, sb);
+
+	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
+		pgsigio(&so->so_sigio, SIGIO, 0);
+	if (sb->sb_flags & SB_UPCALL)
+		(*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
+	if (sb->sb_flags & SB_AIO)		/* XXX locking */
+		aio_swake(so, sb);
+	KNOTE(&sb->sb_sel.si_note, 0);		/* XXX locking? */
+}
+
+/*
+ * Wakeup processes waiting on a socket buffer.
+ * Do asynchronous notification via SIGIO
+ * if the socket has the SS_ASYNC flag set.
+ *
+ * The caller does not hold the sockbuf lock.
+ */
+void
 sowakeup(so, sb)
 	register struct socket *so;
 	register struct sockbuf *sb;
 {
 
-	selwakeup(&sb->sb_sel);
-	sb->sb_flags &= ~SB_SEL;
-	if (sb->sb_flags & SB_WAIT) {
-		sb->sb_flags &= ~SB_WAIT;
-		wakeup(&sb->sb_cc);
-	}
+	SOCKBUF_LOCK(sb);
+	sowakeup_under_lock(so, sb);
+	SOCKBUF_UNLOCK(sb);
+
 	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
 		pgsigio(&so->so_sigio, SIGIO, 0);
 	if (sb->sb_flags & SB_UPCALL)
 		(*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
-	if (sb->sb_flags & SB_AIO)
+	if (sb->sb_flags & SB_AIO)		/* XXX locking */
 		aio_swake(so, sb);
-	KNOTE(&sb->sb_sel.si_note, 0);
+	KNOTE(&sb->sb_sel.si_note, 0);		/* XXX locking? */
 }
 
 /*
@@ -474,6 +573,8 @@
 {
 	struct mbuf *m = sb->sb_mb;
 
+	SOCKBUF_LOCK_ASSERT(sb);
+
 	while (m && m->m_nextpkt)
 		m = m->m_nextpkt;
 
@@ -493,6 +594,8 @@
 	struct mbuf *m = sb->sb_mb;
 	struct mbuf *n;
 
+	SOCKBUF_LOCK_ASSERT(sb);
+
 	while (m && m->m_nextpkt)
 		m = m->m_nextpkt;
 
@@ -529,7 +632,7 @@
  * discarded and mbufs are compacted where possible.
  */
 void
-sbappend(sb, m)
+sbappend_locked(sb, m)
 	struct sockbuf *sb;
 	struct mbuf *m;
 {
@@ -537,6 +640,9 @@
 
 	if (m == 0)
 		return;
+
+	SOCKBUF_LOCK_ASSERT(sb);
+
 	SBLASTRECORDCHK(sb);
 	n = sb->sb_mb;
 	if (n) {
@@ -544,7 +650,7 @@
 			n = n->m_nextpkt;
 		do {
 			if (n->m_flags & M_EOR) {
-				sbappendrecord(sb, m); /* XXXXXX!!!! */
+				sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
 				return;
 			}
 		} while (n->m_next && (n = n->m_next));
@@ -557,7 +663,7 @@
 		if ((n = sb->sb_lastrecord) != NULL) {
 			do {
 				if (n->m_flags & M_EOR) {
-					sbappendrecord(sb, m); /* XXXXXX!!!! */
+					sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
 					return;
 				}
 			} while (n->m_next && (n = n->m_next));
@@ -574,13 +680,33 @@
 }
 
 /*
+ * Append mbuf chain m to the last record in the
+ * socket buffer sb.  The additional space associated
+ * the mbuf chain is recorded in sb.  Empty mbufs are
+ * discarded and mbufs are compacted where possible.
+ */
+void
+sbappend(sb, m)
+	struct sockbuf *sb;
+	struct mbuf *m;
+{
+	if (!SOCKBUF_OWNED(sb)) {
+		SOCKBUF_LOCK(sb);
+		sbappend_locked(sb, m);
+		SOCKBUF_UNLOCK(sb);
+	} else
+		sbappend_locked(sb, m);
+}
+
+/*
  * This version of sbappend() should only be used when the caller
  * absolutely knows that there will never be more than one record
  * in the socket buffer, that is, a stream protocol (such as TCP).
  */
 void
-sbappendstream(struct sockbuf *sb, struct mbuf *m)
+sbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
 {
+	SOCKBUF_LOCK_ASSERT(sb);
 
 	KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
 	KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
@@ -590,13 +716,28 @@
 #ifdef MBUFTRACE
 	m_claim(m, sb->sb_mowner);
 #endif
-
 	sbcompress(sb, m, sb->sb_mbtail);
 
 	sb->sb_lastrecord = sb->sb_mb;
 	SBLASTRECORDCHK(sb);
 }
 
+/*
+ * This version of sbappend() should only be used when the caller
+ * absolutely knows that there will never be more than one record
+ * in the socket buffer, that is, a stream protocol (such as TCP).
+ */
+void
+sbappendstream(struct sockbuf *sb, struct mbuf *m)
+{
+	if (!SOCKBUF_OWNED(sb)) {
+		SOCKBUF_LOCK(sb);

>>> TRUNCATED FOR MAIL (1000 lines) <<<



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200310290032.h9T0We94048821>