Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 18 Jan 2004 16:32:36 -0800 (PST)
From:      Scott Long <scottl@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 45562 for review
Message-ID:  <200401190032.i0J0WanH023063@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=45562

Change 45562 by scottl@scottl_netperf_socket on 2004/01/18 16:31:40

	Merge over the socket locking and MT_TAG changes from sam_socket
	and sam_netperf so we have a base to work from.

Affected files ...

.. //depot/projects/netperf_socket/sys/kern/kern_descrip.c#2 integrate
.. //depot/projects/netperf_socket/sys/kern/sys_socket.c#2 integrate
.. //depot/projects/netperf_socket/sys/kern/uipc_domain.c#2 integrate
.. //depot/projects/netperf_socket/sys/kern/uipc_socket.c#2 integrate
.. //depot/projects/netperf_socket/sys/kern/uipc_socket2.c#2 integrate
.. //depot/projects/netperf_socket/sys/kern/uipc_usrreq.c#2 integrate
.. //depot/projects/netperf_socket/sys/net/bpf.c#2 integrate
.. //depot/projects/netperf_socket/sys/net/bridge.c#2 integrate
.. //depot/projects/netperf_socket/sys/net/raw_cb.c#2 integrate
.. //depot/projects/netperf_socket/sys/net/raw_usrreq.c#2 integrate
.. //depot/projects/netperf_socket/sys/net/route.h#2 integrate
.. //depot/projects/netperf_socket/sys/netatalk/ddp_usrreq.c#2 integrate
.. //depot/projects/netperf_socket/sys/netatm/atm_socket.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/in_pcb.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/in_proto.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/ip_divert.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/ip_dummynet.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/ip_dummynet.h#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/ip_fastfwd.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/ip_fw.h#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/ip_fw2.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/ip_icmp.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/ip_input.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/ip_mroute.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/ip_output.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/ip_var.h#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/tcp_debug.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/tcp_input.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/tcp_subr.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/tcp_syncache.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/tcp_usrreq.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/tcp_var.h#2 integrate
.. //depot/projects/netperf_socket/sys/netinet/udp_usrreq.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet6/in6_pcb.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet6/in6_proto.c#2 integrate
.. //depot/projects/netperf_socket/sys/netinet6/in6_rmx.c#2 integrate
.. //depot/projects/netperf_socket/sys/netipx/ipx_pcb.c#2 integrate
.. //depot/projects/netperf_socket/sys/netipx/ipx_usrreq.c#2 integrate
.. //depot/projects/netperf_socket/sys/netnatm/natm.c#2 integrate
.. //depot/projects/netperf_socket/sys/netsmb/smb_conn.c#2 integrate
.. //depot/projects/netperf_socket/sys/netsmb/smb_trantcp.c#2 integrate
.. //depot/projects/netperf_socket/sys/nfsclient/bootp_subr.c#2 integrate
.. //depot/projects/netperf_socket/sys/nfsclient/krpc_subr.c#2 integrate
.. //depot/projects/netperf_socket/sys/nfsclient/nfs_socket.c#2 integrate
.. //depot/projects/netperf_socket/sys/nfsclient/nfs_vfsops.c#2 integrate
.. //depot/projects/netperf_socket/sys/nfsserver/nfs_srvcache.c#2 integrate
.. //depot/projects/netperf_socket/sys/nfsserver/nfs_srvsock.c#2 integrate
.. //depot/projects/netperf_socket/sys/nfsserver/nfs_syscalls.c#2 integrate
.. //depot/projects/netperf_socket/sys/rpc/rpcclnt.c#2 integrate
.. //depot/projects/netperf_socket/sys/sys/mbuf.h#2 integrate
.. //depot/projects/netperf_socket/sys/sys/socketvar.h#2 integrate
.. //depot/projects/netperf_socket/sys/sys/unpcb.h#2 integrate

Differences ...

==== //depot/projects/netperf_socket/sys/kern/kern_descrip.c#2 (text+ko) ====

@@ -2032,7 +2032,7 @@
 void
 fputsock(struct socket *so)
 {
-
+	SOCK_LOCK(so);
 	sorele(so);
 }
 

==== //depot/projects/netperf_socket/sys/kern/sys_socket.c#2 (text+ko) ====

@@ -77,19 +77,12 @@
 	int flags;
 {
 	struct socket *so = fp->f_data;
-	int error;
-
-	mtx_lock(&Giant);
 #ifdef MAC
-	error = mac_check_socket_receive(active_cred, so);
-	if (error) {
-		mtx_unlock(&Giant);
+	int error = mac_check_socket_receive(active_cred, so);
+	if (error)
 		return (error);
-	}
 #endif
-	error = so->so_proto->pr_usrreqs->pru_soreceive(so, 0, uio, 0, 0, 0);
-	mtx_unlock(&Giant);
-	return (error);
+	return (so->so_proto->pr_usrreqs->pru_soreceive(so, 0, uio, 0, 0, 0));
 }
 
 /* ARGSUSED */
@@ -102,20 +95,13 @@
 	int flags;
 {
 	struct socket *so = fp->f_data;
-	int error;
-
-	mtx_lock(&Giant);
 #ifdef MAC
-	error = mac_check_socket_send(active_cred, so);
-	if (error) {
-		mtx_unlock(&Giant);
+	int error = mac_check_socket_send(active_cred, so);
+	if (error)
 		return (error);
-	}
 #endif
-	error = so->so_proto->pr_usrreqs->pru_sosend(so, 0, uio, 0, 0, 0,
-						    uio->uio_td);
-	mtx_unlock(&Giant);
-	return (error);
+	return (so->so_proto->pr_usrreqs->pru_sosend(so, 0, uio, 0, 0, 0,
+						    uio->uio_td));
 }
 
 int

==== //depot/projects/netperf_socket/sys/kern/uipc_domain.c#2 (text+ko) ====


==== //depot/projects/netperf_socket/sys/kern/uipc_socket.c#2 (text+ko) ====

@@ -128,21 +128,14 @@
  * soalloc() returns a socket with a ref count of 0.
  */
 struct socket *
-soalloc(waitok)
-	int waitok;
+soalloc(int mflags)
 {
 	struct socket *so;
 #ifdef MAC
 	int error;
 #endif
-	int flag;
 
-	if (waitok == 1)
-		flag = M_WAITOK;
-	else
-		flag = M_NOWAIT;
-	flag |= M_ZERO;
-	so = uma_zalloc(socket_zone, flag);
+	so = uma_zalloc(socket_zone, mflags | M_ZERO);
 	if (so) {
 #ifdef MAC
 		error = mac_init_socket(so, flag);
@@ -152,6 +145,8 @@
 			return so;
 		}
 #endif
+		SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
+		SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
 		/* XXX race condition for reentrant kernel */
 		so->so_gencnt = ++so_gencnt;
 		/* sx_init(&so->so_sxlock, "socket sxlock"); */
@@ -195,7 +190,7 @@
 
 	if (prp->pr_type != type)
 		return (EPROTOTYPE);
-	so = soalloc(1);
+	so = soalloc(M_WAITOK);
 	if (so == NULL)
 		return (ENOBUFS);
 
@@ -210,6 +205,7 @@
 	soref(so);
 	error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
 	if (error) {
+		SOCK_LOCK(so);
 		so->so_state |= SS_NOFDREF;
 		sorele(so);
 		return (error);
@@ -253,6 +249,8 @@
 	mac_destroy_socket(so);
 #endif
 	crfree(so->so_cred);
+	SOCKBUF_LOCK_DESTROY(&so->so_snd);
+	SOCKBUF_LOCK_DESTROY(&so->so_rcv);
 	/* sx_destroy(&so->so_sxlock); */
 	uma_zfree(socket_zone, so);
 	--numopensockets;
@@ -277,11 +275,13 @@
 		splx(s);
 		return (error);
 	}
+	SOCKBUF_LOCK(&so->so_rcv);
 	if (TAILQ_EMPTY(&so->so_comp))
 		so->so_options |= SO_ACCEPTCONN;
 	if (backlog < 0 || backlog > somaxconn)
 		backlog = somaxconn;
 	so->so_qlimit = backlog;
+	SOCKBUF_UNLOCK(&so->so_rcv);
 	splx(s);
 	return (0);
 }
@@ -290,13 +290,16 @@
 sofree(so)
 	struct socket *so;
 {
-	struct socket *head = so->so_head;
-
 	KASSERT(so->so_count == 0, ("socket %p so_count not 0", so));
 
-	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
+	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
+		SOCK_UNLOCK(so);
 		return;
-	if (head != NULL) {
+	}
+	SOCK_UNLOCK(so);
+	SOCKBUF_LOCK(&so->so_rcv);
+	if (so->so_head != NULL) {
+		struct socket *head = so->so_head;
 		if (so->so_state & SS_INCOMP) {
 			TAILQ_REMOVE(&head->so_incomp, so, so_list);
 			head->so_incqlen--;
@@ -307,6 +310,7 @@
 			 * accept(2) may hang after select(2) indicated
 			 * that the listening socket was ready.
 			 */
+			/* XXX SOCKBUF_UNLOCK(&so->so_rcv); */
 			return;
 		} else {
 			panic("sofree: not queued");
@@ -314,7 +318,10 @@
 		so->so_state &= ~SS_INCOMP;
 		so->so_head = NULL;
 	}
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	SOCKBUF_LOCK(&so->so_snd);
 	sbrelease(&so->so_snd, so);
+	SOCKBUF_UNLOCK(&so->so_snd);
 	sorflush(so);
 	sodealloc(so);
 }
@@ -354,11 +361,14 @@
 			(void) soabort(sp);
 		}
 	}
+	SOCK_LOCK(so);
 	if (so->so_pcb == 0)
 		goto discard;
 	if (so->so_state & SS_ISCONNECTED) {
 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
+			SOCK_UNLOCK(so);
 			error = sodisconnect(so);
+			SOCK_LOCK(so);
 			if (error)
 				goto drop;
 		}
@@ -367,7 +377,7 @@
 			    (so->so_state & SS_NBIO))
 				goto drop;
 			while (so->so_state & SS_ISCONNECTED) {
-				error = tsleep(&so->so_timeo,
+				error = msleep(&so->so_timeo, SOCK_MTX(so),
 				    PSOCK | PCATCH, "soclos", so->so_linger * hz);
 				if (error)
 					break;
@@ -376,7 +386,10 @@
 	}
 drop:
 	if (so->so_pcb) {
-		int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
+		int error2;
+		SOCK_UNLOCK(so);
+		error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
+		SOCK_LOCK(so);
 		if (error == 0)
 			error = error2;
 	}
@@ -400,6 +413,7 @@
 
 	error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
 	if (error) {
+		SOCK_LOCK(so);
 		sotryfree(so);	/* note: does not decrement the ref count */
 		return error;
 	}
@@ -411,14 +425,12 @@
 	struct socket *so;
 	struct sockaddr **nam;
 {
-	int s = splnet();
 	int error;
 
 	if ((so->so_state & SS_NOFDREF) == 0)
 		panic("soaccept: !NOFDREF");
 	so->so_state &= ~SS_NOFDREF;
 	error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
-	splx(s);
 	return (error);
 }
 
@@ -428,12 +440,10 @@
 	struct sockaddr *nam;
 	struct thread *td;
 {
-	int s;
 	int error;
 
 	if (so->so_options & SO_ACCEPTCONN)
 		return (EOPNOTSUPP);
-	s = splnet();
 	/*
 	 * If protocol is connection-based, can only connect once.
 	 * Otherwise, if connected, try to disconnect first.
@@ -446,7 +456,6 @@
 		error = EISCONN;
 	else
 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td);
-	splx(s);
 	return (error);
 }
 
@@ -455,11 +464,9 @@
 	struct socket *so1;
 	struct socket *so2;
 {
-	int s = splnet();
 	int error;
 
 	error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
-	splx(s);
 	return (error);
 }
 
@@ -467,20 +474,13 @@
 sodisconnect(so)
 	struct socket *so;
 {
-	int s = splnet();
 	int error;
 
-	if ((so->so_state & SS_ISCONNECTED) == 0) {
-		error = ENOTCONN;
-		goto bad;
-	}
-	if (so->so_state & SS_ISDISCONNECTING) {
-		error = EALREADY;
-		goto bad;
-	}
+	if ((so->so_state & SS_ISCONNECTED) == 0)
+		return ENOTCONN;
+	if (so->so_state & SS_ISDISCONNECTING)
+		return EALREADY;
 	error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
-bad:
-	splx(s);
 	return (error);
 }
 
@@ -531,7 +531,7 @@
 	struct mbuf **mp;
 	struct mbuf *m;
 	long space, len, resid;
-	int clen = 0, error, s, dontroute, mlen;
+	int clen = 0, error, dontroute, mlen;
 	int atomic = sosendallatonce(so) || top;
 #ifdef ZERO_COPY_SOCKETS
 	int cow_send;
@@ -563,20 +563,18 @@
 		td->td_proc->p_stats->p_ru.ru_msgsnd++;
 	if (control)
 		clen = control->m_len;
-#define	snderr(errno)	{ error = (errno); splx(s); goto release; }
+#define	snderr(errno)	{ error = (errno); goto release; }
 
-restart:
+	SOCKBUF_LOCK(&so->so_snd);
 	error = sblock(&so->so_snd, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 	do {
-		s = splnet();
 		if (so->so_state & SS_CANTSENDMORE)
 			snderr(EPIPE);
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
-			splx(s);
 			goto release;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
@@ -605,14 +603,11 @@
 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
 			if (so->so_state & SS_NBIO)
 				snderr(EWOULDBLOCK);
-			sbunlock(&so->so_snd);
 			error = sbwait(&so->so_snd);
-			splx(s);
 			if (error)
-				goto out;
-			goto restart;
+				goto release;
+			continue;
 		}
-		splx(s);
 		mp = &top;
 		space -= clen;
 		do {
@@ -627,10 +622,12 @@
 #ifdef ZERO_COPY_SOCKETS
 			cow_send = 0;
 #endif /* ZERO_COPY_SOCKETS */
+			SOCKBUF_UNLOCK(&so->so_snd);
 			if (top == 0) {
 				MGETHDR(m, M_TRYWAIT, MT_DATA);
 				if (m == NULL) {
 					error = ENOBUFS;
+					SOCKBUF_LOCK(&so->so_snd); /* XXX */
 					goto release;
 				}
 				mlen = MHLEN;
@@ -640,6 +637,7 @@
 				MGET(m, M_TRYWAIT, MT_DATA);
 				if (m == NULL) {
 					error = ENOBUFS;
+					SOCKBUF_LOCK(&so->so_snd); /* XXX */
 					goto release;
 				}
 				mlen = MLEN;
@@ -687,6 +685,7 @@
 			else
 #endif /* ZERO_COPY_SOCKETS */
 			error = uiomove(mtod(m, void *), (int)len, uio);
+			SOCKBUF_LOCK(&so->so_snd);
 			resid = uio->uio_resid;
 			m->m_len = len;
 			*mp = m;
@@ -702,13 +701,12 @@
 		    } while (space > 0 && atomic);
 		    if (dontroute)
 			    so->so_options |= SO_DONTROUTE;
-		    s = splnet();				/* XXX */
 		    /*
 		     * XXX all the SS_CANTSENDMORE checks previously
 		     * done could be out of date.  We could have recieved
 		     * a reset packet in an interrupt or maybe we slept
 		     * while doing page faults in uiomove() etc. We could
-		     * probably recheck again inside the splnet() protection
+		     * probably recheck again inside the locking protection
 		     * here, but there are probably other places that this
 		     * also happens.  We must rethink this.
 		     */
@@ -726,7 +724,6 @@
 			/* If there is more to send set PRUS_MORETOCOME */
 			(resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 			top, addr, control, td);
-		    splx(s);
 		    if (dontroute)
 			    so->so_options &= ~SO_DONTROUTE;
 		    clen = 0;
@@ -741,6 +738,7 @@
 release:
 	sbunlock(&so->so_snd);
 out:
+	SOCKBUF_UNLOCK(&so->so_snd);
 	if (top)
 		m_freem(top);
 	if (control)
@@ -774,7 +772,7 @@
 	int *flagsp;
 {
 	struct mbuf *m, **mp;
-	int flags, len, error, s, offset;
+	int flags, len, error, offset;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 	int moff, type = 0;
@@ -832,12 +830,12 @@
 	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
 		(*pr->pr_usrreqs->pru_rcvd)(so, 0);
 
-restart:
+	SOCKBUF_LOCK(&so->so_rcv);
 	error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
 	if (error)
-		return (error);
-	s = splnet();
+		goto out;
 
+restart:
 	m = so->so_rcv.sb_mb;
 	/*
 	 * If we have less data than requested, block awaiting more
@@ -855,9 +853,8 @@
 	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
 	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
 	    m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
-		KASSERT(m != 0 || !so->so_rcv.sb_cc,
-		    ("receive: m == %p so->so_rcv.sb_cc == %u",
-		    m, so->so_rcv.sb_cc));
+		KASSERT(!(m == 0 && so->so_rcv.sb_cc),
+		    ("m %p so->so_rcv.sb_cc %u", m, so->so_rcv.sb_cc));
 		if (so->so_error) {
 			if (m)
 				goto dontblock;
@@ -890,14 +887,13 @@
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
-		sbunlock(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
-		splx(s);
 		if (error)
-			return (error);
+			goto release;
 		goto restart;
 	}
 dontblock:
+	KASSERT(error == 0, ("unexpected state, error %u", error));
 	if (uio->uio_td)
 		uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++;
 	SBLASTRECORDCHK(&so->so_rcv);
@@ -906,10 +902,14 @@
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME,
 		    ("m->m_type == %d", m->m_type));
-		orig_resid = 0;
-		if (psa)
-			*psa = dup_sockaddr(mtod(m, struct sockaddr *),
-					    mp0 == 0);
+		if (psa) {
+			*psa = sodupsockaddr(mtod(m, struct sockaddr *),
+					M_NOWAIT);
+			if (*psa == NULL) {
+				error = ENOMEM;
+				goto release;
+			}
+		}
 		if (flags & MSG_PEEK) {
 			m = m->m_next;
 		} else {
@@ -917,30 +917,56 @@
 			so->so_rcv.sb_mb = m_free(m);
 			m = so->so_rcv.sb_mb;
 		}
+		orig_resid = 0;
 	}
-	while (m && m->m_type == MT_CONTROL && error == 0) {
-		if (flags & MSG_PEEK) {
-			if (controlp)
-				*controlp = m_copy(m, 0, m->m_len);
-			m = m->m_next;
-		} else {
-			sbfree(&so->so_rcv, m);
-			so->so_rcv.sb_mb = m->m_next;
-			m->m_next = NULL;
-			if (pr->pr_domain->dom_externalize)
-				error =
-				(*pr->pr_domain->dom_externalize)(m, controlp);
-			else if (controlp)
-				*controlp = m;
-			else
-				m_freem(m);
-			m = so->so_rcv.sb_mb;
+	if (m && m->m_type == MT_CONTROL) {
+		struct mbuf *cm = NULL;
+		struct mbuf **cme = &cm;
+
+		do {
+			if (flags & MSG_PEEK) {
+				if (controlp) {
+					SOCKBUF_UNLOCK(&so->so_rcv);
+					*controlp = m_copym(m, 0, m->m_len,
+						M_TRYWAIT);
+					SOCKBUF_LOCK(&so->so_rcv);
+					if (*controlp == NULL) {
+						error = ENOBUFS;
+						goto release;
+					}
+					controlp = &(*controlp)->m_next;
+				}
+				m = m->m_next;
+			} else {
+				sbfree(&so->so_rcv, m);
+				so->so_rcv.sb_mb = m->m_next;
+				m->m_next = NULL;
+				if (controlp) {
+					/*
+					 * Collect mbufs for processing below.
+					 */
+					*cme = m;
+					cme = &(*cme)->m_next;
+				} else
+					m_free(m);
+				m = so->so_rcv.sb_mb;
+			}
+		} while (m && m->m_type == MT_CONTROL);
+		if (cm != NULL) {
+			if (pr->pr_domain->dom_externalize) {
+				/*
+				 * NB: drop the lock to avoid potential LORs;
+				 * in particular unix domain sockets grab the
+				 * file descriptor lock which would be a LOR.
+				 */
+				SOCKBUF_UNLOCK(&so->so_rcv);
+				error = (*pr->pr_domain->dom_externalize)
+						(cm, controlp);
+				SOCKBUF_LOCK(&so->so_rcv);
+			} else
+				m_freem(cm);
 		}
-		if (controlp) {
-			orig_resid = 0;
-			while (*controlp != NULL)
-				controlp = &(*controlp)->m_next;
-		}
+		orig_resid = 0;
 	}
 	if (m) {
 		if ((flags & MSG_PEEK) == 0) {
@@ -997,7 +1023,7 @@
 		if (mp == 0) {
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
-			splx(s);
+			SOCKBUF_UNLOCK(&so->so_rcv);
 #ifdef ZERO_COPY_SOCKETS
 			if (so_zero_copy_receive) {
 				vm_page_t pg;
@@ -1021,7 +1047,7 @@
 			} else
 #endif /* ZERO_COPY_SOCKETS */
 			error = uiomove(mtod(m, char *) + moff, (int)len, uio);
-			s = splnet();
+			SOCKBUF_LOCK(&so->so_rcv);
 			if (error)
 				goto release;
 		} else
@@ -1102,9 +1128,8 @@
 			SBLASTMBUFCHK(&so->so_rcv);
 			error = sbwait(&so->so_rcv);
 			if (error) {
-				sbunlock(&so->so_rcv);
-				splx(s);
-				return (0);
+				error = 0;
+				goto release;
 			}
 			m = so->so_rcv.sb_mb;
 			if (m)
@@ -1137,17 +1162,15 @@
 			(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 	}
 	if (orig_resid == uio->uio_resid && orig_resid &&
-	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
-		sbunlock(&so->so_rcv);
-		splx(s);
-		goto restart;
-	}
+	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0)
+		goto restart;		/* XXX multi-counts msgs */
 
 	if (flagsp)
 		*flagsp |= flags;
 release:
 	sbunlock(&so->so_rcv);
-	splx(s);
+out:
+	SOCKBUF_UNLOCK(&so->so_rcv);
 	return (error);
 }
 
@@ -1174,22 +1197,21 @@
 {
 	struct sockbuf *sb = &so->so_rcv;
 	struct protosw *pr = so->so_proto;
-	int s;
 	struct sockbuf asb;
 
+	SOCKBUF_LOCK(sb);
 	sb->sb_flags |= SB_NOINTR;
 	(void) sblock(sb, M_WAITOK);
-	s = splimp();
-	socantrcvmore(so);
+	socantrcvmore_locked(so);
 	sbunlock(sb);
 	asb = *sb;
 	/*
-	 * Invalidate/clear most of the sockbuf structure, but keep
-	 * its selinfo structure valid.
+	 * Invalidate/clear most of the sockbuf structure, but leave
+	 * selinfo and mutex data unchanged.
 	 */
 	bzero(&sb->sb_startzero,
 	    sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
-	splx(s);
+	SOCKBUF_UNLOCK(sb);
 
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
 		(*pr->pr_domain->dom_dispose)(asb.sb_mb);
@@ -1207,6 +1229,7 @@
 	struct so_accf	*af = so->so_accf;
 	int	error = 0;
 
+/* XXX locking */
 	/* do not set/remove accept filters on non listen sockets */
 	if ((so->so_options & SO_ACCEPTCONN) == 0) {
 		error = EINVAL;
@@ -1801,7 +1824,6 @@
 {
 	struct socket *so = kn->kn_fp->f_data;
 	struct sockbuf *sb;
-	int s;
 
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
@@ -1819,10 +1841,10 @@
 		return (1);
 	}
 
-	s = splnet();
+	SOCKBUF_LOCK(sb);
 	SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext);
 	sb->sb_flags |= SB_KNOTE;
-	splx(s);
+	SOCKBUF_UNLOCK(sb);
 	return (0);
 }
 
@@ -1830,12 +1852,12 @@
 filt_sordetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
-	int s = splnet();
 
+	SOCKBUF_LOCK(&so->so_rcv);
 	SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
 	if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note))
 		so->so_rcv.sb_flags &= ~SB_KNOTE;
-	splx(s);
+	SOCKBUF_UNLOCK(&so->so_rcv);
 }
 
 /*ARGSUSED*/
@@ -1843,30 +1865,35 @@
 filt_soread(struct knote *kn, long hint)
 {
 	struct socket *so = kn->kn_fp->f_data;
+	int result;
 
+	SOCKBUF_LOCK(&so->so_rcv);	/* XXX too conservative? */
 	kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
 	if (so->so_state & SS_CANTRCVMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
-		return (1);
+		result = 1;
+	} else if (so->so_error) {	/* temporary udp error */
+		result = 1;
+	} else if (kn->kn_sfflags & NOTE_LOWAT) {
+		result = (kn->kn_data >= kn->kn_sdata);
+	} else {
+		result = (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat);
 	}
-	if (so->so_error)	/* temporary udp error */
-		return (1);
-	if (kn->kn_sfflags & NOTE_LOWAT)
-		return (kn->kn_data >= kn->kn_sdata);
-	return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat);
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	return (result);
 }
 
 static void
 filt_sowdetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
-	int s = splnet();
 
+	SOCKBUF_LOCK(&so->so_snd);
 	SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext);
 	if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note))
 		so->so_snd.sb_flags &= ~SB_KNOTE;
-	splx(s);
+	SOCKBUF_UNLOCK(&so->so_snd);
 }
 
 /*ARGSUSED*/
@@ -1874,21 +1901,26 @@
 filt_sowrite(struct knote *kn, long hint)
 {
 	struct socket *so = kn->kn_fp->f_data;
+	int result;
 
+	SOCKBUF_LOCK(&so->so_snd);	/* XXX too conservative? */
 	kn->kn_data = sbspace(&so->so_snd);
 	if (so->so_state & SS_CANTSENDMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
-		return (1);
+		result = 1;
+	} else if (so->so_error) {	/* temporary udp error */
+		result = 1;
+	} else if (((so->so_state & SS_ISCONNECTED) == 0) &&
+	    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
+		result = 0;
+	} else if (kn->kn_sfflags & NOTE_LOWAT) {
+		result = (kn->kn_data >= kn->kn_sdata);
+	} else {
+		result = (kn->kn_data >= so->so_snd.sb_lowat);
 	}
-	if (so->so_error)	/* temporary udp error */
-		return (1);
-	if (((so->so_state & SS_ISCONNECTED) == 0) &&
-	    (so->so_proto->pr_flags & PR_CONNREQUIRED))
-		return (0);
-	if (kn->kn_sfflags & NOTE_LOWAT)
-		return (kn->kn_data >= kn->kn_sdata);
-	return (kn->kn_data >= so->so_snd.sb_lowat);
+	SOCKBUF_UNLOCK(&so->so_snd);
+	return (result);
 }
 
 /*ARGSUSED*/

==== //depot/projects/netperf_socket/sys/kern/uipc_socket2.c#2 (text+ko) ====

@@ -108,9 +108,14 @@
 soisconnecting(so)
 	register struct socket *so;
 {
+	int need_lock = !SOCK_OWNED(so);
 
+	if (need_lock)
+		SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= SS_ISCONNECTING;
+	if (need_lock)
+		SOCK_UNLOCK(so);
 }
 
 void
@@ -118,56 +123,79 @@
 	struct socket *so;
 {
 	struct socket *head = so->so_head;
+	int need_lock = !SOCK_OWNED(so);
 
+	if (need_lock)
+		SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 	so->so_state |= SS_ISCONNECTED;
 	if (head && (so->so_state & SS_INCOMP)) {
-		if ((so->so_options & SO_ACCEPTFILTER) != 0) {
+		if ((so->so_options & SO_ACCEPTFILTER) == 0) {
+			if (need_lock)
+				SOCK_UNLOCK(so);
+			SOCK_LOCK(head);
+			TAILQ_REMOVE(&head->so_incomp, so, so_list);
+			head->so_incqlen--;
+			so->so_state &= ~SS_INCOMP;
+			TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
+			head->so_qlen++;
+			so->so_state |= SS_COMP;
+			sorwakeup_locked(head);
+			wakeup_one(&head->so_timeo);
+			SOCK_UNLOCK(head);
+		} else {
+/* XXX locking */
 			so->so_upcall = head->so_accf->so_accept_filter->accf_callback;
 			so->so_upcallarg = head->so_accf->so_accept_filter_arg;
 			so->so_rcv.sb_flags |= SB_UPCALL;
 			so->so_options &= ~SO_ACCEPTFILTER;
 			so->so_upcall(so, so->so_upcallarg, M_TRYWAIT);
-			return;
 		}
-		TAILQ_REMOVE(&head->so_incomp, so, so_list);
-		head->so_incqlen--;
-		so->so_state &= ~SS_INCOMP;
-		TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
-		head->so_qlen++;
-		so->so_state |= SS_COMP;
-		sorwakeup(head);
-		wakeup_one(&head->so_timeo);
 	} else {
 		wakeup(&so->so_timeo);
+		SOCK_UNLOCK(so);
 		sorwakeup(so);
 		sowwakeup(so);
 	}
+	if (!need_lock)
+		SOCK_LOCK(so);
 }
 
 void
 soisdisconnecting(so)
 	register struct socket *so;
 {
+	int need_lock = !SOCK_OWNED(so);
 
+	if (need_lock)
+		SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTING;
 	so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
 	wakeup(&so->so_timeo);
+	SOCK_UNLOCK(so);
 	sowwakeup(so);
 	sorwakeup(so);
+	if (!need_lock)
+		SOCK_LOCK(so);
 }
 
 void
 soisdisconnected(so)
 	register struct socket *so;
 {
+	int need_lock = !SOCK_OWNED(so);
 
+	if (need_lock)
+		SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
 	wakeup(&so->so_timeo);
+	SOCK_UNLOCK(so);
 	sbdrop(&so->so_snd, so->so_snd.sb_cc);
 	sowwakeup(so);
 	sorwakeup(so);
+	if (!need_lock)
+		SOCK_LOCK(so);
 }
 
 /*
@@ -186,10 +214,14 @@
 	int connstatus;
 {
 	register struct socket *so;
+	int over;
 
-	if (head->so_qlen > 3 * head->so_qlimit / 2)
+	SOCK_LOCK(head);
+	over = (head->so_qlen > 3 * head->so_qlimit / 2);
+	SOCK_UNLOCK(head);
+	if (over)
 		return ((struct socket *)0);
-	so = soalloc(0);
+	so = soalloc(M_NOWAIT);
 	if (so == NULL)
 		return ((struct socket *)0);
 	if ((head->so_options & SO_ACCEPTFILTER) != 0)
@@ -205,12 +237,13 @@
 #ifdef MAC
 	mac_create_socket_from_socket(head, so);
 #endif
+
 	if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) ||
 	    (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
 		sodealloc(so);
 		return ((struct socket *)0);
 	}
-
+	SOCKBUF_LOCK(&head->so_rcv);
 	if (connstatus) {
 		TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 		so->so_state |= SS_COMP;
@@ -225,10 +258,11 @@
 		so->so_state |= SS_INCOMP;
 		head->so_incqlen++;
 	}
+	SOCKBUF_UNLOCK(&head->so_rcv);
 	if (connstatus) {
+		so->so_state |= connstatus;
 		sorwakeup(head);
 		wakeup(&head->so_timeo);
-		so->so_state |= connstatus;
 	}
 	return (so);
 }
@@ -253,6 +287,16 @@
 }
 
 void
+socantsendmore_locked(so)
+	struct socket *so;
+{
+	SOCKBUF_LOCK_ASSERT(&so->so_snd);
+
+	so->so_state |= SS_CANTSENDMORE;
+	sowwakeup(so);
+}
+
+void
 socantrcvmore(so)
 	struct socket *so;
 {
@@ -261,6 +305,16 @@
 	sorwakeup(so);
 }
 
+void

>>> TRUNCATED FOR MAIL (1000 lines) <<<



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200401190032.i0J0WanH023063>