Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 14 Nov 2001 16:15:47 -0800 (PST)
From:      Matthew Dillon <dillon@apollo.backplane.com>
To:        freebsd-arch@freebsd.org
Subject:   Need review - patch for socket locking and ref counting
Message-ID:  <200111150015.fAF0Flb09186@apollo.backplane.com>

next in thread | raw e-mail | index | archive | help
    This patch adds a reference count to the socket structure
    and cleans up & encapulates the API calls.  I do not yet
    attempt to use sxlocks to lock the socket structure (to allow
    us to multi-thread the network stack), but that is the
    direction I am headed.

    soalloc()/sofree() - no reference counter adjustments
			 (so_count must be 0 or sofree() panics)
			 (soalloc initializes so_count to 0)
 
    socreate()/soclose() - socreate inits ref counter to 1,
			   soclose decrements ref counter.

    soref()		- bump ref counter

    sorele()		- decrement ref counter, calls sofree()
			  when the ref counter hits 0

    holdsock() removed, fgetsock() added in a manner similar to fget() and
    fgetvp().

    I would like a review. 

    Also, I noticed there are two calls to soisdisconnected()
    *AFTER* the code (originally) calls sofree(), which sounds
    bogus to me.  Could someone review the original code and
    give me an opinion?  (see the last two XXX's in the patch
    set).

					Thanks,

					-Matt
					Matthew Dillon 
					<dillon@backplane.com>


Index: compat/svr4/svr4_stream.c
===================================================================
RCS file: /home/ncvs/src/sys/compat/svr4/svr4_stream.c,v
retrieving revision 1.22
diff -u -r1.22 svr4_stream.c
--- compat/svr4/svr4_stream.c	2001/09/12 08:36:58	1.22
+++ compat/svr4/svr4_stream.c	2001/11/14 22:10:24
@@ -150,7 +150,6 @@
 	register struct msghdr *mp;
 	int flags;
 {
-	struct file *fp;
 	struct uio auio;
 	register struct iovec *iov;
 	register int i;
@@ -163,8 +162,7 @@
 	struct uio ktruio;
 #endif
 
-	error = holdsock(td->td_proc->p_fd, s, &fp);
-	if (error)
+	if ((error = fgetsock(td, s, &so, NULL)) != 0)
 		return (error);
 	auio.uio_iov = mp->msg_iov;
 	auio.uio_iovcnt = mp->msg_iovlen;
@@ -176,16 +174,14 @@
 	iov = mp->msg_iov;
 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
 		if ((auio.uio_resid += iov->iov_len) < 0) {
-			fdrop(fp, td);
-			return (EINVAL);
+			error = EINVAL;
+			goto done1;
 		}
 	}
 	if (mp->msg_name) {
 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
-		if (error) {
-			fdrop(fp, td);
-			return (error);
-		}
+		if (error)
+			goto done1;
 	} else {
 		to = 0;
 	}
@@ -211,7 +207,6 @@
 	}
 #endif
 	len = auio.uio_resid;
-	so = (struct socket *)fp->f_data;
 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
 						     flags, td);
 	if (error) {
@@ -239,7 +234,8 @@
 bad:
 	if (to)
 		FREE(to, M_SONAME);
-	fdrop(fp, td);
+done1:
+	fputsock(so);
 	return (error);
 }
 
@@ -250,7 +246,6 @@
 	register struct msghdr *mp;
 	caddr_t namelenp;
 {
-	struct file *fp;
 	struct uio auio;
 	register struct iovec *iov;
 	register int i;
@@ -264,8 +259,7 @@
 	struct uio ktruio;
 #endif
 
-	error = holdsock(td->td_proc->p_fd, s, &fp);
-	if (error)
+	if ((error = fgetsock(td, s, &so, NULL)) != 0)
 		return (error);
 	auio.uio_iov = mp->msg_iov;
 	auio.uio_iovcnt = mp->msg_iovlen;
@@ -277,8 +271,8 @@
 	iov = mp->msg_iov;
 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
 		if ((auio.uio_resid += iov->iov_len) < 0) {
-			fdrop(fp, td);
-			return (EINVAL);
+			error = EINVAL;
+			goto done1;
 		}
 	}
 #ifdef KTRACE
@@ -365,7 +359,8 @@
 		FREE(fromsa, M_SONAME);
 	if (control)
 		m_freem(control);
-	fdrop(fp, td);
+done1:
+	fputsock(so);
 	return (error);
 }
 
Index: kern/kern_descrip.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.111
diff -u -r1.111 kern_descrip.c
--- kern/kern_descrip.c	2001/11/14 06:30:35	1.111
+++ kern/kern_descrip.c	2001/11/14 23:42:17
@@ -60,6 +60,8 @@
 #include <sys/unistd.h>
 #include <sys/resourcevar.h>
 #include <sys/event.h>
+#include <sys/sx.h>
+#include <sys/socketvar.h>
 
 #include <machine/limits.h>
 
@@ -1423,6 +1425,51 @@
 fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
 {
 	return(_fgetvp(td, fd, vpp, FWRITE));
+}
+
+/*
+ * Like fget() but loads the underlying socket, or returns an error if
+ * the descriptor does not represent a socket.
+ *
+ * We bump the ref count on the returned socket.  XXX Also obtain the SX lock in
+ * the future.
+ */
+int
+fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
+{
+	struct filedesc *fdp;
+	struct file *fp;
+	struct socket *so;
+
+	GIANT_REQUIRED;
+	fdp = td->td_proc->p_fd;
+	*spp = NULL;
+	if (fflagp)
+		*fflagp = 0;
+	if ((u_int)fd >= fdp->fd_nfiles)
+		return(EBADF);
+	if ((fp = fdp->fd_ofiles[fd]) == NULL)
+		return(EBADF);
+	if (fp->f_type != DTYPE_SOCKET)
+		return(ENOTSOCK);
+	if (fp->f_data == NULL)
+		return(EINVAL);
+	so = (struct socket *)fp->f_data;
+	if (fflagp)
+		*fflagp = fp->f_flag;
+	soref(so);
+	*spp = so;
+	return(0);
+}
+
+/*
+ * Drop the reference count on the the socket and XXX release the SX lock in
+ * the future.  The last reference closes the socket.
+ */
+void
+fputsock(struct socket *so)
+{
+	sorele(so);
 }
 
 int
Index: kern/kern_mtxpool.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_mtxpool.c,v
retrieving revision 1.1
diff -u -r1.1 kern_mtxpool.c
--- kern/kern_mtxpool.c	2001/11/13 21:55:12	1.1
+++ kern/kern_mtxpool.c	2001/11/14 04:06:48
@@ -35,9 +35,10 @@
 #include <sys/systm.h>
 
 #ifndef MTX_POOL_SIZE
-#define MTX_POOL_SIZE	128
+#define MTX_POOL_SIZE	128	/* must be a multiple of 4 */
 #endif
-#define MTX_POOL_MASK	(MTX_POOL_SIZE-1)
+#define MTX_POOL_MASK	(MTX_POOL_SIZE - 1)
+#define MTX_POOL_XMASK	(MTX_POOL_MASK & ~3)
 
 static struct mtx mtx_pool_ary[MTX_POOL_SIZE];
 
@@ -54,6 +55,34 @@
     return(&mtx_pool_ary[((int)ptr ^ ((int)ptr >> 6)) & MTX_POOL_MASK]);
 }
 
+static __inline
+struct mtx *
+_mtx_pool1_find(void *ptr)
+{
+    return(&mtx_pool_ary[(((int)ptr ^ ((int)ptr >> 6)) & MTX_POOL_XMASK) | 0]);
+}
+
+static __inline
+struct mtx *
+_mtx_pool2_find(void *ptr)
+{
+    return(&mtx_pool_ary[(((int)ptr ^ ((int)ptr >> 6)) & MTX_POOL_XMASK) | 1]);
+}
+
+static __inline
+struct mtx *
+_mtx_pool3_find(void *ptr)
+{
+    return(&mtx_pool_ary[(((int)ptr ^ ((int)ptr >> 6)) & MTX_POOL_XMASK) | 2]);
+}
+
+static __inline
+struct mtx *
+_mtx_pool4_find(void *ptr)
+{
+    return(&mtx_pool_ary[(((int)ptr ^ ((int)ptr >> 6)) & MTX_POOL_XMASK) | 3]);
+}
+
 static void
 mtx_pool_setup(void *dummy __unused)
 {
@@ -88,6 +117,30 @@
     return(_mtx_pool_find(ptr));
 }
 
+struct mtx *
+mtx_pool1_find(void *ptr)
+{
+    return(_mtx_pool1_find(ptr));
+}
+
+struct mtx *
+mtx_pool2_find(void *ptr)
+{
+    return(_mtx_pool2_find(ptr));
+}
+
+struct mtx *
+mtx_pool3_find(void *ptr)
+{
+    return(_mtx_pool3_find(ptr));
+}
+
+struct mtx *
+mtx_pool4_find(void *ptr)
+{
+    return(_mtx_pool4_find(ptr));
+}
+
 /*
  * Combined find/lock operation.  Lock the pool mutex associated with
  * the specified address.
@@ -98,6 +151,30 @@
     mtx_lock(_mtx_pool_find(ptr));
 }
 
+void 
+mtx_pool1_lock(void *ptr)
+{
+    mtx_lock(_mtx_pool1_find(ptr));
+}
+
+void 
+mtx_pool2_lock(void *ptr)
+{
+    mtx_lock(_mtx_pool2_find(ptr));
+}
+
+void 
+mtx_pool3_lock(void *ptr)
+{
+    mtx_lock(_mtx_pool3_find(ptr));
+}
+
+void 
+mtx_pool4_lock(void *ptr)
+{
+    mtx_lock(_mtx_pool4_find(ptr));
+}
+
 /*
  * Combined find/unlock operation.  Unlock the pool mutex associated with
  * the specified address.
@@ -106,6 +183,30 @@
 mtx_pool_unlock(void *ptr)
 {
     mtx_unlock(_mtx_pool_find(ptr));
+}
+
+void
+mtx_pool1_unlock(void *ptr)
+{
+    mtx_unlock(_mtx_pool1_find(ptr));
+}
+
+void
+mtx_pool2_unlock(void *ptr)
+{
+    mtx_unlock(_mtx_pool2_find(ptr));
+}
+
+void
+mtx_pool3_unlock(void *ptr)
+{
+    mtx_unlock(_mtx_pool3_find(ptr));
+}
+
+void
+mtx_pool4_unlock(void *ptr)
+{
+    mtx_unlock(_mtx_pool4_find(ptr));
 }
 
 SYSINIT(mtxpooli, SI_SUB_MUTEX, SI_ORDER_FIRST, mtx_pool_setup, NULL)   
Index: kern/sys_socket.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/sys_socket.c,v
retrieving revision 1.35
diff -u -r1.35 sys_socket.c
--- kern/sys_socket.c	2001/09/12 08:37:46	1.35
+++ kern/sys_socket.c	2001/11/14 23:48:45
@@ -182,6 +182,12 @@
 	return ((*so->so_proto->pr_usrreqs->pru_sense)(so, ub));
 }
 
+/*
+ * API socket close on file pointer.  We call soclose() to close the 
+ * socket (including initiating closing protocols).  soclose() will
+ * sorele() the file reference but the actual socket will not go away
+ * until the socket's ref count hits 0.
+ */
 /* ARGSUSED */
 int
 soo_close(fp, td)
@@ -189,10 +195,12 @@
 	struct thread *td;
 {
 	int error = 0;
+	struct socket *so;
 
 	fp->f_ops = &badfileops;
-	if (fp->f_data)
-		error = soclose((struct socket *)fp->f_data);
-	fp->f_data = 0;
+	if ((so = fp->f_data) != NULL) {
+		fp->f_data = NULL;
+		error = soclose(so);
+	}
 	return (error);
 }
Index: kern/uipc_socket.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.105
diff -u -r1.105 uipc_socket.c
--- kern/uipc_socket.c	2001/11/12 20:51:40	1.105
+++ kern/uipc_socket.c	2001/11/15 00:03:25
@@ -106,6 +106,8 @@
  * Note that it would probably be better to allocate socket
  * and PCB at the same time, but I'm not convinced that all
  * the protocols can be easily modified to do this.
+ *
+ * soalloc() returns a socket with a ref count of 0.
  */
 struct socket *
 soalloc(waitok)
@@ -119,11 +121,16 @@
 		bzero(so, sizeof *so);
 		so->so_gencnt = ++so_gencnt;
 		so->so_zone = socket_zone;
+		/* sx_init(&so->so_sxlock, "socket sxlock"); */
 		TAILQ_INIT(&so->so_aiojobq);
 	}
 	return so;
 }
 
+/*
+ * socreate returns a socket with a ref count of 1.  The socket should be
+ * closed with soclose().
+ */
 int
 socreate(dom, aso, type, proto, td)
 	int dom;
@@ -162,10 +169,11 @@
 	so->so_type = type;
 	so->so_cred = crhold(td->td_proc->p_ucred);
 	so->so_proto = prp;
+	soref(so);
 	error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
 	if (error) {
 		so->so_state |= SS_NOFDREF;
-		sofree(so);
+		sorele(so);
 		return (error);
 	}
 	*aso = so;
@@ -186,11 +194,12 @@
 	return (error);
 }
 
-void
-sodealloc(so)
-	struct socket *so;
+static void
+sodealloc(struct socket *so)
 {
 
+	KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count));
+	so->so_count = 0;
 	so->so_gencnt = ++so_gencnt;
 	if (so->so_rcv.sb_hiwat)
 		(void)chgsbsize(so->so_cred->cr_uidinfo,
@@ -210,6 +219,7 @@
 	}
 #endif
 	crfree(so->so_cred);
+	/* sx_destroy(&so->so_sxlock); */
 	zfree(so->so_zone, so);
 }
 
@@ -242,6 +252,8 @@
 {
 	struct socket *head = so->so_head;
 
+	KASSERT(so->so_count == 0, ("socket %p so_count not 0", so));
+
 	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
 		return;
 	if (head != NULL) {
@@ -272,6 +284,10 @@
  * Close a socket on last file table reference removal.
  * Initiate disconnect if connected.
  * Free socket when disconnect complete.
+ *
+ * This function will sorele() the socket.  Note that soclose() may be
+ * called prior to the ref count reaching zero.  The actual socket
+ * structure will not be freed until the ref count reaches zero.
  */
 int
 soclose(so)
@@ -329,7 +345,7 @@
 	if (so->so_state & SS_NOFDREF)
 		panic("soclose: NOFDREF");
 	so->so_state |= SS_NOFDREF;
-	sofree(so);
+	sorele(so);
 	splx(s);
 	return (error);
 }
@@ -345,7 +361,7 @@
 
 	error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
 	if (error) {
-		sofree(so);
+		sotryfree(so);	/* note: does not decrement the ref count */
 		return error;
 	}
 	return (0);
Index: kern/uipc_socket2.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.76
diff -u -r1.76 uipc_socket2.c
--- kern/uipc_socket2.c	2001/10/11 23:38:15	1.76
+++ kern/uipc_socket2.c	2001/11/14 23:59:33
@@ -210,6 +210,8 @@
  * then we allocate a new structure, propoerly linked into the
  * data structure of the original socket, and return this.
  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
+ *
+ * note: the ref count on the socket is 0 on return
  */
 struct socket *
 sonewconn(head, connstatus)
@@ -246,7 +248,7 @@
 		so->so_cred = crhold(head->so_cred);
 	if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) ||
 	    (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
-		sodealloc(so);
+		sotryfree(so);
 		return ((struct socket *)0);
 	}
 
Index: kern/uipc_syscalls.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.98
diff -u -r1.98 uipc_syscalls.c
--- kern/uipc_syscalls.c	2001/11/14 06:30:35	1.98
+++ kern/uipc_syscalls.c	2001/11/14 23:09:34
@@ -139,7 +139,7 @@
 			fdrop(fp, td);
 		}
 	} else {
-		fp->f_data = (caddr_t)so;
+		fp->f_data = (caddr_t)so;	/* already has ref count */
 		fp->f_flag = FREAD|FWRITE;
 		fp->f_ops = &socketops;
 		fp->f_type = DTYPE_SOCKET;
@@ -164,22 +164,19 @@
 		int	namelen;
 	} */ *uap;
 {
-	struct file *fp;
 	struct sockaddr *sa;
+	struct socket *sp;
 	int error;
 
 	mtx_lock(&Giant);
-	error = holdsock(td->td_proc->p_fd, uap->s, &fp);
-	if (error)
+	if ((error = fgetsock(td, uap->s, &sp, NULL)) != 0)
 		goto done2;
-	error = getsockaddr(&sa, uap->name, uap->namelen);
-	if (error) {
-		fdrop(fp, td);
-		goto done2;
-	}
-	error = sobind((struct socket *)fp->f_data, sa, td);
+	if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
+		goto done1;
+	error = sobind(sp, sa, td);
 	FREE(sa, M_SONAME);
-	fdrop(fp, td);
+done1:
+	fputsock(sp);
 done2:
 	mtx_unlock(&Giant);
 	return (error);
@@ -197,14 +194,13 @@
 		int	backlog;
 	} */ *uap;
 {
-	struct file *fp;
+	struct socket *sp;
 	int error;
 
 	mtx_lock(&Giant);
-	error = holdsock(td->td_proc->p_fd, uap->s, &fp);
-	if (error == 0) {
-		error = solisten((struct socket *)fp->f_data, uap->backlog, td);
-		fdrop(fp, td);
+	if ((error = fgetsock(td, uap->s, &sp, NULL)) == 0) {
+		error = solisten(sp, uap->backlog, td);
+		fputsock(sp);
 	}
 	mtx_unlock(&Giant);
 	return(error);
@@ -225,13 +221,12 @@
 	int compat;
 {
 	struct filedesc *fdp;
-	struct file *lfp = NULL;
 	struct file *nfp = NULL;
 	struct sockaddr *sa;
 	int namelen, error, s;
 	struct socket *head, *so;
 	int fd;
-	short fflag;		/* type must match fp->f_flag */
+	u_int fflag;
 
 	mtx_lock(&Giant);
 	fdp = td->td_proc->p_fd;
@@ -241,11 +236,10 @@
 		if(error)
 			goto done2;
 	}
-	error = holdsock(fdp, uap->s, &lfp);
+	error = fgetsock(td, uap->s, &head, &fflag);
 	if (error)
 		goto done2;
 	s = splnet();
-	head = (struct socket *)lfp->f_data;
 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
 		splx(s);
 		error = EINVAL;
@@ -286,7 +280,6 @@
 	TAILQ_REMOVE(&head->so_comp, so, so_list);
 	head->so_qlen--;
 
-	fflag = lfp->f_flag;
 	error = falloc(td, &nfp, &fd);
 	if (error) {
 		/*
@@ -312,7 +305,7 @@
 	if (head->so_sigio != NULL)
 		fsetown(fgetown(head->so_sigio), &so->so_sigio);
 
-	nfp->f_data = (caddr_t)so;
+	nfp->f_data = (caddr_t)so;	/* already has ref count */
 	nfp->f_flag = fflag;
 	nfp->f_ops = &socketops;
 	nfp->f_type = DTYPE_SOCKET;
@@ -375,7 +368,7 @@
 done:
 	if (nfp != NULL)
 		fdrop(nfp, td);
-	fdrop(lfp, td);
+	fputsock(head);
 done2:
 	mtx_unlock(&Giant);
 	return (error);
@@ -420,35 +413,31 @@
 		int	namelen;
 	} */ *uap;
 {
-	struct file *fp;
-	register struct socket *so;
+	struct socket *so;
 	struct sockaddr *sa;
 	int error, s;
 
 	mtx_lock(&Giant);
-	error = holdsock(td->td_proc->p_fd, uap->s, &fp);
-	if (error)
+	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
 		goto done2;
-	so = (struct socket *)fp->f_data;
 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
 		error = EALREADY;
-		goto done;
+		goto done1;
 	}
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error)
-		goto done;
+		goto done1;
 	error = soconnect(so, sa, td);
 	if (error)
 		goto bad;
 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
 		FREE(sa, M_SONAME);
 		error = EINPROGRESS;
-		goto done;
+		goto done1;
 	}
 	s = splnet();
 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
-		error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
-		    "connec", 0);
+		error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, "connec", 0);
 		if (error)
 			break;
 	}
@@ -462,8 +451,8 @@
 	FREE(sa, M_SONAME);
 	if (error == ERESTART)
 		error = EINTR;
-done:
-	fdrop(fp, td);
+done1:
+	fputsock(so);
 done2:
 	mtx_unlock(&Giant);
 	return (error);
@@ -499,12 +488,12 @@
 		goto free2;
 	fhold(fp1);
 	sv[0] = fd;
-	fp1->f_data = (caddr_t)so1;
+	fp1->f_data = (caddr_t)so1;	/* so1 already has ref count */
 	error = falloc(td, &fp2, &fd);
 	if (error)
 		goto free3;
 	fhold(fp2);
-	fp2->f_data = (caddr_t)so2;
+	fp2->f_data = (caddr_t)so2;	/* so2 already has ref count */
 	sv[1] = fd;
 	error = soconnect2(so1, so2);
 	if (error)
@@ -552,12 +541,11 @@
 	register struct msghdr *mp;
 	int flags;
 {
-	struct file *fp;
 	struct uio auio;
 	register struct iovec *iov;
 	register int i;
 	struct mbuf *control;
-	struct sockaddr *to;
+	struct sockaddr *to = NULL;
 	int len, error;
 	struct socket *so;
 #ifdef KTRACE
@@ -565,8 +553,7 @@
 	struct uio ktruio;
 #endif
 
-	error = holdsock(td->td_proc->p_fd, s, &fp);
-	if (error)
+	if ((error = fgetsock(td, s, &so, NULL)) != 0)
 		return (error);
 	auio.uio_iov = mp->msg_iov;
 	auio.uio_iovcnt = mp->msg_iovlen;
@@ -578,18 +565,14 @@
 	iov = mp->msg_iov;
 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
 		if ((auio.uio_resid += iov->iov_len) < 0) {
-			fdrop(fp, td);
-			return (EINVAL);
+			error = EINVAL;
+			goto bad;
 		}
 	}
 	if (mp->msg_name) {
 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
-		if (error) {
-			fdrop(fp, td);
-			return (error);
-		}
-	} else {
-		to = 0;
+		if (error)
+			goto bad;
 	}
 	if (mp->msg_control) {
 		if (mp->msg_controllen < sizeof(struct cmsghdr)
@@ -633,7 +616,6 @@
 	}
 #endif
 	len = auio.uio_resid;
-	so = (struct socket *)fp->f_data;
 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
 						     flags, td);
 	if (error) {
@@ -659,7 +641,7 @@
 	}
 #endif
 bad:
-	fdrop(fp, td);
+	fputsock(so);
 	if (to)
 		FREE(to, M_SONAME);
 	return (error);
@@ -834,7 +816,6 @@
 	register struct msghdr *mp;
 	caddr_t namelenp;
 {
-	struct file *fp;
 	struct uio auio;
 	register struct iovec *iov;
 	register int i;
@@ -848,8 +829,7 @@
 	struct uio ktruio;
 #endif
 
-	error = holdsock(td->td_proc->p_fd, s, &fp);
-	if (error)
+	if ((error = fgetsock(td, s, &so, NULL)) != 0)
 		return (error);
 	auio.uio_iov = mp->msg_iov;
 	auio.uio_iovcnt = mp->msg_iovlen;
@@ -861,7 +841,7 @@
 	iov = mp->msg_iov;
 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
 		if ((auio.uio_resid += iov->iov_len) < 0) {
-			fdrop(fp, td);
+			fputsock(so);
 			return (EINVAL);
 		}
 	}
@@ -875,7 +855,6 @@
 	}
 #endif
 	len = auio.uio_resid;
-	so = (struct socket *)fp->f_data;
 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
 	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
 	    &mp->msg_flags);
@@ -975,7 +954,7 @@
 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
 	}
 out:
-	fdrop(fp, td);
+	fputsock(so);
 	if (fromsa)
 		FREE(fromsa, M_SONAME);
 	if (control)
@@ -1196,14 +1175,13 @@
 		int	how;
 	} */ *uap;
 {
-	struct file *fp;
+	struct socket *so;
 	int error;
 
 	mtx_lock(&Giant);
-	error = holdsock(td->td_proc->p_fd, uap->s, &fp);
-	if (error == 0) {
-		error = soshutdown((struct socket *)fp->f_data, uap->how);
-		fdrop(fp, td);
+	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
+		error = soshutdown(so, uap->how);
+		fputsock(so);
 	}
 	mtx_unlock(&Giant);
 	return(error);
@@ -1224,7 +1202,7 @@
 		int	valsize;
 	} */ *uap;
 {
-	struct file *fp;
+	struct socket *so;
 	struct sockopt sopt;
 	int error;
 
@@ -1234,16 +1212,15 @@
 		return (EINVAL);
 
 	mtx_lock(&Giant);
-	error = holdsock(td->td_proc->p_fd, uap->s, &fp);
-	if (error == 0) {
+	if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
 		sopt.sopt_dir = SOPT_SET;
 		sopt.sopt_level = uap->level;
 		sopt.sopt_name = uap->name;
 		sopt.sopt_val = uap->val;
 		sopt.sopt_valsize = uap->valsize;
 		sopt.sopt_td = td;
-		error = sosetopt((struct socket *)fp->f_data, &sopt);
-		fdrop(fp, td);
+		error = sosetopt(so, &sopt);
+		fputsock(so);
 	}
 	mtx_unlock(&Giant);
 	return(error);
@@ -1265,24 +1242,20 @@
 	} */ *uap;
 {
 	int	valsize, error;
-	struct	file *fp;
+	struct  socket *so;
 	struct	sockopt sopt;
 
 	mtx_lock(&Giant);
-	error = holdsock(td->td_proc->p_fd, uap->s, &fp);
-	if (error)
+	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
 		goto done2;
 	if (uap->val) {
 		error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
 		    sizeof (valsize));
-		if (error) {
-			fdrop(fp, td);
-			goto done2;
-		}
+		if (error)
+			goto done1;
 		if (valsize < 0) {
-			fdrop(fp, td);
 			error = EINVAL;
-			goto done2;
+			goto done1;
 		}
 	} else {
 		valsize = 0;
@@ -1295,13 +1268,14 @@
 	sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
 	sopt.sopt_td = td;
 
-	error = sogetopt((struct socket *)fp->f_data, &sopt);
+	error = sogetopt(so, &sopt);
 	if (error == 0) {
 		valsize = sopt.sopt_valsize;
 		error = copyout((caddr_t)&valsize,
 				(caddr_t)uap->avalsize, sizeof (valsize));
 	}
-	fdrop(fp, td);
+done1:
+	fputsock(so);
 done2:
 	mtx_unlock(&Giant);
 	return (error);
@@ -1323,21 +1297,16 @@
 	} */ *uap;
 	int compat;
 {
-	struct file *fp;
-	register struct socket *so;
+	struct socket *so;
 	struct sockaddr *sa;
 	int len, error;
 
 	mtx_lock(&Giant);
-	error = holdsock(td->td_proc->p_fd, uap->fdes, &fp);
-	if (error)
+	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
 		goto done2;
 	error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
-	if (error) {
-		fdrop(fp, td);
-		goto done2;
-	}
-	so = (struct socket *)fp->f_data;
+	if (error)
+		goto done1;
 	sa = 0;
 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
 	if (error)
@@ -1360,7 +1329,8 @@
 bad:
 	if (sa)
 		FREE(sa, M_SONAME);
-	fdrop(fp, td);
+done1:
+	fputsock(so);
 done2:
 	mtx_unlock(&Giant);
 	return (error);
@@ -1408,26 +1378,20 @@
 	} */ *uap;
 	int compat;
 {
-	struct file *fp;
-	register struct socket *so;
+	struct socket *so;
 	struct sockaddr *sa;
 	int len, error;
 
 	mtx_lock(&Giant);
-	error = holdsock(td->td_proc->p_fd, uap->fdes, &fp);
-	if (error)
+	if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
 		goto done2;
-	so = (struct socket *)fp->f_data;
 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
-		fdrop(fp, td);
 		error = ENOTCONN;
-		goto done2;
+		goto done1;
 	}
 	error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
-	if (error) {
-		fdrop(fp, td);
-		goto done2;
-	}
+	if (error)
+		goto done1;
 	sa = 0;
 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
 	if (error)
@@ -1450,7 +1414,8 @@
 bad:
 	if (sa)
 		FREE(sa, M_SONAME);
-	fdrop(fp, td);
+done1:
+	fputsock(so);
 done2:
 	mtx_unlock(&Giant);
 	return (error);
@@ -1550,33 +1515,6 @@
 }
 
 /*
- * holdsock() - load the struct file pointer associated
- * with a socket into *fpp.  If an error occurs, non-zero
- * will be returned and *fpp will be set to NULL.
- */
-int
-holdsock(fdp, fdes, fpp)
-	struct filedesc *fdp;
-	int fdes;
-	struct file **fpp;
-{
-	register struct file *fp = NULL;
-	int error = 0;
-
-	if ((unsigned)fdes >= fdp->fd_nfiles ||
-	    (fp = fdp->fd_ofiles[fdes]) == NULL) {
-		error = EBADF;
-	} else if (fp->f_type != DTYPE_SOCKET) {
-		error = ENOTSOCK;
-		fp = NULL;
-	} else {
-		fhold(fp);
-	}
-	*fpp = fp;
-	return(error);
-}
-
-/*
  * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
  * XXX - The sf_buf functions are currently private to sendfile(2), so have
  * been made static, but may be useful in the future for doing zero-copy in
@@ -1678,10 +1616,9 @@
 int
 sendfile(struct thread *td, struct sendfile_args *uap)
 {
-	struct file *fp = NULL;
 	struct vnode *vp;
 	struct vm_object *obj;
-	struct socket *so;
+	struct socket *so = NULL;
 	struct mbuf *m;
 	struct sf_buf *sf;
 	struct vm_page *pg;
@@ -1701,10 +1638,8 @@
 		error = EINVAL;
 		goto done;
 	}
-	error = holdsock(td->td_proc->p_fd, uap->s, &fp);
-	if (error)
+	if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
 		goto done;
-	so = (struct socket *)fp->f_data;
 	if (so->so_type != SOCK_STREAM) {
 		error = EINVAL;
 		goto done;
@@ -1988,8 +1923,9 @@
 	}
 	if (vp)
 		vrele(vp);
-	if (fp)
-		fdrop(fp, td);
+	if (so)
+		fputsock(so);
 	mtx_unlock(&Giant);
 	return (error);
 }
+
Index: kern/uipc_usrreq.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/uipc_usrreq.c,v
retrieving revision 1.76
diff -u -r1.76 uipc_usrreq.c
--- kern/uipc_usrreq.c	2001/11/08 02:13:16	1.76
+++ kern/uipc_usrreq.c	2001/11/14 23:59:42
@@ -935,7 +935,7 @@
 		if (unp->unp_addr)
 			FREE(unp->unp_addr, M_SONAME);
 		zfree(unp_zone, unp);
-		sofree(so);
+		sotryfree(so);
 	}
 }
 
Index: net/raw_cb.c
===================================================================
RCS file: /home/ncvs/src/sys/net/raw_cb.c,v
retrieving revision 1.16
diff -u -r1.16 raw_cb.c
--- net/raw_cb.c	1999/08/28 00:48:27	1.16
+++ net/raw_cb.c	2001/11/14 23:59:49
@@ -97,7 +97,7 @@
 	struct socket *so = rp->rcb_socket;
 
 	so->so_pcb = 0;
-	sofree(so);
+	sotryfree(so);
 	LIST_REMOVE(rp, list);
 #ifdef notdef
 	if (rp->rcb_laddr)
Index: net/raw_usrreq.c
===================================================================
RCS file: /home/ncvs/src/sys/net/raw_usrreq.c,v
retrieving revision 1.20
diff -u -r1.20 raw_usrreq.c
--- net/raw_usrreq.c	2001/09/12 08:37:51	1.20
+++ net/raw_usrreq.c	2001/11/14 23:59:56
@@ -142,8 +142,8 @@
 	if (rp == 0)
 		return EINVAL;
 	raw_disconnect(rp);
-	sofree(so);
-	soisdisconnected(so);
+	sotryfree(so);
+	soisdisconnected(so);	/* XXX huh? called after the sofree()? */
 	return 0;
 }
 
Index: netatalk/ddp_usrreq.c
===================================================================
RCS file: /home/ncvs/src/sys/netatalk/ddp_usrreq.c,v
retrieving revision 1.21
diff -u -r1.21 ddp_usrreq.c
--- netatalk/ddp_usrreq.c	2001/09/12 08:37:52	1.21
+++ netatalk/ddp_usrreq.c	2001/11/15 00:00:03
@@ -441,7 +441,7 @@
 {
     soisdisconnected( so );
     so->so_pcb = 0;
-    sofree( so );
+    sotryfree(so);
 
     /* remove ddp from ddp_ports list */
     if ( ddp->ddp_lsat.sat_port != ATADDR_ANYPORT &&
Index: netatm/atm_socket.c
===================================================================
RCS file: /home/ncvs/src/sys/netatm/atm_socket.c,v
retrieving revision 1.8
diff -u -r1.8 atm_socket.c
--- netatm/atm_socket.c	2000/12/07 22:19:04	1.8
+++ netatm/atm_socket.c	2001/11/14 23:58:01
@@ -176,7 +176,7 @@
 	 * Break links and free control blocks
 	 */
 	so->so_pcb = NULL;
-	sofree(so);
+	sotryfree(so);
 
 	atm_free((caddr_t)atp);
 
Index: netinet/in_pcb.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.92
diff -u -r1.92 in_pcb.c
--- netinet/in_pcb.c	2001/11/06 00:48:01	1.92
+++ netinet/in_pcb.c	2001/11/14 23:58:11
@@ -563,7 +563,7 @@
 	inp->inp_gencnt = ++ipi->ipi_gencnt;
 	in_pcbremlists(inp);
 	so->so_pcb = 0;
-	sofree(so);
+	sotryfree(so);
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
 	if (rt) {
Index: netinet6/in6_pcb.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet6/in6_pcb.c,v
retrieving revision 1.21
diff -u -r1.21 in6_pcb.c
--- netinet6/in6_pcb.c	2001/10/17 18:07:05	1.21
+++ netinet6/in6_pcb.c	2001/11/14 23:58:15
@@ -606,7 +606,7 @@
 	inp->inp_gencnt = ++ipi->ipi_gencnt;
 	in_pcbremlists(inp);
 	sotoinpcb(so) = 0;
-	sofree(so);
+	sotryfree(so);
 
 	if (inp->in6p_options)
 		m_freem(inp->in6p_options);
Index: netipx/ipx_pcb.c
===================================================================
RCS file: /home/ncvs/src/sys/netipx/ipx_pcb.c,v
retrieving revision 1.21
diff -u -r1.21 ipx_pcb.c
--- netipx/ipx_pcb.c	2001/09/12 08:37:56	1.21
+++ netipx/ipx_pcb.c	2001/11/14 23:58:22
@@ -268,7 +268,7 @@
 	struct socket *so = ipxp->ipxp_socket;
 
 	so->so_pcb = 0;
-	sofree(so);
+	sotryfree(so);
 	if (ipxp->ipxp_route.ro_rt != NULL)
 		rtfree(ipxp->ipxp_route.ro_rt);
 	remque(ipxp);
Index: netipx/ipx_usrreq.c
===================================================================
RCS file: /home/ncvs/src/sys/netipx/ipx_usrreq.c,v
retrieving revision 1.29
diff -u -r1.29 ipx_usrreq.c
--- netipx/ipx_usrreq.c	2001/09/12 08:37:56	1.29
+++ netipx/ipx_usrreq.c	2001/11/14 23:58:25
@@ -426,7 +426,7 @@
 	s = splnet();
 	ipx_pcbdetach(ipxp);
 	splx(s);
-	sofree(so);
+	sotryfree(so);
 	soisdisconnected(so);
 	return (0);
 }
Index: netnatm/natm.c
===================================================================
RCS file: /home/ncvs/src/sys/netnatm/natm.c,v
retrieving revision 1.13
diff -u -r1.13 natm.c
--- netnatm/natm.c	2001/04/05 04:20:48	1.13
+++ netnatm/natm.c	2001/11/14 23:58:41
@@ -133,7 +133,7 @@
      */
     npcb_free(npcb, NPCB_DESTROY);	/* drain */
     so->so_pcb = NULL;
-    sofree(so);
+    sotryfree(so);
  out:
     splx(s);
     return (error);
@@ -481,7 +481,7 @@
 
       npcb_free(npcb, NPCB_DESTROY);	/* drain */
       so->so_pcb = NULL;
-      sofree(so);
+      sotryfree(so);
 
       break;
 
Index: netns/idp_usrreq.c
===================================================================
RCS file: /home/ncvs/src/sys/netns/idp_usrreq.c,v
retrieving revision 1.9
diff -u -r1.9 idp_usrreq.c
--- netns/idp_usrreq.c	1999/08/28 00:49:47	1.9
+++ netns/idp_usrreq.c	2001/11/14 23:58:57
@@ -491,8 +491,8 @@
 
 	case PRU_ABORT:
 		ns_pcbdetach(nsp);
-		sofree(so);
-		soisdisconnected(so);
+		sotryfree(so);
+		soisdisconnected(so);	/* XXX huh, called after sofree()? */
 		break;
 
 	case PRU_SOCKADDR:
Index: netns/ns_pcb.c
===================================================================
RCS file: /home/ncvs/src/sys/netns/ns_pcb.c,v
retrieving revision 1.9
diff -u -r1.9 ns_pcb.c
--- netns/ns_pcb.c	1999/08/28 00:49:51	1.9
+++ netns/ns_pcb.c	2001/11/14 23:59:03
@@ -232,7 +232,7 @@
 	struct socket *so = nsp->nsp_socket;
 
 	so->so_pcb = 0;
-	sofree(so);
+	sotryfree(so);
 	if (nsp->nsp_route.ro_rt)
 		rtfree(nsp->nsp_route.ro_rt);
 	remque(nsp);
Index: nfsserver/nfs_syscalls.c
===================================================================
RCS file: /home/ncvs/src/sys/nfsserver/nfs_syscalls.c,v
retrieving revision 1.72
diff -u -r1.72 nfs_syscalls.c
--- nfsserver/nfs_syscalls.c	2001/09/28 04:37:08	1.72
+++ nfsserver/nfs_syscalls.c	2001/11/14 22:30:42
@@ -143,9 +143,12 @@
 		error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg));
 		if (error)
 			goto done2;
-		error = holdsock(td->td_proc->p_fd, nfsdarg.sock, &fp);
-		if (error)
+		if ((error = fget(td, nfsdarg.sock, &fp)) != 0)
 			goto done2;
+		if (fp->f_type != DTYPE_SOCKET) {
+			fdrop(fp, td);
+			goto done2;
+		}
 		/*
 		 * Get the client address for connected sockets.
 		 */
Index: sys/file.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/file.h,v
retrieving revision 1.32
diff -u -r1.32 file.h
--- sys/file.h	2001/11/14 06:30:36	1.32
+++ sys/file.h	2001/11/14 21:57:21
@@ -50,6 +50,7 @@
 struct uio;
 struct knote;
 struct vnode;
+struct socket;
 
 /*
  * Kernel descriptor table.
@@ -118,6 +119,9 @@
 int fgetvp __P((struct thread *td, int fd, struct vnode **vpp));
 int fgetvp_read __P((struct thread *td, int fd, struct vnode **vpp));
 int fgetvp_write __P((struct thread *td, int fd, struct vnode **vpp));
+
+int fgetsock __P((struct thread *td, int fd, struct socket **spp, u_int *fflagp));
+void fputsock __P((struct socket *sp));
 
 static __inline void
 fhold(fp)
Index: sys/socketvar.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/socketvar.h,v
retrieving revision 1.63
diff -u -r1.63 socketvar.h
--- sys/socketvar.h	2001/10/25 02:03:37	1.63
+++ sys/socketvar.h	2001/11/15 00:07:07
@@ -38,6 +38,7 @@
 #define _SYS_SOCKETVAR_H_
 
 #include <sys/queue.h>			/* for TAILQ macros */
+#include <sys/sx.h>			/* SX locks */
 #include <sys/selinfo.h>		/* for struct selinfo */
 
 /*
@@ -52,6 +53,7 @@
 
 struct socket {
 	struct	vm_zone *so_zone;	/* zone we were allocated from */
+	int	so_count;		/* reference count */
 	short	so_type;		/* generic type, see socket.h */
 	short	so_options;		/* from socket call, see socket.h */
 	short	so_linger;		/* time to linger while closing */
@@ -244,6 +246,24 @@
 	} \
 }
 
+/*
+ * soref()/sorele() ref-count the socket structure.  Note that you must
+ * still explicitly close the socket, but the last ref count will free
+ * the structure.
+ */
+
+#define soref(so)	++so->so_count
+
+#define sorele(so)	do {				\
+				if (--so->so_count == 0)\
+					sofree(so);	\
+			} while (0)
+
+#define sotryfree(so)	do {				\
+				if (so->so_count == 0)	\
+					sofree(so);	\
+			} while(0)
+
 #define	sorwakeup(so)	do { \
 			  if (sb_notify(&(so)->so_rcv)) \
 			    sowakeup((so), &(so)->so_rcv); \
@@ -360,7 +380,7 @@
 int	soconnect2 __P((struct socket *so1, struct socket *so2));
 int	socreate __P((int dom, struct socket **aso, int type, int proto,
 	    struct thread *td));
-void	sodealloc __P((struct socket *so));
+/*void	sodealloc __P((struct socket *so));*/
 int	sodisconnect __P((struct socket *so));
 void	sofree __P((struct socket *so));
 int	sogetopt __P((struct socket *so, struct sockopt *sopt));

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-arch" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200111150015.fAF0Flb09186>