Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 11 Sep 2013 06:41:16 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r255467 - head/sys/kern
Message-ID:  <201309110641.r8B6fGQU018859@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Wed Sep 11 06:41:15 2013
New Revision: 255467
URL: http://svnweb.freebsd.org/changeset/base/255467

Log:
  Implement sendfile(2) for the posix shared memory segment file descriptor,
  in addition to the regular files.
  
  Requested by:	alc
  Discussed with:	emaste
  Tested by:	pho (previous version)
  Sponsored by:	The FreeBSD Foundation
  Approved by:	re (hrs)

Modified:
  head/sys/kern/uipc_shm.c
  head/sys/kern/uipc_syscalls.c

Modified: head/sys/kern/uipc_shm.c
==============================================================================
--- head/sys/kern/uipc_shm.c	Wed Sep 11 06:16:12 2013	(r255466)
+++ head/sys/kern/uipc_shm.c	Wed Sep 11 06:41:15 2013	(r255467)
@@ -134,7 +134,7 @@ static struct fileops shm_ops = {
 	.fo_close = shm_close,
 	.fo_chmod = shm_chmod,
 	.fo_chown = shm_chown,
-	.fo_sendfile = invfo_sendfile,
+	.fo_sendfile = vn_sendfile,
 	.fo_seek = shm_seek,
 	.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
 };

Modified: head/sys/kern/uipc_syscalls.c
==============================================================================
--- head/sys/kern/uipc_syscalls.c	Wed Sep 11 06:16:12 2013	(r255466)
+++ head/sys/kern/uipc_syscalls.c	Wed Sep 11 06:41:15 2013	(r255467)
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capability.h>
+#include <sys/condvar.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
@@ -57,6 +58,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/file.h>
 #include <sys/filio.h>
 #include <sys/jail.h>
+#include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
@@ -86,7 +88,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
-#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 
@@ -1850,8 +1852,6 @@ getsockaddr(namp, uaddr, len)
 	return (error);
 }
 
-#include <sys/condvar.h>
-
 struct sendfile_sync {
 	struct mtx	mtx;
 	struct cv	cv;
@@ -1917,6 +1917,10 @@ do_sendfile(struct thread *td, struct se
 	cap_rights_t rights;
 	int error;
 
+	/*
+	 * File offset must be positive.  If it goes beyond EOF
+	 * we send only the header/trailer and no payload data.
+	 */
 	if (uap->offset < 0)
 		return (EINVAL);
 
@@ -1978,79 +1982,240 @@ freebsd4_sendfile(struct thread *td, str
 }
 #endif /* COMPAT_FREEBSD4 */
 
-int
-vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
-    struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
-    int kflags, struct thread *td)
+static int
+sendfile_readpage(vm_object_t obj, struct vnode *vp, int nd,
+    off_t off, int xfsize, int bsize, struct thread *td, vm_page_t *res)
 {
-	struct vnode *vp = fp->f_vnode;
-	struct file *sock_fp;
-	struct vm_object *obj = NULL;
-	struct socket *so = NULL;
-	struct mbuf *m = NULL;
-	struct sf_buf *sf;
-	struct vm_page *pg;
-	struct vattr va;
-	struct sendfile_sync *sfs = NULL;
-	cap_rights_t rights;
-	off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
-	int bsize, error, hdrlen = 0, mnw = 0;
+	vm_page_t m;
+	vm_pindex_t pindex;
+	ssize_t resid;
+	int error, readahead, rv;
+
+	pindex = OFF_TO_IDX(off);
+	VM_OBJECT_WLOCK(obj);
+	m = vm_page_grab(obj, pindex, (vp != NULL ? VM_ALLOC_NOBUSY |
+	    VM_ALLOC_IGN_SBUSY : 0) | VM_ALLOC_WIRED | VM_ALLOC_NORMAL);
 
-	vn_lock(vp, LK_SHARED | LK_RETRY);
-	if (vp->v_type == VREG) {
-		bsize = vp->v_mount->mnt_stat.f_iosize;
-		if (nbytes == 0) {
-			error = VOP_GETATTR(vp, &va, td->td_ucred);
-			if (error != 0) {
-				VOP_UNLOCK(vp, 0);
-				obj = NULL;
-				goto out;
+	/*
+	 * Check if page is valid for what we need, otherwise initiate I/O.
+	 *
+	 * The non-zero nd argument prevents disk I/O, instead we
+	 * return the caller what he specified in nd.  In particular,
+	 * if we already turned some pages into mbufs, nd == EAGAIN
+	 * and the main function send them the pages before we come
+	 * here again and block.
+	 */
+	if (m->valid != 0 && vm_page_is_valid(m, off & PAGE_MASK, xfsize)) {
+		if (vp == NULL)
+			vm_page_xunbusy(m);
+		VM_OBJECT_WUNLOCK(obj);
+		*res = m;
+		return (0);
+	} else if (nd != 0) {
+		if (vp == NULL)
+			vm_page_xunbusy(m);
+		error = nd;
+		goto free_page;
+	}
+
+	/*
+	 * Get the page from backing store.
+	 */
+	error = 0;
+	if (vp != NULL) {
+		VM_OBJECT_WUNLOCK(obj);
+		readahead = sfreadahead * MAXBSIZE;
+
+		/*
+		 * Use vn_rdwr() instead of the pager interface for
+		 * the vnode, to allow the read-ahead.
+		 *
+		 * XXXMAC: Because we don't have fp->f_cred here, we
+		 * pass in NOCRED.  This is probably wrong, but is
+		 * consistent with our original implementation.
+		 */
+		error = vn_rdwr(UIO_READ, vp, NULL, readahead, trunc_page(off),
+		    UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | ((readahead /
+		    bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td);
+		SFSTAT_INC(sf_iocnt);
+		VM_OBJECT_WLOCK(obj);
+	} else {
+		if (vm_pager_has_page(obj, pindex, NULL, NULL)) {
+			rv = vm_pager_get_pages(obj, &m, 1, 0);
+			SFSTAT_INC(sf_iocnt);
+			m = vm_page_lookup(obj, pindex);
+			if (m == NULL)
+				error = EIO;
+			else if (rv != VM_PAGER_OK) {
+				vm_page_lock(m);
+				vm_page_free(m);
+				vm_page_unlock(m);
+				m = NULL;
+				error = EIO;
 			}
-			rem = va.va_size;
-		} else
-			rem = nbytes;
+		} else {
+			pmap_zero_page(m);
+			m->valid = VM_PAGE_BITS_ALL;
+			m->dirty = 0;
+		}
+		if (m != NULL)
+			vm_page_xunbusy(m);
+	}
+	if (error == 0) {
+		*res = m;
+	} else if (m != NULL) {
+free_page:
+		vm_page_lock(m);
+		vm_page_unwire(m, 0);
+
+		/*
+		 * See if anyone else might know about this page.  If
+		 * not and it is not valid, then free it.
+		 */
+		if (m->wire_count == 0 && m->valid == 0 && !vm_page_busied(m))
+			vm_page_free(m);
+		vm_page_unlock(m);
+	}
+	VM_OBJECT_WUNLOCK(obj);
+	KASSERT(error != 0 || (m->wire_count > 0 && m->valid ==
+	    VM_PAGE_BITS_ALL),
+	    ("wrong page state m %p", m));
+	return (error);
+}
+
+static int
+sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res,
+    struct vnode **vp_res, struct shmfd **shmfd_res, off_t *obj_size,
+    int *bsize)
+{
+	struct vattr va;
+	vm_object_t obj;
+	struct vnode *vp;
+	struct shmfd *shmfd;
+	int error;
+
+	vp = *vp_res = NULL;
+	obj = NULL;
+	shmfd = *shmfd_res = NULL;
+	*bsize = 0;
+
+	/*
+	 * The file descriptor must be a regular file and have a
+	 * backing VM object.
+	 */
+	if (fp->f_type == DTYPE_VNODE) {
+		vp = fp->f_vnode;
+		vn_lock(vp, LK_SHARED | LK_RETRY);
+		if (vp->v_type != VREG) {
+			error = EINVAL;
+			goto out;
+		}
+		*bsize = vp->v_mount->mnt_stat.f_iosize;
+		error = VOP_GETATTR(vp, &va, td->td_ucred);
+		if (error != 0)
+			goto out;
+		*obj_size = va.va_size;
 		obj = vp->v_object;
-		if (obj != NULL) {
-			/*
-			 * Temporarily increase the backing VM
-			 * object's reference count so that a forced
-			 * reclamation of its vnode does not
-			 * immediately destroy it.
-			 */
-			VM_OBJECT_WLOCK(obj);
-			if ((obj->flags & OBJ_DEAD) == 0) {
-				vm_object_reference_locked(obj);
-				VM_OBJECT_WUNLOCK(obj);
-			} else {
-				VM_OBJECT_WUNLOCK(obj);
-				obj = NULL;
-			}
+		if (obj == NULL) {
+			error = EINVAL;
+			goto out;
 		}
-	} else
-		bsize = 0;	/* silence gcc */
-	VOP_UNLOCK(vp, 0);
-	if (obj == NULL) {
+	} else if (fp->f_type == DTYPE_SHM) {
+		shmfd = fp->f_data;
+		obj = shmfd->shm_object;
+		*obj_size = shmfd->shm_size;
+	} else {
 		error = EINVAL;
 		goto out;
 	}
 
+	VM_OBJECT_WLOCK(obj);
+	if ((obj->flags & OBJ_DEAD) != 0) {
+		VM_OBJECT_WUNLOCK(obj);
+		error = EBADF;
+		goto out;
+	}
+
+	/*
+	 * Temporarily increase the backing VM object's reference
+	 * count so that a forced reclamation of its vnode does not
+	 * immediately destroy it.
+	 */
+	vm_object_reference_locked(obj);
+	VM_OBJECT_WUNLOCK(obj);
+	*obj_res = obj;
+	*vp_res = vp;
+	*shmfd_res = shmfd;
+
+out:
+	if (vp != NULL)
+		VOP_UNLOCK(vp, 0);
+	return (error);
+}
+
+static int
+kern_sendfile_getsock(struct thread *td, int s, struct file **sock_fp,
+    struct socket **so)
+{
+	cap_rights_t rights;
+	int error;
+
+	*sock_fp = NULL;
+	*so = NULL;
+
 	/*
 	 * The socket must be a stream socket and connected.
-	 * Remember if it a blocking or non-blocking socket.
 	 */
-	error = getsock_cap(td->td_proc->p_fd, sockfd,
-	    cap_rights_init(&rights, CAP_SEND), &sock_fp, NULL);
+	error = getsock_cap(td->td_proc->p_fd, s, cap_rights_init(&rights,
+	    CAP_SEND), sock_fp, NULL);
+	if (error != 0)
+		return (error);
+	*so = (*sock_fp)->f_data;
+	if ((*so)->so_type != SOCK_STREAM)
+		return (EINVAL);
+	if (((*so)->so_state & SS_ISCONNECTED) == 0)
+		return (ENOTCONN);
+	return (0);
+}
+
+int
+vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
+    struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
+    int kflags, struct thread *td)
+{
+	struct file *sock_fp;
+	struct vnode *vp;
+	struct vm_object *obj;
+	struct socket *so;
+	struct mbuf *m;
+	struct sf_buf *sf;
+	struct vm_page *pg;
+	struct shmfd *shmfd;
+	struct sendfile_sync *sfs;
+	struct vattr va;
+	off_t off, xfsize, fsbytes, sbytes, rem, obj_size;
+	int error, bsize, nd, hdrlen, mnw;
+	bool inflight_called;
+
+	obj = NULL;
+	so = NULL;
+	m = NULL;
+	sfs = NULL;
+	fsbytes = sbytes = 0;
+	hdrlen = mnw = 0;
+	rem = nbytes;
+	inflight_called = false;
+
+	error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize);
+	if (error != 0)
+		return (error);
+	if (rem == 0)
+		rem = obj_size;
+
+	error = kern_sendfile_getsock(td, sockfd, &sock_fp, &so);
 	if (error != 0)
 		goto out;
-	so = sock_fp->f_data;
-	if (so->so_type != SOCK_STREAM) {
-		error = EINVAL;
-		goto out;
-	}
-	if ((so->so_state & SS_ISCONNECTED) == 0) {
-		error = ENOTCONN;
-		goto out;
-	}
+
 	/*
 	 * Do not wait on memory allocations but return ENOMEM for
 	 * caller to retry later.
@@ -2123,7 +2288,7 @@ vn_sendfile(struct file *fp, int sockfd,
 		int done;
 
 		if ((nbytes != 0 && nbytes == fsbytes) ||
-		    (nbytes == 0 && va.va_size == fsbytes))
+		    (nbytes == 0 && obj_size == fsbytes))
 			break;
 
 		mtail = NULL;
@@ -2197,13 +2362,16 @@ retry_space:
 		 */
 		space -= hdrlen;
 
-		error = vn_lock(vp, LK_SHARED);
-		if (error != 0)
-			goto done;
-		error = VOP_GETATTR(vp, &va, td->td_ucred);
-		if (error != 0 || off >= va.va_size) {
-			VOP_UNLOCK(vp, 0);
-			goto done;
+		if (vp != NULL) {
+			error = vn_lock(vp, LK_SHARED);
+			if (error != 0)
+				goto done;
+			error = VOP_GETATTR(vp, &va, td->td_ucred);
+			if (error != 0 || off >= va.va_size) {
+				VOP_UNLOCK(vp, 0);
+				goto done;
+			}
+			obj_size = va.va_size;
 		}
 
 		/*
@@ -2211,7 +2379,6 @@ retry_space:
 		 * dumped into socket buffer.
 		 */
 		while (space > loopbytes) {
-			vm_pindex_t pindex;
 			vm_offset_t pgoff;
 			struct mbuf *m0;
 
@@ -2221,7 +2388,7 @@ retry_space:
 			 * or the passed in nbytes.
 			 */
 			pgoff = (vm_offset_t)(off & PAGE_MASK);
-			rem = va.va_size - offset;
+			rem = obj_size - offset;
 			if (nbytes != 0)
 				rem = omin(rem, nbytes);
 			rem -= fsbytes + loopbytes;
@@ -2236,59 +2403,15 @@ retry_space:
 			 * Attempt to look up the page.  Allocate
 			 * if not found or wait and loop if busy.
 			 */
-			pindex = OFF_TO_IDX(off);
-			VM_OBJECT_WLOCK(obj);
-			pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
-			    VM_ALLOC_IGN_SBUSY | VM_ALLOC_NORMAL |
-			    VM_ALLOC_WIRED);
-
-			/*
-			 * Check if page is valid for what we need,
-			 * otherwise initiate I/O.
-			 * If we already turned some pages into mbufs,
-			 * send them off before we come here again and
-			 * block.
-			 */
-			if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
-				VM_OBJECT_WUNLOCK(obj);
-			else if (m != NULL)
-				error = EAGAIN;	/* send what we already got */
-			else if (flags & SF_NODISKIO)
-				error = EBUSY;
-			else {
-				ssize_t resid;
-				int readahead = sfreadahead * MAXBSIZE;
-
-				VM_OBJECT_WUNLOCK(obj);
-
-				/*
-				 * Get the page from backing store.
-				 * XXXMAC: Because we don't have fp->f_cred
-				 * here, we pass in NOCRED.  This is probably
-				 * wrong, but is consistent with our original
-				 * implementation.
-				 */
-				error = vn_rdwr(UIO_READ, vp, NULL, readahead,
-				    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
-				    IO_VMIO | ((readahead / bsize) << IO_SEQSHIFT),
-				    td->td_ucred, NOCRED, &resid, td);
-				SFSTAT_INC(sf_iocnt);
-				if (error != 0)
-					VM_OBJECT_WLOCK(obj);
-			}
+			if (m != NULL)
+				nd = EAGAIN; /* send what we already got */
+			else if ((flags & SF_NODISKIO) != 0)
+				nd = EBUSY;
+			else
+				nd = 0;
+			error = sendfile_readpage(obj, vp, nd, off,
+			    xfsize, bsize, td, &pg);
 			if (error != 0) {
-				vm_page_lock(pg);
-				vm_page_unwire(pg, 0);
-				/*
-				 * See if anyone else might know about
-				 * this page.  If not and it is not valid,
-				 * then free it.
-				 */
-				if (pg->wire_count == 0 && pg->valid == 0 &&
-				    !vm_page_busied(pg))
-					vm_page_free(pg);
-				vm_page_unlock(pg);
-				VM_OBJECT_WUNLOCK(obj);
 				if (error == EAGAIN)
 					error = 0;	/* not a real error */
 				break;
@@ -2358,7 +2481,8 @@ retry_space:
 			}
 		}
 
-		VOP_UNLOCK(vp, 0);
+		if (vp != NULL)
+			VOP_UNLOCK(vp, 0);
 
 		/* Add the buffer chain to the socket buffer. */
 		if (m != NULL) {



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201309110641.r8B6fGQU018859>