Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 10 Sep 2013 19:00:32 +0000 (UTC)
From:      Glen Barber <gjb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-releng@freebsd.org
Subject:   svn commit: r255456 - releng/9.2/sys/kern
Message-ID:  <201309101900.r8AJ0WnI030043@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gjb
Date: Tue Sep 10 19:00:32 2013
New Revision: 255456
URL: http://svnweb.freebsd.org/changeset/base/255456

Log:
  Reintegrate r250907 (previously reverted as r254754) plus MFS r254947,
  r255443:
  
  r250907 (scottl):
    MFC r248830, r250027, r250409: Several fixes and improvements
    to sendfile()
  
  r254947 (kib):
    NFS implementation of VOP_READ() sometimes upgrades the vnode lock,
    which causes drop of the shared lock and sleep for exclusive.  As
    result, busying of the page before the call to vn_rdwr() makes NFS
    code to wait for vnode lock while page is busy, which contradicts the
    proper order of vnode lock -> busy.
  
  r255443 (des):
    Fix the length calculation for the final block of a sendfile(2)
    transmission which could be tricked into rounding up to the nearest
    page size, leaking up to a page of kernel memory.  [13:11]
  
  Approved by:	re (delphij)
  Sponsored by:	The FreeBSD Foundation

Modified:
  releng/9.2/sys/kern/uipc_syscalls.c
Directory Properties:
  releng/9.2/sys/   (props changed)

Modified: releng/9.2/sys/kern/uipc_syscalls.c
==============================================================================
--- releng/9.2/sys/kern/uipc_syscalls.c	Tue Sep 10 18:40:43 2013	(r255455)
+++ releng/9.2/sys/kern/uipc_syscalls.c	Tue Sep 10 19:00:32 2013	(r255456)
@@ -1835,9 +1835,11 @@ kern_sendfile(struct thread *td, struct 
 	struct mbuf *m = NULL;
 	struct sf_buf *sf;
 	struct vm_page *pg;
+	struct vattr va;
 	off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
 	int error, hdrlen = 0, mnw = 0;
 	int vfslocked;
+	int bsize;
 	struct sendfile_sync *sfs = NULL;
 
 	/*
@@ -1852,6 +1854,18 @@ kern_sendfile(struct thread *td, struct 
 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	if (vp->v_type == VREG) {
+		bsize = vp->v_mount->mnt_stat.f_iosize;
+		if (uap->nbytes == 0) {
+			error = VOP_GETATTR(vp, &va, td->td_ucred);
+			if (error != 0) {
+				VOP_UNLOCK(vp, 0);
+				VFS_UNLOCK_GIANT(vfslocked);
+				obj = NULL;
+				goto out;
+			}
+			rem = va.va_size;
+		} else
+			rem = uap->nbytes;
 		obj = vp->v_object;
 		if (obj != NULL) {
 			/*
@@ -1869,7 +1883,8 @@ kern_sendfile(struct thread *td, struct 
 				obj = NULL;
 			}
 		}
-	}
+	} else
+		bsize = 0;	/* silence gcc */
 	VOP_UNLOCK(vp, 0);
 	VFS_UNLOCK_GIANT(vfslocked);
 	if (obj == NULL) {
@@ -1962,11 +1977,20 @@ kern_sendfile(struct thread *td, struct 
 	 * The outer loop checks the state and available space of the socket
 	 * and takes care of the overall progress.
 	 */
-	for (off = uap->offset, rem = uap->nbytes; ; ) {
-		struct mbuf *mtail = NULL;
-		int loopbytes = 0;
-		int space = 0;
-		int done = 0;
+	for (off = uap->offset; ; ) {
+		struct mbuf *mtail;
+		int loopbytes;
+		int space;
+		int done;
+
+		if ((uap->nbytes != 0 && uap->nbytes == fsbytes) ||
+		    (uap->nbytes == 0 && va.va_size == fsbytes))
+			break;
+
+		mtail = NULL;
+		loopbytes = 0;
+		space = 0;
+		done = 0;
 
 		/*
 		 * Check the socket state for ongoing connection,
@@ -2034,6 +2058,20 @@ retry_space:
 		 */
 		space -= hdrlen;
 
+		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+		error = vn_lock(vp, LK_SHARED);
+		if (error != 0) {
+			VFS_UNLOCK_GIANT(vfslocked);
+			goto done;
+		}
+		error = VOP_GETATTR(vp, &va, td->td_ucred);
+		if (error != 0 || off >= va.va_size) {
+			VOP_UNLOCK(vp, 0);
+			VFS_UNLOCK_GIANT(vfslocked);
+			goto done;
+		}
+		VFS_UNLOCK_GIANT(vfslocked);
+
 		/*
 		 * Loop and construct maximum sized mbuf chain to be bulk
 		 * dumped into socket buffer.
@@ -2043,25 +2081,19 @@ retry_space:
 			vm_offset_t pgoff;
 			struct mbuf *m0;
 
-			VM_OBJECT_LOCK(obj);
 			/*
 			 * Calculate the amount to transfer.
 			 * Not to exceed a page, the EOF,
 			 * or the passed in nbytes.
 			 */
 			pgoff = (vm_offset_t)(off & PAGE_MASK);
-			xfsize = omin(PAGE_SIZE - pgoff,
-			    obj->un_pager.vnp.vnp_size - uap->offset -
-			    fsbytes - loopbytes);
-			if (uap->nbytes)
-				rem = (uap->nbytes - fsbytes - loopbytes);
-			else
-				rem = obj->un_pager.vnp.vnp_size -
-				    uap->offset - fsbytes - loopbytes;
-			xfsize = omin(rem, xfsize);
+			rem = va.va_size - uap->offset;
+			if (uap->nbytes != 0)
+				rem = omin(rem, uap->nbytes);
+			rem -= fsbytes + loopbytes;
+			xfsize = omin(PAGE_SIZE - pgoff, rem);
 			xfsize = omin(space - loopbytes, xfsize);
 			if (xfsize <= 0) {
-				VM_OBJECT_UNLOCK(obj);
 				done = 1;		/* all data sent */
 				break;
 			}
@@ -2071,6 +2103,7 @@ retry_space:
 			 * if not found or wait and loop if busy.
 			 */
 			pindex = OFF_TO_IDX(off);
+			VM_OBJECT_LOCK(obj);
 			pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
 			    VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
 
@@ -2088,42 +2121,25 @@ retry_space:
 			else if (uap->flags & SF_NODISKIO)
 				error = EBUSY;
 			else {
-				int bsize;
 				ssize_t resid;
 
-				/*
-				 * Ensure that our page is still around
-				 * when the I/O completes.
-				 */
-				vm_page_io_start(pg);
 				VM_OBJECT_UNLOCK(obj);
 
 				/*
 				 * Get the page from backing store.
-				 */
-				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
-				error = vn_lock(vp, LK_SHARED);
-				if (error != 0)
-					goto after_read;
-				bsize = vp->v_mount->mnt_stat.f_iosize;
-
-				/*
 				 * XXXMAC: Because we don't have fp->f_cred
 				 * here, we pass in NOCRED.  This is probably
 				 * wrong, but is consistent with our original
 				 * implementation.
 				 */
+				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 				error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
 				    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
 				    IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
 				    td->td_ucred, NOCRED, &resid, td);
-				VOP_UNLOCK(vp, 0);
-			after_read:
 				VFS_UNLOCK_GIANT(vfslocked);
-				VM_OBJECT_LOCK(obj);
-				vm_page_io_finish(pg);
-				if (!error)
-					VM_OBJECT_UNLOCK(obj);
+				if (error)
+					VM_OBJECT_LOCK(obj);
 				mbstat.sf_iocnt++;
 			}
 			if (error) {
@@ -2174,7 +2190,7 @@ retry_space:
 			m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
 			if (m0 == NULL) {
 				error = (mnw ? EAGAIN : ENOBUFS);
-				sf_buf_mext((void *)sf_buf_kva(sf), sf);
+				sf_buf_mext(NULL, sf);
 				break;
 			}
 			MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
@@ -2202,6 +2218,8 @@ retry_space:
 			}
 		}
 
+		VOP_UNLOCK(vp, 0);
+
 		/* Add the buffer chain to the socket buffer. */
 		if (m != NULL) {
 			int mlen, err;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201309101900.r8AJ0WnI030043>