Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 21 Aug 2019 19:35:04 +0000 (UTC)
From:      Mark Johnston <markj@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r351348 - head/sys/kern
Message-ID:  <201908211935.x7LJZ4dd008616@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: markj
Date: Wed Aug 21 19:35:04 2019
New Revision: 351348
URL: https://svnweb.freebsd.org/changeset/base/351348

Log:
  Modify pipe_poll() to properly check for pending direct writes.
  
  With r349546, it is a responsibility of the writer to clear PIPE_DIRECTW
  after pinned data has been read.  In particular, once a reader has
  drained this data, there is a small window where the pipe is empty but
  PIPE_DIRECTW is set.  pipe_poll() was using the presence of PIPE_DIRECTW
  to determine whether to return POLLIN, so in this window it would
  claim that data was available to read when this was not the case.
  
  Fix this by modifying several checks for PIPE_DIRECTW to instead look
  at the number of residual bytes in data pinned by a direct writer.  In
  some cases we really do want to check for PIPE_DIRECTW, since the
  presence of this flag indicates that any attempt to write to the pipe
  will block on the existing direct writer.
  
  Bisected and test case provided by:	mav
  Tested by:	pho
  Reviewed by:	kib
  MFC after:	3 days
  Sponsored by:	The FreeBSD Foundation
  Differential Revision:	https://reviews.freebsd.org/D21333

Modified:
  head/sys/kern/sys_pipe.c

Modified: head/sys/kern/sys_pipe.c
==============================================================================
--- head/sys/kern/sys_pipe.c	Wed Aug 21 19:09:40 2019	(r351347)
+++ head/sys/kern/sys_pipe.c	Wed Aug 21 19:35:04 2019	(r351348)
@@ -709,11 +709,9 @@ pipe_read(struct file *fp, struct uio *uio, struct ucr
 		/*
 		 * Direct copy, bypassing a kernel buffer.
 		 */
-		} else if ((size = rpipe->pipe_map.cnt) &&
-			   (rpipe->pipe_state & PIPE_DIRECTW)) {
+		} else if ((size = rpipe->pipe_map.cnt) != 0) {
 			if (size > uio->uio_resid)
 				size = (u_int) uio->uio_resid;
-
 			PIPE_UNLOCK(rpipe);
 			error = uiomove_fromphys(rpipe->pipe_map.ms,
 			    rpipe->pipe_map.pos, size, uio);
@@ -819,32 +817,33 @@ pipe_build_write_buffer(struct pipe *wpipe, struct uio
 	u_int size;
 	int i;
 
-	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
-	KASSERT(wpipe->pipe_state & PIPE_DIRECTW,
-		("Clone attempt on non-direct write pipe!"));
+	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
+	KASSERT((wpipe->pipe_state & PIPE_DIRECTW) == 0,
+	    ("%s: PIPE_DIRECTW set on %p", __func__, wpipe));
+	KASSERT(wpipe->pipe_map.cnt == 0,
+	    ("%s: pipe map for %p contains residual data", __func__, wpipe));
 
 	if (uio->uio_iov->iov_len > wpipe->pipe_buffer.size)
                 size = wpipe->pipe_buffer.size;
 	else
                 size = uio->uio_iov->iov_len;
 
-	if ((i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
+	wpipe->pipe_state |= PIPE_DIRECTW;
+	PIPE_UNLOCK(wpipe);
+	i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
 	    (vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ,
-	    wpipe->pipe_map.ms, PIPENPAGES)) < 0)
+	    wpipe->pipe_map.ms, PIPENPAGES);
+	PIPE_LOCK(wpipe);
+	if (i < 0) {
+		wpipe->pipe_state &= ~PIPE_DIRECTW;
 		return (EFAULT);
+	}
 
-/*
- * set up the control block
- */
 	wpipe->pipe_map.npages = i;
 	wpipe->pipe_map.pos =
 	    ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
 	wpipe->pipe_map.cnt = size;
 
-/*
- * and update the uio data
- */
-
 	uio->uio_iov->iov_len -= size;
 	uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size;
 	if (uio->uio_iov->iov_len == 0)
@@ -864,6 +863,8 @@ pipe_destroy_write_buffer(struct pipe *wpipe)
 	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
 	KASSERT((wpipe->pipe_state & PIPE_DIRECTW) != 0,
 	    ("%s: PIPE_DIRECTW not set on %p", __func__, wpipe));
+	KASSERT(wpipe->pipe_map.cnt == 0,
+	    ("%s: pipe map for %p contains residual data", __func__, wpipe));
 
 	wpipe->pipe_state &= ~PIPE_DIRECTW;
 	vm_page_unhold_pages(wpipe->pipe_map.ms, wpipe->pipe_map.npages);
@@ -889,6 +890,7 @@ pipe_clone_write_buffer(struct pipe *wpipe)
 
 	size = wpipe->pipe_map.cnt;
 	pos = wpipe->pipe_map.pos;
+	wpipe->pipe_map.cnt = 0;
 
 	wpipe->pipe_buffer.in = size;
 	wpipe->pipe_buffer.out = 0;
@@ -946,7 +948,6 @@ retry:
 		else
 			goto retry;
 	}
-	wpipe->pipe_map.cnt = 0;	/* transfer not ready yet */
 	if (wpipe->pipe_buffer.cnt > 0) {
 		if (wpipe->pipe_state & PIPE_WANTR) {
 			wpipe->pipe_state &= ~PIPE_WANTR;
@@ -963,19 +964,15 @@ retry:
 			goto retry;
 	}
 
-	wpipe->pipe_state |= PIPE_DIRECTW;
-
-	PIPE_UNLOCK(wpipe);
 	error = pipe_build_write_buffer(wpipe, uio);
-	PIPE_LOCK(wpipe);
 	if (error) {
-		wpipe->pipe_state &= ~PIPE_DIRECTW;
 		pipeunlock(wpipe);
 		goto error1;
 	}
 
 	while (wpipe->pipe_map.cnt != 0) {
 		if (wpipe->pipe_state & PIPE_EOF) {
+			wpipe->pipe_map.cnt = 0;
 			pipe_destroy_write_buffer(wpipe);
 			pipeselwakeup(wpipe);
 			pipeunlock(wpipe);
@@ -1129,7 +1126,7 @@ pipe_write(struct file *fp, struct uio *uio, struct uc
 		 * pipe buffer.  We break out if a signal occurs or the
 		 * reader goes away.
 		 */
-		if (wpipe->pipe_state & PIPE_DIRECTW) {
+		if (wpipe->pipe_map.cnt != 0) {
 			if (wpipe->pipe_state & PIPE_WANTR) {
 				wpipe->pipe_state &= ~PIPE_WANTR;
 				wakeup(wpipe);
@@ -1347,7 +1344,7 @@ pipe_ioctl(struct file *fp, u_long cmd, void *data, st
 			PIPE_UNLOCK(mpipe);
 			return (0);
 		}
-		if (mpipe->pipe_state & PIPE_DIRECTW)
+		if (mpipe->pipe_map.cnt != 0)
 			*(int *)data = mpipe->pipe_map.cnt;
 		else
 			*(int *)data = mpipe->pipe_buffer.cnt;
@@ -1403,14 +1400,13 @@ pipe_poll(struct file *fp, int events, struct ucred *a
 		goto locked_error;
 #endif
 	if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM))
-		if ((rpipe->pipe_state & PIPE_DIRECTW) ||
-		    (rpipe->pipe_buffer.cnt > 0))
+		if (rpipe->pipe_map.cnt > 0 || rpipe->pipe_buffer.cnt > 0)
 			revents |= events & (POLLIN | POLLRDNORM);
 
 	if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM))
 		if (wpipe->pipe_present != PIPE_ACTIVE ||
 		    (wpipe->pipe_state & PIPE_EOF) ||
-		    (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
+		    ((wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
 		     ((wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF ||
 			 wpipe->pipe_buffer.size == 0)))
 			revents |= events & (POLLOUT | POLLWRNORM);
@@ -1485,7 +1481,7 @@ pipe_stat(struct file *fp, struct stat *ub, struct ucr
 	bzero(ub, sizeof(*ub));
 	ub->st_mode = S_IFIFO;
 	ub->st_blksize = PAGE_SIZE;
-	if (pipe->pipe_state & PIPE_DIRECTW)
+	if (pipe->pipe_map.cnt != 0)
 		ub->st_size = pipe->pipe_map.cnt;
 	else
 		ub->st_size = pipe->pipe_buffer.cnt;
@@ -1727,7 +1723,7 @@ filt_piperead(struct knote *kn, long hint)
 
 	PIPE_LOCK_ASSERT(rpipe, MA_OWNED);
 	kn->kn_data = rpipe->pipe_buffer.cnt;
-	if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
+	if (kn->kn_data == 0)
 		kn->kn_data = rpipe->pipe_map.cnt;
 
 	if ((rpipe->pipe_state & PIPE_EOF) ||



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201908211935.x7LJZ4dd008616>