Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 3 Jun 2009 18:07:10 +0300
From:      Kostik Belousov <kostikbel@gmail.com>
To:        Vlad Galu <dudu@dudu.ro>
Cc:        freebsd-stable@freebsd.org, Oliver Fromme <olli@freebsd.org>, bde@freebsd.org
Subject:   Re: poll()-ing a pipe descriptor, watching for POLLHUP
Message-ID:  <20090603150710.GN1927@deviant.kiev.zoral.com.ua>
In-Reply-To: <20090603143051.GM1927@deviant.kiev.zoral.com.ua>
References:  <ad79ad6b0906030515k2e41f4b9t25f752af8ef3866c@mail.gmail.com> <20090603123208.GK1927@deviant.kiev.zoral.com.ua> <ad79ad6b0906030535o4b1a959ev6bc2b34af4e7304e@mail.gmail.com> <ad79ad6b0906030610y7e3beb05w5a3a39eaf7ebe2be@mail.gmail.com> <20090603143051.GM1927@deviant.kiev.zoral.com.ua>

next in thread | previous in thread | raw e-mail | index | archive | help

--1UNnSMTKiCAppeLf
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Wed, Jun 03, 2009 at 05:30:51PM +0300, Kostik Belousov wrote:
> On Wed, Jun 03, 2009 at 04:10:34PM +0300, Vlad Galu wrote:
> > Hm, I was having an issue with an internal piece of software, but
> > never checked what kind of pipe caused the problem. Turns out it was a
> > FIFO, and I got bitten by the same bug described here:
> > http://lists.freebsd.org/pipermail/freebsd-bugs/2006-March/017591.html
> >=20
> > The problem is that the reader process isn't notified when the writer
> > process exits or closes the FIFO fd...
>=20
> So you did found the relevant PR with long audit trail and patches
> attached. You obviously should contact the author of the patches,
> Oliver Fromme, who is FreeBSD committer for some time (CCed).
>=20
> I agree that the thing shall be fixed finally. Skimming over the
> patches in kern/94772, I have some doubts about removal of
> POLLINIGNEOF flag. The reason is that we are generally do not
> remove exposed user interfaces.

I forward-ported Bruce' patch to the CURRENT. It passes the tests
from tools/regression/fifo and a test from kern/94772.
For my liking, I did not removed POLLINIGNEOF.

diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c
index 66963bc..7e279ca 100644
--- a/sys/fs/fifofs/fifo_vnops.c
+++ b/sys/fs/fifofs/fifo_vnops.c
@@ -226,11 +226,47 @@ fail1:
 	if (ap->a_mode & FREAD) {
 		fip->fi_readers++;
 		if (fip->fi_readers =3D=3D 1) {
+			SOCKBUF_LOCK(&fip->fi_readsock->so_rcv);
+			if (fip->fi_writers > 0)
+				fip->fi_readsock->so_rcv.sb_state |=3D
+				    SBS_COULDRCV;
+			else
+				/*
+				 * Sloppy?  Might be necessary to clear it
+				 * in all the places where fi_readers is
+				 * decremented to 0.  I think only writers
+				 * polling for input could be confused by
+				 * having it not set, and there is a problem
+				 * with these anyway now that we have
+				 * reversed the sense of the flag -- they
+				 * now block (?), but shouldn't.
+				 */
+				fip->fi_readsock->so_rcv.sb_state &=3D
+				    ~SBS_COULDRCV;
+			SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv);
 			SOCKBUF_LOCK(&fip->fi_writesock->so_snd);
 			fip->fi_writesock->so_snd.sb_state &=3D ~SBS_CANTSENDMORE;
 			SOCKBUF_UNLOCK(&fip->fi_writesock->so_snd);
 			if (fip->fi_writers > 0) {
 				wakeup(&fip->fi_writers);
+			SOCKBUF_LOCK(&fip->fi_readsock->so_rcv);
+			if (fip->fi_writers > 0)
+				fip->fi_readsock->so_rcv.sb_state |=3D
+				    SBS_COULDRCV;
+			else
+				/*
+				 * Sloppy?  Might be necessary to clear it
+				 * in all the places where fi_readers is
+				 * decremented to 0.  I think only writers
+				 * polling for input could be confused by
+				 * having it not set, and there is a problem
+				 * with these anyway now that we have
+				 * reversed the sense of the flag -- they
+				 * now block (?), but shouldn't.
+				 */
+				fip->fi_readsock->so_rcv.sb_state &=3D
+				    ~SBS_COULDRCV;
+			SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv);
 				sowwakeup(fip->fi_writesock);
 			}
 		}
@@ -244,6 +280,7 @@ fail1:
 		if (fip->fi_writers =3D=3D 1) {
 			SOCKBUF_LOCK(&fip->fi_readsock->so_rcv);
 			fip->fi_readsock->so_rcv.sb_state &=3D ~SBS_CANTRCVMORE;
+			fip->fi_readsock->so_rcv.sb_state |=3D SBS_COULDRCV;
 			SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv);
 			if (fip->fi_readers > 0) {
 				wakeup(&fip->fi_readers);
@@ -672,28 +709,10 @@ fifo_poll_f(struct file *fp, int events, struct ucred=
 *cred, struct thread *td)
 	levents =3D events &
 	    (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND);
 	if ((fp->f_flag & FREAD) && levents) {
-		/*
-		 * If POLLIN or POLLRDNORM is requested and POLLINIGNEOF is
-		 * not, then convert the first two to the last one.  This
-		 * tells the socket poll function to ignore EOF so that we
-		 * block if there is no writer (and no data).  Callers can
-		 * set POLLINIGNEOF to get non-blocking behavior.
-		 */
-		if (levents & (POLLIN | POLLRDNORM) &&
-		    !(levents & POLLINIGNEOF)) {
-			levents &=3D ~(POLLIN | POLLRDNORM);
-			levents |=3D POLLINIGNEOF;
-		}
-
 		filetmp.f_data =3D fip->fi_readsock;
 		filetmp.f_cred =3D cred;
-		revents |=3D soo_poll(&filetmp, levents, cred, td);
-
-		/* Reverse the above conversion. */
-		if ((revents & POLLINIGNEOF) && !(events & POLLINIGNEOF)) {
-			revents |=3D (events & (POLLIN | POLLRDNORM));
-			revents &=3D ~POLLINIGNEOF;
-		}
+		revents |=3D soo_poll(&filetmp, levents | (events & POLLPOLL),
+		    cred, td);
 	}
 	levents =3D events & (POLLOUT | POLLWRNORM | POLLWRBAND);
 	if ((fp->f_flag & FWRITE) && levents) {
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index 616c5b7..98ccc9e 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -1226,8 +1226,8 @@ pollscan(td, fds, nfd)
 				 * POLLERR if appropriate.
 				 */
 				selfdalloc(td, fds);
-				fds->revents =3D fo_poll(fp, fds->events,
-				    td->td_ucred, td);
+				fds->revents =3D fo_poll(fp,
+				    fds->events | POLLPOLL, td->td_ucred, td);
 				if (fds->revents !=3D 0)
 					n++;
 			}
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 7341d3f..a13d648 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -2706,6 +2706,42 @@ sopoll_generic(struct socket *so, int events, struct=
 ucred *active_cred,
 		if (sowriteable(so))
 			revents |=3D events & (POLLOUT | POLLWRNORM);
=20
+	/*
+	 * SBS_CANTRCVMORE (which is checked by soreadable()) normally
+	 * implies EOF (and thus readable) and hung up, but for
+	 * compatibility with other systems and to obtain behavior that
+	 * is otherwise unavailable we make the case of poll() on a fifo
+	 * that has never had any writers during the lifetime of any
+	 * current reader special: then we pretend that the fifo is
+	 * unreadable unless it contains non-null data, and that it is
+	 * not hung up.  The POLLPOLL flag is set by poll() to identify
+	 * poll() here, and the SBS_COULDRCV flag is set by the fifo
+	 * layer to indicate a fifo that is not in the special state.
+	 */
+	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+		if (!(events & POLLPOLL) || so->so_rcv.sb_state & SBS_COULDRCV)
+			revents |=3D POLLHUP;	/* finish settings */
+		else if (!(so->so_rcv.sb_cc >=3D so->so_rcv.sb_lowat ||
+		    !TAILQ_EMPTY(&so->so_comp) || so->so_error))
+			revents &=3D ~(POLLIN | POLLRDNORM); /* undo settings */
+	}
+
+	/*
+	 * Testing of hangup for writers could be optimized by combining
+	 * it with testing for writeability, but we keep the test separate
+	 * and with the same organization as the test for readers for
+	 * clarity.  Note that writeable implies not hung up, so if POLLHUP
+	 * is set here then (POLLOUT | POLLWRNORM) is not set above, as
+	 * standards require.  Less obviously, if POLLHUP was set above for
+	 * a reader, then the output flags cannot have been set above for
+	 * a writer.  Even less obviously, we cannot end up with both
+	 * POLLHUP output flags set in revents after ORing the revents for
+	 * the read and write socket in fifo_poll().
+	 */
+	if (so->so_snd.sb_state & SBS_CANTSENDMORE)
+		revents |=3D POLLHUP;
+
+
 	if (events & (POLLPRI | POLLRDBAND))
 		if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK))
 			revents |=3D events & (POLLPRI | POLLRDBAND);
diff --git a/sys/sys/poll.h b/sys/sys/poll.h
index c955f32..cfd5f01 100644
--- a/sys/sys/poll.h
+++ b/sys/sys/poll.h
@@ -71,6 +71,10 @@ struct pollfd {
 #define	POLLINIGNEOF	0x2000		/* like POLLIN, except ignore EOF */
 #endif
=20
+#ifdef _KERNEL
+#define	POLLPOLL	0x8000		/* system call is actually poll() */
+#endif
+
 /*
  * These events are set if they occur regardless of whether they were
  * requested.
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index b8e6699..0da4fa1 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -56,6 +56,7 @@
 #define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
 #define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
 #define	SBS_RCVATMARK		0x0040	/* at mark on input */
+#define	SBS_COULDRCV		0x0080	/* could receive previously (or now) */
=20
 struct mbuf;
 struct sockaddr;

--1UNnSMTKiCAppeLf
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (FreeBSD)

iEYEARECAAYFAkomkZ0ACgkQC3+MBN1Mb4gr2gCdGq4WE9des8oKJ7Ha1JNLfWsV
elMAoM5Kwu6d2xeufKLw8uPAp81MGOR3
=w4Un
-----END PGP SIGNATURE-----

--1UNnSMTKiCAppeLf--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20090603150710.GN1927>