Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 24 Sep 2014 18:57:28 +0300
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        Peter Holm <peter@holm.cc>
Cc:        FreeBSD FS <freebsd-fs@freebsd.org>
Subject:   Re: Deadlock with umount -f involving tmpfs on top of ZFS on r271170
Message-ID:  <20140924155728.GN8870@kib.kiev.ua>
In-Reply-To: <20140924153045.GA15685@x2.osted.lan>
References:  <5420D5FC.4030600@FreeBSD.org> <20140923131244.GC8870@kib.kiev.ua> <5422240F.4080003@FreeBSD.org> <20140924102758.GH8870@kib.kiev.ua> <20140924132605.GA11772@x2.osted.lan> <20140924134725.GI8870@kib.kiev.ua> <20140924153045.GA15685@x2.osted.lan>

next in thread | previous in thread | raw e-mail | index | archive | help
On Wed, Sep 24, 2014 at 05:30:45PM +0200, Peter Holm wrote:
> On Wed, Sep 24, 2014 at 04:47:25PM +0300, Konstantin Belousov wrote:
> > On Wed, Sep 24, 2014 at 03:26:05PM +0200, Peter Holm wrote:
> > > The patch is an improvement, but:
> > > 
> > > http://people.freebsd.org/~pho/stress/log/kostik718.txt
> > 
> > Does you load included both rename and link, or only one of those
> > syscalls ?  I see a bug in the rename part of the patch, below is
> > the update.
> > 
> 
> Both. I have split the tests in two now. Uptime is by now one hour.
> I'll let that run for a few hours more, before switching to random
> tests.
> 
> I did get this page fault once:
> http://people.freebsd.org/~pho/stress/log/kostik719.txt
> but I guess it's unrelated? I have recompiled uma_core.c and
> vm_pageout with "-O0" in case it shows up again.

This looks unrelated.  But, in the log, I see user-controllable LOR
caused by my patch.  Please use the following update instead.

diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index b3b7ed5..a4aa19e 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1551,10 +1551,10 @@ kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
 	cap_rights_t rights;
 	int error;
 
+again:
 	bwillwrite();
 	NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td);
 
-again:
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
@@ -1563,50 +1563,65 @@ again:
 		vrele(vp);
 		return (EPERM);		/* POSIX */
 	}
-	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
-		vrele(vp);
-		return (error);
-	}
 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2,
 	    segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), td);
 	if ((error = namei(&nd)) == 0) {
 		if (nd.ni_vp != NULL) {
+			NDFREE(&nd, NDF_ONLY_PNBUF);
 			if (nd.ni_dvp == nd.ni_vp)
 				vrele(nd.ni_dvp);
 			else
 				vput(nd.ni_dvp);
 			vrele(nd.ni_vp);
-			error = EEXIST;
-		} else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) {
+			vrele(vp);
+			return (EEXIST);
+		} else if (nd.ni_dvp->v_mount != vp->v_mount) {
 			/*
-			 * Check for cross-device links.  No need to
-			 * recheck vp->v_type, since it cannot change
-			 * for non-doomed vnode.
+			 * Cross-device link.  No need to recheck
+			 * vp->v_type, since it cannot change, except
+			 * to VBAD.
 			 */
-			if (nd.ni_dvp->v_mount != vp->v_mount)
-				error = EXDEV;
-			else
-				error = can_hardlink(vp, td->td_ucred);
-			if (error == 0)
+			NDFREE(&nd, NDF_ONLY_PNBUF);
+			vput(nd.ni_dvp);
+			vrele(vp);
+			return (EXDEV);
+		} else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) {
+			error = can_hardlink(vp, td->td_ucred);
 #ifdef MAC
+			if (error == 0)
 				error = mac_vnode_check_link(td->td_ucred,
 				    nd.ni_dvp, vp, &nd.ni_cnd);
-			if (error == 0)
 #endif
-				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
+			if (error != 0) {
+				vput(vp);
+				vput(nd.ni_dvp);
+				NDFREE(&nd, NDF_ONLY_PNBUF);
+				return (error);
+			}
+			error = vn_start_write(vp, &mp, V_NOWAIT);
+			if (error != 0) {
+				vput(vp);
+				vput(nd.ni_dvp);
+				NDFREE(&nd, NDF_ONLY_PNBUF);
+				error = vn_start_write(NULL, &mp,
+				    V_XSLEEP | PCATCH);
+				if (error != 0)
+					return (error);
+				goto again;
+			}
+			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 			VOP_UNLOCK(vp, 0);
 			vput(nd.ni_dvp);
+			vn_finished_write(mp);
+			NDFREE(&nd, NDF_ONLY_PNBUF);
 		} else {
 			vput(nd.ni_dvp);
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 			vrele(vp);
-			vn_finished_write(mp);
 			goto again;
 		}
-		NDFREE(&nd, NDF_ONLY_PNBUF);
 	}
 	vrele(vp);
-	vn_finished_write(mp);
 	return (error);
 }
 
@@ -3519,6 +3534,7 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 	cap_rights_t rights;
 	int error;
 
+again:
 	bwillwrite();
 #ifdef MAC
 	NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
@@ -3539,14 +3555,6 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 		VOP_UNLOCK(fromnd.ni_vp, 0);
 #endif
 	fvp = fromnd.ni_vp;
-	if (error == 0)
-		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
-	if (error != 0) {
-		NDFREE(&fromnd, NDF_ONLY_PNBUF);
-		vrele(fromnd.ni_dvp);
-		vrele(fvp);
-		goto out1;
-	}
 	NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 	    SAVESTART | AUDITVNODE2, pathseg, new, newfd,
 	    cap_rights_init(&rights, CAP_LINKAT), td);
@@ -3559,11 +3567,28 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
-		vn_finished_write(mp);
 		goto out1;
 	}
 	tdvp = tond.ni_dvp;
 	tvp = tond.ni_vp;
+	error = vn_start_write(fvp, &mp, V_NOWAIT);
+	if (error != 0) {
+		NDFREE(&fromnd, NDF_ONLY_PNBUF);
+		NDFREE(&tond, NDF_ONLY_PNBUF);
+		if (tvp != NULL)
+			vput(tvp);
+		if (tdvp == tvp)
+			vrele(tdvp);
+		else
+			vput(tdvp);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+		vrele(tond.ni_startdir);
+		error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
+		if (error != 0)
+			return (error);
+		goto again;
+	}
 	if (tvp != NULL) {
 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 			error = ENOTDIR;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20140924155728.GN8870>