Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 2 Oct 2001 12:35:10 -0700 (PDT)
From:      Matt Dillon <dillon@earth.backplane.com>
To:        Yevgeniy Aleynikov <eugenea@infospace.com>
Cc:        peter@FreeBSD.ORG, ache@FreeBSD.ORG, mckusick@mckusick.com, Ken Pizzini <kenp@infospace.com>, Ian Dowse <iedowse@maths.tcd.ie>, hackers@FreeBSD.ORG
Subject:   Re: bleh.  Re: ufs_rename panic
Message-ID:  <200110021935.f92JZAm59707@earth.backplane.com>
References:  <200110020506.f9256BJ55255@earth.backplane.com> <3BBA0699.29F589BE@infospace.com>

next in thread | previous in thread | raw e-mail | index | archive | help

    Ok, I'm adding -hackers... another email thread got going in -committers.

    Here is a patch set for -stable.  It isn't perfect but it does appear
    to solve the problem.  The one case I don't handle right is if you have
    a hardlinked file and two renames in two different directories on the same
    file occur at the same time... that will (improperly) return an error 
    code when, in fact, it's perfectly acceptable to do that.

    This patch appears to fix the utterly trivial crash reproduction that
    Yevgeniy was able to produce with a couple of simple race scripts running
    in the background.

    What I've done is add a SOFTLOCKLEAF capability to namei().  If set, and
    the file/directory exists, namei() will generate an extra VREF() on 
    the vnode and set the VSOFTLOCK flag in vp->v_flag.  If the vnode already
    has VSOFTLOCK set, namei() will return EINVAL.

    Then in rename() and rmdir() I set SOFTLOCKLEAF for the namei resolution
    and, of course, clean things up when everything is done.

    The ufs_rename() and ufs_rmdir() code no longer have to do the IN_RENAME
    hack at all, because it's now handled.

    This patch set does not yet include fixes to unionfs or the nfs server
    and is for informational purposes only.  Comments?

						-Matt

Index: kern/vfs_lookup.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_lookup.c,v
retrieving revision 1.38.2.3
diff -u -r1.38.2.3 vfs_lookup.c
--- kern/vfs_lookup.c	2001/08/31 19:36:49	1.38.2.3
+++ kern/vfs_lookup.c	2001/10/02 19:04:21
@@ -372,11 +372,20 @@
 			error = EISDIR;
 			goto bad;
 		}
+		if ((cnp->cn_flags & SOFTLOCKLEAF) &&
+		    (dp->v_flag & VSOFTLOCK)) {
+			error = EINVAL;
+			goto bad;
+		}
 		if (wantparent) {
 			ndp->ni_dvp = dp;
 			VREF(dp);
 		}
 		ndp->ni_vp = dp;
+		if (cnp->cn_flags & SOFTLOCKLEAF) {
+			VREF(dp);
+			vsetsoftlock(dp);
+		}
 		if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF)))
 			VOP_UNLOCK(dp, 0, p);
 		/* XXX This should probably move to the top of function. */
@@ -565,13 +574,20 @@
 		error = EROFS;
 		goto bad2;
 	}
+	if ((cnp->cn_flags & SOFTLOCKLEAF) && (dp->v_flag & VSOFTLOCK)) {
+		error = EINVAL;
+		goto bad2;
+	}
 	if (cnp->cn_flags & SAVESTART) {
 		ndp->ni_startdir = ndp->ni_dvp;
 		VREF(ndp->ni_startdir);
 	}
 	if (!wantparent)
 		vrele(ndp->ni_dvp);
-
+	if (cnp->cn_flags & SOFTLOCKLEAF) {
+		VREF(dp);
+		vsetsoftlock(dp);
+	}
 	if ((cnp->cn_flags & LOCKLEAF) == 0)
 		VOP_UNLOCK(dp, 0, p);
 	return (0);
@@ -654,6 +670,15 @@
 			error = ENOTDIR;
 			goto bad;
 		}
+		if ((cnp->cn_flags & SOFTLOCKLEAF) &&
+		    (dp->v_flag & VSOFTLOCK)) {
+			error = EINVAL;
+			goto bad;
+		}
+		if (cnp->cn_flags & SOFTLOCKLEAF) {
+			VREF(dp);
+			vsetsoftlock(dp);
+		}
 		if (!(cnp->cn_flags & LOCKLEAF))
 			VOP_UNLOCK(dp, 0, p);
 		*vpp = dp;
@@ -707,6 +732,10 @@
 		error = EROFS;
 		goto bad2;
 	}
+	if ((cnp->cn_flags & SOFTLOCKLEAF) && (dp->v_flag & VSOFTLOCK)) {
+		error = EINVAL;
+		goto bad2;
+	}
 	/* ASSERT(dvp == ndp->ni_startdir) */
 	if (cnp->cn_flags & SAVESTART)
 		VREF(dvp);
@@ -718,6 +747,10 @@
 		((cnp->cn_flags & (NOOBJ|LOCKLEAF)) == LOCKLEAF))
 		vfs_object_create(dp, cnp->cn_proc, cnp->cn_cred);
 
+	if (cnp->cn_flags & SOFTLOCKLEAF) {
+		VREF(dp);
+		vsetsoftlock(dp);
+	}
 	if ((cnp->cn_flags & LOCKLEAF) == 0)
 		VOP_UNLOCK(dp, 0, p);
 	return (0);
Index: kern/vfs_subr.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_subr.c,v
retrieving revision 1.249.2.11
diff -u -r1.249.2.11 vfs_subr.c
--- kern/vfs_subr.c	2001/09/11 09:49:53	1.249.2.11
+++ kern/vfs_subr.c	2001/10/02 18:45:55
@@ -2927,6 +2961,12 @@
      struct nameidata *ndp;
      const uint flags;
 {
+	if (!(flags & NDF_NO_FREE_SOFTLOCKLEAF) &&
+	    (ndp->ni_cnd.cn_flags & SOFTLOCKLEAF) &&
+	    ndp->ni_vp) {
+		vclearsoftlock(ndp->ni_vp);
+		vrele(ndp->ni_vp);
+	}
 	if (!(flags & NDF_NO_FREE_PNBUF) &&
 	    (ndp->ni_cnd.cn_flags & HASBUF)) {
 		zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
@@ -2955,3 +2995,24 @@
 		ndp->ni_startdir = NULL;
 	}
 }
+
+void
+vsetsoftlock(struct vnode *vp)
+{
+	int s;
+
+	s = splbio();
+	vp->v_flag |= VSOFTLOCK;
+	splx(s);
+}
+
+void
+vclearsoftlock(struct vnode *vp)
+{
+	int s;
+
+	s = splbio();
+	vp->v_flag &= ~VSOFTLOCK;
+	splx(s);
+}
+
Index: kern/vfs_syscalls.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.151.2.9
diff -u -r1.151.2.9 vfs_syscalls.c
--- kern/vfs_syscalls.c	2001/08/12 10:48:00	1.151.2.9
+++ kern/vfs_syscalls.c	2001/10/02 19:18:06
@@ -2633,8 +2633,8 @@
 	int error;
 
 	bwillwrite();
-	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
-	    SCARG(uap, from), p);
+	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | SOFTLOCKLEAF,
+	    UIO_USERSPACE, SCARG(uap, from), p);
 	if ((error = namei(&fromnd)) != 0)
 		return (error);
 	fvp = fromnd.ni_vp;
@@ -2646,7 +2646,7 @@
 		/* Translate error code for rename("dir1", "dir2/."). */
 		if (error == EISDIR && fvp->v_type == VDIR)
 			error = EINVAL;
-		NDFREE(&fromnd, NDF_ONLY_PNBUF);
+		NDFREE(&fromnd, NDF_ONLY_PNBUF & NDF_ONLY_SOFTLOCKLEAF);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 		goto out1;
@@ -2683,12 +2683,14 @@
 		if (tvp) {
 			VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
 		}
+		fromnd.ni_cnd.cn_flags &= ~SOFTLOCKLEAF;	/* XXX hack */
 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
-		NDFREE(&fromnd, NDF_ONLY_PNBUF);
+		fromnd.ni_cnd.cn_flags |= SOFTLOCKLEAF;
+		NDFREE(&fromnd, NDF_ONLY_PNBUF & NDF_ONLY_SOFTLOCKLEAF);
 		NDFREE(&tond, NDF_ONLY_PNBUF);
 	} else {
-		NDFREE(&fromnd, NDF_ONLY_PNBUF);
+		NDFREE(&fromnd, NDF_ONLY_PNBUF & NDF_ONLY_SOFTLOCKLEAF);
 		NDFREE(&tond, NDF_ONLY_PNBUF);
 		if (tdvp == tvp)
 			vrele(tdvp);
@@ -2785,8 +2787,8 @@
 	struct nameidata nd;
 
 	bwillwrite();
-	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
-	    SCARG(uap, path), p);
+	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | SOFTLOCKLEAF,
+	    UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
@@ -2812,7 +2814,7 @@
 		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 	}
 out:
-	NDFREE(&nd, NDF_ONLY_PNBUF);
+	NDFREE(&nd, NDF_ONLY_PNBUF & NDF_ONLY_SOFTLOCKLEAF);
 	if (nd.ni_dvp == vp)
 		vrele(nd.ni_dvp);
 	else
Index: sys/namei.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/namei.h,v
retrieving revision 1.29.2.2
diff -u -r1.29.2.2 namei.h
--- sys/namei.h	2001/09/30 21:12:54	1.29.2.2
+++ sys/namei.h	2001/10/02 18:43:34
@@ -114,7 +114,7 @@
 #define	FOLLOW		0x0040	/* follow symbolic links */
 #define	NOOBJ		0x0080	/* don't create object */
 #define	NOFOLLOW	0x0000	/* do not follow symbolic links (pseudo) */
-#define	MODMASK		0x00fc	/* mask of operational modifiers */
+#define	MODMASK		(0x00fc|SOFTLOCKLEAF)
 /*
  * Namei parameter descriptors.
  *
@@ -143,7 +143,7 @@
 #define	WILLBEDIR	0x080000 /* new files will be dirs; allow trailing / */
 #define	ISUNICODE	0x100000 /* current component name is unicode*/
 #define	PDIRUNLOCK	0x200000 /* file system lookup() unlocked parent dir */
-#define PARAMASK	0x1fff00 /* mask of parameter descriptors */
+#define SOFTLOCKLEAF	0x400000 /* set VSOFTLOCK in vnode */
 /*
  * Initialization of an nameidata structure.
  */
@@ -180,7 +180,9 @@
 #define NDF_NO_VP_PUT		0x0000000c
 #define NDF_NO_STARTDIR_RELE	0x00000010
 #define NDF_NO_FREE_PNBUF	0x00000020
+#define NDF_NO_FREE_SOFTLOCKLEAF 0x00000040
 #define NDF_ONLY_PNBUF		(~NDF_NO_FREE_PNBUF)
+#define NDF_ONLY_SOFTLOCKLEAF	(~NDF_NO_FREE_SOFTLOCKLEAF)
 
 void NDFREE __P((struct nameidata *, const uint));
 
Index: sys/vnode.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/vnode.h,v
retrieving revision 1.111.2.12
diff -u -r1.111.2.12 vnode.h
--- sys/vnode.h	2001/09/22 09:21:48	1.111.2.12
+++ sys/vnode.h	2001/10/02 18:46:10
@@ -169,6 +169,7 @@
 #define	VTBFREE		0x100000 /* This vnode is on the to-be-freelist */
 #define	VONWORKLST	0x200000 /* On syncer work-list */
 #define	VMOUNT		0x400000 /* Mount in progress */
+#define VSOFTLOCK	0x800000 /* Soft lock on vnode (directory entry) */
 
 /*
  * Vnode attributes.  A field value of VNOVAL represents a field whose value
@@ -630,6 +632,8 @@
 void 	vrele __P((struct vnode *vp));
 void	vref __P((struct vnode *vp));
 void	vbusy __P((struct vnode *vp));
+void	vsetsoftlock __P((struct vnode *vp));
+void	vclearsoftlock __P((struct vnode *vp));
 
 extern	vop_t	**default_vnodeop_p;
 extern	vop_t **spec_vnodeop_p;
Index: ufs/ufs/ufs_vnops.c
===================================================================
RCS file: /home/ncvs/src/sys/ufs/ufs/ufs_vnops.c,v
retrieving revision 1.131.2.4
diff -u -r1.131.2.4 ufs_vnops.c
--- ufs/ufs/ufs_vnops.c	2001/08/28 17:28:49	1.131.2.4
+++ ufs/ufs/ufs_vnops.c	2001/10/02 19:16:40
@@ -997,13 +997,11 @@
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
-		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
-		    (ip->i_flag & IN_RENAME)) {
+		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
 			VOP_UNLOCK(fvp, 0, p);
 			error = EINVAL;
 			goto abortit;
 		}
-		ip->i_flag |= IN_RENAME;
 		oldparent = dp->i_number;
 		doingdirectory = 1;
 	}
@@ -1224,13 +1222,12 @@
 		return (0);
 	}
 	/*
-	 * Ensure that the directory entry still exists and has not
-	 * changed while the new name has been entered. If the source is
-	 * a file then the entry may have been unlinked or renamed. In
-	 * either case there is no further work to be done. If the source
-	 * is a directory then it cannot have been rmdir'ed; the IN_RENAME
-	 * flag ensures that it cannot be moved by another rename or removed
-	 * by a rmdir.
+	 * The source has been VSOFTLOCKd by the caller, preventing rename
+	 * or rmdir, but not prevent unlink.  At the moment we allow the unlink
+	 * race and so do not panic if relookup fails or the inodes do not
+	 * match, because files can be hardlinked and our flag is on the vnode
+	 * rather then the directory entry in the namei cache.  But source
+	 * directories must still exist.
 	 */
 	if (xp != ip) {
 		if (doingdirectory)
@@ -1248,7 +1245,6 @@
 			cache_purge(fdvp);
 		}
 		error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
-		xp->i_flag &= ~IN_RENAME;
 	}
 	VN_KNOTE(fvp, NOTE_RENAME);
 	if (dp)
@@ -1263,13 +1259,10 @@
 		vput(ITOV(xp));
 	vput(ITOV(dp));
 out:
-	if (doingdirectory)
-		ip->i_flag &= ~IN_RENAME;
 	if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) {
 		ip->i_effnlink--;
 		ip->i_nlink--;
 		ip->i_flag |= IN_CHANGE;
-		ip->i_flag &= ~IN_RENAME;
 		if (DOINGSOFTDEP(fvp))
 			softdep_change_linkcnt(ip);
 		vput(fvp);
@@ -1503,18 +1496,16 @@
 	dp = VTOI(dvp);
 
 	/*
-	 * Do not remove a directory that is in the process of being renamed.
 	 * Verify the directory is empty (and valid). Rmdir ".." will not be
 	 * valid since ".." will contain a reference to the current directory
 	 * and thus be non-empty. Do not allow the removal of mounted on
 	 * directories (this can happen when an NFS exported filesystem
 	 * tries to remove a locally mounted on directory).
+	 *
+	 * The syscall wrapper already protects us from attempting to remove
+	 * a directory that is undergoing a rename.
 	 */
 	error = 0;
-	if (ip->i_flag & IN_RENAME) {
-		error = EINVAL;
-		goto out;
-	}
 	if (ip->i_effnlink != 2 ||
 	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
 		error = ENOTEMPTY;

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-hackers" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200110021935.f92JZAm59707>