Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 4 Mar 2004 00:40:01 +1100
From:      Tim Robbins <tjr@freebsd.org>
To:        Pawel Malachowski <pawmal-posting@freebsd.lublin.pl>
Cc:        FreeBSD-gnats-submit@freebsd.org
Subject:   Re: Using read-only NULLFS leads to panic. gdb output included, easy to reproduce.
Message-ID:  <20040303134001.GA63144@cat.robbins.dropbear.id.au>
In-Reply-To: <20040303110930.GA35449@shellma.zin.lublin.pl>
References:  <20040302213936.216CB5F103@shellma.zin.lublin.pl> <20040303002536.GA59500@cat.robbins.dropbear.id.au> <20040303110930.GA35449@shellma.zin.lublin.pl>

next in thread | previous in thread | raw e-mail | index | archive | help
On Wed, Mar 03, 2004 at 12:09:30PM +0100, Pawel Malachowski wrote:

> On Wed, Mar 03, 2004 at 11:25:36AM +1100, Tim Robbins wrote:
> 
> > There are known bugs in nullfs in all 4.x releases to date, and in 5.0.
> > If I have time, I may MFC the fixes some time before 4.10 is released.
> 
> Sounds interesting.
> 
> > Can you reproduce these problems on 5.1 or 5.2?
> 
> Procedure:
> mount_null -o ro /usr/ports /mnt/1
> find /usr/ports -type f -perm -u+s &
> find /mnt/1 -type f -perm -u+s &
> ...
> <about 30-40 processess of find(1) was enough>
> 
> 
> I was not able to reproduce panic on 5.1-RELEASE system.
> 
> However, I've reproduced second problem described in this PR,
> which happened to me on 4.9-STABLE once.
[...]

I'm sorry - the commit that fixed the deadlock issue you're describing
here ocurred between 5.1 and 5.2, not between 5.0 and 5.1 like I
had previously thought. Try 5.2, or try this patch on 5.1. If it solves
your problem, I'll backport it to 4.x for you later this week.


Fix two bugs causing possible deadlocks or panics, and one nit:
- Emulate lock draining (LK_DRAIN) in null_lock() to avoid deadlocks
  when the vnode is being recycled.
- Don't allow null_nodeget() to return a nullfs vnode from the wrong
  mount when multiple nullfs's are mounted. It's unclear why these checks
  were removed in null_subr.c 1.35, but they are definitely necessary.
  Without the checks, trying to unmount a nullfs mount will erroneously
  return EBUSY, and forcibly unmounting with -f will cause a panic.
- Bump LOG2_SIZEVNODE up to 8, since vnodes are >256 bytes now. The old
  value (7) didn't cause any problems, but made the hash algorithm
  suboptimal.

These changes fix nullfs enough that a parallel buildworld succeeds.

Submitted by:   tegge (partially; LK_DRAIN)
Tested by:      kris

Index: null.h
===================================================================
RCS file: /home/ncvs/src/sys/fs/nullfs/null.h,v
retrieving revision 1.18
diff -u -r1.18 null.h
--- null.h	13 Jun 2002 21:49:09 -0000	1.18
+++ null.h	3 Mar 2004 13:28:18 -0000
@@ -35,7 +35,7 @@
  *
  *	@(#)null.h	8.3 (Berkeley) 8/20/94
  *
- * $FreeBSD: src/sys/fs/nullfs/null.h,v 1.18 2002/06/13 21:49:09 semenu Exp $
+ * $FreeBSD: src/sys/fs/nullfs/null.h,v 1.19 2003/06/17 08:52:45 tjr Exp $
  */
 
 struct null_mount {
@@ -51,6 +51,8 @@
 	LIST_ENTRY(null_node)	null_hash;	/* Hash list */
 	struct vnode	        *null_lowervp;	/* VREFed once */
 	struct vnode		*null_vnode;	/* Back pointer */
+	int			null_pending_locks;
+	int			null_drain_wakeup;
 };
 
 #define	MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
Index: null_subr.c
===================================================================
RCS file: /home/ncvs/src/sys/fs/nullfs/null_subr.c,v
retrieving revision 1.40
diff -u -r1.40 null_subr.c
--- null_subr.c	19 Feb 2003 05:47:18 -0000	1.40
+++ null_subr.c	3 Mar 2004 13:28:18 -0000
@@ -35,7 +35,7 @@
  *
  *	@(#)null_subr.c	8.7 (Berkeley) 5/14/95
  *
- * $FreeBSD: src/sys/fs/nullfs/null_subr.c,v 1.40 2003/02/19 05:47:18 imp Exp $
+ * $FreeBSD: src/sys/fs/nullfs/null_subr.c,v 1.41 2003/06/17 08:52:45 tjr Exp $
  */
 
 #include <sys/param.h>
@@ -50,7 +50,7 @@
 
 #include <fs/nullfs/null.h>
 
-#define LOG2_SIZEVNODE 7		/* log2(sizeof struct vnode) */
+#define LOG2_SIZEVNODE 8		/* log2(sizeof struct vnode) */
 #define	NNULLNODECACHE 16
 
 /*
@@ -71,8 +71,8 @@
 static MALLOC_DEFINE(M_NULLFSHASH, "NULLFS hash", "NULLFS hash table");
 MALLOC_DEFINE(M_NULLFSNODE, "NULLFS node", "NULLFS vnode private part");
 
-static struct vnode * null_hashget(struct vnode *);
-static struct vnode * null_hashins(struct null_node *);
+static struct vnode * null_hashget(struct mount *, struct vnode *);
+static struct vnode * null_hashins(struct mount *, struct null_node *);
 
 /*
  * Initialise cache headers
@@ -103,7 +103,8 @@
  * Lower vnode should be locked on entry and will be left locked on exit.
  */
 static struct vnode *
-null_hashget(lowervp)
+null_hashget(mp, lowervp)
+	struct mount *mp;
 	struct vnode *lowervp;
 {
 	struct thread *td = curthread;	/* XXX */
@@ -121,9 +122,20 @@
 loop:
 	mtx_lock(&null_hashmtx);
 	LIST_FOREACH(a, hd, null_hash) {
-		if (a->null_lowervp == lowervp) {
+		if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
 			vp = NULLTOV(a);
 			mtx_lock(&vp->v_interlock);
+			/*
+			 * Don't block if nullfs vnode is being recycled.
+			 * We already hold a lock on the lower vnode, thus
+			 * waiting might deadlock against the thread
+			 * recycling the nullfs vnode or another thread
+			 * in vrele() waiting for the vnode lock.
+			 */
+			if ((vp->v_iflag & VI_XLOCK) != 0) {
+				VI_UNLOCK(vp);
+				continue;
+			}
 			mtx_unlock(&null_hashmtx);
 			/*
 			 * We need vget for the VXLOCK
@@ -145,7 +157,8 @@
  * node found.
  */
 static struct vnode *
-null_hashins(xp)
+null_hashins(mp, xp)
+	struct mount *mp;
 	struct null_node *xp;
 {
 	struct thread *td = curthread;	/* XXX */
@@ -157,9 +170,21 @@
 loop:
 	mtx_lock(&null_hashmtx);
 	LIST_FOREACH(oxp, hd, null_hash) {
-		if (oxp->null_lowervp == xp->null_lowervp) {
+		if (oxp->null_lowervp == xp->null_lowervp &&
+		    NULLTOV(oxp)->v_mount == mp) {
 			ovp = NULLTOV(oxp);
 			mtx_lock(&ovp->v_interlock);
+			/*
+			 * Don't block if nullfs vnode is being recycled.
+			 * We already hold a lock on the lower vnode, thus
+			 * waiting might deadlock against the thread
+			 * recycling the nullfs vnode or another thread
+			 * in vrele() waiting for the vnode lock.
+			 */
+			if ((ovp->v_iflag & VI_XLOCK) != 0) {
+				VI_UNLOCK(ovp);
+				continue;
+			}
 			mtx_unlock(&null_hashmtx);
 			if (vget(ovp, LK_EXCLUSIVE | LK_THISLAYER | LK_INTERLOCK, td))
 				goto loop;
@@ -192,7 +217,7 @@
 	int error;
 
 	/* Lookup the hash firstly */
-	*vpp = null_hashget(lowervp);
+	*vpp = null_hashget(mp, lowervp);
 	if (*vpp != NULL) {
 		vrele(lowervp);
 		return (0);
@@ -222,6 +247,8 @@
 
 	xp->null_vnode = vp;
 	xp->null_lowervp = lowervp;
+	xp->null_pending_locks = 0;
+	xp->null_drain_wakeup = 0;
 
 	vp->v_type = lowervp->v_type;
 	vp->v_data = xp;
@@ -244,7 +271,7 @@
 	 * Atomically insert our new node into the hash or vget existing 
 	 * if someone else has beaten us to it.
 	 */
-	*vpp = null_hashins(xp);
+	*vpp = null_hashins(mp, xp);
 	if (*vpp != NULL) {
 		vrele(lowervp);
 		VOP_UNLOCK(vp, LK_THISLAYER, td);
Index: null_vnops.c
===================================================================
RCS file: /home/ncvs/src/sys/fs/nullfs/null_vnops.c,v
retrieving revision 1.62
diff -u -r1.62 null_vnops.c
--- null_vnops.c	3 Mar 2003 19:15:38 -0000	1.62
+++ null_vnops.c	3 Mar 2004 13:28:26 -0000
@@ -40,7 +40,7 @@
  *	...and...
  *	@(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
  *
- * $FreeBSD: src/sys/fs/nullfs/null_vnops.c,v 1.62 2003/03/03 19:15:38 njl Exp $
+ * $FreeBSD: src/sys/fs/nullfs/null_vnops.c,v 1.63 2003/06/17 08:52:45 tjr Exp $
  */
 
 /*
@@ -592,6 +592,7 @@
 	struct thread *td = ap->a_td;
 	struct vnode *lvp;
 	int error;
+	struct null_node *nn;
 
 	if (flags & LK_THISLAYER) {
 		if (vp->v_vnlock != NULL) {
@@ -614,13 +615,65 @@
 		 * going away doesn't mean the struct lock below us is.
 		 * LK_EXCLUSIVE is fine.
 		 */
+		if ((flags & LK_INTERLOCK) == 0) {
+			VI_LOCK(vp);
+			flags |= LK_INTERLOCK;
+		}
+		nn = VTONULL(vp);
 		if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
 			NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n");
-			return(lockmgr(vp->v_vnlock,
-				(flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
-				&vp->v_interlock, td));
+			/*
+			 * Emulate lock draining by waiting for all other
+			 * pending locks to complete.  Afterwards the
+			 * lockmgr call might block, but no other threads
+			 * will attempt to use this nullfs vnode due to the
+			 * VI_XLOCK flag.
+			 */
+			while (nn->null_pending_locks > 0) {
+				nn->null_drain_wakeup = 1;
+				msleep(&nn->null_pending_locks,
+				       VI_MTX(vp),
+				       PVFS,
+				       "nuldr", 0);
+			}
+			error = lockmgr(vp->v_vnlock,
+					(flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
+					VI_MTX(vp), td);
+			return error;
+		}
+		nn->null_pending_locks++;
+		error = lockmgr(vp->v_vnlock, flags, &vp->v_interlock, td);
+		VI_LOCK(vp);
+		/*
+		 * If we're called from vrele then v_usecount can have been 0
+		 * and another process might have initiated a recycle 
+		 * operation.  When that happens, just back out.
+		 */
+		if (error == 0 && (vp->v_iflag & VI_XLOCK) != 0 &&
+		    td != vp->v_vxproc) {
+			lockmgr(vp->v_vnlock,
+				(flags & ~LK_TYPE_MASK) | LK_RELEASE,
+				VI_MTX(vp), td);
+			VI_LOCK(vp);
+			error = ENOENT;
+		}
+		nn->null_pending_locks--;
+		/*
+		 * Wakeup the process draining the vnode after all
+		 * pending lock attempts has been failed.
+		 */
+		if (nn->null_pending_locks == 0 &&
+		    nn->null_drain_wakeup != 0) {
+			nn->null_drain_wakeup = 0;
+			wakeup(&nn->null_pending_locks);
+		}
+		if (error == ENOENT && (vp->v_iflag & VI_XLOCK) != 0 &&
+		    vp->v_vxproc != curthread) {
+			vp->v_iflag |= VI_XWANT;
+			msleep(vp, VI_MTX(vp), PINOD, "nulbo", 0);
 		}
-		return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock, td));
+		VI_UNLOCK(vp);
+		return error;
 	} else {
 		/*
 		 * To prevent race conditions involving doing a lookup



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20040303134001.GA63144>