Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 20 Apr 2002 14:30:30 +1000 (EST)
From:      Joshua Goodall <joshua@roughtrade.net>
To:        FreeBSD-gnats-submit@FreeBSD.org
Subject:   kern/37270: nullfs broken by locking changes in -current
Message-ID:  <20020420043030.C873A3E2A@green.shallow.net>

next in thread | raw e-mail | index | archive | help

>Number:         37270
>Category:       kern
>Synopsis:       nullfs broken by locking changes in -current
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Fri Apr 19 21:40:01 PDT 2002
>Closed-Date:
>Last-Modified:
>Originator:     Joshua Goodall
>Release:        FreeBSD 5.0-CURRENT i386
>Organization:
>Environment:
System: FreeBSD toxic.myrrh.net 5.0-CURRENT FreeBSD 5.0-CURRENT #1: Sat Apr 20 03:25:54 EST 2002     joshua@green.shallow.net:/usr/obj/usr/current/sys/TOXIC  i386
>Description:

The change to default to shared looks during a namei lookup causes
a deadlock in nullfs, which doesn't propogate the flags to functions
in null_subr.c

Thus nullfs tries to get an exclusive recursive lock on a v_vnlock
that is already locked shared during the same traversal, and hangs.

>How-To-Repeat:

# mount -t nullfs /var /null
# cd /null/tmp
(never returns, deadlocked in wchan "inode")

reproduced on a) a sony vaio and b) vmware

>Fix:

This was fun[tm] to debug.

The ugly workaround is "options LOOKUP_EXCLUSIVE".

A suggested fix is below. It passes the flags down during appropriate
operations, defaulting to exclusive (flags = 0). This fixes the
problem on both crashboxes and survives a basic fs stressing (multiple
parallel finds, cp -R's and postmarks to/from a given nullmount for
an hour).

A similar fix works for my WIP morphfs layer.

Joshua

--- nullfs-locking.diff begins here ---
Index: null.h
===================================================================
RCS file: /cvs/src/sys/fs/nullfs/null.h,v
retrieving revision 1.15
diff -u -r1.15 null.h
--- null.h	5 Sep 2000 09:02:07 -0000	1.15
+++ null.h	19 Apr 2002 16:05:12 -0000
@@ -63,7 +63,8 @@
 
 int nullfs_init(struct vfsconf *vfsp);
 int nullfs_uninit(struct vfsconf *vfsp);
-int null_node_create(struct mount *mp, struct vnode *target, struct vnode **vpp);
+int null_node_create(struct mount *mp, struct vnode *target,
+			struct vnode **vpp, int flags);
 int null_bypass(struct vop_generic_args *ap);
 
 #ifdef DIAGNOSTIC
Index: null_subr.c
===================================================================
RCS file: /cvs/src/sys/fs/nullfs/null_subr.c,v
retrieving revision 1.32
diff -u -r1.32 null_subr.c
--- null_subr.c	12 Sep 2001 08:37:19 -0000	1.32
+++ null_subr.c	19 Apr 2002 17:50:08 -0000
@@ -46,6 +46,7 @@
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
+#include <sys/namei.h>
 
 #include <fs/nullfs/null.h>
 
@@ -71,9 +72,9 @@
 MALLOC_DEFINE(M_NULLFSNODE, "NULLFS node", "NULLFS vnode private part");
 
 static int	null_node_alloc(struct mount *mp, struct vnode *lowervp,
-				     struct vnode **vpp);
+				     struct vnode **vpp, int flags);
 static struct vnode *
-		null_node_find(struct mount *mp, struct vnode *lowervp);
+		null_node_find(struct mount *mp, struct vnode *lowervp, int flags);
 
 /*
  * Initialise cache headers
@@ -106,14 +107,16 @@
  * Lower vnode should be locked on entry and will be left locked on exit.
  */
 static struct vnode *
-null_node_find(mp, lowervp)
+null_node_find(mp, lowervp, flags)
 	struct mount *mp;
 	struct vnode *lowervp;
+	int flags;
 {
 	struct thread *td = curthread;	/* XXX */
 	struct null_node_hashhead *hd;
 	struct null_node *a;
 	struct vnode *vp;
+	int error;
 
 	/*
 	 * Find hash base, and then search the (two-way) linked
@@ -133,7 +136,15 @@
 			 * stuff, but we don't want to lock
 			 * the lower node.
 			 */
-			if (vget(vp, LK_EXCLUSIVE | LK_CANRECURSE, td)) {
+#ifndef LOOKUP_EXCLUSIVE
+			if ((flags & ISLASTCN) && (flags & LOCKSHARED))
+				error = vget(vp, LK_SHARED, td);
+			else
+				error = vget(vp, LK_EXCLUSIVE | LK_CANRECURSE, td);
+#else
+			error = vget(vp, LK_EXCLUSIVE | LK_CANRECURSE, td);
+#endif
+			if (error) {
 				printf ("null_node_find: vget failed.\n");
 				goto loop;
 			};
@@ -157,10 +168,11 @@
  * Maintain a reference to (lowervp).
  */
 static int
-null_node_alloc(mp, lowervp, vpp)
+null_node_alloc(mp, lowervp, vpp, flags)
 	struct mount *mp;
 	struct vnode *lowervp;
 	struct vnode **vpp;
+	int flags;
 {
 	struct thread *td = curthread;	/* XXX */
 	struct null_node_hashhead *hd;
@@ -192,7 +204,7 @@
 	 * check to see if someone else has beaten us to it.
 	 * (We could have slept in MALLOC.)
 	 */
-	othervp = null_node_find(mp, lowervp);
+	othervp = null_node_find(mp, lowervp, flags);
 	if (othervp) {
 		vp->v_data = NULL;
 		FREE(xp, M_NULLFSNODE);
@@ -213,7 +225,14 @@
 
 	lockmgr(&null_hashlock, LK_EXCLUSIVE, NULL, td);
 	vp->v_vnlock = lowervp->v_vnlock;
+#ifndef LOOKUP_EXCLUSIVE
+	if ((flags & ISLASTCN) && (flags & LOCKSHARED))
+		error = VOP_LOCK(vp, LK_SHARED | LK_THISLAYER, td);
+	else
+		error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_THISLAYER, td);
+#else
 	error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_THISLAYER, td);
+#endif
 	if (error)
 		panic("null_node_alloc: can't lock new vnode\n");
 
@@ -231,14 +250,15 @@
  * vnode which contains a reference to the lower vnode.
  */
 int
-null_node_create(mp, lowervp, newvpp)
+null_node_create(mp, lowervp, newvpp, flags)
 	struct mount *mp;
 	struct vnode *lowervp;
 	struct vnode **newvpp;
+	int flags;
 {
 	struct vnode *aliasvp;
 
-	aliasvp = null_node_find(mp, lowervp);
+	aliasvp = null_node_find(mp, lowervp, flags);
 	if (aliasvp) {
 		/*
 		 * null_node_find has taken another reference
@@ -259,7 +279,7 @@
 		/*
 		 * Make new vnode reference the null_node.
 		 */
-		error = null_node_alloc(mp, lowervp, &aliasvp);
+		error = null_node_alloc(mp, lowervp, &aliasvp, flags);
 		if (error)
 			return error;
 
Index: null_vfsops.c
===================================================================
RCS file: /cvs/src/sys/fs/nullfs/null_vfsops.c,v
retrieving revision 1.51
diff -u -r1.51 null_vfsops.c
--- null_vfsops.c	17 Mar 2002 01:25:41 -0000	1.51
+++ null_vfsops.c	19 Apr 2002 15:59:32 -0000
@@ -171,7 +171,7 @@
 	 * Save reference.  Each mount also holds
 	 * a reference on the root vnode.
 	 */
-	error = null_node_create(mp, lowerrootvp, &vp);
+	error = null_node_create(mp, lowerrootvp, &vp, 0);
 	/*
 	 * Unlock the node (either the lower or the alias)
 	 */
@@ -356,7 +356,7 @@
 	if (error)
 		return (error);
 
-	return (null_node_create(mp, *vpp, vpp));
+	return (null_node_create(mp, *vpp, vpp, flags));
 }
 
 static int
@@ -370,7 +370,7 @@
 	if (error)
 		return (error);
 
-	return (null_node_create(mp, *vpp, vpp));
+	return (null_node_create(mp, *vpp, vpp, 0));
 }
 
 static int
Index: null_vnops.c
===================================================================
RCS file: /cvs/src/sys/fs/nullfs/null_vnops.c,v
retrieving revision 1.50
diff -u -r1.50 null_vnops.c
--- null_vnops.c	12 Sep 2001 08:37:19 -0000	1.50
+++ null_vnops.c	19 Apr 2002 17:00:42 -0000
@@ -346,7 +346,7 @@
 		vppp = VOPARG_OFFSETTO(struct vnode***,
 				 descp->vdesc_vpp_offset,ap);
 		if (*vppp)
-			error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+			error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp, 0);
 	}
 
  out:
@@ -400,7 +400,7 @@
 			VREF(dvp);
 			vrele(lvp);
 		} else {
-			error = null_node_create(dvp->v_mount, lvp, &vp);
+			error = null_node_create(dvp->v_mount, lvp, &vp, flags);
 			if (error == 0)
 				*ap->a_vpp = vp;
 		}
--- nullfs-locking.diff ends here ---


>Release-Note:
>Audit-Trail:
>Unformatted:

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-bugs" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20020420043030.C873A3E2A>