From owner-freebsd-current  Wed Sep 11  4:25:17 2002
Delivered-To: freebsd-current@freebsd.org
Received: from mx1.FreeBSD.org (mx1.FreeBSD.org [216.136.204.125])
	by hub.freebsd.org (Postfix) with ESMTP
	id 3D11037B437; Wed, 11 Sep 2002 04:25:01 -0700 (PDT)
Received: from salmon.maths.tcd.ie (salmon.maths.tcd.ie [134.226.81.11])
	by mx1.FreeBSD.org (Postfix) with SMTP
	id F11C643E4A; Wed, 11 Sep 2002 04:24:59 -0700 (PDT)
	(envelope-from iedowse@maths.tcd.ie)
Received: from walton.maths.tcd.ie by salmon.maths.tcd.ie with SMTP
          id <aa37675@salmon>; 11 Sep 2002 12:24:58 +0100 (BST)
To: Don Lewis <dl-freebsd@catspoiler.org>
Cc: current@FreeBSD.ORG, jeff@FreeBSD.ORG
Subject: Re: nfs_inactive() bug? -> panic: lockmgr: locking against myself 
In-Reply-To: Your message of "Tue, 10 Sep 2002 23:58:02 PDT."
             <200209110658.g8B6w2wr097059@gw.catspoiler.org> 
Date: Wed, 11 Sep 2002 12:24:58 +0100
From: Ian Dowse <iedowse@maths.tcd.ie>
Message-ID:  <200209111224.aa37675@salmon.maths.tcd.ie>
Sender: owner-freebsd-current@FreeBSD.ORG
Precedence: bulk
List-ID: <freebsd-current.FreeBSD.ORG>
List-Archive: <http://docs.freebsd.org/mail/> (Web Archive)
List-Help: <mailto:majordomo@FreeBSD.ORG?subject=help> (List Instructions)
List-Subscribe: <mailto:majordomo@FreeBSD.ORG?subject=subscribe%20freebsd-current>
List-Unsubscribe: <mailto:majordomo@FreeBSD.ORG?subject=unsubscribe%20freebsd-current>
X-Loop: FreeBSD.ORG

In message <200209110658.g8B6w2wr097059@gw.catspoiler.org>, Don Lewis writes:
>
>A potentially better solution just occurred to me.  It looks like it
>would be better if vrele() waited to decrement v_usecount until *after*
>the call to VOP_INACTIVE() (and after the call to VI_LOCK()).  If that
>were done, nfs_inactive() wouldn't need to muck with v_usecount at all.

I've looked at this before; I think some filesystems (ufs anyway)
depend on v_usecount being 0 when VOP_INACTIVE() is called. The
patch I have had lying around for quite a while is below. It adds
a vnode flag to avoid recursion into the last-reference handling
code in vrele/vput, which is the real problem.

It also guarantees that a vnode will not be recycled during
VOP_INACTIVE(), so the nfs code no longer needs to hold an extra
reference in the first place. The flag manipulation code got a bit
messy after Jeff's vnode flag splitting work, so the patch could
probably be improved.

Whatever way this is done, we should try to avoid adding more hacks
to the nfs_inactive() code anyway.

Ian

Index: sys/vnode.h
===================================================================
RCS file: /home/iedowse/CVS/src/sys/sys/vnode.h,v
retrieving revision 1.206
diff -u -r1.206 vnode.h
--- sys/vnode.h	1 Sep 2002 20:37:21 -0000	1.206
+++ sys/vnode.h	11 Sep 2002 11:06:46 -0000
@@ -220,6 +220,7 @@
 #define	VI_DOOMED	0x0080	/* This vnode is being recycled */
 #define	VI_FREE		0x0100	/* This vnode is on the freelist */
 #define	VI_OBJDIRTY	0x0400	/* object might be dirty */
+#define	VI_INACTIVE	0x0800	/* VOP_INACTIVE is in progress */
 /*
  * XXX VI_ONWORKLST could be replaced with a check for NULL list elements
  * in v_synclist.
@@ -377,14 +378,14 @@
 
 /* Requires interlock */
 #define	VSHOULDFREE(vp)	\
-	(!((vp)->v_iflag & (VI_FREE|VI_DOOMED)) && \
+	(!((vp)->v_iflag & (VI_FREE|VI_DOOMED|VI_INACTIVE)) && \
 	 !(vp)->v_holdcnt && !(vp)->v_usecount && \
 	 (!(vp)->v_object || \
 	  !((vp)->v_object->ref_count || (vp)->v_object->resident_page_count)))
 
 /* Requires interlock */
 #define VMIGHTFREE(vp) \
-	(!((vp)->v_iflag & (VI_FREE|VI_DOOMED|VI_XLOCK)) &&	\
+	(!((vp)->v_iflag & (VI_FREE|VI_DOOMED|VI_XLOCK|VI_INACTIVE)) &&	\
 	 LIST_EMPTY(&(vp)->v_cache_src) && !(vp)->v_usecount)
 
 /* Requires interlock */
Index: nfsclient/nfs_node.c
===================================================================
RCS file: /home/iedowse/CVS/src/sys/nfsclient/nfs_node.c,v
retrieving revision 1.55
diff -u -r1.55 nfs_node.c
--- nfsclient/nfs_node.c	11 Jul 2002 17:54:58 -0000	1.55
+++ nfsclient/nfs_node.c	11 Sep 2002 11:06:46 -0000
@@ -289,21 +289,7 @@
 	} else
 		sp = NULL;
 	if (sp) {
-		/*
-		 * We need a reference to keep the vnode from being
-		 * recycled by getnewvnode while we do the I/O
-		 * associated with discarding the buffers unless we
-		 * are being forcibly unmounted in which case we already
-		 * have our own reference.
-		 */
-		if (ap->a_vp->v_usecount > 0)
-			(void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, td, 1);
-		else if (vget(ap->a_vp, 0, td))
-			panic("nfs_inactive: lost vnode");
-		else {
-			(void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, td, 1);
-			vrele(ap->a_vp);
-		}
+		(void)nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, td, 1);
 		/*
 		 * Remove the silly file that was rename'd earlier
 		 */
Index: kern/vfs_subr.c
===================================================================
RCS file: /home/iedowse/CVS/src/sys/kern/vfs_subr.c,v
retrieving revision 1.401
diff -u -r1.401 vfs_subr.c
--- kern/vfs_subr.c	5 Sep 2002 20:46:19 -0000	1.401
+++ kern/vfs_subr.c	11 Sep 2002 11:06:46 -0000
@@ -829,7 +829,8 @@
 		for (count = 0; count < freevnodes; count++) {
 			vp = TAILQ_FIRST(&vnode_free_list);
 
-			KASSERT(vp->v_usecount == 0, 
+			KASSERT(vp->v_usecount == 0 &&
+			    (vp->v_iflag & VI_INACTIVE) == 0,
 			    ("getnewvnode: free vnode isn't"));
 
 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
@@ -1980,8 +1981,8 @@
 	KASSERT(vp->v_writecount < vp->v_usecount || vp->v_usecount < 1,
 	    ("vrele: missed vn_close"));
 
-	if (vp->v_usecount > 1) {
-
+	if (vp->v_usecount > 1 ||
+	    ((vp->v_iflag & VI_INACTIVE) && vp->v_usecount == 1)) {
 		vp->v_usecount--;
 		VI_UNLOCK(vp);
 
@@ -1991,13 +1992,20 @@
 	if (vp->v_usecount == 1) {
 		vp->v_usecount--;
 		/*
-		 * We must call VOP_INACTIVE with the node locked.
-		 * If we are doing a vput, the node is already locked,
-		 * but, in the case of vrele, we must explicitly lock
-		 * the vnode before calling VOP_INACTIVE.
+		 * We must call VOP_INACTIVE with the node locked. Mark
+		 * as VI_INACTIVE to avoid recursion.
 		 */
-		if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, td) == 0)
+		if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, td) == 0) {
+			VI_LOCK(vp);
+			vp->v_iflag |= VI_INACTIVE;
+			VI_UNLOCK(vp);
 			VOP_INACTIVE(vp, td);
+			VI_LOCK(vp);
+			KASSERT(vp->v_iflag & VI_INACTIVE,
+			    ("vrele: lost VI_INACTIVE"));
+			vp->v_iflag &= ~VI_INACTIVE;
+			VI_UNLOCK(vp);
+		}
 		VI_LOCK(vp);
 		if (VSHOULDFREE(vp))
 			vfree(vp);
@@ -2033,7 +2041,8 @@
 	KASSERT(vp->v_writecount < vp->v_usecount || vp->v_usecount < 1,
 	    ("vput: missed vn_close"));
 
-	if (vp->v_usecount > 1) {
+	if (vp->v_usecount > 1 ||
+	    ((vp->v_iflag & VI_INACTIVE) && vp->v_usecount == 1)) {
 		vp->v_usecount--;
 		VOP_UNLOCK(vp, LK_INTERLOCK, td);
 		return;
@@ -2042,13 +2051,16 @@
 	if (vp->v_usecount == 1) {
 		vp->v_usecount--;
 		/*
-		 * We must call VOP_INACTIVE with the node locked.
-		 * If we are doing a vput, the node is already locked,
-		 * so we just need to release the vnode mutex.
+		 * We must call VOP_INACTIVE with the node locked, so
+		 * we just need to release the vnode mutex. Mark as
+		 * as VI_INACTIVE to avoid recursion.
 		 */
+		vp->v_iflag |= VI_INACTIVE;
 		VI_UNLOCK(vp);
 		VOP_INACTIVE(vp, td);
 		VI_LOCK(vp);
+		KASSERT(vp->v_iflag & VI_INACTIVE, ("vput: lost VI_INACTIVE"));
+		vp->v_iflag &= ~VI_INACTIVE;
 		if (VSHOULDFREE(vp))
 			vfree(vp);
 		else
@@ -2331,12 +2343,16 @@
 	 * deactivated before being reclaimed. Note that the
 	 * VOP_INACTIVE will unlock the vnode.
 	 */
-	if (active) {
-		if (flags & DOCLOSE)
-			VOP_CLOSE(vp, FNONBLOCK, NOCRED, td);
+	if (active && (flags & DOCLOSE))
+		VOP_CLOSE(vp, FNONBLOCK, NOCRED, td);
+	if (active && (vp->v_iflag & VI_INACTIVE) == 0) {
+		vp->v_iflag |= VI_INACTIVE;
 		if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT, td) != 0)
 			panic("vclean: cannot relock.");
 		VOP_INACTIVE(vp, td);
+		KASSERT(vp->v_iflag & VI_INACTIVE,
+		    ("vclean: lost VI_INACTIVE"));
+		vp->v_iflag &= ~VI_INACTIVE;
 	}
 
 	/*


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-current" in the body of the message