Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 18 Oct 2002 10:56:18 -0700
From:      Paul Saab <ps@mu.org>
To:        sjs <sjs@mail2000.com.tw>
Cc:        freebsd-isp@FreeBSD.ORG, freebsd-fs@FreeBSD.ORG
Subject:   Re: NAS via NFS crashes with vinvalbuf: flush failed
Message-ID:  <20021018175618.GA97335@elvis.mu.org>
In-Reply-To: <1034955118.86889.sjs@mail2000.com.tw>
References:  <1034955118.86889.sjs@mail2000.com.tw>

next in thread | previous in thread | raw e-mail | index | archive | help

--k1lZvvs/B4yU6o8G
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

sjs (sjs@mail2000.com.tw) wrote:
> Hi,
> 
>   I have two FreeBSD servers mount NetApp NAS via NFS and run
> under layer 4 switch to be redundant. I upgraded FreeBSD version
> from 4.2R to 4.6.1-RC2, the panic crash still happen very often. 
> The messages I got from every crash are all the same which is about
> "vinvalbuf: flush failed". Once the server-1 crashes, server-2 will
> crash very soon. I guess there's some problems with accessing NFS
> files on NAS, so when server-1 crashes, server-2 keeps accessing
> the same file and then crahes, too. 

Try this patch.. We've been running with it at Yahoo for about 3-4
weeks now.  Its a hack, but something like this will eventually get
committed to FreeBSD.

--k1lZvvs/B4yU6o8G
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=xxx

Index: kern/vfs_subr.c
===========================================================================
--- kern/vfs_subr.c	2002/10/18 08:38:40	#23
+++ kern/vfs_subr.c	2002/10/18 08:38:40
@@ -781,6 +781,10 @@
 	struct buf *nbp, *blist;
 	int s, error;
 	vm_object_t object;
+	int retrycount;
+
+	retrycount = 0;
+restart:
 
 	if (flags & V_SAVE) {
 		s = splbio();
@@ -884,8 +888,30 @@
 	}
 	simple_unlock(&vp->v_interlock);
 
-	if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd))
-		panic("vinvalbuf: flush failed");
+	if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)){
+		/*
+		 * NFS calls vinvalflush on *live* vnodes.  This kind of
+		 * failure is to be expected.  Retry a few times and give
+		 * up if we are not getting anywhere.  It isn't really
+		 * important in this case anyway as long as we flushed
+		 * everything that existed before we were called.
+		 */
+		if (flags & V_NFSFLUSH) {
+			retrycount++;
+			if (retrycount <= 5) {
+				printf(
+	"vinvalbuf: lost flush race #%d on NFS live vnode; restarting\n",
+	retrycount);
+				goto restart;
+			} else {
+				printf(
+	"vinvalbuf: lost flush race #%d on NFS live vnode; giving up\n",
+	retrycount);
+			}
+		} else {
+			panic("vinvalbuf: flush failed");
+		}
+	}
 	return (0);
 }
 
Index: nfs/nfs_vnops.c
===========================================================================
--- nfs/nfs_vnops.c	2002/10/18 08:38:40	#5
+++ nfs/nfs_vnops.c	2002/10/18 08:38:40
@@ -492,16 +492,16 @@
 			return (error);
 		    if (np->n_lrev != np->n_brev ||
 			(np->n_flag & NQNFSNONCACHE)) {
-			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
-				ap->a_p, 1)) == EINTR)
+			if ((error = nfs_vinvalbuf(vp, V_SAVE | V_NFSFLUSH,
+				ap->a_cred, ap->a_p, 1)) == EINTR)
 				return (error);
 			np->n_brev = np->n_lrev;
 		    }
 		}
 	} else {
 		if (np->n_flag & NMODIFIED) {
-			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
-				ap->a_p, 1)) == EINTR)
+			if ((error = nfs_vinvalbuf(vp, V_SAVE | V_NFSFLUSH,
+				ap->a_cred, ap->a_p, 1)) == EINTR)
 				return (error);
 			np->n_attrstamp = 0;
 			if (vp->v_type == VDIR)
@@ -517,7 +517,8 @@
 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
 				if (vp->v_type == VDIR)
 					np->n_direofoffset = 0;
-				if ((error = nfs_vinvalbuf(vp, V_SAVE,
+				if ((error = nfs_vinvalbuf(vp,
+					V_SAVE | V_NFSFLUSH,
 					ap->a_cred, ap->a_p, 1)) == EINTR)
 					return (error);
 				np->n_mtime = vattr.va_mtime.tv_sec;
@@ -595,7 +596,7 @@
 		    error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, cm);
 		    /* np->n_flag &= ~NMODIFIED; */
 		} else {
-		    error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
+		    error = nfs_vinvalbuf(vp, V_SAVE | V_NFSFLUSH, ap->a_cred, ap->a_p, 1);
 		}
 		np->n_attrstamp = 0;
 	    }
@@ -3450,7 +3451,7 @@
 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_INVAL|OBJPC_SYNC);
 		}
 		VOP_UNLOCK(vp, 0, p);
-		error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, p, 1);
+		error = nfs_vinvalbuf(vp, V_SAVE | V_NFSFLUSH, ap->a_cred, p, 1);
 		if (error) {
 			return (error);
 		}
Index: sys/vnode.h
===========================================================================
--- sys/vnode.h	2002/10/18 08:38:40	#15
+++ sys/vnode.h	2002/10/18 08:38:40
@@ -263,6 +263,7 @@
 #define	WRITECLOSE	0x0004		/* vflush: only close writable files */
 #define	DOCLOSE		0x0008		/* vclean: close active files */
 #define	V_SAVE		0x0001		/* vinvalbuf: sync file first */
+#define	V_NFSFLUSH	0x0002		/* vinvalbuf: live vnode via nfs */
 #define	REVOKEALL	0x0001		/* vop_revoke: revoke all aliases */
 
 #define	VREF(vp)	vref(vp)

--k1lZvvs/B4yU6o8G--

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-isp" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20021018175618.GA97335>