Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 22 Jul 2011 16:11:59 +0300
From:      Kostik Belousov <kostikbel@gmail.com>
To:        John Baldwin <jhb@freebsd.org>
Cc:        freebsd-net@freebsd.org, rmacklem@freebsd.org, Jeremiah Lott <jlott@averesystems.com>
Subject:   Re: LOR with nfsclient "sillyrename"
Message-ID:  <20110722131159.GR17489@deviant.kiev.zoral.com.ua>
In-Reply-To: <201107220855.10774.jhb@freebsd.org>
References:  <44626428-CF14-4B20-AB57-6D4E8F4678AE@averesystems.com> <201107220855.10774.jhb@freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help

--Fcn+O7u6afXSKWdN
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Fri, Jul 22, 2011 at 08:55:10AM -0400, John Baldwin wrote:
> On Thursday, July 21, 2011 4:19:59 pm Jeremiah Lott wrote:
> > We're seeing nfsclient deadlocks with what looks like lock order revers=
al after removing a "silly rename".  It is fairly rare, but we've seen it=
=20
> happen a few times.  I included relevant back traces from an occurrence. =
 From what I can see, nfs_inactive() is called with the vnode locked.  If=
=20
> there is a silly-rename, it will call vrele() on its parent directory, wh=
ich can potentially try to lock the parent directory.  Since this is the=20
> opposite order of the lock acquisition in lookup, it can deadlock.  This =
happened in a FreeBSD7 build, but I looked through freebsd head and=20
> didn't see any change that addressed this.  Anyone seen this before?
>=20
> I haven't seen this before, but your analysis looks correct to me.
>=20
> Perhaps the best fix would be to defer the actual freeing of the sillyren=
ame
> to an asynchronous task?  Maybe something like this (untested, uncompiled=
):
>=20
> Index: nfsclient/nfsnode.h
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- nfsclient/nfsnode.h	(revision 224254)
> +++ nfsclient/nfsnode.h	(working copy)
> @@ -36,6 +36,7 @@
>  #ifndef _NFSCLIENT_NFSNODE_H_
>  #define _NFSCLIENT_NFSNODE_H_
> =20
> +#include <sys/_task.h>
>  #if !defined(_NFSCLIENT_NFS_H_) && !defined(_KERNEL)
>  #include <nfs/nfs.h>
>  #endif
> @@ -45,8 +46,10 @@
>   * can be removed by nfs_inactive()
>   */
>  struct sillyrename {
> +	struct	task s_task;
>  	struct	ucred *s_cred;
>  	struct	vnode *s_dvp;
> +	struct	vnode *s_vp;
>  	int	(*s_removeit)(struct sillyrename *sp);
>  	long	s_namlen;
>  	char	s_name[32];
> Index: nfsclient/nfs_vnops.c
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- nfsclient/nfs_vnops.c	(revision 224254)
> +++ nfsclient/nfs_vnops.c	(working copy)
> @@ -1757,7 +1757,6 @@
>  {
>  	/*
>  	 * Make sure that the directory vnode is still valid.
> -	 * XXX we should lock sp->s_dvp here.
>  	 */
>  	if (sp->s_dvp->v_type =3D=3D VBAD)
>  		return (0);
> @@ -2754,8 +2753,10 @@
>  		M_NFSREQ, M_WAITOK);
>  	sp->s_cred =3D crhold(cnp->cn_cred);
>  	sp->s_dvp =3D dvp;
> +	sp->s_vp =3D vp;
>  	sp->s_removeit =3D nfs_removeit;
>  	VREF(dvp);
> +	vhold(vp);
> =20
>  	/*=20
>  	 * Fudge together a funny name.
> Index: nfsclient/nfs_node.c
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- nfsclient/nfs_node.c	(revision 224254)
> +++ nfsclient/nfs_node.c	(working copy)
> @@ -47,6 +47,7 @@
>  #include <sys/proc.h>
>  #include <sys/socket.h>
>  #include <sys/sysctl.h>
> +#include <sys/taskqueue.h>
>  #include <sys/vnode.h>
> =20
>  #include <vm/uma.h>
> @@ -185,6 +186,26 @@
>  	return (0);
>  }
> =20
> +static void
> +nfs_freesillyrename(void *arg, int pending)
> +{
> +	struct sillyrename *sp;
> +
> +	sp =3D arg;
> +	vn_lock(sp->s_dvp, LK_SHARED | LK_RETRY);
I think taking an exclusive lock is somewhat more clean.
> +	vn_lock(sp->s_vp, LK_EXCLUSIVE | LK_RETRY);
I believe that you have to verify that at least dvp is not doomed.

Due to this, I propose to only move the vrele() call to taskqueue.
> +	(void)nfs_vinvalbuf(ap->a_vp, 0, td, 1);
> +	/*
> +	 * Remove the silly file that was rename'd earlier
> +	 */
> +	(sp->s_removeit)(sp);
> +	crfree(sp->s_cred);
> +	vput(sp->s_dvp);
> +	VOP_UNLOCK(sp->s_vp, 0);
> +	vdrop(sp->s_vp);
> +	free((caddr_t)sp, M_NFSREQ);
> +}
> +
>  int
>  nfs_inactive(struct vop_inactive_args *ap)
>  {
> @@ -200,15 +221,9 @@
>  	} else
>  		sp =3D NULL;
>  	if (sp) {
> +		TASK_INIT(&sp->task, 0, nfs_freesillyrename, sp);
> +		taskqueue_enqueue(taskqueue_thread, &sp->task);
>  		mtx_unlock(&np->n_mtx);
> -		(void)nfs_vinvalbuf(ap->a_vp, 0, td, 1);
> -		/*
> -		 * Remove the silly file that was rename'd earlier
> -		 */
> -		(sp->s_removeit)(sp);
> -		crfree(sp->s_cred);
> -		vrele(sp->s_dvp);
> -		free((caddr_t)sp, M_NFSREQ);
>  		mtx_lock(&np->n_mtx);
>  	}
>  	np->n_flag &=3D NMODIFIED;
>=20
> --=20
> John Baldwin
> _______________________________________________
> freebsd-net@freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-net
> To unsubscribe, send any mail to "freebsd-net-unsubscribe@freebsd.org"

--Fcn+O7u6afXSKWdN
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (FreeBSD)

iEYEARECAAYFAk4pdx8ACgkQC3+MBN1Mb4gYwwCePg47CM8VYF1BG3zKgf3VPQ68
BnAAoOW3ff1Se7g+T3VgjdC+6zDGKRkY
=pZXt
-----END PGP SIGNATURE-----

--Fcn+O7u6afXSKWdN--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20110722131159.GR17489>