Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 09 Jun 2009 13:11:30 -0500
From:      Robert Noland <rnoland@FreeBSD.org>
To:        John Baldwin <jhb@freebsd.org>
Cc:        Kostik Belousov <kostikbel@gmail.com>, Yuri Pankov <yuri.pankov@gmail.com>, freebsd-current@freebsd.org, Paul Saab <ps@freebsd.org>, Matthew Fleming <matthew.fleming@isilon.com>
Subject:   Re: panic: knlist not locked, but should be
Message-ID:  <1244571090.60347.1792.camel@balrog.2hip.net>
In-Reply-To: <200906091325.08293.jhb@freebsd.org>
References:  <20090609163005.GD75569@deviant.kiev.zoral.com.ua> <06D5F9F6F655AD4C92E28B662F7F853E02CC8A29@seaxch09.desktop.isilon.com> <20090609170025.GE75569@deviant.kiev.zoral.com.ua> <200906091325.08293.jhb@freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help

--=-0/nt3EDdlKLgcd+NSTo5
Content-Type: text/plain
Content-Transfer-Encoding: quoted-printable

On Tue, 2009-06-09 at 13:25 -0400, John Baldwin wrote:
> On Tuesday 09 June 2009 1:00:25 pm Kostik Belousov wrote:
> > On Tue, Jun 09, 2009 at 09:45:49AM -0700, Matthew Fleming wrote:
> > >=20
> > > > This appears to be an interaction with the recent changes to use=20
> > > > shared vnode locks for writes on ZFS.  Hmm, I think it may be ok to=
=20
> > > > use a shared vnode lock for kevents on vnodes though.  The vnode=20
> > > > interlock should be sufficient locking for what little work the kev=
ent
> > >=20
> > > > filters do.  As a quick hack for now the MNT_SHARED_WRITES() stuff=20
> > > > could avoid using shared locks 'if (!VN_KNLIST_EMPTY(vp))', but I=20
> > > > think the longer term fix is to not use the vnode locks for vnode
> > > kevents, but use the interlock instead.
> > >=20
> > > I tried (briefly) using the interlock since Isilon's vnode lock is
> > > cluster wide (in our 6.1 based code we got away with using Giant).  T=
his
> > > got me a LOR report on the interlock:
> > >=20
> > > 	/*
> > > 	 * kqueue/VFS interaction
> > > 	 */
> > > 	{ "kqueue", &lock_class_mtx_sleep },
> > > 	{ "struct mount mtx", &lock_class_mtx_sleep },
> > > 	{ "vnode interlock", &lock_class_mtx_sleep },
> > > 	{ NULL, NULL },
> > >=20
> > > since knote() will take first the list->kl_lock and then the kqueue
> > > lock.  I didn't spend any time on it, and switched to using the vnode
> > > v_lock for my purposes.  But someone added that lock ordering (r16642=
1)
> > > for a reason.
> >=20
> > That was me, I actually looked for the reversed order that was reported
> > several times on the list in 6.1-6.2 timeframe. Unfortunately, nothing
> > was found.
> >=20
> > I noted in the separate letter that read filter for vnodes needs
> > shared vnode lock anyway.
>=20
> So my current idea is to allow shared vnode locks and use the vnode inter=
lock=20
> in the filt_vfs_* routines to protect access to the kn_* member variables=
.
>=20
> --- //depot/projects/smpng/sys/kern/vfs_subr.c	2009/06/09 15:15:22
> +++ //depot/user/jhb/lock/kern/vfs_subr.c	2009/06/09 17:20:33
> @@ -4103,8 +4103,10 @@
>  vfs_knllocked(void *arg)
>  {
>  	struct vnode *vp =3D arg;
> +	int islocked;
> =20
> -	return (VOP_ISLOCKED(vp) =3D=3D LK_EXCLUSIVE);
> +	islocked =3D VOP_ISLOCKED(vp);
> +	return (islocked =3D=3D LK_SHARED || islocked =3D=3D LK_EXCLUSIVE);
>  }
> =20
>  int
> @@ -4114,6 +4116,7 @@
>  	struct knote *kn =3D ap->a_kn;
>  	struct knlist *knl;
> =20
> +	ASSERT_VOP_ELOCKED(vp, "vfs_kqfilter");
>  	switch (kn->kn_filter) {
>  	case EVFILT_READ:
>  		kn->kn_fop =3D &vfsread_filtops;
> @@ -4147,6 +4150,7 @@
>  {
>  	struct vnode *vp =3D (struct vnode *)kn->kn_hook;
> =20
> +	ASSERT_VOP_ELOCKED(vp, "filt_vfsdetach");
>  	KASSERT(vp->v_pollinfo !=3D NULL, ("Missing v_pollinfo"));
>  	knlist_remove(&vp->v_pollinfo->vpi_selinfo.si_note, kn, 0);
>  }
> @@ -4157,48 +4161,65 @@
>  {
>  	struct vnode *vp =3D (struct vnode *)kn->kn_hook;
>  	struct vattr va;
> +	int retval;
> =20
>  	/*
>  	 * filesystem is gone, so set the EOF flag and schedule
>  	 * the knote for deletion.
>  	 */
>  	if (hint =3D=3D NOTE_REVOKE) {
> +		VI_LOCK(vp);
>  		kn->kn_flags |=3D (EV_EOF | EV_ONESHOT);
> +		VI_UNLOCK(vp);
>  		return (1);
>  	}
> =20
>  	if (VOP_GETATTR(vp, &va, curthread->td_ucred))
>  		return (0);
> =20
> +	VI_LOCK(vp);
>  	kn->kn_data =3D va.va_size - kn->kn_fp->f_offset;
> -	return (kn->kn_data !=3D 0);
> +	retval =3D (kn->kn_data !=3D 0);
> +	VI_UNLOCK(vp);
> +	return (retval);
>  }
> =20
>  /*ARGSUSED*/
>  static int
>  filt_vfswrite(struct knote *kn, long hint)
>  {
> +	struct vnode *vp =3D (struct vnode *)kn->kn_hook;
> +
>  	/*
>  	 * filesystem is gone, so set the EOF flag and schedule
>  	 * the knote for deletion.
>  	 */
> +	VI_LOCK(vp);
>  	if (hint =3D=3D NOTE_REVOKE)
>  		kn->kn_flags |=3D (EV_EOF | EV_ONESHOT);
> =20
>  	kn->kn_data =3D 0;
> +	VI_UNLOCK(vp);
>  	return (1);
>  }
> =20
>  static int
>  filt_vfsvnode(struct knote *kn, long hint)
>  {
> +	struct vnode *vp =3D (struct vnode *)kn->kn_hook;
> +	int retval;
> +
> +	VI_LOCK(vp);
>  	if (kn->kn_sfflags & hint)
>  		kn->kn_fflags |=3D hint;
>  	if (hint =3D=3D NOTE_REVOKE) {
>  		kn->kn_flags |=3D EV_EOF;
> +		VI_UNLOCK(vp);
>  		return (1);
>  	}
> -	return (kn->kn_fflags !=3D 0);
> +	retval =3D (kn->kn_fflags !=3D 0);
> +	VI_UNLOCK(vp);
> +	return (retval);
>  }
> =20
>  int

Getting this panic with the patch.  I have core files if I can get you
anything.

(kgdb) #0  doadump () at pcpu.h:223
#1  0xffffffff80590503 in boot (howto=3D260)
    at /usr/src/sys/kern/kern_shutdown.c:419
#2  0xffffffff8059098c in panic (fmt=3DVariable "fmt" is not available.
)
    at /usr/src/sys/kern/kern_shutdown.c:575
#3  0xffffffff80560115 in knlist_add (knl=3D0xffffff0067190718,=20
    kn=3D0xffffff00671be528, islocked=3D0)
at /usr/src/sys/kern/kern_event.c:1656
#4  0xffffffff8061bc14 in vfs_kqfilter (ap=3DVariable "ap" is not
available.
)
    at /usr/src/sys/kern/vfs_subr.c:4140
#5  0xffffffff808c1e97 in VOP_KQFILTER_APV (vop=3D0xffffffff80bc4580,=20
    a=3D0xffffff803e661820) at vnode_if.c:1141
#6  0xffffffff8062a241 in vn_kqfilter (fp=3D0xffffff0056f5f820,=20
    kn=3D0xffffff00671be528) at vnode_if.h:500
#7  0xffffffff80562084 in kqueue_register (kq=3D0xffffff0005adf000,=20
    kev=3D0xffffff803e661920, td=3D0xffffff004054d720, waitok=3D1)
    at /usr/src/sys/kern/kern_event.c:968
#8  0xffffffff805623f2 in kern_kevent (td=3D0xffffff004054d720,
fd=3DVariable "fd" is not available.
)
    at /usr/src/sys/kern/kern_event.c:721
#9  0xffffffff80562e10 in kevent (td=3D0xffffff004054d720,=20
    uap=3D0xffffff803e661c00) at /usr/src/sys/kern/kern_event.c:642
#10 0xffffffff80877e5d in syscall (frame=3D0xffffff803e661c90)
    at /usr/src/sys/amd64/amd64/trap.c:984
#11 0xffffffff808527d0 in Xfast_syscall ()
    at /usr/src/sys/amd64/amd64/exception.S:364
#12 0x0000000800e2672c in ?? ()
Previous frame inner to this frame (corrupt stack?)
(kgdb)=20

robert.

>=20
> --=20
> John Baldwin
> _______________________________________________
> freebsd-current@freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-current
> To unsubscribe, send any mail to "freebsd-current-unsubscribe@freebsd.org=
"
--=20
Robert Noland <rnoland@FreeBSD.org>
FreeBSD

--=-0/nt3EDdlKLgcd+NSTo5
Content-Type: application/pgp-signature; name="signature.asc"
Content-Description: This is a digitally signed message part

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.11 (FreeBSD)

iEYEABECAAYFAkoupdIACgkQM4TrQ4qfROP+LACeJLsW2lQNdYdTXedXmHtkfm93
9PYAn2oM1tFzKUWgQA1oJjMKgtIOTv4e
=mWRR
-----END PGP SIGNATURE-----

--=-0/nt3EDdlKLgcd+NSTo5--




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?1244571090.60347.1792.camel>