From owner-svn-src-stable-9@FreeBSD.ORG Sat May 25 11:05:01 2013 Return-Path: Delivered-To: svn-src-stable-9@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by hub.freebsd.org (Postfix) with ESMTP id EB1C8FA7; Sat, 25 May 2013 11:05:01 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id CB63DB5C; Sat, 25 May 2013 11:05:01 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.6/8.14.6) with ESMTP id r4PB51n2082281; Sat, 25 May 2013 11:05:01 GMT (envelope-from kib@svn.freebsd.org) Received: (from kib@localhost) by svn.freebsd.org (8.14.6/8.14.5/Submit) id r4PB50hs082255; Sat, 25 May 2013 11:05:00 GMT (envelope-from kib@svn.freebsd.org) Message-Id: <201305251105.r4PB50hs082255@svn.freebsd.org> From: Konstantin Belousov Date: Sat, 25 May 2013 11:05:00 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org Subject: svn commit: r250978 - in stable/9/sys: fs/nullfs kern sys X-SVN-Group: stable-9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable-9@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for only the 9-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 25 May 2013 11:05:02 -0000 Author: kib Date: Sat May 25 11:05:00 2013 New Revision: 250978 URL: http://svnweb.freebsd.org/changeset/base/250978 Log: MFC r250505: - Fix nullfs vnode reference leak in nullfs_reclaim_lowervp(). The null_hashget() obtains the reference on the nullfs vnode, which must be dropped. - Fix a wart which existed from the introduction of the nullfs caching, do not unlock lower vnode in the nullfs_reclaim_lowervp(). It should be innocent, but now it is also formally safe. Inform the nullfs_reclaim() about this using the NULLV_NOUNLOCK flag set on nullfs inode. - Add a callback to the upper filesystems for the lower vnode unlinking. When inactivating a nullfs vnode, check if the lower vnode was unlinked, indicated by nullfs flag NULLV_DROP or VV_NOSYNC on the lower vnode, and reclaim upper vnode if so. This allows nullfs to purge cached vnodes for the unlinked lower vnode, avoiding excessive caching. MFC r250852: Do not leak the NULLV_NOUNLOCK flag from the nullfs_unlink_lowervp(), for the case when the nullfs vnode is not reclaimed. Otherwise, later reclamation would not unlock the lower vnode. Modified: stable/9/sys/fs/nullfs/null.h stable/9/sys/fs/nullfs/null_subr.c stable/9/sys/fs/nullfs/null_vfsops.c stable/9/sys/fs/nullfs/null_vnops.c stable/9/sys/kern/vfs_subr.c stable/9/sys/kern/vfs_syscalls.c stable/9/sys/sys/mount.h Directory Properties: stable/9/sys/ (props changed) stable/9/sys/fs/ (props changed) stable/9/sys/sys/ (props changed) Modified: stable/9/sys/fs/nullfs/null.h ============================================================================== --- stable/9/sys/fs/nullfs/null.h Sat May 25 10:35:05 2013 (r250977) +++ stable/9/sys/fs/nullfs/null.h Sat May 25 11:05:00 2013 (r250978) @@ -53,8 +53,12 @@ struct null_node { LIST_ENTRY(null_node) null_hash; /* Hash list */ struct vnode *null_lowervp; /* VREFed once */ struct vnode *null_vnode; /* Back pointer */ + u_int null_flags; }; +#define NULLV_NOUNLOCK 0x0001 +#define NULLV_DROP 0x0002 + #define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data)) #define VTONULL(vp) ((struct null_node *)(vp)->v_data) #define NULLTOV(xp) ((xp)->null_vnode) Modified: stable/9/sys/fs/nullfs/null_subr.c ============================================================================== --- stable/9/sys/fs/nullfs/null_subr.c Sat May 25 10:35:05 2013 (r250977) +++ stable/9/sys/fs/nullfs/null_subr.c Sat May 25 11:05:00 2013 (r250978) @@ -251,6 +251,7 @@ null_nodeget(mp, lowervp, vpp) xp->null_vnode = vp; xp->null_lowervp = lowervp; + xp->null_flags = 0; vp->v_type = lowervp->v_type; vp->v_data = xp; vp->v_vnlock = lowervp->v_vnlock; Modified: stable/9/sys/fs/nullfs/null_vfsops.c ============================================================================== --- stable/9/sys/fs/nullfs/null_vfsops.c Sat May 25 10:35:05 2013 (r250977) +++ stable/9/sys/fs/nullfs/null_vfsops.c Sat May 25 11:05:00 2013 (r250978) @@ -65,7 +65,6 @@ static vfs_statfs_t nullfs_statfs; static vfs_unmount_t nullfs_unmount; static vfs_vget_t nullfs_vget; static vfs_extattrctl_t nullfs_extattrctl; -static vfs_reclaim_lowervp_t nullfs_reclaim_lowervp; /* * Mount null layer @@ -390,8 +389,49 @@ nullfs_reclaim_lowervp(struct mount *mp, vp = null_hashget(mp, lowervp); if (vp == NULL) return; + VTONULL(vp)->null_flags |= NULLV_NOUNLOCK; vgone(vp); - vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY); + vput(vp); +} + +static void +nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp) +{ + struct vnode *vp; + struct null_node *xp; + + vp = null_hashget(mp, lowervp); + if (vp == NULL) + return; + xp = VTONULL(vp); + xp->null_flags |= NULLV_DROP | NULLV_NOUNLOCK; + vhold(vp); + vunref(vp); + + if (vp->v_usecount == 0) { + /* + * If vunref() dropped the last use reference on the + * nullfs vnode, it must be reclaimed, and its lock + * was split from the lower vnode lock. Need to do + * extra unlock before allowing the final vdrop() to + * free the vnode. + */ + KASSERT((vp->v_iflag & VI_DOOMED) != 0, + ("not reclaimed nullfs vnode %p", vp)); + VOP_UNLOCK(vp, 0); + } else { + /* + * Otherwise, the nullfs vnode still shares the lock + * with the lower vnode, and must not be unlocked. + * Also clear the NULLV_NOUNLOCK, the flag is not + * relevant for future reclamations. + */ + ASSERT_VOP_ELOCKED(vp, "unlink_lowervp"); + KASSERT((vp->v_iflag & VI_DOOMED) == 0, + ("reclaimed nullfs vnode %p", vp)); + xp->null_flags &= ~NULLV_NOUNLOCK; + } + vdrop(vp); } static struct vfsops null_vfsops = { @@ -407,6 +447,7 @@ static struct vfsops null_vfsops = { .vfs_unmount = nullfs_unmount, .vfs_vget = nullfs_vget, .vfs_reclaim_lowervp = nullfs_reclaim_lowervp, + .vfs_unlink_lowervp = nullfs_unlink_lowervp, }; VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL); Modified: stable/9/sys/fs/nullfs/null_vnops.c ============================================================================== --- stable/9/sys/fs/nullfs/null_vnops.c Sat May 25 10:35:05 2013 (r250977) +++ stable/9/sys/fs/nullfs/null_vnops.c Sat May 25 11:05:00 2013 (r250978) @@ -692,18 +692,24 @@ null_unlock(struct vop_unlock_args *ap) static int null_inactive(struct vop_inactive_args *ap __unused) { - struct vnode *vp; + struct vnode *vp, *lvp; + struct null_node *xp; struct mount *mp; struct null_mount *xmp; vp = ap->a_vp; + xp = VTONULL(vp); + lvp = NULLVPTOLOWERVP(vp); mp = vp->v_mount; xmp = MOUNTTONULLMOUNT(mp); - if ((xmp->nullm_flags & NULLM_CACHE) == 0) { + if ((xmp->nullm_flags & NULLM_CACHE) == 0 || + (xp->null_flags & NULLV_DROP) != 0 || + (lvp->v_vflag & VV_NOSYNC) != 0) { /* * If this is the last reference and caching of the - * nullfs vnodes is not enabled, then free up the - * vnode so as not to tie up the lower vnodes. + * nullfs vnodes is not enabled, or the lower vnode is + * deleted, then free up the vnode so as not to tie up + * the lower vnodes. */ vp->v_object = NULL; vrecycle(vp, curthread); @@ -748,7 +754,10 @@ null_reclaim(struct vop_reclaim_args *ap */ if (vp->v_writecount > 0) VOP_ADD_WRITECOUNT(lowervp, -1); - vput(lowervp); + if ((xp->null_flags & NULLV_NOUNLOCK) != 0) + vunref(lowervp); + else + vput(lowervp); free(xp, M_NULLFSNODE); return (0); Modified: stable/9/sys/kern/vfs_subr.c ============================================================================== --- stable/9/sys/kern/vfs_subr.c Sat May 25 10:35:05 2013 (r250977) +++ stable/9/sys/kern/vfs_subr.c Sat May 25 11:05:00 2013 (r250978) @@ -2756,19 +2756,20 @@ vgone(struct vnode *vp) } static void -vgonel_reclaim_lowervp_vfs(struct mount *mp __unused, +notify_lowervp_vfs_dummy(struct mount *mp __unused, struct vnode *lowervp __unused) { } /* - * Notify upper mounts about reclaimed vnode. + * Notify upper mounts about reclaimed or unlinked vnode. */ -static void -vgonel_reclaim_lowervp(struct vnode *vp) +void +vfs_notify_upper(struct vnode *vp, int event) { static struct vfsops vgonel_vfsops = { - .vfs_reclaim_lowervp = vgonel_reclaim_lowervp_vfs + .vfs_reclaim_lowervp = notify_lowervp_vfs_dummy, + .vfs_unlink_lowervp = notify_lowervp_vfs_dummy, }; struct mount *mp, *ump, *mmp; @@ -2792,7 +2793,17 @@ vgonel_reclaim_lowervp(struct vnode *vp) } TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link); MNT_IUNLOCK(mp); - VFS_RECLAIM_LOWERVP(ump, vp); + switch (event) { + case VFS_NOTIFY_UPPER_RECLAIM: + VFS_RECLAIM_LOWERVP(ump, vp); + break; + case VFS_NOTIFY_UPPER_UNLINK: + VFS_UNLINK_LOWERVP(ump, vp); + break; + default: + KASSERT(0, ("invalid event %d", event)); + break; + } MNT_ILOCK(mp); ump = TAILQ_NEXT(mmp, mnt_upper_link); TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link); @@ -2839,7 +2850,7 @@ vgonel(struct vnode *vp) active = vp->v_usecount; oweinact = (vp->v_iflag & VI_OWEINACT); VI_UNLOCK(vp); - vgonel_reclaim_lowervp(vp); + vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM); /* * Clean out any buffers associated with the vnode. Modified: stable/9/sys/kern/vfs_syscalls.c ============================================================================== --- stable/9/sys/kern/vfs_syscalls.c Sat May 25 10:35:05 2013 (r250977) +++ stable/9/sys/kern/vfs_syscalls.c Sat May 25 11:05:00 2013 (r250978) @@ -1949,6 +1949,7 @@ restart: if (error) goto out; #endif + vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); #ifdef MAC out: @@ -3947,6 +3948,7 @@ restart: return (error); goto restart; } + vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); vn_finished_write(mp); out: Modified: stable/9/sys/sys/mount.h ============================================================================== --- stable/9/sys/sys/mount.h Sat May 25 10:35:05 2013 (r250977) +++ stable/9/sys/sys/mount.h Sat May 25 11:05:00 2013 (r250978) @@ -630,7 +630,7 @@ typedef int vfs_mount_t(struct mount *mp typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op, struct sysctl_req *req); typedef void vfs_susp_clean_t(struct mount *mp); -typedef void vfs_reclaim_lowervp_t(struct mount *mp, struct vnode *lowervp); +typedef void vfs_notify_lowervp_t(struct mount *mp, struct vnode *lowervp); struct vfsops { vfs_mount_t *vfs_mount; @@ -648,7 +648,8 @@ struct vfsops { vfs_extattrctl_t *vfs_extattrctl; vfs_sysctl_t *vfs_sysctl; vfs_susp_clean_t *vfs_susp_clean; - vfs_reclaim_lowervp_t *vfs_reclaim_lowervp; + vfs_notify_lowervp_t *vfs_reclaim_lowervp; + vfs_notify_lowervp_t *vfs_unlink_lowervp; }; vfs_statfs_t __vfs_statfs; @@ -713,6 +714,12 @@ vfs_statfs_t __vfs_statfs; mtx_assert(&Giant, MA_OWNED); \ } while (0) +#define VFS_UNLINK_LOWERVP(MP, VP) do { \ + if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) { \ + (*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP)); \ + } \ +} while (0) + #define VFS_KNOTE_LOCKED(vp, hint) do \ { \ if (((vp)->v_vflag & VV_NOKNOTE) == 0) \ @@ -725,6 +732,9 @@ vfs_statfs_t __vfs_statfs; VN_KNOTE((vp), (hint), 0); \ } while (0) +#define VFS_NOTIFY_UPPER_RECLAIM 1 +#define VFS_NOTIFY_UPPER_UNLINK 2 + #include /* @@ -803,6 +813,7 @@ int vfs_modevent(module_t, int, void *); void vfs_mount_error(struct mount *, const char *, ...); void vfs_mountroot(void); /* mount our root filesystem */ void vfs_mountedfrom(struct mount *, const char *from); +void vfs_notify_upper(struct vnode *, int); void vfs_oexport_conv(const struct oexport_args *oexp, struct export_args *exp); void vfs_ref(struct mount *);