From owner-svn-src-stable-10@freebsd.org Tue Aug 23 07:55:07 2016 Return-Path: Delivered-To: svn-src-stable-10@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id A6F02BC2FF0; Tue, 23 Aug 2016 07:55:07 +0000 (UTC) (envelope-from avg@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 6D29319A1; Tue, 23 Aug 2016 07:55:07 +0000 (UTC) (envelope-from avg@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u7N7t6Wh059844; Tue, 23 Aug 2016 07:55:06 GMT (envelope-from avg@FreeBSD.org) Received: (from avg@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u7N7t6Nj059837; Tue, 23 Aug 2016 07:55:06 GMT (envelope-from avg@FreeBSD.org) Message-Id: <201608230755.u7N7t6Nj059837@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: avg set sender to avg@FreeBSD.org using -f From: Andriy Gapon Date: Tue, 23 Aug 2016 07:55:06 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r304671 - in stable/10/sys/cddl: compat/opensolaris/sys contrib/opensolaris/uts/common/fs/zfs contrib/opensolaris/uts/common/fs/zfs/sys X-SVN-Group: stable-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable-10@freebsd.org X-Mailman-Version: 2.1.22 Precedence: list List-Id: SVN commit messages for only the 10-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 23 Aug 2016 07:55:07 -0000 Author: avg Date: Tue Aug 23 07:55:05 2016 New Revision: 304671 URL: https://svnweb.freebsd.org/changeset/base/304671 Log: MFC r303763,303791,303869: zfs: honour and make use of vfs vnode locking protocol PR: 209158 Modified: stable/10/sys/cddl/compat/opensolaris/sys/vnode.h stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_sa.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c Directory Properties: stable/10/ (props changed) Modified: stable/10/sys/cddl/compat/opensolaris/sys/vnode.h ============================================================================== --- stable/10/sys/cddl/compat/opensolaris/sys/vnode.h Tue Aug 23 07:54:14 2016 (r304670) +++ stable/10/sys/cddl/compat/opensolaris/sys/vnode.h Tue Aug 23 07:55:05 2016 (r304671) @@ -87,8 +87,6 @@ vn_is_readonly(vnode_t *vp) #define VN_RELE(v) vrele(v) #define VN_URELE(v) vput(v) -#define VOP_REALVP(vp, vpp, ct) (*(vpp) = (vp), 0) - #define vnevent_create(vp, ct) do { } while (0) #define vnevent_link(vp, ct) do { } while (0) #define vnevent_remove(vp, dvp, name, ct) do { } while (0) Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h ============================================================================== --- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h Tue Aug 23 07:54:14 2016 (r304670) +++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_dir.h Tue Aug 23 07:55:05 2016 (r304671) @@ -48,18 +48,18 @@ extern "C" { #define IS_ROOT_NODE 0x01 /* create a root node */ #define IS_XATTR 0x02 /* create an extended attribute node */ -extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **, - int, int *, pathname_t *); -extern void zfs_dirent_unlock(zfs_dirlock_t *); -extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int); -extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int, +extern int zfs_dirent_lookup(znode_t *, const char *, znode_t **, int); +extern int zfs_link_create(znode_t *, const char *, znode_t *, dmu_tx_t *, int); +extern int zfs_link_destroy(znode_t *, const char *, znode_t *, dmu_tx_t *, int, boolean_t *); -extern int zfs_dirlook(znode_t *, char *, vnode_t **, int, int *, - pathname_t *); +#if 0 +extern int zfs_dirlook(vnode_t *, const char *, vnode_t **, int); +#else +extern int zfs_dirlook(znode_t *, const char *name, znode_t **); +#endif extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *, uint_t, znode_t **, zfs_acl_ids_t *); extern void zfs_rmnode(znode_t *); -extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old); extern boolean_t zfs_dirempty(znode_t *); extern void zfs_unlinked_add(znode_t *, dmu_tx_t *); extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs); Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h ============================================================================== --- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h Tue Aug 23 07:54:14 2016 (r304670) +++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h Tue Aug 23 07:55:05 2016 (r304671) @@ -75,6 +75,7 @@ struct zfsvfs { boolean_t z_use_fuids; /* version allows fuids */ boolean_t z_replay; /* set during ZIL replay */ boolean_t z_use_sa; /* version allow system attributes */ + boolean_t z_use_namecache;/* make use of FreeBSD name cache */ uint64_t z_version; /* ZPL version */ uint64_t z_shares_dir; /* hidden shares dir */ kmutex_t z_lock; Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h ============================================================================== --- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h Tue Aug 23 07:54:14 2016 (r304670) +++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h Tue Aug 23 07:55:05 2016 (r304671) @@ -181,10 +181,12 @@ typedef struct znode { struct zfsvfs *z_zfsvfs; vnode_t *z_vnode; uint64_t z_id; /* object ID for this znode */ +#ifdef illumos kmutex_t z_lock; /* znode modification lock */ krwlock_t z_parent_lock; /* parent lock for directories */ krwlock_t z_name_lock; /* "master" lock for dirent locks */ zfs_dirlock_t *z_dirlocks; /* directory entry lock list */ +#endif kmutex_t z_range_lock; /* protects changes to z_range_avl */ avl_tree_t z_range_avl; /* avl tree of file range locks */ uint8_t z_unlinked; /* file has been unlinked */ Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c ============================================================================== --- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c Tue Aug 23 07:54:14 2016 (r304670) +++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c Tue Aug 23 07:55:05 2016 (r304671) @@ -1058,8 +1058,7 @@ zfs_mode_compute(uint64_t fmode, zfs_acl * create a new acl and leave any cached acl in place. */ static int -zfs_acl_node_read(znode_t *zp, boolean_t have_lock, zfs_acl_t **aclpp, - boolean_t will_modify) +zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp, boolean_t will_modify) { zfs_acl_t *aclp; int aclsize; @@ -1068,26 +1067,15 @@ zfs_acl_node_read(znode_t *zp, boolean_t zfs_acl_phys_t znode_acl; int version; int error; - boolean_t drop_lock = B_FALSE; ASSERT(MUTEX_HELD(&zp->z_acl_lock)); + ASSERT_VOP_LOCKED(ZTOV(zp), __func__); if (zp->z_acl_cached && !will_modify) { *aclpp = zp->z_acl_cached; return (0); } - /* - * close race where znode could be upgrade while trying to - * read the znode attributes. - * - * But this could only happen if the file isn't already an SA - * znode - */ - if (!zp->z_is_sa && !have_lock) { - mutex_enter(&zp->z_lock); - drop_lock = B_TRUE; - } version = zfs_znode_acl_version(zp); if ((error = zfs_acl_znode_info(zp, &aclsize, @@ -1133,8 +1121,6 @@ zfs_acl_node_read(znode_t *zp, boolean_t if (!will_modify) zp->z_acl_cached = aclp; done: - if (drop_lock) - mutex_exit(&zp->z_lock); return (error); } @@ -1161,10 +1147,10 @@ zfs_acl_chown_setattr(znode_t *zp) int error; zfs_acl_t *aclp; - ASSERT(MUTEX_HELD(&zp->z_lock)); + ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); ASSERT(MUTEX_HELD(&zp->z_acl_lock)); - if ((error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE)) == 0) + if ((error = zfs_acl_node_read(zp, &aclp, B_FALSE)) == 0) zp->z_mode = zfs_mode_compute(zp->z_mode, aclp, &zp->z_pflags, zp->z_uid, zp->z_gid); return (error); @@ -1445,18 +1431,17 @@ zfs_acl_chmod_setattr(znode_t *zp, zfs_a int error = 0; mutex_enter(&zp->z_acl_lock); - mutex_enter(&zp->z_lock); + ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_DISCARD) *aclp = zfs_acl_alloc(zfs_acl_version_zp(zp)); else - error = zfs_acl_node_read(zp, B_TRUE, aclp, B_TRUE); + error = zfs_acl_node_read(zp, aclp, B_TRUE); if (error == 0) { (*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS; zfs_acl_chmod(ZTOV(zp)->v_type, mode, (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp); } - mutex_exit(&zp->z_lock); mutex_exit(&zp->z_acl_lock); return (error); @@ -1627,6 +1612,7 @@ zfs_acl_ids_create(znode_t *dzp, int fla boolean_t need_chmod = B_TRUE; boolean_t inherited = B_FALSE; + ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__); bzero(acl_ids, sizeof (zfs_acl_ids_t)); acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode); @@ -1710,12 +1696,10 @@ zfs_acl_ids_create(znode_t *dzp, int fla if (acl_ids->z_aclp == NULL) { mutex_enter(&dzp->z_acl_lock); - mutex_enter(&dzp->z_lock); if (!(flag & IS_ROOT_NODE) && (dzp->z_pflags & ZFS_INHERIT_ACE) && !(dzp->z_pflags & ZFS_XATTR)) { - VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE, - &paclp, B_FALSE)); + VERIFY(0 == zfs_acl_node_read(dzp, &paclp, B_FALSE)); acl_ids->z_aclp = zfs_acl_inherit(zfsvfs, vap->va_type, paclp, acl_ids->z_mode, &need_chmod); inherited = B_TRUE; @@ -1724,7 +1708,6 @@ zfs_acl_ids_create(znode_t *dzp, int fla zfs_acl_alloc(zfs_acl_version_zp(dzp)); acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL; } - mutex_exit(&dzp->z_lock); mutex_exit(&dzp->z_acl_lock); if (need_chmod) { acl_ids->z_aclp->z_hints |= (vap->va_type == VDIR) ? @@ -1790,7 +1773,8 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsec mutex_enter(&zp->z_acl_lock); - error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE); + ASSERT_VOP_LOCKED(ZTOV(zp), __func__); + error = zfs_acl_node_read(zp, &aclp, B_FALSE); if (error != 0) { mutex_exit(&zp->z_acl_lock); return (error); @@ -1938,6 +1922,7 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsec boolean_t fuid_dirtied; uint64_t acl_obj; + ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); if (mask == 0) return (SET_ERROR(ENOSYS)); @@ -1962,7 +1947,6 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsec } top: mutex_enter(&zp->z_acl_lock); - mutex_enter(&zp->z_lock); tx = dmu_tx_create(zfsvfs->z_os); @@ -1994,7 +1978,6 @@ top: zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { - mutex_exit(&zp->z_lock); mutex_exit(&zp->z_acl_lock); if (error == ERESTART) { @@ -2020,7 +2003,6 @@ top: if (fuidp) zfs_fuid_info_free(fuidp); dmu_tx_commit(tx); - mutex_exit(&zp->z_lock); mutex_exit(&zp->z_acl_lock); return (error); @@ -2124,7 +2106,8 @@ zfs_zaccess_aces_check(znode_t *zp, uint mutex_enter(&zp->z_acl_lock); - error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE); + ASSERT_VOP_LOCKED(ZTOV(zp), __func__); + error = zfs_acl_node_read(zp, &aclp, B_FALSE); if (error != 0) { mutex_exit(&zp->z_acl_lock); return (error); Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c ============================================================================== --- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c Tue Aug 23 07:54:14 2016 (r304670) +++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c Tue Aug 23 07:55:05 2016 (r304671) @@ -58,96 +58,64 @@ #include /* - * zfs_match_find() is used by zfs_dirent_lock() to peform zap lookups + * zfs_match_find() is used by zfs_dirent_lookup() to peform zap lookups * of names after deciding which is the appropriate lookup interface. */ static int -zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact, - boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid) +zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, const char *name, + boolean_t exact, uint64_t *zoid) { int error; if (zfsvfs->z_norm) { - matchtype_t mt = MT_FIRST; - boolean_t conflict = B_FALSE; - size_t bufsz = 0; - char *buf = NULL; - - if (rpnp) { - buf = rpnp->pn_buf; - bufsz = rpnp->pn_bufsize; - } - if (exact) - mt = MT_EXACT; + matchtype_t mt = exact? MT_EXACT : MT_FIRST; + /* * In the non-mixed case we only expect there would ever * be one match, but we need to use the normalizing lookup. */ error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1, - zoid, mt, buf, bufsz, &conflict); - if (!error && deflags) - *deflags = conflict ? ED_CASE_CONFLICT : 0; + zoid, mt, NULL, 0, NULL); } else { error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid); } *zoid = ZFS_DIRENT_OBJ(*zoid); - if (error == ENOENT && update) - dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE); - return (error); } /* - * Lock a directory entry. A dirlock on protects that name - * in dzp's directory zap object. As long as you hold a dirlock, you can - * assume two things: (1) dzp cannot be reaped, and (2) no other thread - * can change the zap entry for (i.e. link or unlink) this name. + * Look up a directory entry under a locked vnode. + * dvp being locked gives us a guarantee that there are no concurrent + * modification of the directory and, thus, if a node can be found in + * the directory, then it must not be unlinked. * * Input arguments: * dzp - znode for directory * name - name of entry to lock * flag - ZNEW: if the entry already exists, fail with EEXIST. * ZEXISTS: if the entry does not exist, fail with ENOENT. - * ZSHARED: allow concurrent access with other ZSHARED callers. * ZXATTR: we want dzp's xattr directory - * ZCILOOK: On a mixed sensitivity file system, - * this lookup should be case-insensitive. - * ZCIEXACT: On a purely case-insensitive file system, - * this lookup should be case-sensitive. - * ZRENAMING: we are locking for renaming, force narrow locks - * ZHAVELOCK: Don't grab the z_name_lock for this call. The - * current thread already holds it. * * Output arguments: * zpp - pointer to the znode for the entry (NULL if there isn't one) - * dlpp - pointer to the dirlock for this entry (NULL on error) - * direntflags - (case-insensitive lookup only) - * flags if multiple case-sensitive matches exist in directory - * realpnp - (case-insensitive lookup only) - * actual name matched within the directory * * Return value: 0 on success or errno on failure. * * NOTE: Always checks for, and rejects, '.' and '..'. - * NOTE: For case-insensitive file systems we take wide locks (see below), - * but return znode pointers to a single match. */ int -zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, - int flag, int *direntflags, pathname_t *realpnp) +zfs_dirent_lookup(znode_t *dzp, const char *name, znode_t **zpp, int flag) { zfsvfs_t *zfsvfs = dzp->z_zfsvfs; - zfs_dirlock_t *dl; - boolean_t update; boolean_t exact; uint64_t zoid; vnode_t *vp = NULL; int error = 0; - int cmpflags; + + ASSERT_VOP_LOCKED(ZTOV(dzp), __func__); *zpp = NULL; - *dlpp = NULL; /* * Verify that we are not trying to lock '.', '..', or '.zfs' @@ -161,280 +129,93 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, zn * Case sensitivity and normalization preferences are set when * the file system is created. These are stored in the * zfsvfs->z_case and zfsvfs->z_norm fields. These choices - * affect what vnodes can be cached in the DNLC, how we - * perform zap lookups, and the "width" of our dirlocks. + * affect how we perform zap lookups. * - * A normal dirlock locks a single name. Note that with - * normalization a name can be composed multiple ways, but - * when normalized, these names all compare equal. A wide - * dirlock locks multiple names. We need these when the file - * system is supporting mixed-mode access. It is sometimes - * necessary to lock all case permutations of file name at - * once so that simultaneous case-insensitive/case-sensitive - * behaves as rationally as possible. - */ - - /* * Decide if exact matches should be requested when performing * a zap lookup on file systems supporting case-insensitive * access. - */ - exact = - ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) || - ((zfsvfs->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK)); - - /* - * Only look in or update the DNLC if we are looking for the - * name on a file system that does not require normalization - * or case folding. We can also look there if we happen to be - * on a non-normalizing, mixed sensitivity file system IF we - * are looking for the exact name. * - * Maybe can add TO-UPPERed version of name to dnlc in ci-only - * case for performance improvement? - */ - update = !zfsvfs->z_norm || - ((zfsvfs->z_case == ZFS_CASE_MIXED) && - !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK)); - - /* - * ZRENAMING indicates we are in a situation where we should - * take narrow locks regardless of the file system's - * preferences for normalizing and case folding. This will - * prevent us deadlocking trying to grab the same wide lock - * twice if the two names happen to be case-insensitive - * matches. - */ - if (flag & ZRENAMING) - cmpflags = 0; - else - cmpflags = zfsvfs->z_norm; - - /* - * Wait until there are no locks on this name. - * - * Don't grab the the lock if it is already held. However, cannot - * have both ZSHARED and ZHAVELOCK together. - */ - ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK)); - if (!(flag & ZHAVELOCK)) - rw_enter(&dzp->z_name_lock, RW_READER); - - mutex_enter(&dzp->z_lock); - for (;;) { - if (dzp->z_unlinked && !(flag & ZXATTR)) { - mutex_exit(&dzp->z_lock); - if (!(flag & ZHAVELOCK)) - rw_exit(&dzp->z_name_lock); - return (SET_ERROR(ENOENT)); - } - for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) { - if ((u8_strcmp(name, dl->dl_name, 0, cmpflags, - U8_UNICODE_LATEST, &error) == 0) || error != 0) - break; - } - if (error != 0) { - mutex_exit(&dzp->z_lock); - if (!(flag & ZHAVELOCK)) - rw_exit(&dzp->z_name_lock); - return (SET_ERROR(ENOENT)); - } - if (dl == NULL) { - size_t namesize; - - /* - * Allocate a new dirlock and add it to the list. - */ - namesize = strlen(name) + 1; - dl = kmem_alloc(sizeof (zfs_dirlock_t) + namesize, - KM_SLEEP); - cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL); - dl->dl_name = (char *)(dl + 1); - bcopy(name, dl->dl_name, namesize); - dl->dl_sharecnt = 0; - dl->dl_namelock = 0; - dl->dl_namesize = namesize; - dl->dl_dzp = dzp; - dl->dl_next = dzp->z_dirlocks; - dzp->z_dirlocks = dl; - break; - } - if ((flag & ZSHARED) && dl->dl_sharecnt != 0) - break; - cv_wait(&dl->dl_cv, &dzp->z_lock); - } - - /* - * If the z_name_lock was NOT held for this dirlock record it. + * NB: we do not need to worry about this flag for ZFS_CASE_SENSITIVE + * because in that case MT_EXACT and MT_FIRST should produce exactly + * the same result. */ - if (flag & ZHAVELOCK) - dl->dl_namelock = 1; + exact = zfsvfs->z_case == ZFS_CASE_MIXED; - if (flag & ZSHARED) - dl->dl_sharecnt++; - - mutex_exit(&dzp->z_lock); - - /* - * We have a dirlock on the name. (Note that it is the dirlock, - * not the dzp's z_lock, that protects the name in the zap object.) - * See if there's an object by this name; if so, put a hold on it. - */ + if (dzp->z_unlinked && !(flag & ZXATTR)) + return (ENOENT); if (flag & ZXATTR) { error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid, sizeof (zoid)); if (error == 0) error = (zoid == 0 ? ENOENT : 0); } else { - if (update) - vp = dnlc_lookup(ZTOV(dzp), name); - if (vp == DNLC_NO_VNODE) { - VN_RELE(vp); - error = SET_ERROR(ENOENT); - } else if (vp) { - if (flag & ZNEW) { - zfs_dirent_unlock(dl); - VN_RELE(vp); - return (SET_ERROR(EEXIST)); - } - *dlpp = dl; - *zpp = VTOZ(vp); - return (0); - } else { - error = zfs_match_find(zfsvfs, dzp, name, exact, - update, direntflags, realpnp, &zoid); - } + error = zfs_match_find(zfsvfs, dzp, name, exact, &zoid); } if (error) { if (error != ENOENT || (flag & ZEXISTS)) { - zfs_dirent_unlock(dl); return (error); } } else { if (flag & ZNEW) { - zfs_dirent_unlock(dl); return (SET_ERROR(EEXIST)); } error = zfs_zget(zfsvfs, zoid, zpp); - if (error) { - zfs_dirent_unlock(dl); + if (error) return (error); - } - if (!(flag & ZXATTR) && update) - dnlc_update(ZTOV(dzp), name, ZTOV(*zpp)); + ASSERT(!(*zpp)->z_unlinked); } - *dlpp = dl; - return (0); } -/* - * Unlock this directory entry and wake anyone who was waiting for it. - */ -void -zfs_dirent_unlock(zfs_dirlock_t *dl) +static int +zfs_dd_lookup(znode_t *dzp, znode_t **zpp) { - znode_t *dzp = dl->dl_dzp; - zfs_dirlock_t **prev_dl, *cur_dl; + zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + znode_t *zp; + uint64_t parent; + int error; - mutex_enter(&dzp->z_lock); + ASSERT_VOP_LOCKED(ZTOV(dzp), __func__); + ASSERT(RRM_READ_HELD(&zfsvfs->z_teardown_lock)); - if (!dl->dl_namelock) - rw_exit(&dzp->z_name_lock); + if (dzp->z_unlinked) + return (ENOENT); - if (dl->dl_sharecnt > 1) { - dl->dl_sharecnt--; - mutex_exit(&dzp->z_lock); - return; - } - prev_dl = &dzp->z_dirlocks; - while ((cur_dl = *prev_dl) != dl) - prev_dl = &cur_dl->dl_next; - *prev_dl = dl->dl_next; - cv_broadcast(&dl->dl_cv); - mutex_exit(&dzp->z_lock); + if ((error = sa_lookup(dzp->z_sa_hdl, + SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) + return (error); - cv_destroy(&dl->dl_cv); - kmem_free(dl, sizeof (*dl) + dl->dl_namesize); + error = zfs_zget(zfsvfs, parent, &zp); + if (error == 0) + *zpp = zp; + return (error); } -/* - * Look up an entry in a directory. - * - * NOTE: '.' and '..' are handled as special cases because - * no directory entries are actually stored for them. If this is - * the root of a filesystem, then '.zfs' is also treated as a - * special pseudo-directory. - */ int -zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags, - int *deflg, pathname_t *rpnp) +zfs_dirlook(znode_t *dzp, const char *name, znode_t **zpp) { - zfs_dirlock_t *dl; + zfsvfs_t *zfsvfs = dzp->z_zfsvfs; znode_t *zp; int error = 0; - uint64_t parent; - int unlinked; - - if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { - mutex_enter(&dzp->z_lock); - unlinked = dzp->z_unlinked; - mutex_exit(&dzp->z_lock); - if (unlinked) - return (ENOENT); - - *vpp = ZTOV(dzp); - VN_HOLD(*vpp); - } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { - zfsvfs_t *zfsvfs = dzp->z_zfsvfs; - /* - * If we are a snapshot mounted under .zfs, return - * the vp for the snapshot directory. - */ - if ((error = sa_lookup(dzp->z_sa_hdl, - SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) - return (error); - if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) { - error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir, - "snapshot", vpp, NULL, 0, NULL, kcred, - NULL, NULL, NULL); - return (error); - } + ASSERT_VOP_LOCKED(ZTOV(dzp), __func__); + ASSERT(RRM_READ_HELD(&zfsvfs->z_teardown_lock)); - mutex_enter(&dzp->z_lock); - unlinked = dzp->z_unlinked; - mutex_exit(&dzp->z_lock); - if (unlinked) - return (ENOENT); + if (dzp->z_unlinked) + return (SET_ERROR(ENOENT)); - rw_enter(&dzp->z_parent_lock, RW_READER); - error = zfs_zget(zfsvfs, parent, &zp); - if (error == 0) - *vpp = ZTOV(zp); - rw_exit(&dzp->z_parent_lock); - } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) { - *vpp = zfsctl_root(dzp); + if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { + *zpp = dzp; + } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { + error = zfs_dd_lookup(dzp, zpp); } else { - int zf; - - zf = ZEXISTS | ZSHARED; - if (flags & FIGNORECASE) - zf |= ZCILOOK; - - error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp); + error = zfs_dirent_lookup(dzp, name, &zp, ZEXISTS); if (error == 0) { - *vpp = ZTOV(zp); - zfs_dirent_unlock(dl); dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */ + *zpp = zp; } - rpnp = NULL; } - - if ((flags & FIGNORECASE) && rpnp && !error) - (void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize); - return (error); } @@ -510,8 +291,9 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs) if (error != 0) continue; + vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_RETRY); zp->z_unlinked = B_TRUE; - VN_RELE(ZTOV(zp)); + vput(ZTOV(zp)); } zap_cursor_fini(&zc); } @@ -535,7 +317,6 @@ zfs_purgedir(znode_t *dzp) znode_t *xzp; dmu_tx_t *tx; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; - zfs_dirlock_t dl; int skipped = 0; int error; @@ -549,6 +330,7 @@ zfs_purgedir(znode_t *dzp) continue; } + vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY); ASSERT((ZTOV(xzp)->v_type == VREG) || (ZTOV(xzp)->v_type == VLNK)); @@ -563,20 +345,17 @@ zfs_purgedir(znode_t *dzp) error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); - VN_RELE(ZTOV(xzp)); + vput(ZTOV(xzp)); skipped += 1; continue; } - bzero(&dl, sizeof (dl)); - dl.dl_dzp = dzp; - dl.dl_name = zap.za_name; - error = zfs_link_destroy(&dl, xzp, tx, 0, NULL); + error = zfs_link_destroy(dzp, zap.za_name, xzp, tx, 0, NULL); if (error) skipped += 1; dmu_tx_commit(tx); - VN_RELE(ZTOV(xzp)); + vput(ZTOV(xzp)); } zap_cursor_fini(&zc); if (error != ENOENT) @@ -596,6 +375,7 @@ zfs_rmnode(znode_t *zp) int error; ASSERT(zp->z_links == 0); + ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); /* * If this is an attribute directory, purge its contents. @@ -634,7 +414,8 @@ zfs_rmnode(znode_t *zp) &xattr_obj, sizeof (xattr_obj)); if (error == 0 && xattr_obj) { error = zfs_zget(zfsvfs, xattr_obj, &xzp); - ASSERT(error == 0); + ASSERT3S(error, ==, 0); + vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY); } acl_obj = zfs_external_acl(zp); @@ -668,12 +449,10 @@ zfs_rmnode(znode_t *zp) if (xzp) { ASSERT(error == 0); - mutex_enter(&xzp->z_lock); xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ xzp->z_links = 0; /* no more links to it */ VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), &xzp->z_links, sizeof (xzp->z_links), tx)); - mutex_exit(&xzp->z_lock); zfs_unlinked_add(xzp, tx); } @@ -686,7 +465,7 @@ zfs_rmnode(znode_t *zp) dmu_tx_commit(tx); out: if (xzp) - VN_RELE(ZTOV(xzp)); + vput(ZTOV(xzp)); } static uint64_t @@ -700,12 +479,12 @@ zfs_dirent(znode_t *zp, uint64_t mode) } /* - * Link zp into dl. Can only fail if zp has been unlinked. + * Link zp into dzp. Can only fail if zp has been unlinked. */ int -zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) +zfs_link_create(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx, + int flag) { - znode_t *dzp = dl->dl_dzp; zfsvfs_t *zfsvfs = zp->z_zfsvfs; vnode_t *vp = ZTOV(zp); uint64_t value; @@ -715,18 +494,32 @@ zfs_link_create(zfs_dirlock_t *dl, znode int count = 0; int error; - mutex_enter(&zp->z_lock); - + ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__); + ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); +#if 0 + if (zp_is_dir) { + error = 0; + if (dzp->z_links >= LINK_MAX) + error = SET_ERROR(EMLINK); + return (error); + } +#endif if (!(flag & ZRENAMING)) { if (zp->z_unlinked) { /* no new links to unlinked zp */ ASSERT(!(flag & (ZNEW | ZEXISTS))); - mutex_exit(&zp->z_lock); return (SET_ERROR(ENOENT)); } +#if 0 + if (zp->z_links >= LINK_MAX) { + return (SET_ERROR(EMLINK)); + } +#endif zp->z_links++; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &zp->z_links, sizeof (zp->z_links)); + } else { + ASSERT(zp->z_unlinked == 0); } SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &dzp->z_id, sizeof (dzp->z_id)); @@ -740,11 +533,8 @@ zfs_link_create(zfs_dirlock_t *dl, znode ctime, B_TRUE); } error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); - ASSERT(error == 0); - - mutex_exit(&zp->z_lock); + ASSERT0(error); - mutex_enter(&dzp->z_lock); dzp->z_size++; dzp->z_links += zp_is_dir; count = 0; @@ -760,55 +550,48 @@ zfs_link_create(zfs_dirlock_t *dl, znode &dzp->z_pflags, sizeof (dzp->z_pflags)); zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); - ASSERT(error == 0); - mutex_exit(&dzp->z_lock); + ASSERT0(error); value = zfs_dirent(zp, zp->z_mode); - error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name, + error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, name, 8, 1, &value, tx); - ASSERT(error == 0); - - dnlc_update(ZTOV(dzp), dl->dl_name, vp); + VERIFY0(error); return (0); } static int -zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx, +zfs_dropname(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx, int flag) { int error; if (zp->z_zfsvfs->z_norm) { - if (((zp->z_zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && - (flag & ZCIEXACT)) || - ((zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) && - !(flag & ZCILOOK))) + if (zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) error = zap_remove_norm(zp->z_zfsvfs->z_os, - dzp->z_id, dl->dl_name, MT_EXACT, tx); + dzp->z_id, name, MT_EXACT, tx); else error = zap_remove_norm(zp->z_zfsvfs->z_os, - dzp->z_id, dl->dl_name, MT_FIRST, tx); + dzp->z_id, name, MT_FIRST, tx); } else { error = zap_remove(zp->z_zfsvfs->z_os, - dzp->z_id, dl->dl_name, tx); + dzp->z_id, name, tx); } return (error); } /* - * Unlink zp from dl, and mark zp for deletion if this was the last link. + * Unlink zp from dzp, and mark zp for deletion if this was the last link. * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST). * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list. * If it's non-NULL, we use it to indicate whether the znode needs deletion, * and it's the caller's job to do it. */ int -zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, - boolean_t *unlinkedp) +zfs_link_destroy(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx, + int flag, boolean_t *unlinkedp) { - znode_t *dzp = dl->dl_dzp; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; vnode_t *vp = ZTOV(zp); int zp_is_dir = (vp->v_type == VDIR); @@ -818,22 +601,12 @@ zfs_link_destroy(zfs_dirlock_t *dl, znod int count = 0; int error; - dnlc_remove(ZTOV(dzp), dl->dl_name); + ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__); + ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); if (!(flag & ZRENAMING)) { - if (vn_vfswlock(vp)) /* prevent new mounts on zp */ - return (SET_ERROR(EBUSY)); - - if (vn_ismntpt(vp)) { /* don't remove mount point */ - vn_vfsunlock(vp); - return (SET_ERROR(EBUSY)); - } - - mutex_enter(&zp->z_lock); if (zp_is_dir && !zfs_dirempty(zp)) { - mutex_exit(&zp->z_lock); - vn_vfsunlock(vp); #ifdef illumos return (SET_ERROR(EEXIST)); #else @@ -846,10 +619,8 @@ zfs_link_destroy(zfs_dirlock_t *dl, znod * First try removing the name from the directory; if that * fails, return the error. */ - error = zfs_dropname(dl, zp, dzp, tx, flag); + error = zfs_dropname(dzp, name, zp, tx, flag); if (error != 0) { - mutex_exit(&zp->z_lock); - vn_vfsunlock(vp); return (error); } @@ -876,16 +647,14 @@ zfs_link_destroy(zfs_dirlock_t *dl, znod NULL, &zp->z_links, sizeof (zp->z_links)); error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); count = 0; - ASSERT(error == 0); - mutex_exit(&zp->z_lock); - vn_vfsunlock(vp); + ASSERT0(error); } else { - error = zfs_dropname(dl, zp, dzp, tx, flag); + ASSERT(zp->z_unlinked == 0); + error = zfs_dropname(dzp, name, zp, tx, flag); if (error != 0) return (error); } - mutex_enter(&dzp->z_lock); dzp->z_size--; /* one dirent removed */ dzp->z_links -= zp_is_dir; /* ".." link from zp */ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), @@ -900,8 +669,7 @@ zfs_link_destroy(zfs_dirlock_t *dl, znod NULL, &dzp->z_pflags, sizeof (dzp->z_pflags)); zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); - ASSERT(error == 0); - mutex_exit(&dzp->z_lock); + ASSERT0(error); if (unlinkedp != NULL) *unlinkedp = unlinked; @@ -912,14 +680,12 @@ zfs_link_destroy(zfs_dirlock_t *dl, znod } /* - * Indicate whether the directory is empty. Works with or without z_lock - * held, but can only be consider a hint in the latter case. Returns true - * if only "." and ".." remain and there's no work in progress. + * Indicate whether the directory is empty. */ boolean_t zfs_dirempty(znode_t *dzp) { - return (dzp->z_size == 2 && dzp->z_dirlocks == 0); + return (dzp->z_size == 2); } int @@ -1013,23 +779,20 @@ zfs_get_xattrdir(znode_t *zp, vnode_t ** { zfsvfs_t *zfsvfs = zp->z_zfsvfs; znode_t *xzp; - zfs_dirlock_t *dl; vattr_t va; int error; top: - error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL); + error = zfs_dirent_lookup(zp, "", &xzp, ZXATTR); if (error) return (error); if (xzp != NULL) { *xvpp = ZTOV(xzp); - zfs_dirent_unlock(dl); return (0); } if (!(flags & CREATE_XATTR_DIR)) { - zfs_dirent_unlock(dl); #ifdef illumos return (SET_ERROR(ENOENT)); #else @@ -1038,7 +801,6 @@ top: } if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { - zfs_dirent_unlock(dl); return (SET_ERROR(EROFS)); *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***