Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 21 Feb 2017 17:47:09 +0000 (UTC)
From:      Andriy Gapon <avg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r314048 - in head/sys/cddl: compat/opensolaris/kern contrib/opensolaris/uts/common/fs/zfs contrib/opensolaris/uts/common/fs/zfs/sys
Message-ID:  <201702211747.v1LHl9t1094779@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: avg
Date: Tue Feb 21 17:47:08 2017
New Revision: 314048
URL: https://svnweb.freebsd.org/changeset/base/314048

Log:
  reimplement zfsctl (.zfs) support
  
  The current code is written on top of GFS, a library with the generic
  support for writing filesystems, which was ported from illumos.
  Because of significant differences between illumos VFS and FreeBSD
  VFS models, both the GFS and zfsctl code were heavily modified to
  work on FreeBSD.  Nonetheless, they still contain quite a few ugly
  hacks and bugs.
  
  This is a reimplementation of the zfsctl code where the VFS-specific
  bits are written from scratch and only the code that interacts with
  the rest of ZFS is reused.
  
  Some highlights.
  
  We use two types of nodes, static and on-demand. The static nodes
  are used for permanent directories like .zfs, .zfs/snapshot, etc. The
  on-demand nodes are used for ephemeral directories that act as snapshot
  mount points.
  Initially only static nodes are created. Their vnodes are instantiated
  when they are looked up. The on-demand nodes and vnodes are instantiated
  as needed and the nodes are destroyed as soon as the corresponding
  vnodes are reclaimed.
  We also try very hard to ensure that uncovered snapshot vnodes do not
  linger.  They are supposed to become inactive as soon as they are
  uncovered and we try to recycle them immediately.
  When a filesystem is unmounted all snapshots under .zfs are unmounted
  first, then all vnodes are flushed and finally the static .zfs nodes
  are destroyed.
  
  There are some changes outside of zfsctl code too.
  z_ctldir is never used directly (as it is an opaque pointer),
  zfsctl_root() has to be used instead.  The function returns a locked
  vnode now, so it accepts a lock flags parameter.  The function can
  also fail now, e.g. during force unmounting, whereas previously it
  was infallible.
  zfsctl_root_lookup() is retired, instead of it VOP_LOOKUP() on the .zfs
  vnode (obtained with zfsctl_root) is used.
  
  Some ideas are picked from an independent work by will.
  
  Reviewed by:	asomers, smh
  MFC after:	1 month
  Relnotes:	maybe
  Differential Revision: https://reviews.freebsd.org/D7421

Modified:
  head/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c

Modified: head/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
==============================================================================
--- head/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c	Tue Feb 21 17:34:27 2017	(r314047)
+++ head/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c	Tue Feb 21 17:47:08 2017	(r314048)
@@ -196,10 +196,17 @@ mount_snapshot(kthread_t *td, vnode_t **
 	td->td_ucred = cr;
 
 	if (error != 0) {
+		/*
+		 * Clear VI_MOUNT and decrement the use count "atomically",
+		 * under the vnode lock.  This is not strictly required,
+		 * but makes it easier to reason about the life-cycle and
+		 * ownership of the covered vnode.
+		 */
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		VI_LOCK(vp);
 		vp->v_iflag &= ~VI_MOUNT;
 		VI_UNLOCK(vp);
-		vrele(vp);
+		vput(vp);
 		vfs_unbusy(mp);
 		vfs_freeopts(mp->mnt_optnew);
 		vfs_mount_destroy(mp);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h	Tue Feb 21 17:34:27 2017	(r314047)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h	Tue Feb 21 17:47:08 2017	(r314048)
@@ -44,7 +44,7 @@ extern "C" {
 
 void zfsctl_create(zfsvfs_t *);
 void zfsctl_destroy(zfsvfs_t *);
-vnode_t *zfsctl_root(znode_t *);
+int zfsctl_root(zfsvfs_t *, int, vnode_t **);
 void zfsctl_init(void);
 void zfsctl_fini(void);
 boolean_t zfsctl_is_node(vnode_t *);
@@ -53,10 +53,6 @@ int zfsctl_rename_snapshot(const char *f
 int zfsctl_destroy_snapshot(const char *snapname, int force);
 int zfsctl_umount_snapshots(vfs_t *, int, cred_t *);
 
-int zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
-    int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
-    int *direntflags, pathname_t *realpnp);
-
 int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp);
 
 #define	ZFSCTL_INO_ROOT		0x1

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h	Tue Feb 21 17:34:27 2017	(r314047)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h	Tue Feb 21 17:47:08 2017	(r314048)
@@ -68,7 +68,7 @@ struct zfsvfs {
 	krwlock_t	z_teardown_inactive_lock;
 	list_t		z_all_znodes;	/* all vnodes in the fs */
 	kmutex_t	z_znodes_lock;	/* lock for z_all_znodes */
-	vnode_t		*z_ctldir;	/* .zfs directory pointer */
+	struct zfsctl_root	*z_ctldir;	/* .zfs directory pointer */
 	boolean_t	z_show_ctldir;	/* expose .zfs in the root dir */
 	boolean_t	z_issnap;	/* true if this is a snapshot */
 	boolean_t	z_vscan;	/* virus scan on/off */

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c	Tue Feb 21 17:34:27 2017	(r314047)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c	Tue Feb 21 17:47:08 2017	(r314048)
@@ -70,136 +70,249 @@
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/namei.h>
-#include <sys/gfs.h>
 #include <sys/stat.h>
 #include <sys/dmu.h>
+#include <sys/dsl_dataset.h>
 #include <sys/dsl_destroy.h>
 #include <sys/dsl_deleg.h>
 #include <sys/mount.h>
-#include <sys/sunddi.h>
+#include <sys/zap.h>
 
 #include "zfs_namecheck.h"
 
-typedef struct zfsctl_node {
-	gfs_dir_t	zc_gfs_private;
-	uint64_t	zc_id;
-	timestruc_t	zc_cmtime;	/* ctime and mtime, always the same */
-} zfsctl_node_t;
-
-typedef struct zfsctl_snapdir {
-	zfsctl_node_t	sd_node;
-	kmutex_t	sd_lock;
-	avl_tree_t	sd_snaps;
-} zfsctl_snapdir_t;
+/*
+ * "Synthetic" filesystem implementation.
+ */
 
-typedef struct {
-	char		*se_name;
-	vnode_t		*se_root;
-	avl_node_t	se_node;
-} zfs_snapentry_t;
-
-static int
-snapentry_compare(const void *a, const void *b)
-{
-	const zfs_snapentry_t *sa = a;
-	const zfs_snapentry_t *sb = b;
-	int ret = strcmp(sa->se_name, sb->se_name);
-
-	if (ret < 0)
-		return (-1);
-	else if (ret > 0)
-		return (1);
-	else
-		return (0);
-}
-
-#ifdef illumos
-vnodeops_t *zfsctl_ops_root;
-vnodeops_t *zfsctl_ops_snapdir;
-vnodeops_t *zfsctl_ops_snapshot;
-vnodeops_t *zfsctl_ops_shares;
-vnodeops_t *zfsctl_ops_shares_dir;
-
-static const fs_operation_def_t zfsctl_tops_root[];
-static const fs_operation_def_t zfsctl_tops_snapdir[];
-static const fs_operation_def_t zfsctl_tops_snapshot[];
-static const fs_operation_def_t zfsctl_tops_shares[];
-#else
-static struct vop_vector zfsctl_ops_root;
-static struct vop_vector zfsctl_ops_snapdir;
-static struct vop_vector zfsctl_ops_snapshot;
-static struct vop_vector zfsctl_ops_shares;
-static struct vop_vector zfsctl_ops_shares_dir;
-#endif
+/*
+ * Assert that A implies B.
+ */
+#define KASSERT_IMPLY(A, B, msg)	KASSERT(!(A) || (B), (msg));
 
-static vnode_t *zfsctl_mknode_snapdir(vnode_t *);
-static vnode_t *zfsctl_mknode_shares(vnode_t *);
-static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset);
-static int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *);
-
-#ifdef illumos
-static gfs_opsvec_t zfsctl_opsvec[] = {
-	{ ".zfs", zfsctl_tops_root, &zfsctl_ops_root },
-	{ ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir },
-	{ ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot },
-	{ ".zfs/shares", zfsctl_tops_shares, &zfsctl_ops_shares_dir },
-	{ ".zfs/shares/vnode", zfsctl_tops_shares, &zfsctl_ops_shares },
-	{ NULL }
-};
-#endif
+static MALLOC_DEFINE(M_SFSNODES, "sfs_nodes", "synthetic-fs nodes");
+
+typedef struct sfs_node {
+	char		sn_name[ZFS_MAX_DATASET_NAME_LEN];
+	uint64_t	sn_parent_id;
+	uint64_t	sn_id;
+} sfs_node_t;
 
 /*
- * Root directory elements.  We only have two entries
- * snapshot and shares.
+ * Check the parent's ID as well as the node's to account for a chance
+ * that IDs originating from different domains (snapshot IDs, artifical
+ * IDs, znode IDs) may clash.
  */
-static gfs_dirent_t zfsctl_root_entries[] = {
-	{ "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE },
-	{ "shares", zfsctl_mknode_shares, GFS_CACHE_VNODE },
-	{ NULL }
-};
+static int
+sfs_compare_ids(struct vnode *vp, void *arg)
+{
+	sfs_node_t *n1 = vp->v_data;
+	sfs_node_t *n2 = arg;
+	bool equal;
+
+	equal = n1->sn_id == n2->sn_id &&
+	    n1->sn_parent_id == n2->sn_parent_id;
+
+	/* Zero means equality. */
+	return (!equal);
+}
+
+static int
+sfs_vnode_get(const struct mount *mp, int flags, uint64_t parent_id,
+   uint64_t id, struct vnode **vpp)
+{
+	sfs_node_t search;
+	int err;
+
+	search.sn_id = id;
+	search.sn_parent_id = parent_id;
+	err = vfs_hash_get(mp, (u_int)id, flags, curthread, vpp,
+	    sfs_compare_ids, &search);
+	return (err);
+}
+
+static int
+sfs_vnode_insert(struct vnode *vp, int flags, uint64_t parent_id,
+   uint64_t id, struct vnode **vpp)
+{
+	int err;
+
+	KASSERT(vp->v_data != NULL, ("sfs_vnode_insert with NULL v_data"));
+	err = vfs_hash_insert(vp, (u_int)id, flags, curthread, vpp,
+	    sfs_compare_ids, vp->v_data);
+	return (err);
+}
+
+static void
+sfs_vnode_remove(struct vnode *vp)
+{
+	vfs_hash_remove(vp);
+}
+
+typedef void sfs_vnode_setup_fn(vnode_t *vp, void *arg);
+
+static int
+sfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id,
+    const char *tag, struct vop_vector *vops,
+    sfs_vnode_setup_fn setup, void *arg,
+    struct vnode **vpp)
+{
+	struct vnode *vp;
+	int error;
+
+	error = sfs_vnode_get(mp, flags, parent_id, id, vpp);
+	if (error != 0 || *vpp != NULL) {
+		KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL,
+		    "sfs vnode with no data");
+		return (error);
+	}
+
+	/* Allocate a new vnode/inode. */
+	error = getnewvnode(tag, mp, vops, &vp);
+	if (error != 0) {
+		*vpp = NULL;
+		return (error);
+	}
+
+	/*
+	 * Exclusively lock the vnode vnode while it's being constructed.
+	 */
+	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
+	error = insmntque(vp, mp);
+	if (error != 0) {
+		*vpp = NULL;
+		return (error);
+	}
+
+	setup(vp, arg);
+
+	error = sfs_vnode_insert(vp, flags, parent_id, id, vpp);
+	if (error != 0 || *vpp != NULL) {
+		KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL,
+		    "sfs vnode with no data");
+		return (error);
+	}
+
+	*vpp = vp;
+	return (0);
+}
+
+static void
+sfs_print_node(sfs_node_t *node)
+{
+	printf("\tname = %s\n", node->sn_name);
+	printf("\tparent_id = %ju\n", (uintmax_t)node->sn_parent_id);
+	printf("\tid = %ju\n", (uintmax_t)node->sn_id);
+}
+
+static sfs_node_t *
+sfs_alloc_node(size_t size, const char *name, uint64_t parent_id, uint64_t id)
+{
+	struct sfs_node *node;
+
+	KASSERT(strlen(name) < sizeof(node->sn_name),
+	    ("sfs node name is too long"));
+	KASSERT(size >= sizeof(*node), ("sfs node size is too small"));
+	node = malloc(size, M_SFSNODES, M_WAITOK | M_ZERO);
+	strlcpy(node->sn_name, name, sizeof(node->sn_name));
+	node->sn_parent_id = parent_id;
+	node->sn_id = id;
+
+	return (node);
+}
+
+static void
+sfs_destroy_node(sfs_node_t *node)
+{
+	free(node, M_SFSNODES);
+}
+
+static void *
+sfs_reclaim_vnode(vnode_t *vp)
+{
+	sfs_node_t *node;
+	void *data;
+
+	sfs_vnode_remove(vp);
+	data = vp->v_data;
+	vp->v_data = NULL;
+	return (data);
+}
+
+static int
+sfs_readdir_common(uint64_t parent_id, uint64_t id, struct vop_readdir_args *ap,
+    uio_t *uio, off_t *offp)
+{
+	struct dirent entry;
+	int error;
+
+	/* Reset ncookies for subsequent use of vfs_read_dirent. */
+	if (ap->a_ncookies != NULL)
+		*ap->a_ncookies = 0;
+
+	if (uio->uio_resid < sizeof(entry))
+		return (SET_ERROR(EINVAL));
+
+	if (uio->uio_offset < 0)
+		return (SET_ERROR(EINVAL));
+	if (uio->uio_offset == 0) {
+		entry.d_fileno = id;
+		entry.d_type = DT_DIR;
+		entry.d_name[0] = '.';
+		entry.d_name[1] = '\0';
+		entry.d_namlen = 1;
+		entry.d_reclen = sizeof(entry);
+		error = vfs_read_dirent(ap, &entry, uio->uio_offset);
+		if (error != 0)
+			return (SET_ERROR(error));
+	}
+
+	if (uio->uio_offset < sizeof(entry))
+		return (SET_ERROR(EINVAL));
+	if (uio->uio_offset == sizeof(entry)) {
+		entry.d_fileno = parent_id;
+		entry.d_type = DT_DIR;
+		entry.d_name[0] = '.';
+		entry.d_name[1] = '.';
+		entry.d_name[2] = '\0';
+		entry.d_namlen = 2;
+		entry.d_reclen = sizeof(entry);
+		error = vfs_read_dirent(ap, &entry, uio->uio_offset);
+		if (error != 0)
+			return (SET_ERROR(error));
+	}
 
-/* include . and .. in the calculation */
-#define	NROOT_ENTRIES	((sizeof (zfsctl_root_entries) / \
-    sizeof (gfs_dirent_t)) + 1)
+	if (offp != NULL)
+		*offp = 2 * sizeof(entry);
+	return (0);
+}
 
 
 /*
- * Initialize the various GFS pieces we'll need to create and manipulate .zfs
- * directories.  This is called from the ZFS init routine, and initializes the
- * vnode ops vectors that we'll be using.
+ * .zfs inode namespace
+ *
+ * We need to generate unique inode numbers for all files and directories
+ * within the .zfs pseudo-filesystem.  We use the following scheme:
+ *
+ * 	ENTRY			ZFSCTL_INODE
+ * 	.zfs			1
+ * 	.zfs/snapshot		2
+ * 	.zfs/snapshot/<snap>	objectid(snap)
  */
+#define	ZFSCTL_INO_SNAP(id)	(id)
+
+static struct vop_vector zfsctl_ops_root;
+static struct vop_vector zfsctl_ops_snapdir;
+static struct vop_vector zfsctl_ops_snapshot;
+static struct vop_vector zfsctl_ops_shares_dir;
+
 void
 zfsctl_init(void)
 {
-#ifdef illumos
-	VERIFY(gfs_make_opsvec(zfsctl_opsvec) == 0);
-#endif
 }
 
 void
 zfsctl_fini(void)
 {
-#ifdef illumos
-	/*
-	 * Remove vfsctl vnode ops
-	 */
-	if (zfsctl_ops_root)
-		vn_freevnodeops(zfsctl_ops_root);
-	if (zfsctl_ops_snapdir)
-		vn_freevnodeops(zfsctl_ops_snapdir);
-	if (zfsctl_ops_snapshot)
-		vn_freevnodeops(zfsctl_ops_snapshot);
-	if (zfsctl_ops_shares)
-		vn_freevnodeops(zfsctl_ops_shares);
-	if (zfsctl_ops_shares_dir)
-		vn_freevnodeops(zfsctl_ops_shares_dir);
-
-	zfsctl_ops_root = NULL;
-	zfsctl_ops_snapdir = NULL;
-	zfsctl_ops_snapshot = NULL;
-	zfsctl_ops_shares = NULL;
-	zfsctl_ops_shares_dir = NULL;
-#endif	/* illumos */
 }
 
 boolean_t
@@ -208,106 +321,114 @@ zfsctl_is_node(vnode_t *vp)
 	return (vn_matchops(vp, zfsctl_ops_root) ||
 	    vn_matchops(vp, zfsctl_ops_snapdir) ||
 	    vn_matchops(vp, zfsctl_ops_snapshot) ||
-	    vn_matchops(vp, zfsctl_ops_shares) ||
 	    vn_matchops(vp, zfsctl_ops_shares_dir));
 
 }
 
-/*
- * Return the inode number associated with the 'snapshot' or
- * 'shares' directory.
- */
-/* ARGSUSED */
-static ino64_t
-zfsctl_root_inode_cb(vnode_t *vp, int index)
-{
-	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
-
-	ASSERT(index < 2);
-
-	if (index == 0)
-		return (ZFSCTL_INO_SNAPDIR);
+typedef struct zfsctl_root {
+	sfs_node_t	node;
+	sfs_node_t	*snapdir;
+	timestruc_t	cmtime;
+} zfsctl_root_t;
 
-	return (zfsvfs->z_shares_dir);
-}
 
 /*
- * Create the '.zfs' directory.  This directory is cached as part of the VFS
- * structure.  This results in a hold on the vfs_t.  The code in zfs_umount()
- * therefore checks against a vfs_count of 2 instead of 1.  This reference
- * is removed when the ctldir is destroyed in the unmount.
+ * Create the '.zfs' directory.
  */
 void
 zfsctl_create(zfsvfs_t *zfsvfs)
 {
-	vnode_t *vp, *rvp;
-	zfsctl_node_t *zcp;
+	zfsctl_root_t *dot_zfs;
+	sfs_node_t *snapdir;
+	vnode_t *rvp;
 	uint64_t crtime[2];
 
 	ASSERT(zfsvfs->z_ctldir == NULL);
 
-	vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs,
-	    &zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries,
-	    zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL);
-	zcp = vp->v_data;
-	zcp->zc_id = ZFSCTL_INO_ROOT;
+	snapdir = sfs_alloc_node(sizeof(*snapdir), "snapshot", ZFSCTL_INO_ROOT,
+	    ZFSCTL_INO_SNAPDIR);
+	dot_zfs = (zfsctl_root_t *)sfs_alloc_node(sizeof(*dot_zfs), ".zfs", 0,
+	    ZFSCTL_INO_ROOT);
+	dot_zfs->snapdir = snapdir;
 
 	VERIFY(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp) == 0);
 	VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
-	    &crtime, sizeof (crtime)));
-	ZFS_TIME_DECODE(&zcp->zc_cmtime, crtime);
-	VN_URELE(rvp);
-
-	/*
-	 * We're only faking the fact that we have a root of a filesystem for
-	 * the sake of the GFS interfaces.  Undo the flag manipulation it did
-	 * for us.
-	 */
-	vp->v_vflag &= ~VV_ROOT;
+	    &crtime, sizeof(crtime)));
+	ZFS_TIME_DECODE(&dot_zfs->cmtime, crtime);
+	vput(rvp);
 
-	zfsvfs->z_ctldir = vp;
-
-	VOP_UNLOCK(vp, 0);
+	zfsvfs->z_ctldir = dot_zfs;
 }
 
 /*
  * Destroy the '.zfs' directory.  Only called when the filesystem is unmounted.
- * There might still be more references if we were force unmounted, but only
- * new zfs_inactive() calls can occur and they don't reference .zfs
+ * The nodes must not have any associated vnodes by now as they should be
+ * vflush-ed.
  */
 void
 zfsctl_destroy(zfsvfs_t *zfsvfs)
 {
-	VN_RELE(zfsvfs->z_ctldir);
+	sfs_destroy_node(zfsvfs->z_ctldir->snapdir);
+	sfs_destroy_node((sfs_node_t *)zfsvfs->z_ctldir);
 	zfsvfs->z_ctldir = NULL;
 }
 
-/*
- * Given a root znode, retrieve the associated .zfs directory.
- * Add a hold to the vnode and return it.
- */
-vnode_t *
-zfsctl_root(znode_t *zp)
+static int
+zfsctl_fs_root_vnode(struct mount *mp, void *arg __unused, int flags,
+    struct vnode **vpp)
+{
+	return (VFS_ROOT(mp, flags, vpp));
+}
+
+static void
+zfsctl_common_vnode_setup(vnode_t *vp, void *arg)
 {
-	ASSERT(zfs_has_ctldir(zp));
-	VN_HOLD(zp->z_zfsvfs->z_ctldir);
-	return (zp->z_zfsvfs->z_ctldir);
+	ASSERT_VOP_ELOCKED(vp, __func__);
+
+	/* We support shared locking. */
+	VN_LOCK_ASHARE(vp);
+	vp->v_type = VDIR;
+	vp->v_data = arg;
 }
 
 static int
-zfsctl_common_print(ap)
-	struct vop_print_args /* {
-		struct vnode *a_vp;
-	} */ *ap;
+zfsctl_root_vnode(struct mount *mp, void *arg __unused, int flags,
+    struct vnode **vpp)
 {
-	vnode_t *vp = ap->a_vp;
-	gfs_file_t *fp = vp->v_data;
+	void *node;
+	int err;
 
-	printf("    parent = %p\n", fp->gfs_parent);
-	printf("    type = %d\n", fp->gfs_type);
-	printf("    index = %d\n", fp->gfs_index);
-	printf("    ino = %ju\n", (uintmax_t)fp->gfs_ino);
-	return (0);
+	node = ((zfsvfs_t*)mp->mnt_data)->z_ctldir;
+	err = sfs_vgetx(mp, flags, 0, ZFSCTL_INO_ROOT, "zfs", &zfsctl_ops_root,
+	    zfsctl_common_vnode_setup, node, vpp);
+	return (err);
+}
+
+static int
+zfsctl_snapdir_vnode(struct mount *mp, void *arg __unused, int flags,
+    struct vnode **vpp)
+{
+	void *node;
+	int err;
+
+	node = ((zfsvfs_t*)mp->mnt_data)->z_ctldir->snapdir;
+	err = sfs_vgetx(mp, flags, ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, "zfs",
+	   &zfsctl_ops_snapdir, zfsctl_common_vnode_setup, node, vpp);
+	return (err);
+}
+
+/*
+ * Given a root znode, retrieve the associated .zfs directory.
+ * Add a hold to the vnode and return it.
+ */
+int
+zfsctl_root(zfsvfs_t *zfsvfs, int flags, vnode_t **vpp)
+{
+	vnode_t *vp;
+	int error;
+
+	error = zfsctl_root_vnode(zfsvfs->z_vfs, NULL, flags, vpp);
+	return (error);
 }
 
 /*
@@ -350,18 +471,8 @@ zfsctl_common_access(ap)
 {
 	accmode_t accmode = ap->a_accmode;
 
-#ifdef TODO
-	if (flags & V_ACE_MASK) {
-		if (accmode & ACE_ALL_WRITE_PERMS)
-			return (SET_ERROR(EACCES));
-	} else {
-#endif
-		if (accmode & VWRITE)
-			return (SET_ERROR(EACCES));
-#ifdef TODO
-	}
-#endif
-
+	if (accmode & VWRITE)
+		return (SET_ERROR(EACCES));
 	return (0);
 }
 
@@ -372,6 +483,9 @@ static void
 zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
 {
 	timestruc_t	now;
+	sfs_node_t *node;
+
+	node = vp->v_data;
 
 	vap->va_uid = 0;
 	vap->va_gid = 0;
@@ -394,6 +508,11 @@ zfsctl_common_getattr(vnode_t *vp, vattr
 	vap->va_atime = now;
 	/* FreeBSD: Reset chflags(2) flags. */
 	vap->va_flags = 0;
+
+	vap->va_nodeid = node->sn_id;
+
+	/* At least '.' and '..'. */
+	vap->va_nlink = 2;
 }
 
 /*ARGSUSED*/
@@ -406,81 +525,46 @@ zfsctl_common_fid(ap)
 {
 	vnode_t		*vp = ap->a_vp;
 	fid_t		*fidp = (void *)ap->a_fid;
-	zfsvfs_t	*zfsvfs = vp->v_vfsp->vfs_data;
-	zfsctl_node_t	*zcp = vp->v_data;
-	uint64_t	object = zcp->zc_id;
+	sfs_node_t	*node = vp->v_data;
+	uint64_t	object = node->sn_id;
 	zfid_short_t	*zfid;
 	int		i;
 
-	ZFS_ENTER(zfsvfs);
-
-#ifdef illumos
-	if (fidp->fid_len < SHORT_FID_LEN) {
-		fidp->fid_len = SHORT_FID_LEN;
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENOSPC));
-	}
-#endif
-
 	zfid = (zfid_short_t *)fidp;
-
 	zfid->zf_len = SHORT_FID_LEN;
 
-	for (i = 0; i < sizeof (zfid->zf_object); i++)
+	for (i = 0; i < sizeof(zfid->zf_object); i++)
 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
 
-	/* .zfs znodes always have a generation number of 0 */
-	for (i = 0; i < sizeof (zfid->zf_gen); i++)
+	/* .zfs nodes always have a generation number of 0 */
+	for (i = 0; i < sizeof(zfid->zf_gen); i++)
 		zfid->zf_gen[i] = 0;
 
-	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
-
-/*ARGSUSED*/
 static int
-zfsctl_shares_fid(ap)
-	struct vop_fid_args /* {
+zfsctl_common_reclaim(ap)
+	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
-		struct fid *a_fid;
+		struct thread *a_td;
 	} */ *ap;
 {
-	vnode_t		*vp = ap->a_vp;
-	fid_t		*fidp = (void *)ap->a_fid;
-	zfsvfs_t	*zfsvfs = vp->v_vfsp->vfs_data;
-	znode_t		*dzp;
-	int		error;
-
-	ZFS_ENTER(zfsvfs);
-
-	if (zfsvfs->z_shares_dir == 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENOTSUP));
-	}
-
-	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
-		error = VOP_FID(ZTOV(dzp), fidp);
-		VN_RELE(ZTOV(dzp));
-	}
+	vnode_t *vp = ap->a_vp;
 
-	ZFS_EXIT(zfsvfs);
-	return (error);
+	(void) sfs_reclaim_vnode(vp);
+	return (0);
 }
 
-/*
- * .zfs inode namespace
- *
- * We need to generate unique inode numbers for all files and directories
- * within the .zfs pseudo-filesystem.  We use the following scheme:
- *
- * 	ENTRY			ZFSCTL_INODE
- * 	.zfs			1
- * 	.zfs/snapshot		2
- * 	.zfs/snapshot/<snap>	objectid(snap)
- */
-
-#define	ZFSCTL_INO_SNAP(id)	(id)
+static int
+zfsctl_common_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	sfs_print_node(ap->a_vp->v_data);
+	return (0);
+}
 
 /*
  * Get root directory attributes.
@@ -496,156 +580,132 @@ zfsctl_root_getattr(ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
-	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
-	zfsctl_node_t *zcp = vp->v_data;
-
-	ZFS_ENTER(zfsvfs);
-	vap->va_nodeid = ZFSCTL_INO_ROOT;
-	vap->va_nlink = vap->va_size = NROOT_ENTRIES;
-	vap->va_mtime = vap->va_ctime = zcp->zc_cmtime;
-	vap->va_birthtime = vap->va_ctime;
+	zfsctl_root_t *node = vp->v_data;
 
 	zfsctl_common_getattr(vp, vap);
-	ZFS_EXIT(zfsvfs);
-
+	vap->va_ctime = node->cmtime;
+	vap->va_mtime = vap->va_ctime;
+	vap->va_birthtime = vap->va_ctime;
+	vap->va_nlink += 1; /* snapdir */
+	vap->va_size = vap->va_nlink;
 	return (0);
 }
 
 /*
- * Special case the handling of "..".
+ * When we lookup "." we still can be asked to lock it
+ * differently, can't we?
  */
-/* ARGSUSED */
 int
-zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
-    int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
-    int *direntflags, pathname_t *realpnp)
+zfsctl_relock_dot(vnode_t *dvp, int ltype)
 {
-	zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
-	int err;
-
-	/*
-	 * No extended attributes allowed under .zfs
-	 */
-	if (flags & LOOKUP_XATTR)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-
-	if (strcmp(nm, "..") == 0) {
-#ifdef illumos
-		err = VFS_ROOT(dvp->v_vfsp, LK_EXCLUSIVE, vpp);
-#else
-		/*
-		 * NB: can not use VFS_ROOT here as it would acquire
-		 * the vnode lock of the parent (root) vnode while
-		 * holding the child's (.zfs) lock.
-		 */
-		znode_t *rootzp;
-
-		err = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
-		if (err == 0)
-			*vpp = ZTOV(rootzp);
-#endif
-	} else {
-		err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir,
-		    cr, ct, direntflags, realpnp);
+	vref(dvp);
+	if (ltype != VOP_ISLOCKED(dvp)) {
+		if (ltype == LK_EXCLUSIVE)
+			vn_lock(dvp, LK_UPGRADE | LK_RETRY);
+		else /* if (ltype == LK_SHARED) */
+			vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
+
+		/* Relock for the "." case may left us with reclaimed vnode. */
+		if ((dvp->v_iflag & VI_DOOMED) != 0) {
+			vrele(dvp);
+			return (SET_ERROR(ENOENT));
+		}
 	}
-
-	ZFS_EXIT(zfsvfs);
-
-	return (err);
+	return (0);
 }
 
-static int
-zfsctl_freebsd_root_lookup(ap)
+/*
+ * Special case the handling of "..".
+ */
+int
+zfsctl_root_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
+	struct componentname *cnp = ap->a_cnp;
 	vnode_t *dvp = ap->a_dvp;
 	vnode_t **vpp = ap->a_vpp;
 	cred_t *cr = ap->a_cnp->cn_cred;
 	int flags = ap->a_cnp->cn_flags;
 	int lkflags = ap->a_cnp->cn_lkflags;
 	int nameiop = ap->a_cnp->cn_nameiop;
-	char nm[NAME_MAX + 1];
 	int err;
+	int ltype;
 
-	if ((flags & ISLASTCN) && (nameiop == RENAME || nameiop == CREATE))
-		return (EOPNOTSUPP);
+	ASSERT(dvp->v_type == VDIR);
 
-	ASSERT(ap->a_cnp->cn_namelen < sizeof(nm));
-	strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
-relookup:
-	err = zfsctl_root_lookup(dvp, nm, vpp, NULL, 0, NULL, cr, NULL, NULL, NULL);
-	if (err == 0 && (nm[0] != '.' || nm[1] != '\0')) {
-		if (flags & ISDOTDOT) {
-			VOP_UNLOCK(dvp, 0);
-			err = vn_lock(*vpp, lkflags);
-			if (err != 0) {
-				vrele(*vpp);
-				*vpp = NULL;
-			}
-			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
-		} else {
-			err = vn_lock(*vpp, LK_EXCLUSIVE);
-			if (err != 0) {
-				VERIFY3S(err, ==, ENOENT);
-				goto relookup;
-			}
-		}
-	}
-	return (err);
-}
+	if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP)
+		return (SET_ERROR(ENOTSUP));
 
-static int
-zfsctl_root_print(ap)
-	struct vop_print_args /* {
+	if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') {
+		err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK);
+		if (err == 0)
+			*vpp = dvp;
+	} else if ((flags & ISDOTDOT) != 0) {
+		err = vn_vget_ino_gen(dvp, zfsctl_fs_root_vnode, NULL,
+		    lkflags, vpp);
+	} else if (strncmp(cnp->cn_nameptr, "snapshot", cnp->cn_namelen) == 0) {
+		err = zfsctl_snapdir_vnode(dvp->v_mount, NULL, lkflags, vpp);
+	} else {
+		err = SET_ERROR(ENOENT);
+	}
+	if (err != 0)
+		*vpp = NULL;
+	return (err);
+}
+
+static int
+zfsctl_root_readdir(ap)
+	struct vop_readdir_args /* {
 		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+		int *a_eofflag;
+		int *ncookies;
+		u_long **a_cookies;
 	} */ *ap;
 {
-	printf("    .zfs node\n");
-	zfsctl_common_print(ap);
+	struct dirent entry;
+	vnode_t *vp = ap->a_vp;
+	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
+	zfsctl_root_t *node = vp->v_data;
+	uio_t *uio = ap->a_uio;
+	int *eofp = ap->a_eofflag;
+	off_t dots_offset;
+	int error;
+
+	ASSERT(vp->v_type == VDIR);
+
+	error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, uio,
+	    &dots_offset);
+	if (error != 0) {
+		if (error == ENAMETOOLONG) /* ran out of destination space */
+			error = 0;
+		return (error);
+	}
+	if (uio->uio_offset != dots_offset)
+		return (SET_ERROR(EINVAL));
+
+	CTASSERT(sizeof(node->snapdir->sn_name) <= sizeof(entry.d_name));
+	entry.d_fileno = node->snapdir->sn_id;
+	entry.d_type = DT_DIR;
+	strcpy(entry.d_name, node->snapdir->sn_name);
+	entry.d_namlen = strlen(entry.d_name);
+	entry.d_reclen = sizeof(entry);
+	error = vfs_read_dirent(ap, &entry, uio->uio_offset);
+	if (error != 0) {
+		if (error == ENAMETOOLONG)
+			error = 0;
+		return (SET_ERROR(error));
+	}
+	if (eofp != NULL)
+		*eofp = 1;
 	return (0);
 }
 
-#ifdef illumos
-static int
-zfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
-    caller_context_t *ct)
-{
-	/*
-	 * We only care about ACL_ENABLED so that libsec can
-	 * display ACL correctly and not default to POSIX draft.
-	 */
-	if (cmd == _PC_ACL_ENABLED) {
-		*valp = _ACL_ACE_ENABLED;
-		return (0);
-	}
-
-	return (fs_pathconf(vp, cmd, valp, cr, ct));
-}
-#endif	/* illumos */
-
-#ifdef illumos
-static const fs_operation_def_t zfsctl_tops_root[] = {
-	{ VOPNAME_OPEN,		{ .vop_open = zfsctl_common_open }	},
-	{ VOPNAME_CLOSE,	{ .vop_close = zfsctl_common_close }	},
-	{ VOPNAME_IOCTL,	{ .error = fs_inval }			},
-	{ VOPNAME_GETATTR,	{ .vop_getattr = zfsctl_root_getattr }	},
-	{ VOPNAME_ACCESS,	{ .vop_access = zfsctl_common_access }	},
-	{ VOPNAME_READDIR,	{ .vop_readdir = gfs_vop_readdir } 	},
-	{ VOPNAME_LOOKUP,	{ .vop_lookup = zfsctl_root_lookup }	},
-	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
-	{ VOPNAME_INACTIVE,	{ .vop_inactive = gfs_vop_inactive }	},
-	{ VOPNAME_PATHCONF,	{ .vop_pathconf = zfsctl_pathconf }	},
-	{ VOPNAME_FID,		{ .vop_fid = zfsctl_common_fid	}	},
-	{ NULL }
-};
-#endif	/* illumos */
-
 static struct vop_vector zfsctl_ops_root = {
 	.vop_default =	&default_vnodeops,
 	.vop_open =	zfsctl_common_open,
@@ -653,29 +713,19 @@ static struct vop_vector zfsctl_ops_root
 	.vop_ioctl =	VOP_EINVAL,
 	.vop_getattr =	zfsctl_root_getattr,
 	.vop_access =	zfsctl_common_access,
-	.vop_readdir =	gfs_vop_readdir,
-	.vop_lookup =	zfsctl_freebsd_root_lookup,
+	.vop_readdir =	zfsctl_root_readdir,
+	.vop_lookup =	zfsctl_root_lookup,
 	.vop_inactive =	VOP_NULL,
-	.vop_reclaim =	gfs_vop_reclaim,
-#ifdef TODO
-	.vop_pathconf =	zfsctl_pathconf,
-#endif
+	.vop_reclaim =	zfsctl_common_reclaim,
 	.vop_fid =	zfsctl_common_fid,
-	.vop_print =	zfsctl_root_print,
+	.vop_print =	zfsctl_common_print,

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201702211747.v1LHl9t1094779>