Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 19 Jan 2016 17:00:25 +0000 (UTC)
From:      Alan Somers <asomers@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r294329 - in head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys
Message-ID:  <201601191700.u0JH0P6k061610@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: asomers
Date: Tue Jan 19 17:00:25 2016
New Revision: 294329
URL: https://svnweb.freebsd.org/changeset/base/294329

Log:
  Disallow zvol-backed ZFS pools
  
  Using zvols as backing devices for ZFS pools is fraught with panics and
  deadlocks. For example, attempting to online a missing device in the
  presence of a zvol can cause a panic when vdev_geom tastes the zvol.  Better
  to completely disable vdev_geom from ever opening a zvol. The solution
  relies on setting a thread-local variable during vdev_geom_open, and
  returning EOPNOTSUPP during zvol_open if that thread-local variable is set.
  
  Remove the check for MUTEX_HELD(&zfsdev_state_lock) in zvol_open. Its intent
  was to prevent a recursive mutex acquisition panic. However, the new check
  for the thread-local variable also fixes that problem.
  
  Also, fix a panic in vdev_geom_taste_orphan. For an unknown reason, this
  function was set to panic. But it can occur that a device disappears during
  tasting, and it causes no problems to ignore this departure.
  
  Reviewed by:	delphij
  MFC after:	1 week
  Relnotes:	yes
  Sponsored by:	Spectra Logic Corp
  Differential Revision:	https://reviews.freebsd.org/D4986

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h	Tue Jan 19 16:18:26 2016	(r294328)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h	Tue Jan 19 17:00:25 2016	(r294329)
@@ -367,6 +367,7 @@ extern void vdev_set_min_asize(vdev_t *v
  */
 /* zdb uses this tunable, so it must be declared here to make lint happy. */
 extern int zfs_vdev_cache_size;
+extern uint_t zfs_geom_probe_vdev_key;
 
 #ifdef illumos
 /*

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	Tue Jan 19 16:18:26 2016	(r294328)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	Tue Jan 19 17:00:25 2016	(r294329)
@@ -61,6 +61,13 @@ static int vdev_geom_bio_delete_disable;
 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RWTUN,
     &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
 
+/*
+ * Thread local storage used to indicate when a thread is probing geoms
+ * for their guids.  If NULL, this thread is not tasting geoms.  If non NULL,
+ * it is looking for a replacement for the vdev_t* that is its value.
+ */
+uint_t zfs_geom_probe_vdev_key;
+
 static void
 vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp)
 { 
@@ -326,9 +333,8 @@ vdev_geom_io(struct g_consumer *cp, int 
 static void
 vdev_geom_taste_orphan(struct g_consumer *cp)
 {
-
-	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
-	    cp->provider->name));
+	ZFS_LOG(0, "WARNING: Orphan %s while tasting its VDev GUID.",
+	    cp->provider->name);
 }
 
 static int
@@ -575,7 +581,6 @@ vdev_geom_attach_by_guids(vdev_t *vd)
 	g_topology_assert();
 
 	zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
-	/* This orphan function should be never called. */
 	zgp->orphan = vdev_geom_taste_orphan;
 	zcp = g_new_consumer(zgp);
 
@@ -702,6 +707,9 @@ vdev_geom_open(vdev_t *vd, uint64_t *psi
 	size_t bufsize;
 	int error;
 
+	/* Set the TLS to indicate downstack that we should not access zvols*/
+	VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0);
+
 	/*
 	 * We must have a pathname, and it must be absolute.
 	 */
@@ -751,6 +759,9 @@ vdev_geom_open(vdev_t *vd, uint64_t *psi
 		}
 	}
 
+	/* Clear the TLS now that tasting is done */
+	VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0);
+
 	if (cp == NULL) {
 		ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
 		error = ENOENT;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c	Tue Jan 19 16:18:26 2016	(r294328)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c	Tue Jan 19 17:00:25 2016	(r294329)
@@ -206,6 +206,7 @@ extern void zfs_fini(void);
 uint_t zfs_fsyncer_key;
 extern uint_t rrw_tsd_key;
 static uint_t zfs_allow_log_key;
+extern uint_t zfs_geom_probe_vdev_key;
 
 typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
 typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
@@ -6602,6 +6603,7 @@ zfs__init(void)
 	tsd_create(&zfs_fsyncer_key, NULL);
 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
 	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
+	tsd_create(&zfs_geom_probe_vdev_key, NULL);
 
 	printf("ZFS storage pool version: features support (" SPA_VERSION_STRING ")\n");
 	root_mount_rel(zfs_root_token);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c	Tue Jan 19 16:18:26 2016	(r294328)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c	Tue Jan 19 17:00:25 2016	(r294329)
@@ -1114,36 +1114,30 @@ zvol_open(struct g_provider *pp, int fla
 		return (err);
 	}
 #else	/* !illumos */
-	boolean_t locked = B_FALSE;
-
-	/*
-	 * Protect against recursively entering spa_namespace_lock
-	 * when spa_open() is used for a pool on a (local) ZVOL(s).
-	 * This is needed since we replaced upstream zfsdev_state_lock
-	 * with spa_namespace_lock in the ZVOL code.
-	 * We are using the same trick as spa_open().
-	 * Note that calls in zvol_first_open which need to resolve
-	 * pool name to a spa object will enter spa_open()
-	 * recursively, but that function already has all the
-	 * necessary protection.
-	 */
-	if (!MUTEX_HELD(&zfsdev_state_lock)) {
-		mutex_enter(&zfsdev_state_lock);
-		locked = B_TRUE;
+	if (tsd_get(zfs_geom_probe_vdev_key) != NULL) {
+		/*
+		 * if zfs_geom_probe_vdev_key is set, that means that zfs is
+		 * attempting to probe geom providers while looking for a
+		 * replacement for a missing VDEV.  In this case, the
+		 * spa_namespace_lock will not be held, but it is still illegal
+		 * to use a zvol as a vdev.  Deadlocks can result if another
+		 * thread has spa_namespace_lock
+		 */
+		return (EOPNOTSUPP);
 	}
 
+	mutex_enter(&zfsdev_state_lock);
+
 	zv = pp->private;
 	if (zv == NULL) {
-		if (locked)
-			mutex_exit(&zfsdev_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		return (SET_ERROR(ENXIO));
 	}
 
 	if (zv->zv_total_opens == 0) {
 		err = zvol_first_open(zv);
 		if (err) {
-			if (locked)
-				mutex_exit(&zfsdev_state_lock);
+			mutex_exit(&zfsdev_state_lock);
 			return (err);
 		}
 		pp->mediasize = zv->zv_volsize;
@@ -1177,8 +1171,7 @@ zvol_open(struct g_provider *pp, int fla
 	mutex_exit(&zfsdev_state_lock);
 #else
 	zv->zv_total_opens += count;
-	if (locked)
-		mutex_exit(&zfsdev_state_lock);
+	mutex_exit(&zfsdev_state_lock);
 #endif
 
 	return (err);
@@ -1188,8 +1181,7 @@ out:
 #ifdef illumos
 	mutex_exit(&zfsdev_state_lock);
 #else
-	if (locked)
-		mutex_exit(&zfsdev_state_lock);
+	mutex_exit(&zfsdev_state_lock);
 #endif
 	return (err);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201601191700.u0JH0P6k061610>