Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 17 Feb 2014 16:30:01 +0000 (UTC)
From:      Andriy Gapon <avg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
Subject:   svn commit: r262080 - in stable/8/sys/cddl/contrib/opensolaris/uts/common: fs/zfs fs/zfs/sys sys/fs
Message-ID:  <201402171630.s1HGU1nN035561@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: avg
Date: Mon Feb 17 16:30:01 2014
New Revision: 262080
URL: http://svnweb.freebsd.org/changeset/base/262080

Log:
  MFC r254591,255753: Enhance the ZFS vdev layer to maintain both a
  logical and a physical minimum allocation size for devices
  
  Note: on this branch the commit is adjusted for absence of U64
  type support in sysctl infrastructure.

Modified:
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
Directory Properties:
  stable/8/sys/   (props changed)
  stable/8/sys/cddl/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Mon Feb 17 16:30:01 2014	(r262080)
@@ -5167,7 +5167,7 @@ l2arc_compress_buf(l2arc_buf_hdr_t *l2hd
 	len = l2hdr->b_asize;
 	cdata = zio_data_buf_alloc(len);
 	csize = zio_compress_data(ZIO_COMPRESS_LZ4, l2hdr->b_tmp_cdata,
-	    cdata, l2hdr->b_asize);
+	    cdata, l2hdr->b_asize, (size_t)SPA_MINBLOCKSIZE);
 
 	if (csize == 0) {
 		/* zero block, indicate that there's nothing to write */
@@ -5407,6 +5407,8 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
 
 	ASSERT(!l2arc_vdev_present(vd));
 
+	vdev_ashift_optimize(vd);
+
 	/*
 	 * Create a new l2arc device entry.
 	 */

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c	Mon Feb 17 16:30:01 2014	(r262080)
@@ -226,6 +226,27 @@ metaslab_class_space_update(metaslab_cla
 	atomic_add_64(&mc->mc_dspace, dspace_delta);
 }
 
+void
+metaslab_class_minblocksize_update(metaslab_class_t *mc)
+{
+	metaslab_group_t *mg;
+	vdev_t *vd;
+	uint64_t minashift = UINT64_MAX;
+
+	if ((mg = mc->mc_rotor) == NULL) {
+		mc->mc_minblocksize = SPA_MINBLOCKSIZE;
+		return;
+	}
+
+	do {
+		vd = mg->mg_vd;
+		if (vd->vdev_ashift < minashift)
+			minashift = vd->vdev_ashift;
+	} while ((mg = mg->mg_next) != mc->mc_rotor);
+
+	mc->mc_minblocksize = 1ULL << minashift;
+}
+
 uint64_t
 metaslab_class_get_alloc(metaslab_class_t *mc)
 {
@@ -250,6 +271,12 @@ metaslab_class_get_dspace(metaslab_class
 	return (spa_deflate(mc->mc_spa) ? mc->mc_dspace : mc->mc_space);
 }
 
+uint64_t
+metaslab_class_get_minblocksize(metaslab_class_t *mc)
+{
+	return (mc->mc_minblocksize);
+}
+
 /*
  * ==========================================================================
  * Metaslab groups
@@ -389,6 +416,7 @@ metaslab_group_activate(metaslab_group_t
 		mgnext->mg_prev = mg;
 	}
 	mc->mc_rotor = mg;
+	metaslab_class_minblocksize_update(mc);
 }
 
 void
@@ -420,6 +448,7 @@ metaslab_group_passivate(metaslab_group_
 
 	mg->mg_prev = NULL;
 	mg->mg_next = NULL;
+	metaslab_class_minblocksize_update(mc);
 }
 
 static void

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Mon Feb 17 16:30:01 2014	(r262080)
@@ -3500,6 +3500,7 @@ spa_create(const char *pool, nvlist_t *n
 	    (error = spa_validate_aux(spa, nvroot, txg,
 	    VDEV_ALLOC_ADD)) == 0) {
 		for (int c = 0; c < rvd->vdev_children; c++) {
+			vdev_ashift_optimize(rvd->vdev_child[c]);
 			vdev_metaslab_set_size(rvd->vdev_child[c]);
 			vdev_expand(rvd->vdev_child[c], txg);
 		}

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c	Mon Feb 17 16:30:01 2014	(r262080)
@@ -506,8 +506,10 @@ spa_config_update(spa_t *spa, int what)
 		 */
 		for (c = 0; c < rvd->vdev_children; c++) {
 			vdev_t *tvd = rvd->vdev_child[c];
-			if (tvd->vdev_ms_array == 0)
+			if (tvd->vdev_ms_array == 0) {
+				vdev_ashift_optimize(tvd);
 				vdev_metaslab_set_size(tvd);
+			}
 			vdev_expand(tvd, txg);
 		}
 	}

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h	Mon Feb 17 16:30:01 2014	(r262080)
@@ -70,6 +70,7 @@ extern uint64_t metaslab_class_get_alloc
 extern uint64_t metaslab_class_get_space(metaslab_class_t *mc);
 extern uint64_t metaslab_class_get_dspace(metaslab_class_t *mc);
 extern uint64_t metaslab_class_get_deferred(metaslab_class_t *mc);
+extern uint64_t metaslab_class_get_minblocksize(metaslab_class_t *mc);
 
 extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc,
     vdev_t *vd);

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h	Mon Feb 17 16:30:01 2014	(r262080)
@@ -50,6 +50,7 @@ struct metaslab_class {
 	uint64_t		mc_deferred;	/* total deferred frees */
 	uint64_t		mc_space;	/* total space (alloc + free) */
 	uint64_t		mc_dspace;	/* total deflated space */
+	uint64_t		mc_minblocksize;
 };
 
 struct metaslab_group {

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h	Mon Feb 17 16:30:01 2014	(r262080)
@@ -93,6 +93,17 @@ struct dsl_dataset;
 #define	SPA_BLOCKSIZES		(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)
 
 /*
+ * Maximum supported logical ashift.
+ *
+ * The current 8k allocation block size limit is due to the 8k
+ * aligned/sized operations performed by vdev_probe() on
+ * vdev_label->vl_pad2.  Using another "safe region" for these tests
+ * would allow the limit to be raised to 16k, at the expense of
+ * only having 8 available uberblocks in the label area.
+ */
+#define	SPA_MAXASHIFT		13
+
+/*
  * Size of block to hold the configuration data (a packed nvlist)
  */
 #define	SPA_CONFIG_BLOCKSIZE	(1ULL << 14)

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h	Mon Feb 17 16:30:01 2014	(r262080)
@@ -78,6 +78,7 @@ extern void vdev_rele(vdev_t *);
 extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
 extern void vdev_metaslab_fini(vdev_t *vd);
 extern void vdev_metaslab_set_size(vdev_t *);
+extern void vdev_ashift_optimize(vdev_t *);
 extern void vdev_expand(vdev_t *vd, uint64_t txg);
 extern void vdev_split(vdev_t *vd);
 extern void vdev_deadman(vdev_t *vd);

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h	Mon Feb 17 16:30:01 2014	(r262080)
@@ -57,7 +57,7 @@ typedef struct vdev_cache_entry vdev_cac
  * Virtual device operations
  */
 typedef int	vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size,
-    uint64_t *ashift);
+    uint64_t *logical_ashift, uint64_t *physical_ashift);
 typedef void	vdev_close_func_t(vdev_t *vd);
 typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
 typedef int	vdev_io_start_func_t(zio_t *zio);
@@ -133,6 +133,24 @@ struct vdev {
 	uint64_t	vdev_min_asize;	/* min acceptable asize		*/
 	uint64_t	vdev_max_asize;	/* max acceptable asize		*/
 	uint64_t	vdev_ashift;	/* block alignment shift	*/
+	/*
+	 * Logical block alignment shift
+	 *
+	 * The smallest sized/aligned I/O supported by the device.
+	 */
+	uint64_t        vdev_logical_ashift;
+	/*
+	 * Physical block alignment shift
+	 *
+	 * The device supports logical I/Os with vdev_logical_ashift
+	 * size/alignment, but optimum performance will be achieved by
+	 * aligning/sizing requests to vdev_physical_ashift.  Smaller
+	 * requests may be inflated or incur device level read-modify-write
+	 * operations.
+	 *
+	 * May be 0 to indicate no preference (i.e. use vdev_logical_ashift).
+         */
+	uint64_t        vdev_physical_ashift;
 	uint64_t	vdev_state;	/* see VDEV_STATE_* #defines	*/
 	uint64_t	vdev_prevstate;	/* used when reopening a vdev	*/
 	vdev_ops_t	*vdev_ops;	/* vdev operations		*/

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h	Mon Feb 17 16:30:01 2014	(r262080)
@@ -79,7 +79,7 @@ extern int lz4_decompress(void *src, voi
  * Compress and decompress data if necessary.
  */
 extern size_t zio_compress_data(enum zio_compress c, void *src, void *dst,
-    size_t s_len);
+    size_t s_len, size_t minblocksize);
 extern int zio_decompress_data(enum zio_compress c, void *src, void *dst,
     size_t s_len, size_t d_len);
 

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c	Mon Feb 17 16:30:01 2014	(r262080)
@@ -52,6 +52,51 @@ SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CT
  * Virtual device management.
  */
 
+/**
+ * The limit for ZFS to automatically increase a top-level vdev's ashift
+ * from logical ashift to physical ashift.
+ *
+ * Example: one or more 512B emulation child vdevs
+ *          child->vdev_ashift = 9 (512 bytes)
+ *          child->vdev_physical_ashift = 12 (4096 bytes)
+ *          zfs_max_auto_ashift = 11 (2048 bytes)
+ *
+ * On pool creation or the addition of a new top-leve vdev, ZFS will
+ * bump the ashift of the top-level vdev to 2048.
+ *
+ * Example: one or more 512B emulation child vdevs
+ *          child->vdev_ashift = 9 (512 bytes)
+ *          child->vdev_physical_ashift = 12 (4096 bytes)
+ *          zfs_max_auto_ashift = 13 (8192 bytes)
+ *
+ * On pool creation or the addition of a new top-leve vdev, ZFS will
+ * bump the ashift of the top-level vdev to 4096.
+ */
+static uint64_t zfs_max_auto_ashift = SPA_MAXASHIFT;
+
+static int
+sysctl_vfs_zfs_max_auto_ashift(SYSCTL_HANDLER_ARGS)
+{
+	uint64_t val;
+	int err;
+
+	val = zfs_max_auto_ashift;
+	err = sysctl_handle_quad(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+
+	if (val > SPA_MAXASHIFT)
+		val = SPA_MAXASHIFT;
+
+	zfs_max_auto_ashift = val;
+
+	return (0);
+}
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
+    CTLTYPE_QUAD | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(uint64_t),
+    sysctl_vfs_zfs_max_auto_ashift, "Q",
+    "Cap on logical -> physical ashift adjustment on new top-level vdevs.");
+
 static vdev_ops_t *vdev_ops_table[] = {
 	&vdev_root_ops,
 	&vdev_raidz_ops,
@@ -746,6 +791,8 @@ vdev_add_parent(vdev_t *cvd, vdev_ops_t 
 	mvd->vdev_min_asize = cvd->vdev_min_asize;
 	mvd->vdev_max_asize = cvd->vdev_max_asize;
 	mvd->vdev_ashift = cvd->vdev_ashift;
+	mvd->vdev_logical_ashift = cvd->vdev_logical_ashift;
+	mvd->vdev_physical_ashift = cvd->vdev_physical_ashift;
 	mvd->vdev_state = cvd->vdev_state;
 	mvd->vdev_crtxg = cvd->vdev_crtxg;
 
@@ -777,6 +824,8 @@ vdev_remove_parent(vdev_t *cvd)
 	    mvd->vdev_ops == &vdev_replacing_ops ||
 	    mvd->vdev_ops == &vdev_spare_ops);
 	cvd->vdev_ashift = mvd->vdev_ashift;
+	cvd->vdev_logical_ashift = mvd->vdev_logical_ashift;
+	cvd->vdev_physical_ashift = mvd->vdev_physical_ashift;
 
 	vdev_remove_child(mvd, cvd);
 	vdev_remove_child(pvd, mvd);
@@ -1120,7 +1169,8 @@ vdev_open(vdev_t *vd)
 	uint64_t osize = 0;
 	uint64_t max_osize = 0;
 	uint64_t asize, max_asize, psize;
-	uint64_t ashift = 0;
+	uint64_t logical_ashift = 0;
+	uint64_t physical_ashift = 0;
 
 	ASSERT(vd->vdev_open_thread == curthread ||
 	    spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
@@ -1150,7 +1200,8 @@ vdev_open(vdev_t *vd)
 		return (SET_ERROR(ENXIO));
 	}
 
-	error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &ashift);
+	error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize,
+	    &logical_ashift, &physical_ashift);
 
 	/*
 	 * Reset the vdev_reopening flag so that we actually close
@@ -1248,6 +1299,17 @@ vdev_open(vdev_t *vd)
 		return (SET_ERROR(EINVAL));
 	}
 
+	vd->vdev_physical_ashift =
+	    MAX(physical_ashift, vd->vdev_physical_ashift);
+	vd->vdev_logical_ashift = MAX(logical_ashift, vd->vdev_logical_ashift);
+	vd->vdev_ashift = MAX(vd->vdev_logical_ashift, vd->vdev_ashift);
+
+	if (vd->vdev_logical_ashift > SPA_MAXASHIFT) {
+		vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+		    VDEV_AUX_ASHIFT_TOO_BIG);
+		return (EINVAL);
+	}
+
 	if (vd->vdev_asize == 0) {
 		/*
 		 * This is the first-ever open, so use the computed values.
@@ -1255,19 +1317,15 @@ vdev_open(vdev_t *vd)
 		 */
 		vd->vdev_asize = asize;
 		vd->vdev_max_asize = max_asize;
-		vd->vdev_ashift = MAX(ashift, vd->vdev_ashift);
 	} else {
 		/*
-		 * Detect if the alignment requirement has increased.
-		 * We don't want to make the pool unavailable, just
-		 * issue a warning instead.
+		 * Make sure the alignment requirement hasn't increased.
 		 */
-		if (ashift > vd->vdev_top->vdev_ashift &&
+		if (vd->vdev_ashift > vd->vdev_top->vdev_ashift &&
 		    vd->vdev_ops->vdev_op_leaf) {
-			cmn_err(CE_WARN,
-			    "Disk, '%s', has a block alignment that is "
-			    "larger than the pool's alignment\n",
-			    vd->vdev_path);
+			vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+			    VDEV_AUX_BAD_LABEL);
+			return (EINVAL);
 		}
 		vd->vdev_max_asize = max_asize;
 	}
@@ -1577,6 +1635,23 @@ vdev_metaslab_set_size(vdev_t *vd)
 	vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT);
 }
 
+/*
+ * Maximize performance by inflating the configured ashift for
+ * top level vdevs to be as close to the physical ashift as
+ * possible without exceeding the administrator specified
+ * limit.
+ */
+void
+vdev_ashift_optimize(vdev_t *vd)
+{
+	if (vd == vd->vdev_top &&
+	    (vd->vdev_ashift < vd->vdev_physical_ashift) &&
+	    (vd->vdev_ashift < zfs_max_auto_ashift)) {
+		vd->vdev_ashift = MIN(zfs_max_auto_ashift,
+		    vd->vdev_physical_ashift);
+	}
+}
+
 void
 vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg)
 {
@@ -2503,6 +2578,10 @@ vdev_get_stats(vdev_t *vd, vdev_stat_t *
 	if (vd->vdev_ops->vdev_op_leaf)
 		vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
 	vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
+	vs->vs_configured_ashift = vd->vdev_top != NULL
+	    ? vd->vdev_top->vdev_ashift : vd->vdev_ashift;
+	vs->vs_logical_ashift = vd->vdev_logical_ashift;
+	vs->vs_physical_ashift = vd->vdev_physical_ashift;
 	mutex_exit(&vd->vdev_stat_lock);
 
 	/*

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c	Mon Feb 17 16:30:01 2014	(r262080)
@@ -49,7 +49,7 @@ vdev_file_rele(vdev_t *vd)
 
 static int
 vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
-    uint64_t *ashift)
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
 	vdev_file_t *vf;
 	vnode_t *vp;
@@ -132,7 +132,8 @@ skip_open:
 	}
 
 	*max_psize = *psize = vattr.va_size;
-	*ashift = SPA_MINBLOCKSHIFT;
+	*logical_ashift = SPA_MINBLOCKSHIFT;
+	*physical_ashift = SPA_MINBLOCKSHIFT;
 
 	return (0);
 }

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	Mon Feb 17 16:30:01 2014	(r262080)
@@ -576,7 +576,7 @@ vdev_geom_open_by_path(vdev_t *vd, int c
 
 static int
 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
-    uint64_t *ashift)
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp;
@@ -662,9 +662,13 @@ vdev_geom_open(vdev_t *vd, uint64_t *psi
 	*max_psize = *psize = pp->mediasize;
 
 	/*
-	 * Determine the device's minimum transfer size.
+	 * Determine the device's minimum transfer size and preferred
+	 * transfer size.
 	 */
-	*ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
+	*logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
+	*physical_ashift = 0;
+	if (pp->stripesize)
+		*physical_ashift = highbit(pp->stripesize) - 1;
 
 	/*
 	 * Clear the nowritecache settings, so that on a vdev_reopen()

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c	Mon Feb 17 16:30:01 2014	(r262080)
@@ -132,7 +132,7 @@ vdev_mirror_map_alloc(zio_t *zio)
 
 static int
 vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
-    uint64_t *ashift)
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
 	int numerrors = 0;
 	int lasterror = 0;
@@ -155,7 +155,9 @@ vdev_mirror_open(vdev_t *vd, uint64_t *a
 
 		*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
 		*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
-		*ashift = MAX(*ashift, cvd->vdev_ashift);
+		*logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift);
+		*physical_ashift = MAX(*physical_ashift,
+		    cvd->vdev_physical_ashift);
 	}
 
 	if (numerrors == vd->vdev_children) {

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c	Mon Feb 17 16:30:01 2014	(r262080)
@@ -45,7 +45,7 @@
 /* ARGSUSED */
 static int
 vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
-    uint64_t *ashift)
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
 	/*
 	 * Really this should just fail.  But then the root vdev will be in the
@@ -55,7 +55,8 @@ vdev_missing_open(vdev_t *vd, uint64_t *
 	 */
 	*psize = 0;
 	*max_psize = 0;
-	*ashift = 0;
+	*logical_ashift = 0;
+	*physical_ashift = 0;
 	return (0);
 }
 

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c	Mon Feb 17 16:30:01 2014	(r262080)
@@ -1478,7 +1478,7 @@ vdev_raidz_reconstruct(raidz_map_t *rm, 
 
 static int
 vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
-    uint64_t *ashift)
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
 	vdev_t *cvd;
 	uint64_t nparity = vd->vdev_nparity;
@@ -1507,7 +1507,9 @@ vdev_raidz_open(vdev_t *vd, uint64_t *as
 
 		*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
 		*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
-		*ashift = MAX(*ashift, cvd->vdev_ashift);
+		*logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift);
+		*physical_ashift = MAX(*physical_ashift,
+		    cvd->vdev_physical_ashift);
 	}
 
 	*asize *= vd->vdev_children;

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c	Mon Feb 17 16:30:01 2014	(r262080)
@@ -55,7 +55,7 @@ too_many_errors(vdev_t *vd, int numerror
 
 static int
 vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
-    uint64_t *ashift)
+    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
 	int lasterror = 0;
 	int numerrors = 0;
@@ -83,7 +83,8 @@ vdev_root_open(vdev_t *vd, uint64_t *asi
 
 	*asize = 0;
 	*max_asize = 0;
-	*ashift = 0;
+	*logical_ashift = 0;
+	*physical_ashift = 0;
 
 	return (0);
 }

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	Mon Feb 17 16:30:01 2014	(r262080)
@@ -1091,8 +1091,10 @@ zio_write_bp_init(zio_t *zio)
 	}
 
 	if (compress != ZIO_COMPRESS_OFF) {
+		metaslab_class_t *mc = spa_normal_class(spa);
 		void *cbuf = zio_buf_alloc(lsize);
-		psize = zio_compress_data(compress, zio->io_data, cbuf, lsize);
+		psize = zio_compress_data(compress, zio->io_data, cbuf, lsize,
+		    (size_t)metaslab_class_get_minblocksize(mc));
 		if (psize == 0 || psize == lsize) {
 			compress = ZIO_COMPRESS_OFF;
 			zio_buf_free(cbuf, lsize);

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c	Mon Feb 17 16:30:01 2014	(r262080)
@@ -77,7 +77,8 @@ zio_compress_select(enum zio_compress ch
 }
 
 size_t
-zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len)
+zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len,
+    size_t minblocksize)
 {
 	uint64_t *word, *word_end;
 	size_t c_len, d_len, r_len;
@@ -102,7 +103,7 @@ zio_compress_data(enum zio_compress c, v
 		return (s_len);
 
 	/* Compress at least 12.5% */
-	d_len = P2ALIGN(s_len - (s_len >> 3), (size_t)SPA_MINBLOCKSIZE);
+	d_len = P2ALIGN(s_len - (s_len >> 3), minblocksize);
 	if (d_len == 0)
 		return (s_len);
 
@@ -115,14 +116,14 @@ zio_compress_data(enum zio_compress c, v
 	 * Cool.  We compressed at least as much as we were hoping to.
 	 * For both security and repeatability, pad out the last sector.
 	 */
-	r_len = P2ROUNDUP(c_len, (size_t)SPA_MINBLOCKSIZE);
+	r_len = P2ROUNDUP(c_len, minblocksize);
 	if (r_len > c_len) {
 		bzero((char *)dst + c_len, r_len - c_len);
 		c_len = r_len;
 	}
 
 	ASSERT3U(c_len, <=, d_len);
-	ASSERT(P2PHASE(c_len, (size_t)SPA_MINBLOCKSIZE) == 0);
+	ASSERT(P2PHASE(c_len, minblocksize) == 0);
 
 	return (c_len);
 }

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h	Mon Feb 17 16:18:13 2014	(r262079)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h	Mon Feb 17 16:30:01 2014	(r262080)
@@ -620,7 +620,8 @@ typedef enum vdev_aux {
 	VDEV_AUX_IO_FAILURE,	/* experienced I/O failure		*/
 	VDEV_AUX_BAD_LOG,	/* cannot read log chain(s)		*/
 	VDEV_AUX_EXTERNAL,	/* external diagnosis			*/
-	VDEV_AUX_SPLIT_POOL	/* vdev was split off into another pool	*/
+	VDEV_AUX_SPLIT_POOL,	/* vdev was split off into another pool	*/
+	VDEV_AUX_ASHIFT_TOO_BIG /* vdev's min block size is too large   */
 } vdev_aux_t;
 
 /*
@@ -714,7 +715,13 @@ typedef struct vdev_stat {
 	uint64_t	vs_self_healed;		/* self-healed bytes	*/
 	uint64_t	vs_scan_removing;	/* removing?	*/
 	uint64_t	vs_scan_processed;	/* scan processed bytes	*/
+ 	uint64_t	vs_configured_ashift;	/* TLV vdev_ashift      */
+ 	uint64_t	vs_logical_ashift;	/* vdev_logical_ashift  */
+ 	uint64_t	vs_physical_ashift;	/* vdev_physical_ashift */
 } vdev_stat_t;
+#define VDEV_STAT_VALID(field, uint64_t_field_count) \
+    ((uint64_t_field_count * sizeof(uint64_t)) >= \
+     (offsetof(vdev_stat_t, field) + sizeof(((vdev_stat_t *)NULL)->field)))
 
 /*
  * DDT statistics.  Note: all fields should be 64-bit because this



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201402171630.s1HGU1nN035561>