From owner-svn-src-all@FreeBSD.ORG Mon Feb 17 16:30:23 2014 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 2E3895B6; Mon, 17 Feb 2014 16:30:23 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.freebsd.org (Postfix) with ESMTPS id 1549614C5; Mon, 17 Feb 2014 16:30:23 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.8/8.14.8) with ESMTP id s1HGUN3f035981; Mon, 17 Feb 2014 16:30:23 GMT (envelope-from avg@svn.freebsd.org) Received: (from avg@localhost) by svn.freebsd.org (8.14.8/8.14.8/Submit) id s1HGUINE035956; Mon, 17 Feb 2014 16:30:18 GMT (envelope-from avg@svn.freebsd.org) Message-Id: <201402171630.s1HGUINE035956@svn.freebsd.org> From: Andriy Gapon Date: Mon, 17 Feb 2014 16:30:18 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org Subject: svn commit: r262081 - in stable/9/sys/cddl/contrib/opensolaris/uts/common: fs/zfs fs/zfs/sys sys/fs X-SVN-Group: stable-9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.17 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 17 Feb 2014 16:30:23 -0000 Author: avg Date: Mon Feb 17 16:30:18 2014 New Revision: 262081 URL: http://svnweb.freebsd.org/changeset/base/262081 Log: MFC r254591,255753: Enhance the ZFS vdev layer to maintain both a logical and a physical minimum allocation size for devices Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c stable/9/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h Directory Properties: stable/9/sys/ (props changed) stable/9/sys/cddl/contrib/opensolaris/ (props changed) Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Mon Feb 17 16:30:18 2014 (r262081) @@ -5197,7 +5197,7 @@ l2arc_compress_buf(l2arc_buf_hdr_t *l2hd len = l2hdr->b_asize; cdata = zio_data_buf_alloc(len); csize = zio_compress_data(ZIO_COMPRESS_LZ4, l2hdr->b_tmp_cdata, - cdata, l2hdr->b_asize); + cdata, l2hdr->b_asize, (size_t)SPA_MINBLOCKSIZE); if (csize == 0) { /* zero block, indicate that there's nothing to write */ @@ -5437,6 +5437,8 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd) ASSERT(!l2arc_vdev_present(vd)); + vdev_ashift_optimize(vd); + /* * Create a new l2arc device entry. */ Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c Mon Feb 17 16:30:18 2014 (r262081) @@ -226,6 +226,27 @@ metaslab_class_space_update(metaslab_cla atomic_add_64(&mc->mc_dspace, dspace_delta); } +void +metaslab_class_minblocksize_update(metaslab_class_t *mc) +{ + metaslab_group_t *mg; + vdev_t *vd; + uint64_t minashift = UINT64_MAX; + + if ((mg = mc->mc_rotor) == NULL) { + mc->mc_minblocksize = SPA_MINBLOCKSIZE; + return; + } + + do { + vd = mg->mg_vd; + if (vd->vdev_ashift < minashift) + minashift = vd->vdev_ashift; + } while ((mg = mg->mg_next) != mc->mc_rotor); + + mc->mc_minblocksize = 1ULL << minashift; +} + uint64_t metaslab_class_get_alloc(metaslab_class_t *mc) { @@ -250,6 +271,12 @@ metaslab_class_get_dspace(metaslab_class return (spa_deflate(mc->mc_spa) ? mc->mc_dspace : mc->mc_space); } +uint64_t +metaslab_class_get_minblocksize(metaslab_class_t *mc) +{ + return (mc->mc_minblocksize); +} + /* * ========================================================================== * Metaslab groups @@ -389,6 +416,7 @@ metaslab_group_activate(metaslab_group_t mgnext->mg_prev = mg; } mc->mc_rotor = mg; + metaslab_class_minblocksize_update(mc); } void @@ -420,6 +448,7 @@ metaslab_group_passivate(metaslab_group_ mg->mg_prev = NULL; mg->mg_next = NULL; + metaslab_class_minblocksize_update(mc); } static void Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c Mon Feb 17 16:30:18 2014 (r262081) @@ -3505,6 +3505,7 @@ spa_create(const char *pool, nvlist_t *n (error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) == 0) { for (int c = 0; c < rvd->vdev_children; c++) { + vdev_ashift_optimize(rvd->vdev_child[c]); vdev_metaslab_set_size(rvd->vdev_child[c]); vdev_expand(rvd->vdev_child[c], txg); } Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c Mon Feb 17 16:30:18 2014 (r262081) @@ -514,8 +514,10 @@ spa_config_update(spa_t *spa, int what) */ for (c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; - if (tvd->vdev_ms_array == 0) + if (tvd->vdev_ms_array == 0) { + vdev_ashift_optimize(tvd); vdev_metaslab_set_size(tvd); + } vdev_expand(tvd, txg); } } Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab.h Mon Feb 17 16:30:18 2014 (r262081) @@ -70,6 +70,7 @@ extern uint64_t metaslab_class_get_alloc extern uint64_t metaslab_class_get_space(metaslab_class_t *mc); extern uint64_t metaslab_class_get_dspace(metaslab_class_t *mc); extern uint64_t metaslab_class_get_deferred(metaslab_class_t *mc); +extern uint64_t metaslab_class_get_minblocksize(metaslab_class_t *mc); extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc, vdev_t *vd); Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/metaslab_impl.h Mon Feb 17 16:30:18 2014 (r262081) @@ -50,6 +50,7 @@ struct metaslab_class { uint64_t mc_deferred; /* total deferred frees */ uint64_t mc_space; /* total space (alloc + free) */ uint64_t mc_dspace; /* total deflated space */ + uint64_t mc_minblocksize; }; struct metaslab_group { Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h Mon Feb 17 16:30:18 2014 (r262081) @@ -93,6 +93,17 @@ struct dsl_dataset; #define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1) /* + * Maximum supported logical ashift. + * + * The current 8k allocation block size limit is due to the 8k + * aligned/sized operations performed by vdev_probe() on + * vdev_label->vl_pad2. Using another "safe region" for these tests + * would allow the limit to be raised to 16k, at the expense of + * only having 8 available uberblocks in the label area. + */ +#define SPA_MAXASHIFT 13 + +/* * Size of block to hold the configuration data (a packed nvlist) */ #define SPA_CONFIG_BLOCKSIZE (1ULL << 14) Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h Mon Feb 17 16:30:18 2014 (r262081) @@ -78,6 +78,7 @@ extern void vdev_rele(vdev_t *); extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg); extern void vdev_metaslab_fini(vdev_t *vd); extern void vdev_metaslab_set_size(vdev_t *); +extern void vdev_ashift_optimize(vdev_t *); extern void vdev_expand(vdev_t *vd, uint64_t txg); extern void vdev_split(vdev_t *vd); extern void vdev_deadman(vdev_t *vd); Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h Mon Feb 17 16:30:18 2014 (r262081) @@ -57,7 +57,7 @@ typedef struct vdev_cache_entry vdev_cac * Virtual device operations */ typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size, - uint64_t *ashift); + uint64_t *logical_ashift, uint64_t *physical_ashift); typedef void vdev_close_func_t(vdev_t *vd); typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize); typedef int vdev_io_start_func_t(zio_t *zio); @@ -133,6 +133,24 @@ struct vdev { uint64_t vdev_min_asize; /* min acceptable asize */ uint64_t vdev_max_asize; /* max acceptable asize */ uint64_t vdev_ashift; /* block alignment shift */ + /* + * Logical block alignment shift + * + * The smallest sized/aligned I/O supported by the device. + */ + uint64_t vdev_logical_ashift; + /* + * Physical block alignment shift + * + * The device supports logical I/Os with vdev_logical_ashift + * size/alignment, but optimum performance will be achieved by + * aligning/sizing requests to vdev_physical_ashift. Smaller + * requests may be inflated or incur device level read-modify-write + * operations. + * + * May be 0 to indicate no preference (i.e. use vdev_logical_ashift). + */ + uint64_t vdev_physical_ashift; uint64_t vdev_state; /* see VDEV_STATE_* #defines */ uint64_t vdev_prevstate; /* used when reopening a vdev */ vdev_ops_t *vdev_ops; /* vdev operations */ Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h Mon Feb 17 16:30:18 2014 (r262081) @@ -79,7 +79,7 @@ extern int lz4_decompress(void *src, voi * Compress and decompress data if necessary. */ extern size_t zio_compress_data(enum zio_compress c, void *src, void *dst, - size_t s_len); + size_t s_len, size_t minblocksize); extern int zio_decompress_data(enum zio_compress c, void *src, void *dst, size_t s_len, size_t d_len); Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c Mon Feb 17 16:30:18 2014 (r262081) @@ -52,6 +52,51 @@ SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CT * Virtual device management. */ +/** + * The limit for ZFS to automatically increase a top-level vdev's ashift + * from logical ashift to physical ashift. + * + * Example: one or more 512B emulation child vdevs + * child->vdev_ashift = 9 (512 bytes) + * child->vdev_physical_ashift = 12 (4096 bytes) + * zfs_max_auto_ashift = 11 (2048 bytes) + * + * On pool creation or the addition of a new top-leve vdev, ZFS will + * bump the ashift of the top-level vdev to 2048. + * + * Example: one or more 512B emulation child vdevs + * child->vdev_ashift = 9 (512 bytes) + * child->vdev_physical_ashift = 12 (4096 bytes) + * zfs_max_auto_ashift = 13 (8192 bytes) + * + * On pool creation or the addition of a new top-leve vdev, ZFS will + * bump the ashift of the top-level vdev to 4096. + */ +static uint64_t zfs_max_auto_ashift = SPA_MAXASHIFT; + +static int +sysctl_vfs_zfs_max_auto_ashift(SYSCTL_HANDLER_ARGS) +{ + uint64_t val; + int err; + + val = zfs_max_auto_ashift; + err = sysctl_handle_64(oidp, &val, 0, req); + if (err != 0 || req->newptr == NULL) + return (err); + + if (val > SPA_MAXASHIFT) + val = SPA_MAXASHIFT; + + zfs_max_auto_ashift = val; + + return (0); +} +SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift, + CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(uint64_t), + sysctl_vfs_zfs_max_auto_ashift, "QU", + "Cap on logical -> physical ashift adjustment on new top-level vdevs."); + static vdev_ops_t *vdev_ops_table[] = { &vdev_root_ops, &vdev_raidz_ops, @@ -746,6 +791,8 @@ vdev_add_parent(vdev_t *cvd, vdev_ops_t mvd->vdev_min_asize = cvd->vdev_min_asize; mvd->vdev_max_asize = cvd->vdev_max_asize; mvd->vdev_ashift = cvd->vdev_ashift; + mvd->vdev_logical_ashift = cvd->vdev_logical_ashift; + mvd->vdev_physical_ashift = cvd->vdev_physical_ashift; mvd->vdev_state = cvd->vdev_state; mvd->vdev_crtxg = cvd->vdev_crtxg; @@ -777,6 +824,8 @@ vdev_remove_parent(vdev_t *cvd) mvd->vdev_ops == &vdev_replacing_ops || mvd->vdev_ops == &vdev_spare_ops); cvd->vdev_ashift = mvd->vdev_ashift; + cvd->vdev_logical_ashift = mvd->vdev_logical_ashift; + cvd->vdev_physical_ashift = mvd->vdev_physical_ashift; vdev_remove_child(mvd, cvd); vdev_remove_child(pvd, mvd); @@ -1120,7 +1169,8 @@ vdev_open(vdev_t *vd) uint64_t osize = 0; uint64_t max_osize = 0; uint64_t asize, max_asize, psize; - uint64_t ashift = 0; + uint64_t logical_ashift = 0; + uint64_t physical_ashift = 0; ASSERT(vd->vdev_open_thread == curthread || spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); @@ -1150,7 +1200,8 @@ vdev_open(vdev_t *vd) return (SET_ERROR(ENXIO)); } - error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &ashift); + error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, + &logical_ashift, &physical_ashift); /* * Reset the vdev_reopening flag so that we actually close @@ -1248,6 +1299,17 @@ vdev_open(vdev_t *vd) return (SET_ERROR(EINVAL)); } + vd->vdev_physical_ashift = + MAX(physical_ashift, vd->vdev_physical_ashift); + vd->vdev_logical_ashift = MAX(logical_ashift, vd->vdev_logical_ashift); + vd->vdev_ashift = MAX(vd->vdev_logical_ashift, vd->vdev_ashift); + + if (vd->vdev_logical_ashift > SPA_MAXASHIFT) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_ASHIFT_TOO_BIG); + return (EINVAL); + } + if (vd->vdev_asize == 0) { /* * This is the first-ever open, so use the computed values. @@ -1255,19 +1317,15 @@ vdev_open(vdev_t *vd) */ vd->vdev_asize = asize; vd->vdev_max_asize = max_asize; - vd->vdev_ashift = MAX(ashift, vd->vdev_ashift); } else { /* - * Detect if the alignment requirement has increased. - * We don't want to make the pool unavailable, just - * issue a warning instead. + * Make sure the alignment requirement hasn't increased. */ - if (ashift > vd->vdev_top->vdev_ashift && + if (vd->vdev_ashift > vd->vdev_top->vdev_ashift && vd->vdev_ops->vdev_op_leaf) { - cmn_err(CE_WARN, - "Disk, '%s', has a block alignment that is " - "larger than the pool's alignment\n", - vd->vdev_path); + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_BAD_LABEL); + return (EINVAL); } vd->vdev_max_asize = max_asize; } @@ -1577,6 +1635,23 @@ vdev_metaslab_set_size(vdev_t *vd) vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT); } +/* + * Maximize performance by inflating the configured ashift for + * top level vdevs to be as close to the physical ashift as + * possible without exceeding the administrator specified + * limit. + */ +void +vdev_ashift_optimize(vdev_t *vd) +{ + if (vd == vd->vdev_top && + (vd->vdev_ashift < vd->vdev_physical_ashift) && + (vd->vdev_ashift < zfs_max_auto_ashift)) { + vd->vdev_ashift = MIN(zfs_max_auto_ashift, + vd->vdev_physical_ashift); + } +} + void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg) { @@ -2511,6 +2586,10 @@ vdev_get_stats(vdev_t *vd, vdev_stat_t * if (vd->vdev_ops->vdev_op_leaf) vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE; vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize; + vs->vs_configured_ashift = vd->vdev_top != NULL + ? vd->vdev_top->vdev_ashift : vd->vdev_ashift; + vs->vs_logical_ashift = vd->vdev_logical_ashift; + vs->vs_physical_ashift = vd->vdev_physical_ashift; mutex_exit(&vd->vdev_stat_lock); /* Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c Mon Feb 17 16:30:18 2014 (r262081) @@ -49,7 +49,7 @@ vdev_file_rele(vdev_t *vd) static int vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, - uint64_t *ashift) + uint64_t *logical_ashift, uint64_t *physical_ashift) { vdev_file_t *vf; vnode_t *vp; @@ -132,7 +132,8 @@ skip_open: } *max_psize = *psize = vattr.va_size; - *ashift = SPA_MINBLOCKSHIFT; + *logical_ashift = SPA_MINBLOCKSHIFT; + *physical_ashift = SPA_MINBLOCKSHIFT; return (0); } Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c Mon Feb 17 16:30:18 2014 (r262081) @@ -576,7 +576,7 @@ vdev_geom_open_by_path(vdev_t *vd, int c static int vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, - uint64_t *ashift) + uint64_t *logical_ashift, uint64_t *physical_ashift) { struct g_provider *pp; struct g_consumer *cp; @@ -662,9 +662,13 @@ vdev_geom_open(vdev_t *vd, uint64_t *psi *max_psize = *psize = pp->mediasize; /* - * Determine the device's minimum transfer size. + * Determine the device's minimum transfer size and preferred + * transfer size. */ - *ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; + *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; + *physical_ashift = 0; + if (pp->stripesize) + *physical_ashift = highbit(pp->stripesize) - 1; /* * Clear the nowritecache settings, so that on a vdev_reopen() Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c Mon Feb 17 16:30:18 2014 (r262081) @@ -132,7 +132,7 @@ vdev_mirror_map_alloc(zio_t *zio) static int vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, - uint64_t *ashift) + uint64_t *logical_ashift, uint64_t *physical_ashift) { int numerrors = 0; int lasterror = 0; @@ -155,7 +155,9 @@ vdev_mirror_open(vdev_t *vd, uint64_t *a *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1; *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1; - *ashift = MAX(*ashift, cvd->vdev_ashift); + *logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift); + *physical_ashift = MAX(*physical_ashift, + cvd->vdev_physical_ashift); } if (numerrors == vd->vdev_children) { Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c Mon Feb 17 16:30:18 2014 (r262081) @@ -45,7 +45,7 @@ /* ARGSUSED */ static int vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, - uint64_t *ashift) + uint64_t *logical_ashift, uint64_t *physical_ashift) { /* * Really this should just fail. But then the root vdev will be in the @@ -55,7 +55,8 @@ vdev_missing_open(vdev_t *vd, uint64_t * */ *psize = 0; *max_psize = 0; - *ashift = 0; + *logical_ashift = 0; + *physical_ashift = 0; return (0); } Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c Mon Feb 17 16:30:18 2014 (r262081) @@ -1478,7 +1478,7 @@ vdev_raidz_reconstruct(raidz_map_t *rm, static int vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, - uint64_t *ashift) + uint64_t *logical_ashift, uint64_t *physical_ashift) { vdev_t *cvd; uint64_t nparity = vd->vdev_nparity; @@ -1507,7 +1507,9 @@ vdev_raidz_open(vdev_t *vd, uint64_t *as *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1; *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1; - *ashift = MAX(*ashift, cvd->vdev_ashift); + *logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift); + *physical_ashift = MAX(*physical_ashift, + cvd->vdev_physical_ashift); } *asize *= vd->vdev_children; Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c Mon Feb 17 16:30:18 2014 (r262081) @@ -55,7 +55,7 @@ too_many_errors(vdev_t *vd, int numerror static int vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize, - uint64_t *ashift) + uint64_t *logical_ashift, uint64_t *physical_ashift) { int lasterror = 0; int numerrors = 0; @@ -83,7 +83,8 @@ vdev_root_open(vdev_t *vd, uint64_t *asi *asize = 0; *max_asize = 0; - *ashift = 0; + *logical_ashift = 0; + *physical_ashift = 0; return (0); } Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c Mon Feb 17 16:30:18 2014 (r262081) @@ -1092,8 +1092,10 @@ zio_write_bp_init(zio_t *zio) } if (compress != ZIO_COMPRESS_OFF) { + metaslab_class_t *mc = spa_normal_class(spa); void *cbuf = zio_buf_alloc(lsize); - psize = zio_compress_data(compress, zio->io_data, cbuf, lsize); + psize = zio_compress_data(compress, zio->io_data, cbuf, lsize, + (size_t)metaslab_class_get_minblocksize(mc)); if (psize == 0 || psize == lsize) { compress = ZIO_COMPRESS_OFF; zio_buf_free(cbuf, lsize); Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c Mon Feb 17 16:30:18 2014 (r262081) @@ -77,7 +77,8 @@ zio_compress_select(enum zio_compress ch } size_t -zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len) +zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len, + size_t minblocksize) { uint64_t *word, *word_end; size_t c_len, d_len, r_len; @@ -102,7 +103,7 @@ zio_compress_data(enum zio_compress c, v return (s_len); /* Compress at least 12.5% */ - d_len = P2ALIGN(s_len - (s_len >> 3), (size_t)SPA_MINBLOCKSIZE); + d_len = P2ALIGN(s_len - (s_len >> 3), minblocksize); if (d_len == 0) return (s_len); @@ -115,14 +116,14 @@ zio_compress_data(enum zio_compress c, v * Cool. We compressed at least as much as we were hoping to. * For both security and repeatability, pad out the last sector. */ - r_len = P2ROUNDUP(c_len, (size_t)SPA_MINBLOCKSIZE); + r_len = P2ROUNDUP(c_len, minblocksize); if (r_len > c_len) { bzero((char *)dst + c_len, r_len - c_len); c_len = r_len; } ASSERT3U(c_len, <=, d_len); - ASSERT(P2PHASE(c_len, (size_t)SPA_MINBLOCKSIZE) == 0); + ASSERT(P2PHASE(c_len, minblocksize) == 0); return (c_len); } Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h ============================================================================== --- stable/9/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h Mon Feb 17 16:30:01 2014 (r262080) +++ stable/9/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h Mon Feb 17 16:30:18 2014 (r262081) @@ -620,7 +620,8 @@ typedef enum vdev_aux { VDEV_AUX_IO_FAILURE, /* experienced I/O failure */ VDEV_AUX_BAD_LOG, /* cannot read log chain(s) */ VDEV_AUX_EXTERNAL, /* external diagnosis */ - VDEV_AUX_SPLIT_POOL /* vdev was split off into another pool */ + VDEV_AUX_SPLIT_POOL, /* vdev was split off into another pool */ + VDEV_AUX_ASHIFT_TOO_BIG /* vdev's min block size is too large */ } vdev_aux_t; /* @@ -714,7 +715,13 @@ typedef struct vdev_stat { uint64_t vs_self_healed; /* self-healed bytes */ uint64_t vs_scan_removing; /* removing? */ uint64_t vs_scan_processed; /* scan processed bytes */ + uint64_t vs_configured_ashift; /* TLV vdev_ashift */ + uint64_t vs_logical_ashift; /* vdev_logical_ashift */ + uint64_t vs_physical_ashift; /* vdev_physical_ashift */ } vdev_stat_t; +#define VDEV_STAT_VALID(field, uint64_t_field_count) \ + ((uint64_t_field_count * sizeof(uint64_t)) >= \ + (offsetof(vdev_stat_t, field) + sizeof(((vdev_stat_t *)NULL)->field))) /* * DDT statistics. Note: all fields should be 64-bit because this