Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 24 Jun 2013 15:35:43 +0000 (UTC)
From:      Steven Hartland <smh@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
Subject:   svn commit: r252162 - in stable/8: . cddl/lib/libzpool sys/cddl/compat/opensolaris/kern sys/cddl/compat/opensolaris/sys sys/cddl/contrib/opensolaris/uts/common/fs/zfs sys/cddl/contrib/opensolaris/u...
Message-ID:  <201306241535.r5OFZho3039275@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: smh
Date: Mon Jun 24 15:35:42 2013
New Revision: 252162
URL: http://svnweb.freebsd.org/changeset/base/252162

Log:
  Added ZFS TRIM support which is enabled by default. To disable
  ZFS TRIM support set vfs.zfs.trim.enabled=0 in loader.conf.
  
  Creating new ZFS pools and adding new devices to existing pools
  first performs a full device level TRIM which can take a significant
  amount of time. The sysctl vfs.zfs.vdev.trim_on_init can be set to 0
  to disable this behaviour.
  
  ZFS TRIM requires the underlying device support BIO_DELETE which
  is currently provided by methods such as ATA TRIM and SCSI UNMAP
  via CAM, which are typically supported by SSD's.
  
  Stats for ZFS TRIM can be monitored by looking at the sysctl's
  under kstat.zfs.misc.zio_trim.
  
  MFC r240868: Add TRIM support
  MFC r244155: Renamed zfs trim stats
  MFC r244187: Upgrade TRIM free request sizes optimisation
  MFC r244188: Added vfs.zfs.vdev.trim_on_init sysctl
  MFC r248572: Add TRIM support for L2ARC
  MFC r248573: Don't register repair writes in the trim map.
  MFC r248574: Improve TXG handling in the TRIM module
  MFC r248575: TRIM cache devices based on time instead of TXGs
  MFC r248576: Names the ZFS TRIM thread
  MFC r248577: Optimisation of TRIM processing
  MFC r248602: Fix for building libzpool under i386
  MFC r249921: Enabled ZFS TRIM by default

Added:
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/trim_map.h
     - copied, changed from r240868, head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/trim_map.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c
     - copied, changed from r240868, head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c
Modified:
  stable/8/UPDATING
  stable/8/cddl/lib/libzpool/Makefile
  stable/8/sys/cddl/compat/opensolaris/kern/opensolaris_kstat.c
  stable/8/sys/cddl/compat/opensolaris/sys/dkio.h
  stable/8/sys/cddl/compat/opensolaris/sys/kstat.h
  stable/8/sys/cddl/compat/opensolaris/sys/time.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
  stable/8/sys/modules/zfs/Makefile
Directory Properties:
  stable/8/cddl/lib/   (props changed)
  stable/8/cddl/lib/libzpool/   (props changed)
  stable/8/sys/   (props changed)
  stable/8/sys/cddl/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/modules/   (props changed)

Modified: stable/8/UPDATING
==============================================================================
--- stable/8/UPDATING	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/UPDATING	Mon Jun 24 15:35:42 2013	(r252162)
@@ -15,6 +15,22 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.
 	debugging tools present in HEAD were left in place because
 	sun4v support still needs work to become production ready.
 
+20130624:
+	Added ZFS TRIM support which is enabled by default. To disable
+	ZFS TRIM support set vfs.zfs.trim.enabled=0 in loader.conf.
+
+	Creating new ZFS pools and adding new devices to existing pools
+	first performs a full device level TRIM, which can take a significant
+	amount of time. Set the sysctl vfs.zfs.vdev.trim_on_init to 0 to
+	disable this behaviour.
+
+	ZFS TRIM requires the underlying device support BIO_DELETE which
+	is currently provided by methods such as ATA TRIM and SCSI UNMAP
+	via CAM, which are typically supported by SSD's.
+
+	Stats for ZFS TRIM can be monitored by looking at the sysctl's
+	under kstat.zfs.misc.zio_trim.
+
 20130607:
 	8.4-RELEASE.
 

Modified: stable/8/cddl/lib/libzpool/Makefile
==============================================================================
--- stable/8/cddl/lib/libzpool/Makefile	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/cddl/lib/libzpool/Makefile	Mon Jun 24 15:35:42 2013	(r252162)
@@ -26,7 +26,7 @@ ATOMIC_SRCS=	opensolaris_atomic.c
 
 LIB=		zpool
 
-ZFS_COMMON_SRCS= ${ZFS_COMMON_OBJS:C/.o$/.c/} vdev_file.c
+ZFS_COMMON_SRCS= ${ZFS_COMMON_OBJS:C/.o$/.c/} vdev_file.c trim_map.c
 ZFS_SHARED_SRCS= ${ZFS_SHARED_OBJS:C/.o$/.c/}
 KERNEL_SRCS=	kernel.c taskq.c util.c
 LIST_SRCS=	list.c

Modified: stable/8/sys/cddl/compat/opensolaris/kern/opensolaris_kstat.c
==============================================================================
--- stable/8/sys/cddl/compat/opensolaris/kern/opensolaris_kstat.c	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/compat/opensolaris/kern/opensolaris_kstat.c	Mon Jun 24 15:35:42 2013	(r252162)
@@ -118,7 +118,7 @@ kstat_install(kstat_t *ksp)
 		SYSCTL_ADD_PROC(&ksp->ks_sysctl_ctx,
 		    SYSCTL_CHILDREN(ksp->ks_sysctl_root), OID_AUTO, ksent->name,
 		    CTLTYPE_QUAD | CTLFLAG_RD, ksent, sizeof(*ksent),
-		    kstat_sysctl, "QU", "");
+		    kstat_sysctl, "QU", ksent->desc);
 	}
 }
 

Modified: stable/8/sys/cddl/compat/opensolaris/sys/dkio.h
==============================================================================
--- stable/8/sys/cddl/compat/opensolaris/sys/dkio.h	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/compat/opensolaris/sys/dkio.h	Mon Jun 24 15:35:42 2013	(r252162)
@@ -75,6 +75,8 @@ extern "C" {
  */
 #define	DKIOCFLUSHWRITECACHE	(DKIOC|34)	/* flush cache to phys medium */
 
+#define	DKIOCTRIM		(DKIOC|35)	/* TRIM a block */
+
 struct dk_callback {
 	void (*dkc_callback)(void *dkc_cookie, int error);
 	void *dkc_cookie;

Modified: stable/8/sys/cddl/compat/opensolaris/sys/kstat.h
==============================================================================
--- stable/8/sys/cddl/compat/opensolaris/sys/kstat.h	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/compat/opensolaris/sys/kstat.h	Mon Jun 24 15:35:42 2013	(r252162)
@@ -53,6 +53,8 @@ typedef struct kstat_named {
 #define	KSTAT_DATA_INT64	3
 #define	KSTAT_DATA_UINT64	4
 	uchar_t	data_type;
+#define	KSTAT_DESCLEN		128
+	char	desc[KSTAT_DESCLEN];
 	union {
 		uint64_t	ui64;
 	} value;

Modified: stable/8/sys/cddl/compat/opensolaris/sys/time.h
==============================================================================
--- stable/8/sys/cddl/compat/opensolaris/sys/time.h	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/compat/opensolaris/sys/time.h	Mon Jun 24 15:35:42 2013	(r252162)
@@ -35,6 +35,7 @@
 #define MILLISEC	1000
 #define MICROSEC	1000000
 #define NANOSEC		1000000000
+#define TIME_MAX	LLONG_MAX
 
 typedef longlong_t	hrtime_t;
 

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Mon Jun 24 15:35:42 2013	(r252162)
@@ -130,6 +130,7 @@
 #endif
 #include <sys/callb.h>
 #include <sys/kstat.h>
+#include <sys/trim_map.h>
 #include <zfs_fletcher.h>
 #include <sys/sdt.h>
 
@@ -1691,6 +1692,8 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
 		}
 
 		if (l2hdr != NULL) {
+			trim_map_free(l2hdr->b_dev->l2ad_vdev, l2hdr->b_daddr,
+			    hdr->b_size, 0);
 			list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
 			ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
 			kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
@@ -3528,6 +3531,8 @@ arc_release(arc_buf_t *buf, void *tag)
 	buf->b_private = NULL;
 
 	if (l2hdr) {
+		trim_map_free(l2hdr->b_dev->l2ad_vdev, l2hdr->b_daddr,
+		    hdr->b_size, 0);
 		list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
 		kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
 		ARCSTAT_INCR(arcstat_l2_size, -buf_size);
@@ -4442,6 +4447,8 @@ l2arc_write_done(zio_t *zio)
 			list_remove(buflist, ab);
 			abl2 = ab->b_l2hdr;
 			ab->b_l2hdr = NULL;
+			trim_map_free(abl2->b_dev->l2ad_vdev, abl2->b_daddr,
+			    ab->b_size, 0);
 			kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
 			ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
 		}

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c	Mon Jun 24 15:35:42 2013	(r252162)
@@ -397,7 +397,8 @@ void
 dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp)
 {
 	ASSERT(dsl_pool_sync_context(dp));
-	zio_nowait(zio_free_sync(pio, dp->dp_spa, txg, bpp, pio->io_flags));
+	zio_nowait(zio_free_sync(pio, dp->dp_spa, txg, bpp, BP_GET_PSIZE(bpp),
+	    pio->io_flags));
 }
 
 static uint64_t
@@ -1364,7 +1365,7 @@ dsl_scan_free_block_cb(void *arg, const 
 	}
 
 	zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa,
-	    dmu_tx_get_txg(tx), bp, 0));
+	    dmu_tx_get_txg(tx), bp, BP_GET_PSIZE(bp), 0));
 	dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, DD_USED_HEAD,
 	    -bp_get_dsize_sync(scn->scn_dp->dp_spa, bp),
 	    -BP_GET_PSIZE(bp), -BP_GET_UCSIZE(bp), tx);

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Mon Jun 24 15:35:42 2013	(r252162)
@@ -67,6 +67,7 @@
 #include <sys/dsl_userhold.h>
 #include <sys/zfeature.h>
 #include <sys/zvol.h>
+#include <sys/trim_map.h>
 
 #ifdef	_KERNEL
 #include <sys/callb.h>
@@ -1001,6 +1002,11 @@ spa_activate(spa_t *spa, int mode)
 		spa_create_zio_taskqs(spa);
 	}
 
+	/*
+	 * Start TRIM thread.
+	 */
+	trim_thread_create(spa);
+
 	list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
 	    offsetof(vdev_t, vdev_config_dirty_node));
 	list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
@@ -1029,6 +1035,12 @@ spa_deactivate(spa_t *spa)
 	ASSERT(spa->spa_async_zio_root == NULL);
 	ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
 
+	/*
+	 * Stop TRIM thread in case spa_unload() wasn't called directly
+	 * before spa_deactivate().
+	 */
+	trim_thread_destroy(spa);
+
 	txg_list_destroy(&spa->spa_vdev_txg_list);
 
 	list_destroy(&spa->spa_config_dirty_list);
@@ -1145,6 +1157,11 @@ spa_unload(spa_t *spa)
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	/*
+	 * Stop TRIM thread.
+	 */
+	trim_thread_destroy(spa);
+
+	/*
 	 * Stop async tasks.
 	 */
 	spa_async_suspend(spa);
@@ -5875,7 +5892,7 @@ spa_free_sync_cb(void *arg, const blkptr
 	zio_t *zio = arg;
 
 	zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp,
-	    zio->io_flags));
+	    BP_GET_PSIZE(bp), zio->io_flags));
 	return (0);
 }
 

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h	Mon Jun 24 15:35:42 2013	(r252162)
@@ -221,6 +221,9 @@ struct spa {
 	spa_proc_state_t spa_proc_state;	/* see definition */
 	struct proc	*spa_proc;		/* "zpool-poolname" process */
 	uint64_t	spa_did;		/* if procp != p0, did of t1 */
+	kthread_t	*spa_trim_thread;	/* thread sending TRIM I/Os */
+	kmutex_t	spa_trim_lock;		/* protects spa_trim_cv */
+	kcondvar_t	spa_trim_cv;		/* used to notify TRIM thread */
 	boolean_t	spa_autoreplace;	/* autoreplace set in open */
 	int		spa_vdev_locks;		/* locks grabbed */
 	uint64_t	spa_creation_version;	/* version at pool creation */

Copied and modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/trim_map.h (from r240868, head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/trim_map.h)
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/trim_map.h	Sun Sep 23 19:40:58 2012	(r240868, copy source)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/trim_map.h	Mon Jun 24 15:35:42 2013	(r252162)
@@ -36,7 +36,7 @@ extern "C" {
 
 extern void trim_map_create(vdev_t *vd);
 extern void trim_map_destroy(vdev_t *vd);
-extern void trim_map_free(zio_t *zio);
+extern void trim_map_free(vdev_t *vd, uint64_t offset, uint64_t size, uint64_t txg);
 extern boolean_t trim_map_write_start(zio_t *zio);
 extern void trim_map_write_done(zio_t *zio);
 

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev.h	Mon Jun 24 15:35:42 2013	(r252162)
@@ -46,6 +46,7 @@ typedef enum vdev_dtl_type {
 } vdev_dtl_type_t;
 
 extern boolean_t zfs_nocacheflush;
+extern boolean_t zfs_trim_enabled;
 
 extern int vdev_open(vdev_t *);
 extern void vdev_open_children(vdev_t *);

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h	Mon Jun 24 15:35:42 2013	(r252162)
@@ -183,6 +183,7 @@ struct vdev {
 	uint64_t	vdev_unspare;	/* unspare when resilvering done */
 	hrtime_t	vdev_last_try;	/* last reopen time		*/
 	boolean_t	vdev_nowritecache; /* true if flushwritecache failed */
+	boolean_t	vdev_notrim;	/* true if trim failed */
 	boolean_t	vdev_checkremove; /* temporary online test	*/
 	boolean_t	vdev_forcefault; /* force online fault		*/
 	boolean_t	vdev_splitting;	/* split or repair in progress  */
@@ -198,6 +199,7 @@ struct vdev {
 	spa_aux_vdev_t	*vdev_aux;	/* for l2cache vdevs		*/
 	zio_t		*vdev_probe_zio; /* root of current probe	*/
 	vdev_aux_t	vdev_label_aux;	/* on-disk aux state		*/
+	struct trim_map	*vdev_trimmap;
 
 	/*
 	 * For DTrace to work in userland (libzpool) context, these fields must

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h	Mon Jun 24 15:35:42 2013	(r252162)
@@ -32,6 +32,7 @@
 #include <sys/spa.h>
 #include <sys/txg.h>
 #include <sys/avl.h>
+#include <sys/kstat.h>
 #include <sys/fs/zfs.h>
 #include <sys/zio_impl.h>
 
@@ -137,7 +138,8 @@ enum zio_compress {
 #define	ZIO_PRIORITY_RESILVER		(zio_priority_table[9])
 #define	ZIO_PRIORITY_SCRUB		(zio_priority_table[10])
 #define	ZIO_PRIORITY_DDT_PREFETCH	(zio_priority_table[11])
-#define	ZIO_PRIORITY_TABLE_SIZE		12
+#define	ZIO_PRIORITY_TRIM		(zio_priority_table[12])
+#define	ZIO_PRIORITY_TABLE_SIZE		13
 
 #define	ZIO_PIPELINE_CONTINUE		0x100
 #define	ZIO_PIPELINE_STOP		0x101
@@ -367,6 +369,39 @@ typedef struct zio_link {
 	list_node_t	zl_child_node;
 } zio_link_t;
 
+/*
+ * Used for TRIM kstat.
+ */
+typedef struct zio_trim_stats {
+	/*
+	 * Number of bytes successfully TRIMmed.
+	 */
+	kstat_named_t bytes;
+
+	/*
+	 * Number of successful TRIM requests.
+	 */
+	kstat_named_t success;
+
+	/*
+	 * Number of TRIM requests that failed because TRIM is not
+	 * supported.
+	 */
+	kstat_named_t unsupported;
+
+	/*
+	 * Number of TRIM requests that failed for other reasons.
+	 */
+	kstat_named_t failed;
+} zio_trim_stats_t;
+
+extern zio_trim_stats_t zio_trim_stats;
+
+#define ZIO_TRIM_STAT_INCR(stat, val) \
+	atomic_add_64(&zio_trim_stats.stat.value.ui64, (val));
+#define ZIO_TRIM_STAT_BUMP(stat) \
+	ZIO_TRIM_STAT_INCR(stat, 1);
+
 struct zio {
 	/* Core information about this I/O */
 	zbookmark_t	io_bookmark;
@@ -441,6 +476,8 @@ struct zio {
 	/* FreeBSD only. */
 	struct ostask	io_task;
 #endif
+	avl_node_t	io_trim_node;
+	list_node_t	io_trim_link;
 };
 
 extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
@@ -472,8 +509,8 @@ extern zio_t *zio_claim(zio_t *pio, spa_
     zio_done_func_t *done, void *priv, enum zio_flag flags);
 
 extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
-    zio_done_func_t *done, void *priv, int priority,
-    enum zio_flag flags);
+    uint64_t offset, uint64_t size, zio_done_func_t *done, void *priv,
+    int priority, enum zio_flag flags);
 
 extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
     uint64_t size, void *data, int checksum,
@@ -486,12 +523,14 @@ extern zio_t *zio_write_phys(zio_t *pio,
     boolean_t labels);
 
 extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
-    const blkptr_t *bp, enum zio_flag flags);
+    const blkptr_t *bp, uint64_t size, enum zio_flag flags);
 
 extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
     blkptr_t *old_bp, uint64_t size, boolean_t use_slog);
 extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp);
 extern void zio_flush(zio_t *zio, vdev_t *vd);
+extern zio_t *zio_trim(zio_t *zio, spa_t *spa, vdev_t *vd, uint64_t offset,
+    uint64_t size);
 extern void zio_shrink(zio_t *zio, uint64_t size);
 
 extern int zio_wait(zio_t *zio);

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h	Mon Jun 24 15:35:42 2013	(r252162)
@@ -130,9 +130,9 @@ enum zio_stage {
 
 	ZIO_STAGE_READY			= 1 << 16,	/* RWFCI */
 
-	ZIO_STAGE_VDEV_IO_START		= 1 << 17,	/* RW--I */
-	ZIO_STAGE_VDEV_IO_DONE		= 1 << 18,	/* RW--- */
-	ZIO_STAGE_VDEV_IO_ASSESS	= 1 << 19,	/* RW--I */
+	ZIO_STAGE_VDEV_IO_START		= 1 << 17,	/* RWF-I */
+	ZIO_STAGE_VDEV_IO_DONE		= 1 << 18,	/* RWF-- */
+	ZIO_STAGE_VDEV_IO_ASSESS	= 1 << 19,	/* RWF-I */
 
 	ZIO_STAGE_CHECKSUM_VERIFY	= 1 << 20,	/* R---- */
 
@@ -214,7 +214,9 @@ enum zio_stage {
 	(ZIO_INTERLOCK_STAGES |			\
 	ZIO_STAGE_FREE_BP_INIT |		\
 	ZIO_STAGE_ISSUE_ASYNC |			\
-	ZIO_STAGE_DVA_FREE)
+	ZIO_STAGE_DVA_FREE |			\
+	ZIO_STAGE_VDEV_IO_START |		\
+	ZIO_STAGE_VDEV_IO_ASSESS)
 
 #define	ZIO_DDT_FREE_PIPELINE			\
 	(ZIO_INTERLOCK_STAGES |			\

Copied and modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c (from r240868, head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c)
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c	Sun Sep 23 19:40:58 2012	(r240868, copy source)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c	Mon Jun 24 15:35:42 2013	(r252162)
@@ -27,6 +27,30 @@
 #include <sys/spa_impl.h>
 #include <sys/vdev_impl.h>
 #include <sys/trim_map.h>
+#include <sys/time.h>
+
+/*
+ * Calculate the zio end, upgrading based on ashift which would be
+ * done by zio_vdev_io_start.
+ *
+ * This makes free range consolidation much more effective
+ * than it would otherwise be as well as ensuring that entire
+ * blocks are invalidated by writes.
+ */
+#define	TRIM_ZIO_END(vd, offset, size)	(offset +		\
+ 	P2ROUNDUP(size, 1ULL << vd->vdev_top->vdev_ashift))
+
+#define TRIM_MAP_SINC(tm, size)					\
+	atomic_add_64(&(tm)->tm_bytes, (size))
+
+#define TRIM_MAP_SDEC(tm, size)					\
+	atomic_add_64(&(tm)->tm_bytes, -(size))
+
+#define TRIM_MAP_QINC(tm)					\
+	atomic_inc_64(&(tm)->tm_pending);			\
+
+#define TRIM_MAP_QDEC(tm)					\
+	atomic_dec_64(&(tm)->tm_pending);
 
 typedef struct trim_map {
 	list_t		tm_head;		/* List of segments sorted by txg. */
@@ -35,6 +59,8 @@ typedef struct trim_map {
 	avl_tree_t	tm_inflight_writes;	/* AVL tree of in-flight writes. */
 	list_t		tm_pending_writes;	/* Writes blocked on in-flight frees. */
 	kmutex_t	tm_lock;
+	uint64_t	tm_pending;		/* Count of pending TRIMs. */
+	uint64_t	tm_bytes;		/* Total size in bytes of queued TRIMs. */
 } trim_map_t;
 
 typedef struct trim_seg {
@@ -43,16 +69,46 @@ typedef struct trim_seg {
 	uint64_t	ts_start;	/* Starting offset of this segment. */
 	uint64_t	ts_end;		/* Ending offset (non-inclusive). */
 	uint64_t	ts_txg;		/* Segment creation txg. */
+	hrtime_t	ts_time;	/* Segment creation time. */
 } trim_seg_t;
 
-extern boolean_t zfs_notrim;
+extern boolean_t zfs_trim_enabled;
+
+static u_int trim_txg_delay = 32;
+static u_int trim_timeout = 30;
+static u_int trim_max_interval = 1;
+/* Limit outstanding TRIMs to 2G (max size for a single TRIM request) */
+static uint64_t trim_vdev_max_bytes = 2147483648;
+/* Limit outstanding TRIMs to 64 (max ranges for a single TRIM request) */	
+static u_int trim_vdev_max_pending = 64;
 
 SYSCTL_DECL(_vfs_zfs);
-/* Delay TRIMs by that many TXGs. */
-static int trim_txg_limit = 64;
-TUNABLE_INT("vfs.zfs.trim_txg_limit", &trim_txg_limit);
-SYSCTL_INT(_vfs_zfs, OID_AUTO, trim_txg_limit, CTLFLAG_RW, &trim_txg_limit, 0,
-    "Delay TRIMs by that many TXGs.");
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, trim, CTLFLAG_RD, 0, "ZFS TRIM");
+
+TUNABLE_INT("vfs.zfs.trim.txg_delay", &trim_txg_delay);
+SYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, txg_delay, CTLFLAG_RWTUN, &trim_txg_delay,
+    0, "Delay TRIMs by up to this many TXGs");
+
+TUNABLE_INT("vfs.zfs.trim.timeout", &trim_timeout);
+SYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, timeout, CTLFLAG_RWTUN, &trim_timeout, 0,
+    "Delay TRIMs by up to this many seconds");
+
+TUNABLE_INT("vfs.zfs.trim.max_interval", &trim_max_interval);
+SYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, max_interval, CTLFLAG_RWTUN,
+    &trim_max_interval, 0,
+    "Maximum interval between TRIM queue processing (seconds)");
+
+SYSCTL_DECL(_vfs_zfs_vdev);
+TUNABLE_QUAD("vfs.zfs.vdev.trim_max_bytes", &trim_vdev_max_bytes);
+SYSCTL_QUAD(_vfs_zfs_vdev, OID_AUTO, trim_max_bytes, CTLFLAG_RWTUN,
+    &trim_vdev_max_bytes, 0,
+    "Maximum pending TRIM bytes for a vdev");
+
+TUNABLE_INT("vfs.zfs.vdev.trim_max_pending", &trim_vdev_max_pending);
+SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, trim_max_pending, CTLFLAG_RWTUN,
+    &trim_vdev_max_pending, 0,
+    "Maximum pending TRIM segments for a vdev");
+
 
 static void trim_map_vdev_commit_done(spa_t *spa, vdev_t *vd);
 
@@ -101,7 +157,7 @@ trim_map_create(vdev_t *vd)
 
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
-	if (zfs_notrim)
+	if (!zfs_trim_enabled)
 		return;
 
 	tm = kmem_zalloc(sizeof (*tm), KM_SLEEP);
@@ -127,7 +183,7 @@ trim_map_destroy(vdev_t *vd)
 
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
-	if (zfs_notrim)
+	if (!zfs_trim_enabled)
 		return;
 
 	tm = vd->vdev_trimmap;
@@ -146,6 +202,8 @@ trim_map_destroy(vdev_t *vd)
 		avl_remove(&tm->tm_queued_frees, ts);
 		list_remove(&tm->tm_head, ts);
 		kmem_free(ts, sizeof (*ts));
+		TRIM_MAP_SDEC(tm, ts->ts_end - ts->ts_start);
+		TRIM_MAP_QDEC(tm);
 	}
 	mutex_exit(&tm->tm_lock);
 
@@ -165,10 +223,12 @@ trim_map_segment_add(trim_map_t *tm, uin
 	avl_index_t where;
 	trim_seg_t tsearch, *ts_before, *ts_after, *ts;
 	boolean_t merge_before, merge_after;
+	hrtime_t time;
 
 	ASSERT(MUTEX_HELD(&tm->tm_lock));
 	VERIFY(start < end);
 
+	time = gethrtime();
 	tsearch.ts_start = start;
 	tsearch.ts_end = end;
 
@@ -184,25 +244,36 @@ trim_map_segment_add(trim_map_t *tm, uin
 	ts_before = avl_nearest(&tm->tm_queued_frees, where, AVL_BEFORE);
 	ts_after = avl_nearest(&tm->tm_queued_frees, where, AVL_AFTER);
 
-	merge_before = (ts_before != NULL && ts_before->ts_end == start &&
-	    ts_before->ts_txg == txg);
-	merge_after = (ts_after != NULL && ts_after->ts_start == end &&
-	    ts_after->ts_txg == txg);
+	merge_before = (ts_before != NULL && ts_before->ts_end == start);
+	merge_after = (ts_after != NULL && ts_after->ts_start == end);
 
 	if (merge_before && merge_after) {
+		TRIM_MAP_SINC(tm, ts_after->ts_start - ts_before->ts_end);
+		TRIM_MAP_QDEC(tm);
 		avl_remove(&tm->tm_queued_frees, ts_before);
 		list_remove(&tm->tm_head, ts_before);
 		ts_after->ts_start = ts_before->ts_start;
+		ts_after->ts_txg = txg;
+		ts_after->ts_time = time;
 		kmem_free(ts_before, sizeof (*ts_before));
 	} else if (merge_before) {
+		TRIM_MAP_SINC(tm, end - ts_before->ts_end);
 		ts_before->ts_end = end;
+		ts_before->ts_txg = txg;
+		ts_before->ts_time = time;
 	} else if (merge_after) {
+		TRIM_MAP_SINC(tm, ts_after->ts_start - start);
 		ts_after->ts_start = start;
+		ts_after->ts_txg = txg;
+		ts_after->ts_time = time;
 	} else {
+		TRIM_MAP_SINC(tm, end - start);
+		TRIM_MAP_QINC(tm);
 		ts = kmem_alloc(sizeof (*ts), KM_SLEEP);
 		ts->ts_start = start;
 		ts->ts_end = end;
 		ts->ts_txg = txg;
+		ts->ts_time = time;
 		avl_insert(&tm->tm_queued_frees, ts, where);
 		list_insert_tail(&tm->tm_head, ts);
 	}
@@ -220,14 +291,17 @@ trim_map_segment_remove(trim_map_t *tm, 
 	left_over = (ts->ts_start < start);
 	right_over = (ts->ts_end > end);
 
+	TRIM_MAP_SDEC(tm, end - start);
 	if (left_over && right_over) {
 		nts = kmem_alloc(sizeof (*nts), KM_SLEEP);
 		nts->ts_start = end;
 		nts->ts_end = ts->ts_end;
 		nts->ts_txg = ts->ts_txg;
+		nts->ts_time = ts->ts_time;
 		ts->ts_end = start;
 		avl_insert_here(&tm->tm_queued_frees, nts, ts, AVL_AFTER);
 		list_insert_after(&tm->tm_head, ts, nts);
+		TRIM_MAP_QINC(tm);
 	} else if (left_over) {
 		ts->ts_end = start;
 	} else if (right_over) {
@@ -235,6 +309,7 @@ trim_map_segment_remove(trim_map_t *tm, 
 	} else {
 		avl_remove(&tm->tm_queued_frees, ts);
 		list_remove(&tm->tm_head, ts);
+		TRIM_MAP_QDEC(tm);
 		kmem_free(ts, sizeof (*ts));
 	}
 }
@@ -261,17 +336,15 @@ trim_map_free_locked(trim_map_t *tm, uin
 }
 
 void
-trim_map_free(zio_t *zio)
+trim_map_free(vdev_t *vd, uint64_t offset, uint64_t size, uint64_t txg)
 {
-	vdev_t *vd = zio->io_vd;
 	trim_map_t *tm = vd->vdev_trimmap;
 
-	if (zfs_notrim || vd->vdev_notrim || tm == NULL)
+	if (!zfs_trim_enabled || vd->vdev_notrim || tm == NULL)
 		return;
 
 	mutex_enter(&tm->tm_lock);
-	trim_map_free_locked(tm, zio->io_offset, zio->io_offset + zio->io_size,
-	    vd->vdev_spa->spa_syncing_txg);
+	trim_map_free_locked(tm, offset, TRIM_ZIO_END(vd, offset, size), txg);
 	mutex_exit(&tm->tm_lock);
 }
 
@@ -284,11 +357,11 @@ trim_map_write_start(zio_t *zio)
 	boolean_t left_over, right_over;
 	uint64_t start, end;
 
-	if (zfs_notrim || vd->vdev_notrim || tm == NULL)
+	if (!zfs_trim_enabled || vd->vdev_notrim || tm == NULL)
 		return (B_TRUE);
 
 	start = zio->io_offset;
-	end = start + zio->io_size;
+	end = TRIM_ZIO_END(zio->io_vd, start, zio->io_size);
 	tsearch.ts_start = start;
 	tsearch.ts_end = end;
 
@@ -331,7 +404,7 @@ trim_map_write_done(zio_t *zio)
 	 * Don't check for vdev_notrim, since the write could have
 	 * started before vdev_notrim was set.
 	 */
-	if (zfs_notrim || tm == NULL)
+	if (!zfs_trim_enabled || tm == NULL)
 		return;
 
 	mutex_enter(&tm->tm_lock);
@@ -348,19 +421,25 @@ trim_map_write_done(zio_t *zio)
 }
 
 /*
- * Return the oldest segment (the one with the lowest txg) or false if
- * the list is empty or the first element's txg is greater than txg given
- * as function argument.
+ * Return the oldest segment (the one with the lowest txg / time) or NULL if:
+ * 1. The list is empty
+ * 2. The first element's txg is greater than txgsafe
+ * 3. The first element's txg is not greater than the txg argument and the
+ *    the first element's time is not greater than time argument
  */
 static trim_seg_t *
-trim_map_first(trim_map_t *tm, uint64_t txg)
+trim_map_first(trim_map_t *tm, uint64_t txg, uint64_t txgsafe, hrtime_t time)
 {
 	trim_seg_t *ts;
 
 	ASSERT(MUTEX_HELD(&tm->tm_lock));
+	VERIFY(txgsafe >= txg);
 
 	ts = list_head(&tm->tm_head);
-	if (ts != NULL && ts->ts_txg <= txg)
+	if (ts != NULL && ts->ts_txg <= txgsafe &&
+	    (ts->ts_txg <= txg || ts->ts_time <= time ||
+	    tm->tm_bytes > trim_vdev_max_bytes ||
+	    tm->tm_pending > trim_vdev_max_pending))
 		return (ts);
 	return (NULL);
 }
@@ -370,26 +449,37 @@ trim_map_vdev_commit(spa_t *spa, zio_t *
 {
 	trim_map_t *tm = vd->vdev_trimmap;
 	trim_seg_t *ts;
-	uint64_t start, size, txglimit;
+	uint64_t size, txgtarget, txgsafe;
+	hrtime_t timelimit;
 
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
 	if (tm == NULL)
 		return;
 
-	txglimit = MIN(spa->spa_syncing_txg, spa_freeze_txg(spa)) -
-	    trim_txg_limit;
+	timelimit = gethrtime() - trim_timeout * NANOSEC;
+	if (vd->vdev_isl2cache) {
+		txgsafe = UINT64_MAX;
+		txgtarget = UINT64_MAX;
+	} else {
+		txgsafe = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa));
+		if (txgsafe > trim_txg_delay)
+			txgtarget = txgsafe - trim_txg_delay;
+		else
+			txgtarget = 0;
+	}
 
 	mutex_enter(&tm->tm_lock);
-	/*
-	 * Loop until we send all frees up to the txglimit.
-	 */
-	while ((ts = trim_map_first(tm, txglimit)) != NULL) {
+	/* Loop until we have sent all outstanding free's */
+	while ((ts = trim_map_first(tm, txgtarget, txgsafe, timelimit))
+	    != NULL) {
 		list_remove(&tm->tm_head, ts);
 		avl_remove(&tm->tm_queued_frees, ts);
 		avl_add(&tm->tm_inflight_frees, ts);
-		zio_nowait(zio_trim(zio, spa, vd, ts->ts_start,
-		    ts->ts_end - ts->ts_start));
+		size = ts->ts_end - ts->ts_start;
+		zio_nowait(zio_trim(zio, spa, vd, ts->ts_start, size));
+		TRIM_MAP_SDEC(tm, size);
+		TRIM_MAP_QDEC(tm);
 	}
 	mutex_exit(&tm->tm_lock);
 }
@@ -434,7 +524,7 @@ trim_map_commit(spa_t *spa, zio_t *zio, 
 {
 	int c;
 
-	if (vd == NULL || spa->spa_syncing_txg <= trim_txg_limit)
+	if (vd == NULL)
 		return;
 
 	if (vd->vdev_ops->vdev_op_leaf) {
@@ -467,6 +557,11 @@ trim_thread(void *arg)
 	spa_t *spa = arg;
 	zio_t *zio;
 
+#ifdef _KERNEL
+	(void) snprintf(curthread->td_name, sizeof(curthread->td_name),
+	    "trim %s", spa_name(spa));
+#endif
+
 	for (;;) {
 		mutex_enter(&spa->spa_trim_lock);
 		if (spa->spa_trim_thread == NULL) {
@@ -475,7 +570,9 @@ trim_thread(void *arg)
 			mutex_exit(&spa->spa_trim_lock);
 			thread_exit();
 		}
-		cv_wait(&spa->spa_trim_cv, &spa->spa_trim_lock);
+
+		(void) cv_timedwait(&spa->spa_trim_cv, &spa->spa_trim_lock,
+		    hz * trim_max_interval);
 		mutex_exit(&spa->spa_trim_lock);
 
 		zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
@@ -492,7 +589,7 @@ void
 trim_thread_create(spa_t *spa)
 {
 
-	if (zfs_notrim)
+	if (!zfs_trim_enabled)
 		return;
 
 	mutex_init(&spa->spa_trim_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -507,7 +604,7 @@ void
 trim_thread_destroy(spa_t *spa)
 {
 
-	if (zfs_notrim)
+	if (!zfs_trim_enabled)
 		return;
 	if (spa->spa_trim_thread == NULL)
 		return;
@@ -530,7 +627,7 @@ void
 trim_thread_wakeup(spa_t *spa)
 {
 
-	if (zfs_notrim)
+	if (!zfs_trim_enabled)
 		return;
 	if (spa->spa_trim_thread == NULL)
 		return;

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c	Mon Jun 24 15:35:42 2013	(r252162)
@@ -43,6 +43,7 @@
 #include <sys/arc.h>
 #include <sys/zil.h>
 #include <sys/dsl_scan.h>
+#include <sys/trim_map.h>
 
 SYSCTL_DECL(_vfs_zfs);
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV");
@@ -1196,6 +1197,11 @@ vdev_open(vdev_t *vd)
 	if (vd->vdev_ishole || vd->vdev_ops == &vdev_missing_ops)
 		return (0);
 
+	if (vd->vdev_ops->vdev_op_leaf) {
+		vd->vdev_notrim = B_FALSE;
+		trim_map_create(vd);
+	}
+
 	for (int c = 0; c < vd->vdev_children; c++) {
 		if (vd->vdev_child[c]->vdev_state != VDEV_STATE_HEALTHY) {
 			vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED,
@@ -1441,6 +1447,9 @@ vdev_close(vdev_t *vd)
 
 	vdev_cache_purge(vd);
 
+	if (vd->vdev_ops->vdev_op_leaf)
+		trim_map_destroy(vd);
+
 	/*
 	 * We record the previous state before we close it, so that if we are
 	 * doing a reopen(), we don't generate FMA ereports if we notice that

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	Mon Jun 24 15:35:42 2013	(r252162)
@@ -49,14 +49,17 @@ struct g_class zfs_vdev_class = {
 
 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
 
-/*
- * Don't send BIO_FLUSH.
- */
+SYSCTL_DECL(_vfs_zfs_vdev);
+/* Don't send BIO_FLUSH. */
 static int vdev_geom_bio_flush_disable = 0;
 TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
-SYSCTL_DECL(_vfs_zfs_vdev);
 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
     &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
+/* Don't send BIO_DELETE. */
+static int vdev_geom_bio_delete_disable = 0;
+TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
+SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
+    &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
 
 static void
 vdev_geom_orphan(struct g_consumer *cp)
@@ -663,8 +666,8 @@ vdev_geom_open(vdev_t *vd, uint64_t *psi
 	*ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
 
 	/*
-	 * Clear the nowritecache bit, so that on a vdev_reopen() we will
-	 * try again.
+	 * Clear the nowritecache settings, so that on a vdev_reopen()
+	 * we will try again.
 	 */
 	vd->vdev_nowritecache = B_FALSE;
 
@@ -710,6 +713,15 @@ vdev_geom_io_intr(struct bio *bp)
 		 */
 		vd->vdev_nowritecache = B_TRUE;
 	}
+	if (bp->bio_cmd == BIO_DELETE && bp->bio_error == ENOTSUP) {
+		/*
+		 * If we get ENOTSUP, we know that no future
+		 * attempts will ever succeed.  In this case we
+		 * set a persistent bit so that we don't bother
+		 * with the ioctl in the future.
+		 */
+		vd->vdev_notrim = B_TRUE;
+	}
 	if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
 		/*
 		 * If provider's error is set we assume it is being
@@ -752,17 +764,21 @@ vdev_geom_io_start(zio_t *zio)
 		}
 
 		switch (zio->io_cmd) {
-
 		case DKIOCFLUSHWRITECACHE:
-
 			if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
 				break;
-
 			if (vd->vdev_nowritecache) {
 				zio->io_error = ENOTSUP;
 				break;
 			}
-
+			goto sendreq;
+		case DKIOCTRIM:
+			if (vdev_geom_bio_delete_disable)
+				break;
+			if (vd->vdev_notrim) {
+				zio->io_error = ENOTSUP;
+				break;
+			}
 			goto sendreq;
 		default:
 			zio->io_error = ENOTSUP;
@@ -787,11 +803,21 @@ sendreq:
 		bp->bio_length = zio->io_size;
 		break;
 	case ZIO_TYPE_IOCTL:
-		bp->bio_cmd = BIO_FLUSH;
-		bp->bio_flags |= BIO_ORDERED;
-		bp->bio_data = NULL;
-		bp->bio_offset = cp->provider->mediasize;
-		bp->bio_length = 0;
+		switch (zio->io_cmd) {
+		case DKIOCFLUSHWRITECACHE:
+			bp->bio_cmd = BIO_FLUSH;
+			bp->bio_flags |= BIO_ORDERED;
+			bp->bio_data = NULL;
+			bp->bio_offset = cp->provider->mediasize;
+			bp->bio_length = 0;
+			break;
+		case DKIOCTRIM:
+			bp->bio_cmd = BIO_DELETE;
+			bp->bio_data = NULL;
+			bp->bio_offset = zio->io_offset;
+			bp->bio_length = zio->io_size;
+			break;
+		}
 		break;
 	}
 	bp->bio_done = vdev_geom_io_intr;

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c	Mon Jun 24 15:35:42 2013	(r252162)
@@ -145,8 +145,14 @@
 #include <sys/metaslab.h>
 #include <sys/zio.h>
 #include <sys/dsl_scan.h>
+#include <sys/trim_map.h>
 #include <sys/fs/zfs.h>
 
+static boolean_t vdev_trim_on_init = B_TRUE;
+SYSCTL_DECL(_vfs_zfs_vdev);
+SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, trim_on_init, CTLFLAG_RW,
+    &vdev_trim_on_init, 0, "Enable/disable full vdev trim on initialisation");
+
 /*
  * Basic routines to read and write from a vdev label.
  * Used throughout the rest of this file.
@@ -718,6 +724,16 @@ vdev_label_init(vdev_t *vd, uint64_t crt
 	}
 
 	/*
+	 * TRIM the whole thing so that we start with a clean slate.
+	 * It's just an optimization, so we don't care if it fails.
+	 * Don't TRIM if removing so that we don't interfere with zpool
+	 * disaster recovery.
+	 */
+	if (zfs_trim_enabled && vdev_trim_on_init && (reason == VDEV_LABEL_CREATE ||
+	    reason == VDEV_LABEL_SPARE || reason == VDEV_LABEL_L2CACHE))
+		zio_wait(zio_trim(NULL, spa, vd, 0, vd->vdev_psize));
+
+	/*
 	 * Initialize its label.
 	 */
 	vp = zio_buf_alloc(sizeof (vdev_phys_t));
@@ -1282,5 +1298,10 @@ vdev_config_sync(vdev_t **svd, int svdco
 	 * to disk to ensure that all odd-label updates are committed to
 	 * stable storage before the next transaction group begins.
 	 */
-	return (vdev_label_sync_list(spa, 1, txg, flags));
+	if ((error = vdev_label_sync_list(spa, 1, txg, flags)) != 0)
+		return (error);
+
+	trim_thread_wakeup(spa);
+
+	return (0);
 }

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c	Mon Jun 24 15:35:42 2013	(r252162)
@@ -293,10 +293,11 @@ vdev_mirror_io_start(zio_t *zio)
 		c = vdev_mirror_child_select(zio);
 		children = (c >= 0);
 	} else {
-		ASSERT(zio->io_type == ZIO_TYPE_WRITE);
+		ASSERT(zio->io_type == ZIO_TYPE_WRITE ||
+		    zio->io_type == ZIO_TYPE_FREE);
 
 		/*
-		 * Writes go to all children.
+		 * Writes and frees go to all children.
 		 */
 		c = 0;
 		children = mm->mm_children;
@@ -377,6 +378,8 @@ vdev_mirror_io_done(zio_t *zio)
 				zio->io_error = vdev_mirror_worst_error(mm);
 		}
 		return;
+	} else if (zio->io_type == ZIO_TYPE_FREE) {
+		return;
 	}
 
 	ASSERT(zio->io_type == ZIO_TYPE_READ);

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c	Mon Jun 24 13:36:16 2013	(r252161)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c	Mon Jun 24 15:35:42 2013	(r252162)
@@ -259,7 +259,9 @@ vdev_raidz_map_free(raidz_map_t *rm)
 	size_t size;
 
 	for (c = 0; c < rm->rm_firstdatacol; c++) {
-		zio_buf_free(rm->rm_col[c].rc_data, rm->rm_col[c].rc_size);
+		if (rm->rm_col[c].rc_data != NULL)
+			zio_buf_free(rm->rm_col[c].rc_data,
+			    rm->rm_col[c].rc_size);
 
 		if (rm->rm_col[c].rc_gdata != NULL)
 			zio_buf_free(rm->rm_col[c].rc_gdata,
@@ -504,14 +506,20 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_
 	ASSERT3U(rm->rm_asize - asize, ==, rm->rm_nskip << unit_shift);
 	ASSERT3U(rm->rm_nskip, <=, nparity);
 
-	for (c = 0; c < rm->rm_firstdatacol; c++)
-		rm->rm_col[c].rc_data = zio_buf_alloc(rm->rm_col[c].rc_size);
-
-	rm->rm_col[c].rc_data = zio->io_data;
+	if (zio->io_type != ZIO_TYPE_FREE) {
+		for (c = 0; c < rm->rm_firstdatacol; c++) {
+			rm->rm_col[c].rc_data =
+			    zio_buf_alloc(rm->rm_col[c].rc_size);
+		}
 
-	for (c = c + 1; c < acols; c++)
-		rm->rm_col[c].rc_data = (char *)rm->rm_col[c - 1].rc_data +
-		    rm->rm_col[c - 1].rc_size;
+		rm->rm_col[c].rc_data = zio->io_data;
+
+		for (c = c + 1; c < acols; c++) {
+			rm->rm_col[c].rc_data =
+			    (char *)rm->rm_col[c - 1].rc_data +
+			    rm->rm_col[c - 1].rc_size;
+		}
+	}
 
 	/*
 	 * If all data stored spans all columns, there's a danger that parity
@@ -1536,6 +1544,18 @@ vdev_raidz_io_start(zio_t *zio)
 
 	ASSERT3U(rm->rm_asize, ==, vdev_psize_to_asize(vd, zio->io_size));

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201306241535.r5OFZho3039275>