Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 27 May 2014 19:46:11 +0000 (UTC)
From:      Xin LI <delphij@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r266771 - in head: cddl/contrib/opensolaris/cmd/zfs sys/cddl/contrib/opensolaris/common/zfs sys/cddl/contrib/opensolaris/uts/common/fs/zfs sys/cddl/contrib/opensolaris/uts/common/fs/zfs...
Message-ID:  <201405271946.s4RJkBGX087468@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: delphij
Date: Tue May 27 19:46:11 2014
New Revision: 266771
URL: http://svnweb.freebsd.org/changeset/base/266771

Log:
  MFV r266766:
  
  Add a new zfs property, "redundant_metadata" which can have values "all" or
  "most".  The default will be "all", which is the current behavior.  When set
  to all, ZFS stores an extra copy of all metadata.  If a single on-disk block
  is corrupt, at worst a single block of user data (which is recordsize bytes
  long) can be lost.
  
  Setting to "most" will cause us to only store 1 copy of level-1 indirect
  blocks of user data files.  This can improve performance of random writes,
  because less metadata has to be written.  In practice,  at worst about
  100 blocks (of recordsize bytes each) of user data can be lost if a single
  on-disk block is corrupt.
  
  The exact behavior of which metadata blocks are stored redundantly may change
  in future releases.
  
  Illumos issue: 3835 zfs need not store 2 copies of all metadata
  
  MFC after:	2 weeks

Modified:
  head/cddl/contrib/opensolaris/cmd/zfs/zfs.8
  head/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
  head/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
Directory Properties:
  head/cddl/contrib/opensolaris/   (props changed)
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/cddl/contrib/opensolaris/cmd/zfs/zfs.8
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zfs/zfs.8	Tue May 27 19:39:57 2014	(r266770)
+++ head/cddl/contrib/opensolaris/cmd/zfs/zfs.8	Tue May 27 19:46:11 2014	(r266771)
@@ -18,7 +18,7 @@
 .\" information: Portions Copyright [yyyy] [name of copyright owner]
 .\"
 .\" Copyright (c) 2010, Sun Microsystems, Inc. All Rights Reserved.
-.\" Copyright (c) 2013 by Delphix. All rights reserved.
+.\" Copyright (c) 2014 by Delphix. All rights reserved.
 .\" Copyright (c) 2011, Pawel Jakub Dawidek <pjd@FreeBSD.org>
 .\" Copyright (c) 2012, Glen Barber <gjb@FreeBSD.org>
 .\" Copyright (c) 2012, Bryan Drewery <bdrewery@FreeBSD.org>
@@ -30,7 +30,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd April 23, 2014
+.Dd May 27, 2014
 .Dt ZFS 8
 .Os
 .Sh NAME
@@ -1193,6 +1193,53 @@ affects only files created afterward; ex
 .Pp
 This property can also be referred to by its shortened column name,
 .Sy recsize .
+.It Sy redundant_metadata Ns = Ns Cm all | most
+Controls what types of metadata are stored redundantly.
+ZFS stores an extra copy of metadata, so that if a single block is corrupted,
+the amount of user data lost is limited.
+This extra copy is in addition to any redundancy provided at the pool level
+.Pq e.g. by mirroring or RAID-Z ,
+and is in addition to an extra copy specified by the
+.Sy copies
+property
+.Pq up to a total of 3 copies .
+For example if the pool is mirrored,
+.Cm copies Ns = Ns Ar 2 ,
+and
+.Cm redundant_metadata Ns = Ns Ar most ,
+then ZFS
+stores 6 copies of most metadata, and 4 copies of data and some
+metadata.
+.Pp
+When set to
+.Cm all ,
+ZFS stores an extra copy of all metadata.
+If a
+single on-disk block is corrupt, at worst a single block of user data
+.Po which is
+.Cm recordsize
+bytes long
+can be lost.
+.Pc
+.Pp
+When set to
+.Cm most ,
+ZFS stores an extra copy of most types of
+metadata.
+This can improve performance of random writes, because less
+metadata must be written.
+In practice, at worst about 100 blocks
+.Po of
+.Cm recordsize
+bytes each
+.Pc
+of user data can be lost if a single
+on-disk block is corrupt.
+The exact behavior of which metadata blocks
+are stored redundantly may change in future releases.
+.Pp
+The default value is
+.Cm all .
 .It Sy refquota Ns = Ns Ar size | Cm none
 Limits the amount of space a dataset can consume. This property enforces a hard
 limit on the amount of space used. This hard limit does not include space used

Modified: head/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c	Tue May 27 19:39:57 2014	(r266770)
+++ head/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c	Tue May 27 19:46:11 2014	(r266771)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  */
@@ -206,7 +206,18 @@ zfs_prop_init(void)
 		{ NULL }
 	};
 
+	static zprop_index_t redundant_metadata_table[] = {
+		{ "all",	ZFS_REDUNDANT_METADATA_ALL },
+		{ "most",	ZFS_REDUNDANT_METADATA_MOST },
+		{ NULL }
+	};
+
 	/* inherit index properties */
+	zprop_register_index(ZFS_PROP_REDUNDANT_METADATA, "redundant_metadata",
+	    ZFS_REDUNDANT_METADATA_ALL,
+	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+	    "all | most", "REDUND_MD",
+	    redundant_metadata_table);
 	zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD,
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
 	    "standard | always | disabled", "SYNC",

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c	Tue May 27 19:39:57 2014	(r266770)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c	Tue May 27 19:46:11 2014	(r266771)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  */
 /* Copyright (c) 2013 by Saso Kiselkov. All rights reserved. */
 /* Copyright (c) 2013, Joyent, Inc. All rights reserved. */
@@ -1630,6 +1630,12 @@ TUNABLE_INT("vfs.zfs.mdcomp_disable", &z
 SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RW,
     &zfs_mdcomp_disable, 0, "Disable metadata compression");
 
+/*
+ * When the "redundant_metadata" property is set to "most", only indirect
+ * blocks of this level and higher will have an additional ditto block.
+ */
+int zfs_redundant_metadata_most_ditto_level = 2;
+
 void
 dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
 {
@@ -1669,6 +1675,13 @@ dmu_write_policy(objset_t *os, dnode_t *
 		if (zio_checksum_table[checksum].ci_correctable < 1 ||
 		    zio_checksum_table[checksum].ci_eck)
 			checksum = ZIO_CHECKSUM_FLETCHER_4;
+
+		if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL ||
+		    (os->os_redundant_metadata ==
+		    ZFS_REDUNDANT_METADATA_MOST &&
+		    (level >= zfs_redundant_metadata_most_ditto_level ||
+		    DMU_OT_IS_METADATA(type) || (wp & WP_SPILL))))
+			copies++;
 	} else if (wp & WP_NOFILL) {
 		ASSERT(level == 0);
 
@@ -1716,7 +1729,7 @@ dmu_write_policy(objset_t *os, dnode_t *
 	zp->zp_compress = compress;
 	zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type;
 	zp->zp_level = level;
-	zp->zp_copies = MIN(copies + ismd, spa_max_replication(os->os_spa));
+	zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa));
 	zp->zp_dedup = dedup;
 	zp->zp_dedup_verify = dedup && dedup_verify;
 	zp->zp_nopwrite = nopwrite;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c	Tue May 27 19:39:57 2014	(r266770)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c	Tue May 27 19:46:11 2014	(r266771)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  */
@@ -115,13 +115,13 @@ dmu_objset_id(objset_t *os)
 	return (ds ? ds->ds_object : 0);
 }
 
-uint64_t
+zfs_sync_type_t
 dmu_objset_syncprop(objset_t *os)
 {
 	return (os->os_sync);
 }
 
-uint64_t
+zfs_logbias_op_t
 dmu_objset_logbias(objset_t *os)
 {
 	return (os->os_logbias);
@@ -230,6 +230,20 @@ sync_changed_cb(void *arg, uint64_t newv
 }
 
 static void
+redundant_metadata_changed_cb(void *arg, uint64_t newval)
+{
+	objset_t *os = arg;
+
+	/*
+	 * Inheritance and range checking should have been done by now.
+	 */
+	ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL ||
+	    newval == ZFS_REDUNDANT_METADATA_MOST);
+
+	os->os_redundant_metadata = newval;
+}
+
+static void
 logbias_changed_cb(void *arg, uint64_t newval)
 {
 	objset_t *os = arg;
@@ -364,6 +378,12 @@ dmu_objset_open_impl(spa_t *spa, dsl_dat
 				    zfs_prop_to_name(ZFS_PROP_SYNC),
 				    sync_changed_cb, os);
 			}
+			if (err == 0) {
+				err = dsl_prop_register(ds,
+				    zfs_prop_to_name(
+				    ZFS_PROP_REDUNDANT_METADATA),
+				    redundant_metadata_changed_cb, os);
+			}
 		}
 		if (err != 0) {
 			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
@@ -377,9 +397,9 @@ dmu_objset_open_impl(spa_t *spa, dsl_dat
 		os->os_compress = ZIO_COMPRESS_LZJB;
 		os->os_copies = spa_max_replication(spa);
 		os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
-		os->os_dedup_verify = 0;
-		os->os_logbias = 0;
-		os->os_sync = 0;
+		os->os_dedup_verify = B_FALSE;
+		os->os_logbias = ZFS_LOGBIAS_LATENCY;
+		os->os_sync = ZFS_SYNC_STANDARD;
 		os->os_primary_cache = ZFS_CACHE_ALL;
 		os->os_secondary_cache = ZFS_CACHE_ALL;
 	}
@@ -622,6 +642,9 @@ dmu_objset_evict(objset_t *os)
 			VERIFY0(dsl_prop_unregister(ds,
 			    zfs_prop_to_name(ZFS_PROP_SYNC),
 			    sync_changed_cb, os));
+			VERIFY0(dsl_prop_unregister(ds,
+			    zfs_prop_to_name(ZFS_PROP_REDUNDANT_METADATA),
+			    redundant_metadata_changed_cb, os));
 		}
 		VERIFY0(dsl_prop_unregister(ds,
 		    zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h	Tue May 27 19:39:57 2014	(r266770)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h	Tue May 27 19:46:11 2014	(r266771)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright 2013 DEY Storage Systems, Inc.
@@ -746,8 +746,8 @@ extern struct dsl_dataset *dmu_objset_ds
 extern void dmu_objset_name(objset_t *os, char *buf);
 extern dmu_objset_type_t dmu_objset_type(objset_t *os);
 extern uint64_t dmu_objset_id(objset_t *os);
-extern uint64_t dmu_objset_syncprop(objset_t *os);
-extern uint64_t dmu_objset_logbias(objset_t *os);
+extern zfs_sync_type_t dmu_objset_syncprop(objset_t *os);
+extern zfs_logbias_op_t dmu_objset_logbias(objset_t *os);
 extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
     uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
 extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h	Tue May 27 19:39:57 2014	(r266770)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h	Tue May 27 19:46:11 2014	(r266771)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  */
 
@@ -85,15 +85,16 @@ struct objset {
 	zilog_t *os_zil;
 
 	/* can change, under dsl_dir's locks: */
-	uint8_t os_checksum;
-	uint8_t os_compress;
+	enum zio_checksum os_checksum;
+	enum zio_compress os_compress;
 	uint8_t os_copies;
-	uint8_t os_dedup_checksum;
-	uint8_t os_dedup_verify;
-	uint8_t os_logbias;
-	uint8_t os_primary_cache;
-	uint8_t os_secondary_cache;
-	uint8_t os_sync;
+	enum zio_checksum os_dedup_checksum;
+	boolean_t os_dedup_verify;
+	zfs_logbias_op_t os_logbias;
+	zfs_cache_type_t os_primary_cache;
+	zfs_cache_type_t os_secondary_cache;
+	zfs_sync_type_t os_sync;
+	zfs_redundant_metadata_type_t os_redundant_metadata;
 
 	/* no lock needed: */
 	struct dmu_tx *os_synctx; /* XXX sketchy */

Modified: head/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h	Tue May 27 19:39:57 2014	(r266770)
+++ head/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h	Tue May 27 19:46:11 2014	(r266771)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  * Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved.
@@ -150,6 +150,7 @@ typedef enum {
 	ZFS_PROP_SNAPSHOT_LIMIT,
 	ZFS_PROP_FILESYSTEM_COUNT,
 	ZFS_PROP_SNAPSHOT_COUNT,
+	ZFS_PROP_REDUNDANT_METADATA,
 	ZFS_NUM_PROPS
 } zfs_prop_t;
 
@@ -349,6 +350,11 @@ typedef enum {
 	ZFS_VOLMODE_NONE = 3
 } zfs_volmode_t;
 
+typedef enum {
+	ZFS_REDUNDANT_METADATA_ALL,
+	ZFS_REDUNDANT_METADATA_MOST
+} zfs_redundant_metadata_type_t;
+
 /*
  * On-disk version number.
  */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201405271946.s4RJkBGX087468>