Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 2 Jan 2014 08:10:35 +0000 (UTC)
From:      Xin LI <delphij@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r260185 - in head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys
Message-ID:  <201401020810.s028AZBp086769@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: delphij
Date: Thu Jan  2 08:10:35 2014
New Revision: 260185
URL: http://svnweb.freebsd.org/changeset/base/260185

Log:
  MFV r260155:
  
  When we encounter an I/O error on a piece of metadata while deleting
  a file system or zvol, we don't update the bptree_entry_phys_t's
  bookmark.  This would lead to double free of bp's which will lead to
  space map corruption.
  
  Instead of tolerating and allowing the corruption, panic immediately.
  
  See Illumos #4390 for more details.
  
  4391 panic system rather than corrupting pool if we hit bug 4390
  
  Illumos/illumos-gate@8b36997aa24d9817807faa4efa301ac9c07a2b78
  
  MFC after:	2 weeks

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h
Directory Properties:
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c	Thu Jan  2 08:02:57 2014	(r260184)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c	Thu Jan  2 08:10:35 2014	(r260185)
@@ -180,6 +180,7 @@ bptree_iterate(objset_t *os, uint64_t ob
 	err = 0;
 	for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) {
 		bptree_entry_phys_t bte;
+		int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST;
 
 		ASSERT(!free || i == ba.ba_phys->bt_begin);
 
@@ -188,13 +189,13 @@ bptree_iterate(objset_t *os, uint64_t ob
 		if (err != 0)
 			break;
 
+		if (zfs_recover)
+			flags |= TRAVERSE_HARD;
 		err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp,
-		    bte.be_birth_txg, &bte.be_zb,
-		    TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST,
+		    bte.be_birth_txg, &bte.be_zb, flags,
 		    bptree_visit_cb, &ba);
 		if (free) {
-			ASSERT(err == 0 || err == ERESTART);
-			if (err != 0) {
+			if (err == ERESTART) {
 				/* save bookmark for future resume */
 				ASSERT3U(bte.be_zb.zb_objset, ==,
 				    ZB_DESTROYED_OBJSET);
@@ -202,11 +203,21 @@ bptree_iterate(objset_t *os, uint64_t ob
 				dmu_write(os, obj, i * sizeof (bte),
 				    sizeof (bte), &bte, tx);
 				break;
-			} else {
-				ba.ba_phys->bt_begin++;
-				(void) dmu_free_range(os, obj,
-				    i * sizeof (bte), sizeof (bte), tx);
 			}
+			if (err != 0) {
+				/*
+				 * We can not properly handle an i/o
+				 * error, because the traversal code
+				 * does not know how to resume from an
+				 * arbitrary bookmark.
+				 */
+				zfs_panic_recover("error %u from "
+				    "traverse_dataset_destroyed()", err);
+			}
+
+			ba.ba_phys->bt_begin++;
+			(void) dmu_free_range(os, obj,
+			    i * sizeof (bte), sizeof (bte), tx);
 		}
 	}
 

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c	Thu Jan  2 08:02:57 2014	(r260184)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c	Thu Jan  2 08:10:35 2014	(r260185)
@@ -383,7 +383,7 @@ traverse_visitbp(traverse_data_t *td, co
 		(void) arc_buf_remove_ref(buf, &buf);
 
 post:
-	if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
+	if (err == 0 && (td->td_flags & TRAVERSE_POST)) {
 		err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
 		if (err == ERESTART)
 			pause = B_TRUE;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c	Thu Jan  2 08:02:57 2014	(r260184)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c	Thu Jan  2 08:10:35 2014	(r260185)
@@ -1348,6 +1348,9 @@ dsl_scan_free_should_pause(dsl_scan_t *s
 {
 	uint64_t elapsed_nanosecs;
 
+	if (zfs_recover)
+		return (B_FALSE);
+
 	elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
 	return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
 	    (NSEC2MSEC(elapsed_nanosecs) > zfs_free_min_time_ms &&

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c	Thu Jan  2 08:02:57 2014	(r260184)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c	Thu Jan  2 08:10:35 2014	(r260185)
@@ -252,6 +252,8 @@ SYSCTL_INT(_debug, OID_AUTO, zfs_flags, 
  * zfs_recover can be set to nonzero to attempt to recover from
  * otherwise-fatal errors, typically caused by on-disk corruption.  When
  * set, calls to zfs_panic_recover() will turn into warning messages.
+ * This should only be used as a last resort, as it typically results
+ * in leaked space, or worse.
  */
 int zfs_recover = 0;
 SYSCTL_DECL(_vfs_zfs);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h	Thu Jan  2 08:02:57 2014	(r260184)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_debug.h	Thu Jan  2 08:10:35 2014	(r260185)
@@ -49,6 +49,7 @@ extern "C" {
 #endif
 
 extern int zfs_flags;
+extern int zfs_recover;
 
 #define	ZFS_DEBUG_DPRINTF	(1<<0)
 #define	ZFS_DEBUG_DBUF_VERIFY	(1<<1)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201401020810.s028AZBp086769>