Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 10 Mar 2016 23:45:23 +0000 (UTC)
From:      Steven Hartland <smh@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-releng@freebsd.org
Subject:   svn commit: r296631 - releng/10.3/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
Message-ID:  <201603102345.u2ANjNxX055978@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: smh
Date: Thu Mar 10 23:45:23 2016
New Revision: 296631
URL: https://svnweb.freebsd.org/changeset/base/296631

Log:
  MFS r296629:
  
  ZFS send fails to transmit some holes
  
  PR:		207714
  Approved by:	re (gjb)
  Sponsored by:	Multiplay

Modified:
  releng/10.3/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
  releng/10.3/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
Directory Properties:
  releng/10.3/   (props changed)

Modified: releng/10.3/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
==============================================================================
--- releng/10.3/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	Thu Mar 10 23:37:35 2016	(r296630)
+++ releng/10.3/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	Thu Mar 10 23:45:23 2016	(r296631)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
  * Copyright 2014 HybridCluster. All rights reserved.
  */
 
@@ -50,6 +50,12 @@ dmu_object_alloc(objset_t *os, dmu_objec
 		 * reasonably sparse (at most 1/4 full).  Look from the
 		 * beginning once, but after that keep looking from here.
 		 * If we can't find one, just keep going from here.
+		 *
+		 * Note that dmu_traverse depends on the behavior that we use
+		 * multiple blocks of the dnode object before going back to
+		 * reuse objects.  Any change to this algorithm should preserve
+		 * that property or find another solution to the issues
+		 * described in traverse_visitbp.
 		 */
 		if (P2PHASE(object, L2_dnode_count) == 0) {
 			uint64_t offset = restarted ? object << DNODE_SHIFT : 0;

Modified: releng/10.3/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
==============================================================================
--- releng/10.3/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c	Thu Mar 10 23:37:35 2016	(r296630)
+++ releng/10.3/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c	Thu Mar 10 23:45:23 2016	(r296631)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  * Copyright (c) 2015 Chunwei Chen. All rights reserved.
  */
 
@@ -63,6 +63,7 @@ typedef struct traverse_data {
 	uint64_t td_hole_birth_enabled_txg;
 	blkptr_cb_t *td_func;
 	void *td_arg;
+	boolean_t td_realloc_possible;
 } traverse_data_t;
 
 static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
@@ -232,18 +233,30 @@ traverse_visitbp(traverse_data_t *td, co
 
 	if (bp->blk_birth == 0) {
 		/*
-		 * Since this block has a birth time of 0 it must be a
-		 * hole created before the SPA_FEATURE_HOLE_BIRTH
-		 * feature was enabled.  If SPA_FEATURE_HOLE_BIRTH
-		 * was enabled before the min_txg for this traveral we
-		 * know the hole must have been created before the
-		 * min_txg for this traveral, so we can skip it. If
-		 * SPA_FEATURE_HOLE_BIRTH was enabled after the min_txg
-		 * for this traveral we cannot tell if the hole was
-		 * created before or after the min_txg for this
-		 * traversal, so we cannot skip it.
+		 * Since this block has a birth time of 0 it must be one of
+		 * two things: a hole created before the
+		 * SPA_FEATURE_HOLE_BIRTH feature was enabled, or a hole
+		 * which has always been a hole in an object.
+		 *
+		 * If a file is written sparsely, then the unwritten parts of
+		 * the file were "always holes" -- that is, they have been
+		 * holes since this object was allocated.  However, we (and
+		 * our callers) can not necessarily tell when an object was
+		 * allocated.  Therefore, if it's possible that this object
+		 * was freed and then its object number reused, we need to
+		 * visit all the holes with birth==0.
+		 *
+		 * If it isn't possible that the object number was reused,
+		 * then if SPA_FEATURE_HOLE_BIRTH was enabled before we wrote
+		 * all the blocks we will visit as part of this traversal,
+		 * then this hole must have always existed, so we can skip
+		 * it.  We visit blocks born after (exclusive) td_min_txg.
+		 *
+		 * Note that the meta-dnode cannot be reallocated.
 		 */
-		if (td->td_hole_birth_enabled_txg < td->td_min_txg)
+		if ((!td->td_realloc_possible ||
+		    zb->zb_object == DMU_META_DNODE_OBJECT) &&
+		    td->td_hole_birth_enabled_txg <= td->td_min_txg)
 			return (0);
 	} else if (bp->blk_birth <= td->td_min_txg) {
 		return (0);
@@ -338,6 +351,15 @@ traverse_visitbp(traverse_data_t *td, co
 		objset_phys_t *osp = buf->b_data;
 		prefetch_dnode_metadata(td, &osp->os_meta_dnode, zb->zb_objset,
 		    DMU_META_DNODE_OBJECT);
+		/*
+		 * See the block comment above for the goal of this variable.
+		 * If the maxblkid of the meta-dnode is 0, then we know that
+		 * we've never had more than DNODES_PER_BLOCK objects in the
+		 * dataset, which means we can't have reused any object ids.
+		 */
+		if (osp->os_meta_dnode.dn_maxblkid == 0)
+			td->td_realloc_possible = B_FALSE;
+
 		if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
 			prefetch_dnode_metadata(td, &osp->os_groupused_dnode,
 			    zb->zb_objset, DMU_GROUPUSED_OBJECT);
@@ -544,12 +566,13 @@ traverse_impl(spa_t *spa, dsl_dataset_t 
 	td.td_pfd = &pd;
 	td.td_flags = flags;
 	td.td_paused = B_FALSE;
+	td.td_realloc_possible = (txg_start == 0 ? B_FALSE : B_TRUE);
 
 	if (spa_feature_is_active(spa, SPA_FEATURE_HOLE_BIRTH)) {
 		VERIFY(spa_feature_enabled_txg(spa,
 		    SPA_FEATURE_HOLE_BIRTH, &td.td_hole_birth_enabled_txg));
 	} else {
-		td.td_hole_birth_enabled_txg = 0;
+		td.td_hole_birth_enabled_txg = UINT64_MAX;
 	}
 
 	pd.pd_flags = flags;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201603102345.u2ANjNxX055978>