FreeBSD Mail Archives

Date:      Fri, 14 Apr 2017 18:15:47 +0000 (UTC)
From:      Andriy Gapon <avg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-vendor@freebsd.org
Subject:   svn commit: r316905 - in vendor-sys/illumos/dist/uts/common/fs/zfs: . sys
Message-ID:  <201704141815.v3EIFlA4066394@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help

Author: avg
Date: Fri Apr 14 18:15:47 2017
New Revision: 316905
URL: https://svnweb.freebsd.org/changeset/base/316905

Log:
  7740 fix for 6513 only works in hole punching case, not truncation
  
  illumos/illumos-gate@7de35a3ed0c2e6d4256bd2fb05b48b3798aaf553
  https://github.com/illumos/illumos-gate/commit/7de35a3ed0c2e6d4256bd2fb05b48b3798aaf553
  
  https://www.illumos.org/issues/7740
    The problem is that dbuf_findbp will return ENOENT if the block it's
    trying to find is beyond the end of the file. If that happens, we assume
    there is no birth time, and so we lose that information when we write
    out new blkptrs. We should teach dbuf_findbp to look for things that are
    beyond the current end, but not beyond the absolute end of the file.
    To verify, create a large file, truncate it to a short length, and then
    write beyond the end. Check with zdb to make sure that there are no
    holes with birth time zero (will appear as gaps).
  
  Reviewed by: Steve Gonczi <steve.gonczi@delphix.com>
  Reviewed by: Matthew Ahrens <mahrens@delphix.com>
  Approved by: Dan McDonald <danmcd@omniti.com>
  Author: Paul Dagnelie <pcd@delphix.com>

Modified:
  vendor-sys/illumos/dist/uts/common/fs/zfs/dbuf.c
  vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dnode.h

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/dbuf.c
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dbuf.c	Fri Apr 14 18:15:13 2017	(r316904)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dbuf.c	Fri Apr 14 18:15:47 2017	(r316905)
@@ -2161,8 +2161,6 @@ static int
 dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
     dmu_buf_impl_t **parentp, blkptr_t **bpp)
 {
-	int nlevels, epbs;
-
 	*parentp = NULL;
 	*bpp = NULL;
 
@@ -2181,17 +2179,35 @@ dbuf_findbp(dnode_t *dn, int level, uint
 		return (0);
 	}
 
-	if (dn->dn_phys->dn_nlevels == 0)
-		nlevels = 1;
-	else
-		nlevels = dn->dn_phys->dn_nlevels;
-
-	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
+	int nlevels =
+	    (dn->dn_phys->dn_nlevels == 0) ? 1 : dn->dn_phys->dn_nlevels;
+	int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 
 	ASSERT3U(level * epbs, <, 64);
 	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
+	/*
+	 * This assertion shouldn't trip as long as the max indirect block size
+	 * is less than 1M.  The reason for this is that up to that point,
+	 * the number of levels required to address an entire object with blocks
+	 * of size SPA_MINBLOCKSIZE satisfies nlevels * epbs + 1 <= 64.  In
+	 * other words, if N * epbs + 1 > 64, then if (N-1) * epbs + 1 > 55
+	 * (i.e. we can address the entire object), objects will all use at most
+	 * N-1 levels and the assertion won't overflow.  However, once epbs is
+	 * 13, 4 * 13 + 1 = 53, but 5 * 13 + 1 = 66.  Then, 4 levels will not be
+	 * enough to address an entire object, so objects will have 5 levels,
+	 * but then this assertion will overflow.
+	 *
+	 * All this is to say that if we ever increase DN_MAX_INDBLKSHIFT, we
+	 * need to redo this logic to handle overflows.
+	 */
+	ASSERT(level >= nlevels ||
+	    ((nlevels - level - 1) * epbs) +
+	    highbit64(dn->dn_phys->dn_nblkptr) <= 64);
 	if (level >= nlevels ||
-	    (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) {
+	    blkid >= ((uint64_t)dn->dn_phys->dn_nblkptr <<
+	    ((nlevels - level - 1) * epbs)) ||
+	    (fail_sparse &&
+	    blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) {
 		/* the buffer has no parent yet */
 		return (SET_ERROR(ENOENT));
 	} else if (level < nlevels-1) {
@@ -2209,6 +2225,8 @@ dbuf_findbp(dnode_t *dn, int level, uint
 		}
 		*bpp = ((blkptr_t *)(*parentp)->db.db_data) +
 		    (blkid & ((1ULL << epbs) - 1));
+		if (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))
+			ASSERT(BP_IS_HOLE(*bpp));
 		return (0);
 	} else {
 		/* the block is referenced from the dnode */

Modified: vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dnode.h
==============================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dnode.h	Fri Apr 14 18:15:13 2017	(r316904)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dnode.h	Fri Apr 14 18:15:47 2017	(r316905)
@@ -58,6 +58,12 @@ extern "C" {
  */
 #define	DNODE_SHIFT		9	/* 512 bytes */
 #define	DN_MIN_INDBLKSHIFT	12	/* 4k */
+/*
+ * If we ever increase this value beyond 20, we need to revisit all logic that
+ * does x << level * ebps to handle overflow.  With a 1M indirect block size,
+ * 4 levels of indirect blocks would not be able to guarantee addressing an
+ * entire object, so 5 levels will be used, but 5 * (20 - 7) = 65.
+ */
 #define	DN_MAX_INDBLKSHIFT	17	/* 128k */
 #define	DNODE_BLOCK_SHIFT	14	/* 16k */
 #define	DNODE_CORE_SIZE		64	/* 64 bytes for dnode sans blkptrs */

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201704141815.v3EIFlA4066394>

Header And Logo

Peripheral Links

Site Navigation

Header And Logo

Peripheral Links

Search

Site Navigation