Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 15 Oct 2019 14:29:18 +0000 (UTC)
From:      Andriy Gapon <avg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r353559 - in head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys
Message-ID:  <201910151429.x9FETIni007235@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: avg
Date: Tue Oct 15 14:29:18 2019
New Revision: 353559
URL: https://svnweb.freebsd.org/changeset/base/353559

Log:
  MFV r353558: 10572 10579 Fix race in dnode_check_slots_free()
  
  illumos/illumos-gate@aa02ea01948372a32cbf08bfc31c72c32e3fc81e
  https://github.com/illumos/illumos-gate/commit/aa02ea01948372a32cbf08bfc31c72c32e3fc81e
  
  10572 Fix race in dnode_check_slots_free()
  https://www.illumos.org/issues/10572
    The Fix from ZoL:
    Currently, dnode_check_slots_free() works by checking dn->dn_type
    in the dnode to determine if the dnode is reclaimable. However,
    there is a small window of time between dnode_free_sync() in the
    first call to dsl_dataset_sync() and when the useraccounting code
    is run when the type is set DMU_OT_NONE, but the dnode is not yet
    evictable, leading to crashes. This patch adds the ability for
    dnodes to track which txg they were last dirtied in and adds a
    check for this before performing the reclaim.
  
    This patch also corrects several instances when dn_dirty_link was
    treated as a list_node_t when it is technically a multilist_node_t.
  
  10579 Don't allow dnode allocation if dn_holds != 0
  https://www.illumos.org/issues/10579
    The fix from ZoL:
    This patch simply fixes a small bug where dnode_hold_impl() could
    attempt to allocate a dnode that was in the process of being freed,
    but which still had active references. This patch simply adds the
    required check.
  
  Author: Tom Caputi <tcaputi@datto.com>
  Reported by:	delphij
  MFC after:	2 weeks
  X-MFC with:	r353176

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_impl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
Directory Properties:
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Tue Oct 15 14:24:32 2019	(r353558)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Tue Oct 15 14:29:18 2019	(r353559)
@@ -1812,6 +1812,9 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
 			    FTAG);
 		}
 	}
+
+	if (tx->tx_txg > dn->dn_dirty_txg)
+		dn->dn_dirty_txg = tx->tx_txg;
 	mutex_exit(&dn->dn_mtx);
 
 	if (db->db_blkid == DMU_SPILL_BLKID)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c	Tue Oct 15 14:24:32 2019	(r353558)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c	Tue Oct 15 14:29:18 2019	(r353559)
@@ -1249,10 +1249,23 @@ dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_
 		ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
 		multilist_sublist_remove(list, dn);
 
+		/*
+		 * If we are not doing useraccounting (os_synced_dnodes == NULL)
+		 * we are done with this dnode for this txg. Unset dn_dirty_txg
+		 * if later txgs aren't dirtying it so that future holders do
+		 * not get a stale value. Otherwise, we will do this in
+		 * userquota_updates_task() when processing has completely
+		 * finished for this txg.
+		 */
 		multilist_t *newlist = dn->dn_objset->os_synced_dnodes;
 		if (newlist != NULL) {
 			(void) dnode_add_ref(dn, newlist);
 			multilist_insert(newlist, dn);
+		} else {
+			mutex_enter(&dn->dn_mtx);
+			if (dn->dn_dirty_txg == tx->tx_txg)
+				dn->dn_dirty_txg = 0;
+			mutex_exit(&dn->dn_mtx);
 		}
 
 		dnode_sync(dn, tx);
@@ -1617,6 +1630,8 @@ userquota_updates_task(void *arg)
 				dn->dn_id_flags |= DN_ID_CHKED_BONUS;
 		}
 		dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
+		if (dn->dn_dirty_txg == spa_syncing_txg(os->os_spa))
+			dn->dn_dirty_txg = 0;
 		mutex_exit(&dn->dn_mtx);
 
 		multilist_sublist_remove(list, dn);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	Tue Oct 15 14:24:32 2019	(r353558)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	Tue Oct 15 14:29:18 2019	(r353559)
@@ -146,7 +146,7 @@ dnode_cons(void *arg, void *unused, int kmflag)
 	bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz));
 
 	for (i = 0; i < TXG_SIZE; i++) {
-		list_link_init(&dn->dn_dirty_link[i]);
+		multilist_link_init(&dn->dn_dirty_link[i]);
 		dn->dn_free_ranges[i] = NULL;
 		list_create(&dn->dn_dirty_records[i],
 		    sizeof (dbuf_dirty_record_t),
@@ -156,6 +156,7 @@ dnode_cons(void *arg, void *unused, int kmflag)
 	dn->dn_allocated_txg = 0;
 	dn->dn_free_txg = 0;
 	dn->dn_assigned_txg = 0;
+	dn->dn_dirty_txg = 0;
 	dn->dn_dirtyctx = 0;
 	dn->dn_dirtyctx_firstset = NULL;
 	dn->dn_bonus = NULL;
@@ -194,7 +195,7 @@ dnode_dest(void *arg, void *unused)
 	ASSERT(!list_link_active(&dn->dn_link));
 
 	for (i = 0; i < TXG_SIZE; i++) {
-		ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
+		ASSERT(!multilist_link_active(&dn->dn_dirty_link[i]));
 		ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
 		list_destroy(&dn->dn_dirty_records[i]);
 		ASSERT0(dn->dn_next_nblkptr[i]);
@@ -209,6 +210,7 @@ dnode_dest(void *arg, void *unused)
 	ASSERT0(dn->dn_allocated_txg);
 	ASSERT0(dn->dn_free_txg);
 	ASSERT0(dn->dn_assigned_txg);
+	ASSERT0(dn->dn_dirty_txg);
 	ASSERT0(dn->dn_dirtyctx);
 	ASSERT3P(dn->dn_dirtyctx_firstset, ==, NULL);
 	ASSERT3P(dn->dn_bonus, ==, NULL);
@@ -540,6 +542,7 @@ dnode_destroy(dnode_t *dn)
 	dn->dn_allocated_txg = 0;
 	dn->dn_free_txg = 0;
 	dn->dn_assigned_txg = 0;
+	dn->dn_dirty_txg = 0;
 
 	dn->dn_dirtyctx = 0;
 	if (dn->dn_dirtyctx_firstset != NULL) {
@@ -609,6 +612,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int 
 	ASSERT(dn->dn_type == DMU_OT_NONE);
 	ASSERT0(dn->dn_maxblkid);
 	ASSERT0(dn->dn_allocated_txg);
+	ASSERT0(dn->dn_dirty_txg);
 	ASSERT0(dn->dn_assigned_txg);
 	ASSERT(refcount_is_zero(&dn->dn_tx_holds));
 	ASSERT3U(refcount_count(&dn->dn_holds), <=, 1);
@@ -622,7 +626,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int 
 		ASSERT0(dn->dn_next_bonustype[i]);
 		ASSERT0(dn->dn_rm_spillblk[i]);
 		ASSERT0(dn->dn_next_blksz[i]);
-		ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
+		ASSERT(!multilist_link_active(&dn->dn_dirty_link[i]));
 		ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
 		ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
 	}
@@ -799,6 +803,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
 	ndn->dn_allocated_txg = odn->dn_allocated_txg;
 	ndn->dn_free_txg = odn->dn_free_txg;
 	ndn->dn_assigned_txg = odn->dn_assigned_txg;
+	ndn->dn_dirty_txg = odn->dn_dirty_txg;
 	ndn->dn_dirtyctx = odn->dn_dirtyctx;
 	ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset;
 	ASSERT(refcount_count(&odn->dn_tx_holds) == 0);
@@ -865,6 +870,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
 	odn->dn_allocated_txg = 0;
 	odn->dn_free_txg = 0;
 	odn->dn_assigned_txg = 0;
+	odn->dn_dirty_txg = 0;
 	odn->dn_dirtyctx = 0;
 	odn->dn_dirtyctx_firstset = NULL;
 	odn->dn_have_spill = B_FALSE;
@@ -1091,6 +1097,10 @@ dnode_check_slots_free(dnode_children_t *children, int
 {
 	ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
 
+	/*
+	 * If all dnode slots are either already free or
+	 * evictable return B_TRUE.
+	 */
 	for (int i = idx; i < idx + slots; i++) {
 		dnode_handle_t *dnh = &children->dnc_children[i];
 		dnode_t *dn = dnh->dnh_dnode;
@@ -1099,18 +1109,18 @@ dnode_check_slots_free(dnode_children_t *children, int
 			continue;
 		} else if (DN_SLOT_IS_PTR(dn)) {
 			mutex_enter(&dn->dn_mtx);
-			dmu_object_type_t type = dn->dn_type;
+			boolean_t can_free = (dn->dn_type == DMU_OT_NONE &&
+			    refcount_is_zero(&dn->dn_holds) &&
+			    !DNODE_IS_DIRTY(dn));
 			mutex_exit(&dn->dn_mtx);
 
-			if (type != DMU_OT_NONE)
+			if (!can_free)
 				return (B_FALSE);
-
-			continue;
+			else
+				continue;
 		} else {
 			return (B_FALSE);
 		}
-
-		return (B_FALSE);
 	}
 
 	return (B_TRUE);
@@ -1634,7 +1644,7 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
 	/*
 	 * If we are already marked dirty, we're done.
 	 */
-	if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) {
+	if (multilist_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) {
 		multilist_sublist_unlock(mls);
 		return;
 	}

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_impl.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_impl.h	Tue Oct 15 14:24:32 2019	(r353558)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_impl.h	Tue Oct 15 14:29:18 2019	(r353559)
@@ -163,7 +163,7 @@ extern "C" {
  * 	dn_allocated_txg
  * 	dn_free_txg
  * 	dn_assigned_txg
- * 	dd_assigned_tx
+ * 	dn_dirty_txg
  * 	dn_notxholds
  * 	dn_dirtyctx
  * 	dn_dirtyctx_firstset

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h	Tue Oct 15 14:24:32 2019	(r353558)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h	Tue Oct 15 14:29:18 2019	(r353559)
@@ -303,6 +303,7 @@ struct dnode {
 	uint64_t dn_allocated_txg;
 	uint64_t dn_free_txg;
 	uint64_t dn_assigned_txg;
+	uint64_t dn_dirty_txg;			/* txg dnode was last dirtied */
 	kcondvar_t dn_notxholds;
 	enum dnode_dirtycontext dn_dirtyctx;
 	uint8_t *dn_dirtyctx_firstset;		/* dbg: contents meaningless */
@@ -405,6 +406,9 @@ void dnode_evict_dbufs(dnode_t *dn);
 void dnode_evict_bonus(dnode_t *dn);
 void dnode_free_interior_slots(dnode_t *dn);
 boolean_t dnode_needs_remap(const dnode_t *dn);
+
+#define	DNODE_IS_DIRTY(_dn)						\
+	((_dn)->dn_dirty_txg >= spa_syncing_txg((_dn)->dn_objset->os_spa))
 
 #define	DNODE_IS_CACHEABLE(_dn)						\
 	((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL ||		\



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201910151429.x9FETIni007235>