Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 4 Mar 2020 04:36:50 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org
Subject:   svn commit: r358598 - stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
Message-ID:  <202003040436.0244aoo1024354@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Wed Mar  4 04:36:50 2020
New Revision: 358598
URL: https://svnweb.freebsd.org/changeset/base/358598

Log:
  MFC r358336: MFZoL: Fix txg_sync_thread hang in scan_exec_io()
  
  When scn->scn_maxinflight_bytes has not been initialized it's
  possible to hang on the condition variable in scan_exec_io().
  This issue was uncovered by ztest and is only possible when
  deduplication is enabled through the following call path.
  
    txg_sync_thread()
      spa_sync()
        ddt_sync_table()
          ddt_sync_entry()
            dsl_scan_ddt_entry()
              dsl_scan_scrub_cb()
                dsl_scan_enqueuei()
                  scan_exec_io()
                    cv_wait()
  
  Resolve the issue by always initializing scn_maxinflight_bytes
  to a reasonable minimum value.  This value will be recalculated
  in dsl_scan_sync() to pick up changes to zfs_scan_vdev_limit
  and the addition/removal of vdevs.
  
  Reviewed-by: Tom Caputi <tcaputi@datto.com>
  Reviewed by: George Melikov <mail@gmelikov.ru>
  Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
  Closes #7098
  zfsonlinux/zfs@f90a30ad1b32a971f62a540f8944e42f99b254ce

Modified:
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
==============================================================================
--- stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c	Wed Mar  4 00:22:50 2020	(r358597)
+++ stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c	Wed Mar  4 04:36:50 2020	(r358598)
@@ -125,6 +125,7 @@ static boolean_t scan_ds_queue_contains(dsl_scan_t *sc
 static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg);
 static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj);
 static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx);
+static uint64_t dsl_scan_count_leaves(vdev_t *vd);
 
 extern int zfs_vdev_async_write_active_min_dirty_percent;
 
@@ -439,6 +440,14 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
 	scn->scn_async_destroying = spa_feature_is_active(dp->dp_spa,
 	    SPA_FEATURE_ASYNC_DESTROY);
 
+	/*
+	 * Calculate the max number of in-flight bytes for pool-wide
+	 * scanning operations (minimum 1MB). Limits for the issuing
+	 * phase are done per top-level vdev and are handled separately.
+	 */
+	scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit *
+	    dsl_scan_count_leaves(spa->spa_root_vdev), 1ULL << 20);
+
 	bcopy(&scn->scn_phys, &scn->scn_phys_cached, sizeof (scn->scn_phys));
 	avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t),
 	    offsetof(scan_ds_t, sds_node));
@@ -2350,7 +2359,7 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum 
 	zbookmark_phys_t zb = { 0 };
 	int p;
 
-	if (scn->scn_phys.scn_state != DSS_SCANNING)
+	if (!dsl_scan_is_running(scn))
 		return;
 
 	for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
@@ -3333,7 +3342,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
 		uint64_t nr_leaves = dsl_scan_count_leaves(spa->spa_root_vdev);
 
 		/*
-		 * Calculate the max number of in-flight bytes for pool-wide
+		 * Recalculate the max number of in-flight bytes for pool-wide
 		 * scanning operations (minimum 1MB). Limits for the issuing
 		 * phase are done per top-level vdev and are handled separately.
 		 */
@@ -3652,6 +3661,8 @@ dsl_scan_scrub_done(zio_t *zio)
 	dsl_scan_io_queue_t *queue = zio->io_private;
 
 	abd_free(zio->io_abd);
+
+	ASSERT3U(scn->scn_maxinflight_bytes, >, 0);
 
 	if (queue == NULL) {
 		mutex_enter(&spa->spa_scrub_lock);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202003040436.0244aoo1024354>