Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 21 Feb 2018 23:38:30 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r329761 - head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
Message-ID:  <201802212338.w1LNcURV081456@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Wed Feb 21 23:38:30 2018
New Revision: 329761
URL: https://svnweb.freebsd.org/changeset/base/329761

Log:
  MFV r329760: 7638 Refactor spa_load_impl into several functions
  
  illumos/illumos-gate@1fd3785ff6601d3e391378c2dcbf4c5f27e1fe32
  
  spa_load_impl has grown out of proportions.  It is currently over 700
  lines long and makes it very hard to follow or debug the import process
  even for experienced ZFS developers.  The objective is to split it up
  in a series of well commented functions.
  
  Reviewed by: Paul Dagnelie <pcd@delphix.com>
  Reviewed by: George Wilson <george.wilson@delphix.com>
  Reviewed by: Matthew Ahrens <mahrens@delphix.com>
  Reviewed by: Andrew Stormont <andyjstormont@gmail.com>
  Approved by: Dan McDonald <danmcd@joyent.com>
  Author: Pavel Zakharov <pavel.zakharov@delphix.com>

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
Directory Properties:
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Wed Feb 21 23:25:11 2018	(r329760)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Wed Feb 21 23:38:30 2018	(r329761)
@@ -2218,7 +2218,7 @@ spa_try_repair(spa_t *spa, nvlist_t *config)
 
 static int
 spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
-    boolean_t mosconfig)
+    boolean_t trust_config)
 {
 	nvlist_t *config = spa->spa_config;
 	char *ereport = FM_EREPORT_ZFS_POOL;
@@ -2262,7 +2262,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_impor
 
 		gethrestime(&spa->spa_loaded_ts);
 		error = spa_load_impl(spa, pool_guid, config, state, type,
-		    mosconfig, &ereport);
+		    trust_config, &ereport);
 	}
 
 	/*
@@ -2314,38 +2314,20 @@ vdev_count_verify_zaps(vdev_t *vd)
 	return (total);
 }
 
-/*
- * Load an existing storage pool, using the pool's builtin spa_config as a
- * source of configuration information.
- */
 static int
-spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
-    spa_load_state_t state, spa_import_type_t type, boolean_t trust_config,
-    char **ereport)
+spa_ld_parse_config(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
+    spa_load_state_t state, spa_import_type_t type)
 {
 	int error = 0;
-	nvlist_t *nvroot = NULL;
-	nvlist_t *label;
-	vdev_t *rvd;
-	uberblock_t *ub = &spa->spa_uberblock;
-	uint64_t children, config_cache_txg = spa->spa_config_txg;
-	int orig_mode = spa->spa_mode;
+	nvlist_t *nvtree = NULL;
 	int parse;
-	uint64_t obj;
-	boolean_t missing_feat_write = B_FALSE;
+	vdev_t *rvd;
 
-	/*
-	 * If this is an untrusted config, access the pool in read-only mode.
-	 * This prevents things like resilvering recently removed devices.
-	 */
-	if (!trust_config)
-		spa->spa_mode = FREAD;
-
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	spa->spa_load_state = state;
 
-	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot))
+	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtree))
 		return (SET_ERROR(EINVAL));
 
 	parse = (type == SPA_IMPORT_EXISTING ?
@@ -2368,7 +2350,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 	 * configuration requires knowing the version number.
 	 */
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
-	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, parse);
+	error = spa_config_parse(spa, &rvd, nvtree, NULL, 0, parse);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	if (error != 0)
@@ -2382,22 +2364,35 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 		ASSERT(spa_guid(spa) == pool_guid);
 	}
 
-	/*
-	 * Try to open all vdevs, loading each label in the process.
-	 */
+	return (0);
+}
+
+static int
+spa_ld_open_vdevs(spa_t *spa)
+{
+	int error = 0;
+
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
-	error = vdev_open(rvd);
+	error = vdev_open(spa->spa_root_vdev);
 	spa_config_exit(spa, SCL_ALL, FTAG);
-	if (error != 0)
-		return (error);
 
+	return (error);
+}
+
+static int
+spa_ld_validate_vdevs(spa_t *spa, spa_import_type_t type,
+    boolean_t trust_config)
+{
+	int error = 0;
+	vdev_t *rvd = spa->spa_root_vdev;
+
 	/*
 	 * We need to validate the vdev labels against the configuration that
-	 * we have in hand, which is dependent on the setting of mosconfig. If
-	 * mosconfig is true then we're validating the vdev labels based on
-	 * that config.  Otherwise, we're validating against the cached config
-	 * (zpool.cache) that was read when we loaded the zfs module, and then
-	 * later we will recursively call spa_load() and validate against
+	 * we have in hand, which is dependent on the setting of trust_config.
+	 * If trust_config is true then we're validating the vdev labels based
+	 * on that config.  Otherwise, we're validating against the cached
+	 * config (zpool.cache) that was read when we loaded the zfs module, and
+	 * then later we will recursively call spa_load() and validate against
 	 * the vdev config.
 	 *
 	 * If we're assembling a new pool that's been split off from an
@@ -2416,6 +2411,18 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 			return (SET_ERROR(ENXIO));
 	}
 
+	return (0);
+}
+
+static int
+spa_ld_select_uberblock(spa_t *spa, nvlist_t *config, spa_import_type_t type,
+    boolean_t trust_config)
+{
+	vdev_t *rvd = spa->spa_root_vdev;
+	nvlist_t *label;
+	uberblock_t *ub = &spa->spa_uberblock;
+	uint64_t children;
+
 	/*
 	 * Find the best uberblock.
 	 */
@@ -2524,18 +2531,28 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 	spa->spa_claim_max_txg = spa->spa_first_txg;
 	spa->spa_prev_software_version = ub->ub_software_version;
 
-	/*
-	 * Everything that we read before we do spa_remove_init() must
-	 * have been rewritten after the last device removal was initiated.
-	 * Otherwise we could be reading from indirect vdevs before
-	 * we have loaded their mappings.
-	 */
+	return (0);
+}
 
+static int
+spa_ld_open_rootbp(spa_t *spa)
+{
+	int error = 0;
+	vdev_t *rvd = spa->spa_root_vdev;
+
 	error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
 	if (error)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 	spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
 
+	return (0);
+}
+
+static int
+spa_ld_validate_config(spa_t *spa, spa_import_type_t type)
+{
+	vdev_t *rvd = spa->spa_root_vdev;
+
 	if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
@@ -2567,6 +2584,15 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 			return (SET_ERROR(ENXIO));
 	}
 
+	return (0);
+}
+
+static int
+spa_ld_open_indirect_vdev_metadata(spa_t *spa)
+{
+	int error = 0;
+	vdev_t *rvd = spa->spa_root_vdev;
+
 	/*
 	 * Everything that we read before spa_remove_init() must be stored
 	 * on concreted vdevs.  Therefore we do this as early as possible.
@@ -2574,6 +2600,24 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 	if (spa_remove_init(spa) != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
+	/*
+	 * Retrieve information needed to condense indirect vdev mappings.
+	 */
+	error = spa_condense_init(spa);
+	if (error != 0) {
+		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error));
+	}
+
+	return (0);
+}
+
+static int
+spa_ld_check_features(spa_t *spa, spa_load_state_t state,
+    boolean_t *missing_feat_writep)
+{
+	int error = 0;
+	vdev_t *rvd = spa->spa_root_vdev;
+
 	if (spa_version(spa) >= SPA_VERSION_FEATURES) {
 		boolean_t missing_feat_read = B_FALSE;
 		nvlist_t *unsup_feat, *enabled_feat;
@@ -2603,7 +2647,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 		if (spa_writeable(spa) || state == SPA_LOAD_TRYIMPORT) {
 			if (!spa_features_check(spa, B_TRUE,
 			    unsup_feat, enabled_feat)) {
-				missing_feat_write = B_TRUE;
+				*missing_feat_writep = B_TRUE;
 			}
 		}
 
@@ -2642,7 +2686,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 		 * userland in order to know whether to display the
 		 * abovementioned note.
 		 */
-		if (missing_feat_read || (missing_feat_write &&
+		if (missing_feat_read || (*missing_feat_writep &&
 		    spa_writeable(spa))) {
 			return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
 			    ENOTSUP));
@@ -2675,62 +2719,85 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 	}
 
+	return (0);
+}
+
+static int
+spa_ld_load_special_directories(spa_t *spa)
+{
+	int error = 0;
+	vdev_t *rvd = spa->spa_root_vdev;
+
 	spa->spa_is_initializing = B_TRUE;
 	error = dsl_pool_open(spa->spa_dsl_pool);
 	spa->spa_is_initializing = B_FALSE;
 	if (error != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
-	if (!trust_config) {
-		uint64_t hostid;
-		nvlist_t *policy = NULL;
-		nvlist_t *mos_config;
+	return (0);
+}
 
-		if (load_nvlist(spa, spa->spa_config_object, &mos_config) != 0)
-			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+static int
+spa_ld_prepare_for_reload(spa_t *spa, int orig_mode)
+{
+	vdev_t *rvd = spa->spa_root_vdev;
 
-		if (!spa_is_root(spa) && nvlist_lookup_uint64(mos_config,
-		    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
-			char *hostname;
-			unsigned long myhostid = 0;
+	uint64_t hostid;
+	nvlist_t *policy = NULL;
+	nvlist_t *mos_config;
 
-			VERIFY(nvlist_lookup_string(mos_config,
-			    ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
+	if (load_nvlist(spa, spa->spa_config_object, &mos_config) != 0)
+		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
+	if (!spa_is_root(spa) && nvlist_lookup_uint64(mos_config,
+	    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
+		char *hostname;
+		unsigned long myhostid = 0;
+
+		VERIFY(nvlist_lookup_string(mos_config,
+		    ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
+
 #ifdef	_KERNEL
-			myhostid = zone_get_hostid(NULL);
+		myhostid = zone_get_hostid(NULL);
 #else	/* _KERNEL */
-			/*
-			 * We're emulating the system's hostid in userland, so
-			 * we can't use zone_get_hostid().
-			 */
-			(void) ddi_strtoul(hw_serial, NULL, 10, &myhostid);
+		/*
+		 * We're emulating the system's hostid in userland, so
+		 * we can't use zone_get_hostid().
+		 */
+		(void) ddi_strtoul(hw_serial, NULL, 10, &myhostid);
 #endif	/* _KERNEL */
-			if (check_hostid && hostid != 0 && myhostid != 0 &&
-			    hostid != myhostid) {
-				nvlist_free(mos_config);
-				cmn_err(CE_WARN, "pool '%s' could not be "
-				    "loaded as it was last accessed by "
-				    "another system (host: %s hostid: 0x%lx). "
-				    "See: http://illumos.org/msg/ZFS-8000-EY",
-				    spa_name(spa), hostname,
-				    (unsigned long)hostid);
-				return (SET_ERROR(EBADF));
-			}
+		if (check_hostid && hostid != 0 && myhostid != 0 &&
+		    hostid != myhostid) {
+			nvlist_free(mos_config);
+			cmn_err(CE_WARN, "pool '%s' could not be "
+			    "loaded as it was last accessed by "
+			    "another system (host: %s hostid: 0x%lx). "
+			    "See: http://illumos.org/msg/ZFS-8000-EY",
+			    spa_name(spa), hostname,
+			    (unsigned long)hostid);
+			return (SET_ERROR(EBADF));
 		}
-		if (nvlist_lookup_nvlist(spa->spa_config,
-		    ZPOOL_REWIND_POLICY, &policy) == 0)
-			VERIFY(nvlist_add_nvlist(mos_config,
-			    ZPOOL_REWIND_POLICY, policy) == 0);
+	}
+	if (nvlist_lookup_nvlist(spa->spa_config,
+	    ZPOOL_REWIND_POLICY, &policy) == 0)
+		VERIFY(nvlist_add_nvlist(mos_config,
+		    ZPOOL_REWIND_POLICY, policy) == 0);
 
-		spa_config_set(spa, mos_config);
-		spa_unload(spa);
-		spa_deactivate(spa);
-		spa_activate(spa, orig_mode);
+	spa_config_set(spa, mos_config);
+	spa_unload(spa);
+	spa_deactivate(spa);
+	spa_activate(spa, orig_mode);
 
-		return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE));
-	}
+	return (0);
+}
 
+static int
+spa_ld_get_props(spa_t *spa)
+{
+	int error = 0;
+	uint64_t obj;
+	vdev_t *rvd = spa->spa_root_vdev;
+
 	/* Grab the secret checksum salt from the MOS. */
 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_CHECKSUM_SALT, 1,
@@ -2822,6 +2889,35 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 	}
 	nvlist_free(mos_config);
 
+	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
+
+	error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object);
+	if (error && error != ENOENT)
+		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
+	if (error == 0) {
+		uint64_t autoreplace;
+
+		spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs);
+		spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace);
+		spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation);
+		spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
+		spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
+		spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
+		    &spa->spa_dedup_ditto);
+
+		spa->spa_autoreplace = (autoreplace != 0);
+	}
+
+	return (0);
+}
+
+static int
+spa_ld_open_aux_vdevs(spa_t *spa, spa_import_type_t type)
+{
+	int error = 0;
+	vdev_t *rvd = spa->spa_root_vdev;
+
 	/*
 	 * If we're assembling the pool from the split-off vdevs of
 	 * an existing pool, we don't want to attach the spares & cache
@@ -2867,27 +2963,15 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 		spa->spa_l2cache.sav_sync = B_TRUE;
 	}
 
-	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
+	return (0);
+}
 
-	error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object);
-	if (error && error != ENOENT)
-		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+static int
+spa_ld_load_vdev_metadata(spa_t *spa, spa_load_state_t state)
+{
+	int error = 0;
+	vdev_t *rvd = spa->spa_root_vdev;
 
-	if (error == 0) {
-		uint64_t autoreplace;
-
-		spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs);
-		spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace);
-		spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation);
-		spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
-		spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
-		spa_prop_find(spa, ZPOOL_PROP_BOOTSIZE, &spa->spa_bootsize);
-		spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
-		    &spa->spa_dedup_ditto);
-
-		spa->spa_autoreplace = (autoreplace != 0);
-	}
-
 	/*
 	 * If the 'autoreplace' property is set, then post a resource notifying
 	 * the ZFS DE that it should not issue any faults for unopenable
@@ -2909,40 +2993,281 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 	}
 
 	/*
-	 * Load the vdev state for all toplevel vdevs.
+	 * Load the vdev metadata such as metaslabs, DTLs, spacemap object, etc.
 	 */
 	error = vdev_load(rvd);
 	if (error != 0) {
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error));
 	}
 
-	error = spa_condense_init(spa);
-	if (error != 0) {
-		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error));
-	}
-
 	/*
-	 * Propagate the leaf DTLs we just loaded all the way up the tree.
+	 * Propagate the leaf DTLs we just loaded all the way up the vdev tree.
 	 */
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	vdev_dtl_reassess(rvd, 0, 0, B_FALSE);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
-	/*
-	 * Load the DDTs (dedup tables).
-	 */
+	return (0);
+}
+
+static int
+spa_ld_load_dedup_tables(spa_t *spa)
+{
+	int error = 0;
+	vdev_t *rvd = spa->spa_root_vdev;
+
 	error = ddt_load(spa);
 	if (error != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
-	spa_update_dspace(spa);
+	return (0);
+}
 
+static int
+spa_ld_verify_logs(spa_t *spa, spa_import_type_t type, char **ereport)
+{
+	vdev_t *rvd = spa->spa_root_vdev;
+
 	if (type != SPA_IMPORT_ASSEMBLE && spa_writeable(spa) &&
 	    spa_check_logs(spa)) {
 		*ereport = FM_EREPORT_ZFS_LOG_REPLAY;
 		return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO));
 	}
 
+	return (0);
+}
+
+static int
+spa_ld_verify_pool_data(spa_t *spa, spa_load_state_t state)
+{
+	int error = 0;
+	vdev_t *rvd = spa->spa_root_vdev;
+
+	/*
+	 * We've successfully opened the pool, verify that we're ready
+	 * to start pushing transactions.
+	 */
+	if (state != SPA_LOAD_TRYIMPORT) {
+		error = spa_load_verify(spa);
+		if (error != 0) {
+			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
+			    error));
+		}
+	}
+
+	return (0);
+}
+
+static void
+spa_ld_claim_log_blocks(spa_t *spa)
+{
+	dmu_tx_t *tx;
+	dsl_pool_t *dp = spa_get_dsl(spa);
+
+	/*
+	 * Claim log blocks that haven't been committed yet.
+	 * This must all happen in a single txg.
+	 * Note: spa_claim_max_txg is updated by spa_claim_notify(),
+	 * invoked from zil_claim_log_block()'s i/o done callback.
+	 * Price of rollback is that we abandon the log.
+	 */
+	spa->spa_claiming = B_TRUE;
+
+	tx = dmu_tx_create_assigned(dp, spa_first_txg(spa));
+	(void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
+	    zil_claim, tx, DS_FIND_CHILDREN);
+	dmu_tx_commit(tx);
+
+	spa->spa_claiming = B_FALSE;
+
+	spa_set_log_state(spa, SPA_LOG_GOOD);
+}
+
+static void
+spa_ld_check_for_config_update(spa_t *spa, spa_load_state_t state,
+    int64_t config_cache_txg)
+{
+	vdev_t *rvd = spa->spa_root_vdev;
+	int need_update = B_FALSE;
+
+	/*
+	 * If the config cache is stale, or we have uninitialized
+	 * metaslabs (see spa_vdev_add()), then update the config.
+	 *
+	 * If this is a verbatim import, trust the current
+	 * in-core spa_config and update the disk labels.
+	 */
+	if (config_cache_txg != spa->spa_config_txg ||
+	    state == SPA_LOAD_IMPORT ||
+	    state == SPA_LOAD_RECOVER ||
+	    (spa->spa_import_flags & ZFS_IMPORT_VERBATIM))
+		need_update = B_TRUE;
+
+	for (int c = 0; c < rvd->vdev_children; c++)
+		if (rvd->vdev_child[c]->vdev_ms_array == 0)
+			need_update = B_TRUE;
+
+	/*
+	 * Update the config cache asychronously in case we're the
+	 * root pool, in which case the config cache isn't writable yet.
+	 */
+	if (need_update)
+		spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
+}
+
+/*
+ * Load an existing storage pool, using the config provided. This config
+ * describes which vdevs are part of the pool and is later validated against
+ * partial configs present in each vdev's label and an entire copy of the
+ * config stored in the MOS.
+ */
+static int
+spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
+    spa_load_state_t state, spa_import_type_t type, boolean_t trust_config,
+    char **ereport)
+{
+	int error = 0;
+	uint64_t config_cache_txg = spa->spa_config_txg;
+	int orig_mode = spa->spa_mode;
+	boolean_t missing_feat_write = B_FALSE;
+
+	/*
+	 * If this is an untrusted config, first access the pool in read-only
+	 * mode. We will then retrieve a trusted copy of the config from the MOS
+	 * and use it to reopen the pool in read-write mode.
+	 */
+	if (!trust_config)
+		spa->spa_mode = FREAD;
+
+	/*
+	 * Parse the config provided to create a vdev tree.
+	 */
+	error = spa_ld_parse_config(spa, pool_guid, config, state, type);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Now that we have the vdev tree, try to open each vdev. This involves
+	 * opening the underlying physical device, retrieving its geometry and
+	 * probing the vdev with a dummy I/O. The state of each vdev will be set
+	 * based on the success of those operations. After this we'll be ready
+	 * to read from the vdevs.
+	 */
+	error = spa_ld_open_vdevs(spa);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Read the label of each vdev and make sure that the GUIDs stored
+	 * there match the GUIDs in the config provided.
+	 */
+	error = spa_ld_validate_vdevs(spa, type, trust_config);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Read vdev labels to find the best uberblock (i.e. latest, unless
+	 * spa_load_max_txg is set) and store it in spa_uberblock. We get the
+	 * list of features required to read blkptrs in the MOS from the vdev
+	 * label with the best uberblock and verify that our version of zfs
+	 * supports them all.
+	 */
+	error = spa_ld_select_uberblock(spa, config, type, trust_config);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Pass that uberblock to the dsl_pool layer which will open the root
+	 * blkptr. This blkptr points to the latest version of the MOS and will
+	 * allow us to read its contents.
+	 */
+	error = spa_ld_open_rootbp(spa);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Retrieve the config stored in the MOS and use it to validate the
+	 * config provided. Also extract some information from the MOS config
+	 * to update our vdev tree.
+	 */
+	error = spa_ld_validate_config(spa, type);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Retrieve the mapping of indirect vdevs. Those vdevs were removed
+	 * from the pool and their contents were re-mapped to other vdevs. Note
+	 * that everything that we read before this step must have been
+	 * rewritten on concrete vdevs after the last device removal was
+	 * initiated. Otherwise we could be reading from indirect vdevs before
+	 * we have loaded their mappings.
+	 */
+	error = spa_ld_open_indirect_vdev_metadata(spa);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Retrieve the full list of active features from the MOS and check if
+	 * they are all supported.
+	 */
+	error = spa_ld_check_features(spa, state, &missing_feat_write);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Load several special directories from the MOS needed by the dsl_pool
+	 * layer.
+	 */
+	error = spa_ld_load_special_directories(spa);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * If the config provided is not trusted, discard it and use the config
+	 * from the MOS to reload the pool.
+	 */
+	if (!trust_config) {
+		error = spa_ld_prepare_for_reload(spa, orig_mode);
+		if (error != 0)
+			return (error);
+		return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE));
+	}
+
+	/*
+	 * Retrieve pool properties from the MOS.
+	 */
+	error = spa_ld_get_props(spa);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Retrieve the list of auxiliary devices - cache devices and spares -
+	 * and open them.
+	 */
+	error = spa_ld_open_aux_vdevs(spa, type);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Load the metadata for all vdevs. Also check if unopenable devices
+	 * should be autoreplaced.
+	 */
+	error = spa_ld_load_vdev_metadata(spa, state);
+	if (error != 0)
+		return (error);
+
+	error = spa_ld_load_dedup_tables(spa);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * Verify the logs now to make sure we don't have any unexpected errors
+	 * when we claim log blocks later.
+	 */
+	error = spa_ld_verify_logs(spa, type, ereport);
+	if (error != 0)
+		return (error);
+
 	if (missing_feat_write) {
 		ASSERT(state == SPA_LOAD_TRYIMPORT);
 
@@ -2951,24 +3276,34 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 		 * read-only mode but not read-write mode. We now have enough
 		 * information and can return to userland.
 		 */
-		return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP));
+		return (spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT,
+		    ENOTSUP));
 	}
 
 	/*
-	 * We've successfully opened the pool, verify that we're ready
-	 * to start pushing transactions.
+	 * Traverse the last txgs to make sure the pool was left off in a safe
+	 * state. When performing an extreme rewind, we verify the whole pool,
+	 * which can take a very long time.
 	 */
-	if (state != SPA_LOAD_TRYIMPORT) {
-		if (error = spa_load_verify(spa))
-			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
-			    error));
-	}
+	error = spa_ld_verify_pool_data(spa, state);
+	if (error != 0)
+		return (error);
 
+	/*
+	 * Calculate the deflated space for the pool. This must be done before
+	 * we write anything to the pool because we'd need to update the space
+	 * accounting using the deflated sizes.
+	 */
+	spa_update_dspace(spa);
+
+	/*
+	 * We have now retrieved all the information we needed to open the
+	 * pool. If we are importing the pool in read-write mode, a few
+	 * additional steps must be performed to finish the import.
+	 */
 	if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER ||
 	    spa->spa_load_max_txg == UINT64_MAX)) {
-		dmu_tx_t *tx;
-		int need_update = B_FALSE;
-		dsl_pool_t *dp = spa_get_dsl(spa);
+		ASSERT(state != SPA_LOAD_TRYIMPORT);
 
 		/*
 		 * We must check this before we start the sync thread, because
@@ -2982,25 +3317,14 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 		boolean_t condense_in_progress =
 		    (spa->spa_condensing_indirect != NULL);
 
-		ASSERT(state != SPA_LOAD_TRYIMPORT);
-
 		/*
-		 * Claim log blocks that haven't been committed yet.
-		 * This must all happen in a single txg.
-		 * Note: spa_claim_max_txg is updated by spa_claim_notify(),
-		 * invoked from zil_claim_log_block()'s i/o done callback.
-		 * Price of rollback is that we abandon the log.
+		 * Traverse the ZIL and claim all blocks.
 		 */
-		spa->spa_claiming = B_TRUE;
+		spa_ld_claim_log_blocks(spa);
 
-		tx = dmu_tx_create_assigned(dp, spa_first_txg(spa));
-		(void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
-		    zil_claim, tx, DS_FIND_CHILDREN);
-		dmu_tx_commit(tx);
-
-		spa->spa_claiming = B_FALSE;
-
-		spa_set_log_state(spa, SPA_LOG_GOOD);
+		/*
+		 * Kick-off the syncing thread.
+		 */
 		spa->spa_sync_on = B_TRUE;
 		txg_sync_start(spa->spa_dsl_pool);
 
@@ -3008,40 +3332,23 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 		 * Wait for all claims to sync.  We sync up to the highest
 		 * claimed log block birth time so that claimed log blocks
 		 * don't appear to be from the future.  spa_claim_max_txg
-		 * will have been set for us by either zil_check_log_chain()
-		 * (invoked from spa_check_logs()) or zil_claim() above.
+		 * will have been set for us by ZIL traversal operations
+		 * performed above.
 		 */
 		txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg);
 
 		/*
-		 * If the config cache is stale, or we have uninitialized
-		 * metaslabs (see spa_vdev_add()), then update the config.
-		 *
-		 * If this is a verbatim import, trust the current
-		 * in-core spa_config and update the disk labels.
+		 * Check if we need to request an update of the config. On the
+		 * next sync, we would update the config stored in vdev labels
+		 * and the cachefile (by default /etc/zfs/zpool.cache).
 		 */
-		if (config_cache_txg != spa->spa_config_txg ||
-		    state == SPA_LOAD_IMPORT ||
-		    state == SPA_LOAD_RECOVER ||
-		    (spa->spa_import_flags & ZFS_IMPORT_VERBATIM))
-			need_update = B_TRUE;
+		spa_ld_check_for_config_update(spa, state, config_cache_txg);
 
-		for (int c = 0; c < rvd->vdev_children; c++)
-			if (rvd->vdev_child[c]->vdev_ms_array == 0)
-				need_update = B_TRUE;
-
 		/*
-		 * Update the config cache asychronously in case we're the
-		 * root pool, in which case the config cache isn't writable yet.
-		 */
-		if (need_update)
-			spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
-
-		/*
 		 * Check all DTLs to see if anything needs resilvering.
 		 */
 		if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
-		    vdev_resilver_needed(rvd, NULL, NULL))
+		    vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
 			spa_async_request(spa, SPA_ASYNC_RESILVER);
 
 		/*
@@ -3076,7 +3383,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 }
 
 static int
-spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
+spa_load_retry(spa_t *spa, spa_load_state_t state, int trust_config)
 {
 	int mode = spa->spa_mode;
 
@@ -3088,7 +3395,7 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int
 	spa_activate(spa, mode);
 	spa_async_suspend(spa);
 
-	return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig));
+	return (spa_load(spa, state, SPA_IMPORT_EXISTING, trust_config));
 }
 
 /*
@@ -3099,7 +3406,7 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int
  * spa_load().
  */
 static int
-spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
+spa_load_best(spa_t *spa, spa_load_state_t state, int trust_config,
     uint64_t max_request, int rewind_flags)
 {
 	nvlist_t *loadinfo = NULL;
@@ -3118,7 +3425,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int 
 	}
 
 	load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING,
-	    mosconfig);
+	    trust_config);
 	if (load_error == 0)
 		return (0);
 
@@ -3159,7 +3466,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int 
 	    spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) {
 		if (spa->spa_load_max_txg < safe_rewind_txg)
 			spa->spa_extreme_rewind = B_TRUE;
-		rewind_error = spa_load_retry(spa, state, mosconfig);
+		rewind_error = spa_load_retry(spa, state, trust_config);
 	}
 
 	spa->spa_extreme_rewind = B_FALSE;
@@ -4481,7 +4788,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_
 		state = SPA_LOAD_RECOVER;
 
 	/*
-	 * Pass off the heavy lifting to spa_load().  Pass TRUE for mosconfig
+	 * Pass off the heavy lifting to spa_load().  Pass TRUE for trust_config
 	 * because the user-supplied config is actually the one to trust when
 	 * doing an import.
 	 */
@@ -4632,7 +4939,7 @@ spa_tryimport(nvlist_t *tryconfig)
 
 	/*
 	 * Pass off the heavy lifting to spa_load().
-	 * Pass TRUE for mosconfig because the user-supplied config
+	 * Pass TRUE for trust_config because the user-supplied config
 	 * is actually the one to trust when doing an import.
 	 */
 	error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING, B_TRUE);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201802212338.w1LNcURV081456>