Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 20 Jul 2011 22:48:48 +0000 (UTC)
From:      "Justin T. Gibbs" <gibbs@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r224237 - in projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common: fs/zfs fs/zfs/sys sys/fm/fs
Message-ID:  <201107202248.p6KMmmpI035254@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gibbs
Date: Wed Jul 20 22:48:48 2011
New Revision: 224237
URL: http://svn.freebsd.org/changeset/base/224237

Log:
  Allow ZFS asynchronous event handling to proceed even if the
  root file system is mounted read-only.  This restriction appears
  to have been put in place to avoid errors with updating the
  configuration cache file.  However:
  
    o The majority of asynchronous event handling does not involve
      configuration cache file updates.
    o The configuration cache file need not be on the root file system,
      so the check was not complete.
    o Other classes of errors (e.g. file system full) can also prevent
      a successful update yet do not prevent asynchronous event
      processing.
    o Configurations such as NanoBSD never have a read-write root,
      so ZFS event processing is permanently disabled in these
      systems.
    o Failure to handle asynchronous events promptly can extend the
      window of time that a pool is in a critical state.
  
  At worst, a missed configuration cache update will force the
  operator to perform a manual "zfs import" (note -f is not required)
  to inform the system about a newly created pool.  To minimize the
  likelihood of this rare occurrence, configuration cache write failures
  now emit FMA events so the operator can take corrective
  action, and the write is retried every 5 minutes.  The retry interval,
  in seconds, is tunable via the sysctl "vfs.zfs.ccw_retry_interval".
  
  sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c:
      o Add the sysctl "vfs.zfs.ccw_retry_interval".  The value
        defaults to 5 minutes and is used to rate limit, on a
        per-pool basis, configuration cache file write attempts.
      o Modify spa_async_dispatch to honor configuration cache
        write limiting.  If other events are pending, a configuration
        cache write will be attempted at the same time, so the
        rate limiting only applies when the asynchronous dispatch
        system is otherwise idle.  Async events should be rare
        (e.g. device arrival/departure) and configuration cache
        writes rarer, so a more complicated system to strictly
        honor the retry limit seems unwarranted.
      o Remove check in spa_async_dispatch() for the root file
        system being read-write.
  
  sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c:
      Instead of silently ignoring configuration cache write
      failures, report them via a new FMA event as well as
      to the console.  The current zfs_ereport_post() doesn't
      allow arbitrary name=value pairs to be appended to the
      report, so the configuration cache file name is only
      available on the console output.  This limitation should
      be addressed in a future update.
  
  sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h:
      Add a uint64_t to the spa data structure to track the
      time (via LBOLT) of the last configuration cache file
      write failure.  This is referenced in spa_async_dispatch()
      to effect the rate limiting.
  
  sys/cddl/contrib/opensolaris/uts/common/sys/fm/fs/zfs.h:
      Add FM_EREPORT_ZFS_CONFIG_CACHE_WRITE as an ereport class.
  
  Sponsored by:	 Spectra Logic Corporation

Modified:
  projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
  projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
  projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
  projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/sys/fm/fs/zfs.h

Modified: projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Wed Jul 20 21:18:05 2011	(r224236)
+++ projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Wed Jul 20 22:48:48 2011	(r224237)
@@ -73,10 +73,20 @@
 /* Check hostid on import? */
 static int check_hostid = 1;
 
+/*
+ * The interval at which failed configuration cache file writes
+ * should be retried.
+ */
+static int zfs_ccw_retry_interval = 300;
+
 SYSCTL_DECL(_vfs_zfs);
 TUNABLE_INT("vfs.zfs.check_hostid", &check_hostid);
 SYSCTL_INT(_vfs_zfs, OID_AUTO, check_hostid, CTLFLAG_RW, &check_hostid, 0,
     "Check hostid on import?");
+TUNABLE_INT("vfs.zfs.ccw_retry_interval", &zfs_ccw_retry_interval);
+SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval, CTLFLAG_RW,
+    &zfs_ccw_retry_interval, 0,
+    "Configuration cache file write, retry after failure, interval (seconds)");
 
 typedef enum zti_modes {
 	zti_mode_fixed,			/* value is # of threads (min 1) */
@@ -5178,13 +5188,34 @@ spa_async_resume(spa_t *spa)
 	mutex_exit(&spa->spa_async_lock);
 }
 
+static int
+spa_async_tasks_pending(spa_t *spa)
+{
+	u_int non_config_tasks;
+	u_int config_task;
+	boolean_t config_task_suspended;
+
+	non_config_tasks = spa->spa_async_tasks & ~SPA_ASYNC_CONFIG_UPDATE;
+	config_task = spa->spa_async_tasks & SPA_ASYNC_CONFIG_UPDATE;
+	if (spa->spa_ccw_fail_time == 0) {
+		config_task_suspended = B_FALSE;
+	} else {
+		config_task_suspended =
+		    (ddi_get_lbolt64() - spa->spa_ccw_fail_time)
+		  < (zfs_ccw_retry_interval * hz);
+	}
+
+	return (non_config_tasks || (config_task && !config_task_suspended));
+}
+
 static void
 spa_async_dispatch(spa_t *spa)
 {
 	mutex_enter(&spa->spa_async_lock);
-	if (spa->spa_async_tasks && !spa->spa_async_suspended &&
+	if (spa_async_tasks_pending(spa) &&
+	    !spa->spa_async_suspended &&
 	    spa->spa_async_thread == NULL &&
-	    rootdir != NULL && !vn_is_readonly(rootdir))
+	    rootdir != NULL)
 		spa->spa_async_thread = thread_create(NULL, 0,
 		    spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri);
 	mutex_exit(&spa->spa_async_lock);

Modified: projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c
==============================================================================
--- projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c	Wed Jul 20 21:18:05 2011	(r224236)
+++ projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c	Wed Jul 20 22:48:48 2011	(r224237)
@@ -24,6 +24,7 @@
  */
 
 #include <sys/zfs_context.h>
+#include <sys/fm/fs/zfs.h>
 #include <sys/spa.h>
 #include <sys/spa_impl.h>
 #include <sys/nvpair.h>
@@ -136,7 +137,7 @@ out:
 	kobj_close_file(file);
 }
 
-static void
+static int
 spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
 {
 	size_t buflen;
@@ -144,13 +145,14 @@ spa_config_write(spa_config_dirent_t *dp
 	vnode_t *vp;
 	int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX;
 	char *temp;
+	int err;
 
 	/*
 	 * If the nvlist is empty (NULL), then remove the old cachefile.
 	 */
 	if (nvl == NULL) {
-		(void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
-		return;
+		err = vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
+		return (err);
 	}
 
 	/*
@@ -171,11 +173,12 @@ spa_config_write(spa_config_dirent_t *dp
 	 */
 	(void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path);
 
-	if (vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0) == 0) {
-		if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE,
-		    0, RLIM64_INFINITY, kcred, NULL) == 0 &&
-		    VOP_FSYNC(vp, FSYNC, kcred, NULL) == 0) {
-			(void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
+	err = vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0);
+	if (err == 0) {
+		if ((err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE,
+		    0, RLIM64_INFINITY, kcred, NULL)) == 0 &&
+		    (err = VOP_FSYNC(vp, FSYNC, kcred, NULL)) == 0) {
+			err = vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
 		}
 		(void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
 	}
@@ -184,6 +187,7 @@ spa_config_write(spa_config_dirent_t *dp
 
 	kmem_free(buf, buflen);
 	kmem_free(temp, MAXPATHLEN);
+	return (err);
 }
 
 /*
@@ -195,6 +199,8 @@ spa_config_sync(spa_t *target, boolean_t
 {
 	spa_config_dirent_t *dp, *tdp;
 	nvlist_t *nvl;
+	boolean_t ccw_failure;
+	int error;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
@@ -206,6 +212,7 @@ spa_config_sync(spa_t *target, boolean_t
 	 * cachefile is changed, the new one is pushed onto this list, allowing
 	 * us to update previous cachefiles that no longer contain this pool.
 	 */
+	ccw_failure = B_FALSE;
 	for (dp = list_head(&target->spa_config_list); dp != NULL;
 	    dp = list_next(&target->spa_config_list, dp)) {
 		spa_t *spa = NULL;
@@ -238,10 +245,35 @@ spa_config_sync(spa_t *target, boolean_t
 			mutex_exit(&spa->spa_props_lock);
 		}
 
-		spa_config_write(dp, nvl);
+		error = spa_config_write(dp, nvl);
+		if (error != 0) {
+	
+			printf("ZFS ERROR: Update of cache file %s failed: "
+			    "Errno %d\n", dp->scd_path, error);
+			ccw_failure = B_TRUE;
+		}
+
 		nvlist_free(nvl);
 	}
 
+	if (ccw_failure) {
+		/*
+		 * Keep trying so that configuration data is 
+		 * written if/when any temporary filesystem
+		 * resource issues are resolved.
+		 */
+		target->spa_ccw_fail_time = ddi_get_lbolt64();
+		spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE);
+		zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
+		    target, NULL, NULL, 0, 0);
+	} else {
+		/*
+		 * Do not rate limit future attempts to update
+		 * the config cache.
+		 */
+		target->spa_ccw_fail_time = 0;
+	}
+
 	/*
 	 * Remove any config entries older than the current one.
 	 */

Modified: projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
==============================================================================
--- projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h	Wed Jul 20 21:18:05 2011	(r224236)
+++ projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h	Wed Jul 20 22:48:48 2011	(r224237)
@@ -216,6 +216,7 @@ struct spa {
 	int		spa_vdev_locks;		/* locks grabbed */
 	uint64_t	spa_creation_version;	/* version at pool creation */
 	uint64_t	spa_prev_software_version;
+	int64_t		spa_ccw_fail_time;	/* Conf cache write fail time */
 	/*
 	 * spa_refcnt & spa_config_lock must be the last elements
 	 * because refcount_t changes size based on compilation options.

Modified: projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/sys/fm/fs/zfs.h
==============================================================================
--- projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/sys/fm/fs/zfs.h	Wed Jul 20 21:18:05 2011	(r224236)
+++ projects/zfsd/head/sys/cddl/contrib/opensolaris/uts/common/sys/fm/fs/zfs.h	Wed Jul 20 22:48:48 2011	(r224237)
@@ -46,6 +46,7 @@ extern "C" {
 #define	FM_EREPORT_ZFS_IO_FAILURE		"io_failure"
 #define	FM_EREPORT_ZFS_PROBE_FAILURE		"probe_failure"
 #define	FM_EREPORT_ZFS_LOG_REPLAY		"log_replay"
+#define	FM_EREPORT_ZFS_CONFIG_CACHE_WRITE	"config_cache_write"
 
 #define	FM_EREPORT_PAYLOAD_ZFS_POOL		"pool"
 #define	FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE	"pool_failmode"



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201107202248.p6KMmmpI035254>