Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 15 May 2010 07:07:38 +0000 (UTC)
From:      Martin Matuska <mm@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
Subject:   svn commit: r208109 - stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
Message-ID:  <201005150707.o4F77cZx003645@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mm
Date: Sat May 15 07:07:38 2010
New Revision: 208109
URL: http://svn.freebsd.org/changeset/base/208109

Log:
  MFC r207481, r207956:
  
  MFC r207481 [1]:
  Add sysctl and loader tunable vfs.zfs.txg.write_limit_override.
  This tunable improves fine-tuning of ZFS write throttling.
  
  MFC r207956 [2]:
  Fix possible hang when replaying large truncations.
  OpenSolaris onnv revision:	7904:6a124a4ca9c5
  
  PR:		kern/146108 [1]
  Suggested by:	Nikolay Denev <ndenev at gmail.com> [1]
  Obtained from:	OpenSolaris (Bug ID 6761624) [2]
  Approved by:	pjd, delphij (mentor)

Modified:
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
Directory Properties:
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/xen/xenpci/   (props changed)
  stable/8/sys/geom/sched/   (props changed)

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c	Sat May 15 07:01:41 2010	(r208108)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c	Sat May 15 07:07:38 2010	(r208109)
@@ -38,6 +38,7 @@ static void txg_quiesce_thread(void *arg
 
 int zfs_txg_timeout = 30;	/* max seconds worth of delta per txg */
 extern int zfs_txg_synctime;
+extern uint64_t zfs_write_limit_override;
 
 SYSCTL_DECL(_vfs_zfs);
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, txg, CTLFLAG_RW, 0,
@@ -48,6 +49,11 @@ SYSCTL_INT(_vfs_zfs_txg, OID_AUTO, timeo
 TUNABLE_INT("vfs.zfs.txg.synctime", &zfs_txg_synctime);
 SYSCTL_INT(_vfs_zfs_txg, OID_AUTO, synctime, CTLFLAG_RDTUN, &zfs_txg_synctime,
     0, "Target seconds to sync a txg");
+TUNABLE_QUAD("vfs.zfs.txg.write_limit_override", &zfs_write_limit_override);
+SYSCTL_QUAD(_vfs_zfs_txg, OID_AUTO, write_limit_override, CTLFLAG_RW,
+    &zfs_write_limit_override, 0,
+    "Override maximum size of a txg to this size in bytes, "
+    "value of 0 means don't override");
 
 /*
  * Prepare the txg subsystem.

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c	Sat May 15 07:01:41 2010	(r208108)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c	Sat May 15 07:07:38 2010	(r208109)
@@ -1567,6 +1567,29 @@ zil_replay_log_record(zilog_t *zilog, lr
 	}
 
 	/*
+	 * Replay of large truncates can end up needing additional txs
+	 * and a different txg. If they are nested within the replay tx
+	 * as below then a hang is possible. So we do the truncate here
+	 * and redo the truncate later (a no-op) and update the sequence
+	 * number whilst in the replay tx. Fortunately, it's safe to repeat
+	 * a truncate if we crash and the truncate commits. A create over
+	 * an existing file will also come in as a TX_TRUNCATE record.
+	 *
+	 * Note, remove of large files and renames over large files is
+	 * handled by putting the deleted object on a stable list
+	 * and if necessary force deleting the object outside of the replay
+	 * transaction using the zr_replay_cleaner.
+	 */
+	if (txtype == TX_TRUNCATE) {
+		*zr->zr_txgp = TXG_NOWAIT;
+		error = zr->zr_replay[TX_TRUNCATE](zr->zr_arg, zr->zr_lrbuf,
+		    zr->zr_byteswap);
+		if (error)
+			goto bad;
+		zr->zr_byteswap = 0; /* only byteswap once */
+	}
+
+	/*
 	 * We must now do two things atomically: replay this log record,
 	 * and update the log header to reflect the fact that we did so.
 	 * We use the DMU's ability to assign into a specific txg to do this.
@@ -1636,6 +1659,7 @@ zil_replay_log_record(zilog_t *zilog, lr
 		dprintf("pass %d, retrying\n", pass);
 	}
 
+bad:
 	ASSERT(error && error != ERESTART);
 	name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 	dmu_objset_name(zr->zr_os, name);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201005150707.o4F77cZx003645>