Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 27 Mar 2015 13:55:56 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r280760 - head/sys/ufs/ffs
Message-ID:  <201503271355.t2RDtuLt071068@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Fri Mar 27 13:55:56 2015
New Revision: 280760
URL: https://svnweb.freebsd.org/changeset/base/280760

Log:
  Fix the hand after the immediate reboot when the following command
  sequence is performed on UFS SU+J rootfs:
  cp -Rp /sbin/init /sbin/init.old
  mv -f /sbin/init.old /sbin/init
  
  Hang occurs on the rootfs unmount.  There are two issues:
  
  1. Removed init binary, which is still mapped, creates a reference to
  the removed vnode. The inodeblock for such vnode must have active
  inodedep, which is (eventually) linked through the unlinked list. This
  means that ffs_sync(MNT_SUSPEND) cannot succeed, because number of
  softdep workitems for the mp is always > 0.  FFS is suspended during
  unmount, so unmount just hangs.
  
  2. As noted above, the inodedep is linked eventually.  It is not
  linked until the superblock is written.  But at the vfs_unmountall()
  time, when the rootfs is unmounted, the call is made to
  ffs_unmount()->ffs_sync() before vflush(), and ffs_sync() only calls
  ffs_sbupdate() after all workitems are flushed.  It is masked for
  normal system operations, because syncer works in parallel and
  eventually flushes superblock.  Syncer is stopped when rootfs
  unmounted, so ffs_sync() must do sb update on its own.
  
  Correct the issues listed above. For MNT_SUSPEND, count the number of
  linked unlinked inodedeps (this is not a typo) and substract the count
  of such workitems from the total. For the second issue, the
  ffs_sbupdate() is called right after device sync in ffs_sync() loop.
  
  There is third problem, occuring with both SU and SU+J. The
  softdep_waitidle() loop, which waits for softdep_flush() thread to
  clear the worklist, only waits 20ms max. It seems that the 1 tick,
  specified for msleep(9), was a typo.
  
  Add fsync(devvp, MNT_WAIT) call to softdep_waitidle(), which seems to
  significantly help the softdep thread, and change the MNT_LAZY update
  at the reboot time to MNT_WAIT for similar reasons.  Note that
  userspace cannot create more work while devvp is flushed, since the
  mount point is always suspended before the call to softdep_waitidle()
  in unmount or remount path.
  
  PR:	195458
  In collaboration with:	gjb, pho
  Reviewed by:	mckusick
  Sponsored by:	The FreeBSD Foundation
  MFC after:	2 weeks

Modified:
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ffs/ffs_vfsops.c

Modified: head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- head/sys/ufs/ffs/ffs_softdep.c	Fri Mar 27 13:26:59 2015	(r280759)
+++ head/sys/ufs/ffs/ffs_softdep.c	Fri Mar 27 13:55:56 2015	(r280760)
@@ -1885,8 +1885,8 @@ softdep_flushworklist(oldmnt, countp, td
 	struct thread *td;
 {
 	struct vnode *devvp;
-	int count, error = 0;
 	struct ufsmount *ump;
+	int count, error;
 
 	/*
 	 * Alternately flush the block device associated with the mount
@@ -1895,6 +1895,7 @@ softdep_flushworklist(oldmnt, countp, td
 	 * are found.
 	 */
 	*countp = 0;
+	error = 0;
 	ump = VFSTOUFS(oldmnt);
 	devvp = ump->um_devvp;
 	while ((count = softdep_process_worklist(oldmnt, 1)) > 0) {
@@ -1902,37 +1903,47 @@ softdep_flushworklist(oldmnt, countp, td
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 		error = VOP_FSYNC(devvp, MNT_WAIT, td);
 		VOP_UNLOCK(devvp, 0);
-		if (error)
+		if (error != 0)
 			break;
 	}
 	return (error);
 }
 
+#define	SU_WAITIDLE_RETRIES	20
 static int
 softdep_waitidle(struct mount *mp, int flags __unused)
 {
 	struct ufsmount *ump;
-	int error;
-	int i;
+	struct vnode *devvp;
+	struct thread *td;
+	int error, i;
 
 	ump = VFSTOUFS(mp);
+	devvp = ump->um_devvp;
+	td = curthread;
+	error = 0;
 	ACQUIRE_LOCK(ump);
-	for (i = 0; i < 10 && ump->softdep_deps; i++) {
+	for (i = 0; i < SU_WAITIDLE_RETRIES && ump->softdep_deps != 0; i++) {
 		ump->softdep_req = 1;
 		KASSERT((flags & FORCECLOSE) == 0 ||
 		    ump->softdep_on_worklist == 0,
 		    ("softdep_waitidle: work added after flush"));
-		msleep(&ump->softdep_deps, LOCK_PTR(ump), PVM, "softdeps", 1);
+		msleep(&ump->softdep_deps, LOCK_PTR(ump), PVM | PDROP,
+		    "softdeps", 10 * hz);
+		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+		error = VOP_FSYNC(devvp, MNT_WAIT, td);
+		VOP_UNLOCK(devvp, 0);
+		if (error != 0)
+			break;
+		ACQUIRE_LOCK(ump);
 	}
 	ump->softdep_req = 0;
-	FREE_LOCK(ump);
-	error = 0;
-	if (i == 10) {
+	if (i == SU_WAITIDLE_RETRIES && error == 0 && ump->softdep_deps != 0) {
 		error = EBUSY;
 		printf("softdep_waitidle: Failed to flush worklist for %p\n",
 		    mp);
 	}
-
+	FREE_LOCK(ump);
 	return (error);
 }
 
@@ -7637,17 +7648,13 @@ check_inode_unwritten(inodedep)
 	return (1);
 }
 
-/*
- * Try to free an inodedep structure. Return 1 if it could be freed.
- */
 static int
-free_inodedep(inodedep)
+check_inodedep_free(inodedep)
 	struct inodedep *inodedep;
 {
 
 	LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
-	if ((inodedep->id_state & (ONWORKLIST | UNLINKED)) != 0 ||
-	    (inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
+	if ((inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
 	    !LIST_EMPTY(&inodedep->id_dirremhd) ||
 	    !LIST_EMPTY(&inodedep->id_pendinghd) ||
 	    !LIST_EMPTY(&inodedep->id_bufwait) ||
@@ -7662,6 +7669,21 @@ free_inodedep(inodedep)
 	    inodedep->id_nlinkdelta != 0 ||
 	    inodedep->id_savedino1 != NULL)
 		return (0);
+	return (1);
+}
+
+/*
+ * Try to free an inodedep structure. Return 1 if it could be freed.
+ */
+static int
+free_inodedep(inodedep)
+	struct inodedep *inodedep;
+{
+
+	LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
+	if ((inodedep->id_state & (ONWORKLIST | UNLINKED)) != 0 ||
+	    !check_inodedep_free(inodedep))
+		return (0);
 	if (inodedep->id_state & ONDEPLIST)
 		LIST_REMOVE(inodedep, id_deps);
 	LIST_REMOVE(inodedep, id_hash);
@@ -13846,7 +13868,8 @@ softdep_check_suspend(struct mount *mp,
 {
 	struct bufobj *bo;
 	struct ufsmount *ump;
-	int error;
+	struct inodedep *inodedep;
+	int error, unlinked;
 
 	bo = &devvp->v_bufobj;
 	ASSERT_BO_WLOCKED(bo);
@@ -13907,6 +13930,20 @@ softdep_check_suspend(struct mount *mp,
 		break;
 	}
 
+	unlinked = 0;
+	if (MOUNTEDSUJ(mp)) {
+		for (inodedep = TAILQ_FIRST(&ump->softdep_unlinked);
+		    inodedep != NULL;
+		    inodedep = TAILQ_NEXT(inodedep, id_unlinked)) {
+			if ((inodedep->id_state & (UNLINKED | UNLINKLINKS |
+			    UNLINKONLIST)) != (UNLINKED | UNLINKLINKS |
+			    UNLINKONLIST) ||
+			    !check_inodedep_free(inodedep))
+				continue;
+			unlinked++;
+		}
+	}
+
 	/*
 	 * Reasons for needing more work before suspend:
 	 * - Dirty buffers on devvp.
@@ -13916,8 +13953,8 @@ softdep_check_suspend(struct mount *mp,
 	error = 0;
 	if (bo->bo_numoutput > 0 ||
 	    bo->bo_dirty.bv_cnt > 0 ||
-	    softdep_depcnt != 0 ||
-	    ump->softdep_deps != 0 ||
+	    softdep_depcnt != unlinked ||
+	    ump->softdep_deps != unlinked ||
 	    softdep_accdepcnt != ump->softdep_accdeps ||
 	    secondary_writes != 0 ||
 	    mp->mnt_secondary_writes != 0 ||

Modified: head/sys/ufs/ffs/ffs_vfsops.c
==============================================================================
--- head/sys/ufs/ffs/ffs_vfsops.c	Fri Mar 27 13:26:59 2015	(r280759)
+++ head/sys/ufs/ffs/ffs_vfsops.c	Fri Mar 27 13:55:56 2015	(r280760)
@@ -1502,8 +1502,11 @@ ffs_sync(mp, waitfor)
 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0)
 		panic("%s: ffs_sync: modification on read-only filesystem",
 		    fs->fs_fsmnt);
-	if (waitfor == MNT_LAZY)
-		return (ffs_sync_lazy(mp));
+	if (waitfor == MNT_LAZY) {
+		if (!rebooting)
+			return (ffs_sync_lazy(mp));
+		waitfor = MNT_NOWAIT;
+	}
 
 	/*
 	 * Write back each (modified) inode.
@@ -1560,7 +1563,7 @@ loop:
 	/*
 	 * Force stale filesystem control information to be flushed.
 	 */
-	if (waitfor == MNT_WAIT) {
+	if (waitfor == MNT_WAIT || rebooting) {
 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
 			allerror = error;
 		/* Flushed work items may create new vnodes to clean */
@@ -1577,9 +1580,12 @@ loop:
 	if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) {
 		BO_UNLOCK(bo);
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
-		if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
-			allerror = error;
+		error = VOP_FSYNC(devvp, waitfor, td);
 		VOP_UNLOCK(devvp, 0);
+		if (MOUNTEDSOFTDEP(mp) && (error == 0 || error == EAGAIN))
+			error = ffs_sbupdate(ump, waitfor, 0);
+		if (error != 0)
+			allerror = error;
 		if (allerror == 0 && waitfor == MNT_WAIT)
 			goto loop;
 	} else if (suspend != 0) {



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201503271355.t2RDtuLt071068>