Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 17 Feb 2010 03:11:56 +0000 (UTC)
From:      Jeff Roberson <jeff@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r203983 - projects/suj/head/sys/ufs/ffs
Message-ID:  <201002170311.o1H3BufV016689@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jeff
Date: Wed Feb 17 03:11:56 2010
New Revision: 203983
URL: http://svn.freebsd.org/changeset/base/203983

Log:
   - Don't delay freeing indirect blocks when we're not using SUJ.  This
     adds unacceptable latency to non-journaling softdep.  SUJ must wait
     to free an indirect until all of its descendents have been freed.
   - Account for some missing dependencies in softdep_count_dependencies().
   - Remove the 5 second delay for writing new journal entries.  The latency
     is more imoprtant than optimizing the journal writes.
   - Add some more performance counters.
   - After discussing with Kirk, add myself to the copyright list.  I have
     now written half of the code in this file.

Modified:
  projects/suj/head/sys/ufs/ffs/ffs_softdep.c

Modified: projects/suj/head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- projects/suj/head/sys/ufs/ffs/ffs_softdep.c	Wed Feb 17 03:07:08 2010	(r203982)
+++ projects/suj/head/sys/ufs/ffs/ffs_softdep.c	Wed Feb 17 03:11:56 2010	(r203983)
@@ -1,5 +1,7 @@
 /*-
- * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
+ * Copyright 1998, 2000 Marshall Kirk McKusick.
+ * Copyright 2009, 2010 Jeffrey W. Roberson <jeff@FreeBSD.org>
+ * All rights reserved.
  *
  * The soft updates code is derived from the appendix of a University
  * of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
@@ -23,17 +25,16 @@
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
- * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED.  IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *	from: @(#)ffs_softdep.c	9.59 (McKusick) 6/21/00
  */
@@ -950,6 +951,11 @@ static int stat_jaddref;	/* bufs redirti
 static int stat_jnewblk;	/* bufs redirtied as blk bitmap can not write */
 static int stat_journal_min;	/* Times hit journal min threshold */
 static int stat_journal_low;	/* Times hit journal low threshold */
+static int stat_journal_wait;	/* Times blocked in jwait(). */
+static int stat_jwait_filepage;	/* Times blocked in jwait() for filepage. */
+static int stat_jwait_freeblks;	/* Times blocked in jwait() for freeblks. */
+static int stat_jwait_inode;	/* Times blocked in jwait() for inodes. */
+static int stat_jwait_newblk;	/* Times blocked in jwait() for newblks. */
 
 SYSCTL_INT(_debug_softdep, OID_AUTO, max_softdeps, CTLFLAG_RW,
     &max_softdeps, 0, "");
@@ -985,6 +991,16 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, jou
     &stat_journal_low, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, journal_min, CTLFLAG_RW,
     &stat_journal_min, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, journal_wait, CTLFLAG_RW,
+    &stat_journal_wait, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_filepage, CTLFLAG_RW,
+    &stat_jwait_filepage, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_freeblks, CTLFLAG_RW,
+    &stat_jwait_freeblks, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_inode, CTLFLAG_RW,
+    &stat_jwait_inode, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_newblk, CTLFLAG_RW,
+    &stat_jwait_newblk, 0, "");
 
 SYSCTL_DECL(_vfs_ffs);
 
@@ -2511,16 +2527,11 @@ softdep_process_journal(mp, flags)
 		 * entries and add them to the segment.  Notice cnt is
 		 * off by one to account for the space required by the
 		 * jsegrec.  If we don't have a full block to log skip it
-		 * unless we haven't written anything in 5 seconds.
+		 * unless we haven't written anything.
 		 */
 		cnt++;
-		if (cnt < jrecmax) {
-			if (segwritten)
-				break;
-			if (flags == MNT_NOWAIT &&
-			   (ticks - jblocks->jb_age) < hz*5)
-				break;
-		}
+		if (cnt < jrecmax && segwritten)
+			break;
 		/*
 		 * Verify some free journal space.  softdep_prealloc() should
 	 	 * guarantee that we don't run out so this is indicative of
@@ -2644,23 +2655,16 @@ softdep_process_journal(mp, flags)
 		/*
 		 * Write this one buffer and continue.
 		 */
-#if 1
 		WORKLIST_INSERT(&bp->b_dep, &jseg->js_list);
 		FREE_LOCK(&lk);
 		BO_LOCK(bp->b_bufobj);
 		bgetvp(ump->um_devvp, bp);
 		BO_UNLOCK(bp->b_bufobj);
-		/* XXX Could bawrite here. */
-		bwrite(bp);
-		ACQUIRE_LOCK(&lk);
-#else
-		/* This case simulates the write but does not log anything. */
-		handle_written_jseg(jseg, bp);
-		FREE_LOCK(&lk);
-		brelse(bp);
+		if (flags == MNT_NOWAIT)
+			bawrite(bp);
+		else
+			bwrite(bp);
 		ACQUIRE_LOCK(&lk);
-#endif
-		segwritten++;
 	}
 	/*
 	 * If we've suspended the filesystem because we ran out of journal
@@ -3499,6 +3503,7 @@ jwait(wk)
 	struct worklist *wk;
 {
 
+	stat_journal_wait++;
 	/*
 	 * If IO has not started we process the journal.  We can't mark the
 	 * worklist item as IOWAITING because we drop the lock while
@@ -3567,8 +3572,10 @@ softdep_setup_trunc(vp, length, flags)
 		jtrunc->jt_size = DIP(ip, i_size);
 	ACQUIRE_LOCK(&lk);
 	add_to_journal(&jtrunc->jt_list);
-	while (jsegdep->jd_seg == NULL)
+	while (jsegdep->jd_seg == NULL) {
+		stat_jwait_freeblks++;
 		jwait(&jtrunc->jt_list);
+	}
 	FREE_LOCK(&lk);
 
 	return (jsegdep);
@@ -4973,7 +4980,7 @@ softdep_setup_freeblocks(ip, length, fla
 	 * for the allocations will suffice.
 	 */
 	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
-	if ((inodedep->id_state & (DEPCOMPLETE | UNLINKED)) == UNLINKED ||
+	if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED ||
 	    (fs->fs_flags & FS_SUJ) == 0)
 		needj = 0;
 	else
@@ -5225,6 +5232,7 @@ deallocate_dependencies(bp, inodedep, fr
 				while ((jremref =
 				    LIST_FIRST(&dirrem->dm_jremrefhd))
 				    != NULL) {
+					stat_jwait_filepage++;
 					jwait(&jremref->jr_list);
 					return (0);
 				}
@@ -5246,6 +5254,7 @@ deallocate_dependencies(bp, inodedep, fr
 			}
 			while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd))
 			    != NULL) {
+				stat_jwait_filepage++;
 				jwait(&jmvref->jm_list);
 				return (0);
 			}
@@ -5521,12 +5530,17 @@ softdep_freefile(pvp, ino, mode)
 	 * will never be written.
 	 */
 	if (inodedep && inodedep->id_state & UNLINKED) {
+		/*
+		 * Save the journal work to be freed with the bitmap
+		 * before we clear UNLINKED.  Otherwise it can be lost
+		 * if the inode block is written.
+		 */
+		handle_bufwait(inodedep, &freefile->fx_jwork);
 		clear_unlinked_inodedep(inodedep);
+		/* Re-acquire inodedep as we've dropped lk. */
 		inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
-		if (inodedep && (inodedep->id_state & DEPCOMPLETE) == 0) {
+		if (inodedep && (inodedep->id_state & DEPCOMPLETE) == 0)
 			inodedep->id_state |= GOINGAWAY;
-			handle_bufwait(inodedep, &freefile->fx_jwork);
-		}
 	}
 	if (inodedep == NULL || check_inode_unwritten(inodedep)) {
 		FREE_LOCK(&lk);
@@ -5646,21 +5660,24 @@ freework_freeblock(freework)
 	int complete;
 	int pending;
 	int bsize;
+	int needj;
 
 	freeblks = freework->fw_freeblks;
 	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
 	fs = ump->um_fs;
+	needj = freeblks->fb_list.wk_mp->mnt_kern_flag & MNTK_SUJ;
 	complete = 0;
 	LIST_INIT(&wkhd);
 	/*
 	 * If we are canceling an existing jnewblk pass it to the free
 	 * routine, otherwise pass the freeblk which will ultimately
-	 * release the freeblks
+	 * release the freeblks.  If we're not journaling, we can just
+	 * free the freeblks immediately.
 	 */
 	if (!LIST_EMPTY(&freework->fw_jwork)) {
 		LIST_SWAP(&wkhd, &freework->fw_jwork, worklist, wk_list);
 		complete = 1;
-	} else
+	} else if (needj)
 		WORKLIST_INSERT_UNLOCKED(&wkhd, &freework->fw_list);
 	bsize = lfragtosize(fs, freework->fw_frags);
 	pending = btodb(bsize);
@@ -5677,7 +5694,7 @@ freework_freeblock(freework)
 	}
 	ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno,
 	    bsize, freeblks->fb_previousinum, &wkhd);
-	if (complete == 0)
+	if (complete == 0 && needj)
 		return;
 	/*
 	 * The jnewblk will be discarded and the bits in the map never
@@ -5848,6 +5865,7 @@ indir_trunc(freework, dbn, lbn)
 	ufs2_daddr_t dbn;
 	ufs_lbn_t lbn;
 {
+	struct freework *nfreework;
 	struct workhead wkhd;
 	struct jnewblk *jnewblk;
 	struct freeblks *freeblks;
@@ -5863,6 +5881,7 @@ indir_trunc(freework, dbn, lbn)
 	int i, nblocks, ufs1fmt;
 	int fs_pendingblocks;
 	int freedeps;
+	int needj;
 	int level;
 	int cnt;
 
@@ -5875,6 +5894,7 @@ indir_trunc(freework, dbn, lbn)
 	fs = ump->um_fs;
 	fs_pendingblocks = 0;
 	freedeps = 0;
+	needj = UFSTOVFS(ump)->mnt_kern_flag & MNTK_SUJ;
 	lbnadd = 1;
 	for (i = level; i > 0; i--)
 		lbnadd *= NINDIR(fs);
@@ -5966,7 +5986,8 @@ indir_trunc(freework, dbn, lbn)
 		cnt++;
 	}
 	ACQUIRE_LOCK(&lk);
-	freework->fw_ref += NINDIR(fs) + 1;
+	if (needj)
+		freework->fw_ref += NINDIR(fs) + 1;
 	/* Any remaining journal work can be completed with freeblks. */
 	jwork_move(&freeblks->fb_jwork, &wkhd);
 	FREE_LOCK(&lk);
@@ -5975,6 +5996,7 @@ indir_trunc(freework, dbn, lbn)
 		nb = bap1[0];
 	else
 		nb = bap2[0];
+	nfreework = freework;
 	/*
 	 * Reclaim on disk blocks.
 	 */
@@ -5990,13 +6012,14 @@ indir_trunc(freework, dbn, lbn)
 			continue;
 		cnt++;
 		if (level != 0) {
-			struct freework *nfreework;
 			ufs_lbn_t nlbn;
 
 			nlbn = (lbn + 1) - (i * lbnadd);
-			nfreework = newfreework(freeblks, freework, nlbn, nb,
-			    fs->fs_frag, 0);
-			freedeps++;
+			if (needj != 0) {
+				nfreework = newfreework(freeblks, freework,
+				    nlbn, nb, fs->fs_frag, 0);
+				freedeps++;
+			}
 			indir_trunc(nfreework, fsbtodb(fs, nb), nlbn);
 		} else {
 			struct freedep *freedep;
@@ -6006,7 +6029,8 @@ indir_trunc(freework, dbn, lbn)
 			 * all blocks being released to the same CG.
 			 */
 			LIST_INIT(&wkhd);
-			if (nnb == 0 || (dtog(fs, nb) != dtog(fs, nnb))) {
+			if (needj != 0 &&
+			    (nnb == 0 || (dtog(fs, nb) != dtog(fs, nnb)))) {
 				freedep = newfreedep(freework);
 				WORKLIST_INSERT_UNLOCKED(&wkhd,
 				    &freedep->fd_list);
@@ -6014,22 +6038,37 @@ indir_trunc(freework, dbn, lbn)
 			}
 			ffs_blkfree(ump, fs, freeblks->fb_devvp, nb,
 			    fs->fs_bsize, freeblks->fb_previousinum, &wkhd);
-			fs_pendingblocks += nblocks;
 		}
 	}
-	ACQUIRE_LOCK(&lk);
-	freework->fw_off = i;
 	if (level == 0)
 		fs_pendingblocks = (nblocks * cnt);
-	freework->fw_ref += freedeps;
-	freework->fw_ref -= NINDIR(fs) + 1;
-	if (freework->fw_ref != 0)
+	/*
+	 * If we're not journaling we can free the indirect now.  Otherwise
+	 * setup the ref counts and offset so this indirect can be completed
+	 * when its children are free.
+	 */
+	if (needj == 0) {
+		fs_pendingblocks += nblocks;
+		dbn = dbtofsb(fs, dbn);
+		ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize,
+		    freeblks->fb_previousinum, NULL);
+		ACQUIRE_LOCK(&lk);
+		freeblks->fb_chkcnt -= fs_pendingblocks;
+		if (freework->fw_blkno == dbn)
+			handle_written_freework(freework);
+		FREE_LOCK(&lk);
 		freework = NULL;
-	FREE_LOCK(&lk);
-	if (fs_pendingblocks) {
+	} else {
 		ACQUIRE_LOCK(&lk);
+		freework->fw_off = i;
+		freework->fw_ref += freedeps;
+		freework->fw_ref -= NINDIR(fs) + 1;
+		if (freework->fw_ref != 0)
+			freework = NULL;
 		freeblks->fb_chkcnt -= fs_pendingblocks;
 		FREE_LOCK(&lk);
+	}
+	if (fs_pendingblocks) {
 		UFS_LOCK(ump);
 		fs->fs_pendingblocks -= fs_pendingblocks;
 		UFS_UNLOCK(ump);
@@ -7895,10 +7934,14 @@ initiate_write_filepage(pagedep, bp)
 	 * locked so the dependency can not go away.
 	 */
 	LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next)
-		while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL)
+		while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL) {
+			stat_jwait_filepage++;
 			jwait(&jremref->jr_list);
-	while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL)
+		}
+	while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL) {
+		stat_jwait_filepage++;
 		jwait(&jmvref->jm_list);
+	}
 	for (i = 0; i < DAHASHSZ; i++) {
 		LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
 			ep = (struct direct *)
@@ -9754,6 +9797,7 @@ again:
 		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
 			if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
 			    == DEPCOMPLETE) {
+				stat_jwait_inode++;
 				jwait(&inoref->if_list);
 				goto again;
 			}
@@ -9892,6 +9936,7 @@ restart:
 	TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
 		if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
 		    == DEPCOMPLETE) {
+			stat_jwait_inode++;
 			jwait(&inoref->if_list);
 			goto restart;
 		}
@@ -10161,6 +10206,7 @@ loop:
 		case D_ALLOCINDIR:
 			newblk = WK_NEWBLK(wk);
 			if (newblk->nb_jnewblk != NULL) {
+				stat_jwait_newblk++;
 				jwait(&newblk->nb_jnewblk->jn_list);
 				goto restart;
 			}
@@ -10186,6 +10232,7 @@ loop:
 			    &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) {
 				newblk = (struct newblk *)aip;
 				if (newblk->nb_jnewblk != NULL) {
+					stat_jwait_newblk++;
 					jwait(&newblk->nb_jnewblk->jn_list);
 					goto restart;
 				}
@@ -10313,6 +10360,7 @@ restart:
 		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
 			if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
 			    == DEPCOMPLETE) {
+				stat_jwait_inode++;
 				jwait(&inoref->if_list);
 				goto restart;
 			}
@@ -10355,6 +10403,7 @@ flush_deplist(listhead, waitfor, errorp)
 	TAILQ_FOREACH(adp, listhead, ad_next) {
 		newblk = (struct newblk *)adp;
 		if (newblk->nb_jnewblk != NULL) {
+			stat_jwait_newblk++;
 			jwait(&newblk->nb_jnewblk->jn_list);
 			return (1);
 		}
@@ -10419,6 +10468,7 @@ flush_newblk_dep(vp, mp, lbn)
 		 * Flush the journal.
 		 */
 		if (newblk->nb_jnewblk != NULL) {
+			stat_jwait_newblk++;
 			jwait(&newblk->nb_jnewblk->jn_list);
 			continue;
 		}
@@ -10528,6 +10578,7 @@ restart:
 		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
 			if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
 			    == DEPCOMPLETE) {
+				stat_jwait_inode++;
 				jwait(&inoref->if_list);
 				goto restart;
 			}
@@ -10688,12 +10739,12 @@ softdep_request_cleanup(fs, vp)
 		if (error != 0)
 			return (0);
 	}
-	process_removes(vp);
 	while (fs->fs_pendingblocks > 0 && fs->fs_cstotal.cs_nbfree <= needed) {
 		if (time_second > starttime)
 			return (0);
 		UFS_UNLOCK(ump);
 		ACQUIRE_LOCK(&lk);
+		process_removes(vp);
 		if (ump->softdep_on_worklist > 0 &&
 		    process_worklist_item(UFSTOVFS(ump), LK_NOWAIT) != -1) {
 			stat_worklist_push += 1;
@@ -10973,6 +11024,8 @@ softdep_count_dependencies(bp, wantcount
 	struct allocindir *aip;
 	struct pagedep *pagedep;
 	struct dirrem *dirrem;
+	struct newblk *newblk;
+	struct mkdir *mkdir;
 	struct diradd *dap;
 	int i, retval;
 
@@ -11067,12 +11120,30 @@ softdep_count_dependencies(bp, wantcount
 			}
 			continue;
 
-		case D_FREEWORK:
-		case D_FREEDEP:
-		case D_JSEGDEP:
 		case D_ALLOCDIRECT:
 		case D_ALLOCINDIR:
+			newblk = WK_NEWBLK(wk);
+			if (newblk->nb_jnewblk) {
+				/* Journal allocate dependency. */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+			continue;
+
 		case D_MKDIR:
+			mkdir = WK_MKDIR(wk);
+			if (mkdir->md_jaddref) {
+				/* Journal reference dependency. */
+				retval += 1;
+				if (!wantcount)
+					goto out;
+			}
+			continue;
+
+		case D_FREEWORK:
+		case D_FREEDEP:
+		case D_JSEGDEP:
 		case D_JSEG:
 		case D_SBDEP:
 			/* never a dependency on these blocks */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201002170311.o1H3BufV016689>