Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 14 Sep 2010 18:04:05 +0000 (UTC)
From:      Kirk McKusick <mckusick@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r212617 - head/sys/ufs/ffs
Message-ID:  <201009141804.o8EI45kp088466@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mckusick
Date: Tue Sep 14 18:04:05 2010
New Revision: 212617
URL: http://svn.freebsd.org/changeset/base/212617

Log:
  Update comments in soft updates code to more fully describe
  the addition of journalling. Only functional change is to
  tighten a KASSERT.
  
  Reviewed by:	jeff Roberson

Modified:
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ffs/fs.h
  head/sys/ufs/ffs/softdep.h

Modified: head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- head/sys/ufs/ffs/ffs_softdep.c	Tue Sep 14 17:22:06 2010	(r212616)
+++ head/sys/ufs/ffs/ffs_softdep.c	Tue Sep 14 18:04:05 2010	(r212617)
@@ -2378,7 +2378,8 @@ remove_from_journal(wk)
 	/*
 	 * We emulate a TAILQ to save space in most structures which do not
 	 * require TAILQ semantics.  Here we must update the tail position
-	 * when removing the tail which is not the final entry.
+	 * when removing the tail which is not the final entry. This works
+	 * only if the worklist linkage are at the beginning of the structure.
 	 */
 	if (ump->softdep_journal_tail == wk)
 		ump->softdep_journal_tail =
@@ -2605,7 +2606,7 @@ jremref_write(jremref, jseg, data)
 	inoref_write(&jremref->jr_ref, jseg, rec);
 }
 
-static	void
+static void
 jmvref_write(jmvref, jseg, data)
 	struct jmvref *jmvref;
 	struct jseg *jseg;
@@ -2906,9 +2907,9 @@ complete_jseg(jseg)
 		waiting = wk->wk_state & IOWAITING;
 		wk->wk_state &= ~(IOSTARTED | IOWAITING);
 		wk->wk_state |= COMPLETE;
-		KASSERT(i < jseg->js_cnt,
+		KASSERT(i++ < jseg->js_cnt,
 		    ("handle_written_jseg: overflow %d >= %d",
-		    i, jseg->js_cnt));
+		    i - 1, jseg->js_cnt));
 		switch (wk->wk_type) {
 		case D_JADDREF:
 			handle_written_jaddref(WK_JADDREF(wk));
@@ -7492,7 +7493,7 @@ handle_written_sbdep(sbdep, bp)
 	if (inodedep_lookup(mp, fs->fs_sujfree, 0, &inodedep) == 0)
 		panic("handle_written_sbdep: lost inodedep");
 	/*
-	 * Now that we have a record of this indode in stable store allow it
+	 * Now that we have a record of this inode in stable store allow it
 	 * to be written to free up pending work.  Inodes may see a lot of
 	 * write activity after they are unlinked which we must not hold up.
 	 */
@@ -7509,8 +7510,7 @@ handle_written_sbdep(sbdep, bp)
 }
 
 /*
- * Mark an inodedep has unlinked and insert it into the in-memory unlinked
- * list.
+ * Mark an inodedep as unlinked and insert it into the in-memory unlinked list.
  */
 static void
 unlinked_inodedep(mp, inodedep)
@@ -7576,7 +7576,7 @@ clear_unlinked_inodedep(inodedep)
 		 * link before us, whether it be the superblock or an inode.
 		 * Unfortunately the list may change while we're waiting
 		 * on the buf lock for either resource so we must loop until
-		 * we lock. the right one.  If both the superblock and an
+		 * we lock the right one.  If both the superblock and an
 		 * inode point to this inode we must clear the inode first
 		 * followed by the superblock.
 		 */
@@ -9094,7 +9094,7 @@ handle_jwork(wkhd)
 /*
  * Handle the bufwait list on an inode when it is safe to release items
  * held there.  This normally happens after an inode block is written but
- * may be delayed and handle later if there are pending journal items that
+ * may be delayed and handled later if there are pending journal items that
  * are not yet safe to be released.
  */
 static struct freefile *

Modified: head/sys/ufs/ffs/fs.h
==============================================================================
--- head/sys/ufs/ffs/fs.h	Tue Sep 14 17:22:06 2010	(r212616)
+++ head/sys/ufs/ffs/fs.h	Tue Sep 14 18:04:05 2010	(r212617)
@@ -661,7 +661,7 @@ lbn_level(ufs_lbn_t lbn)
 
 /*
  * Size of the segment record header.  There is at most one for each disk
- * block n the journal.  The segment header is followed by an array of
+ * block in the journal.  The segment header is followed by an array of
  * records.  fsck depends on the first element in each record being 'op'
  * and the second being 'ino'.  Segments may span multiple disk blocks but
  * the header is present on each.

Modified: head/sys/ufs/ffs/softdep.h
==============================================================================
--- head/sys/ufs/ffs/softdep.h	Tue Sep 14 17:22:06 2010	(r212616)
+++ head/sys/ufs/ffs/softdep.h	Tue Sep 14 18:04:05 2010	(r212617)
@@ -107,6 +107,15 @@
  *
  * The ONWORKLIST flag shows whether the structure is currently linked
  * onto a worklist.
+ *
+ * The UNLINK* flags track the progress of updating the on-disk linked
+ * list of active but unlinked inodes. When an inode is first unlinked
+ * it is marked as UNLINKED. When its on-disk di_freelink has been
+ * written its UNLINKNEXT flags is set. When its predecessor in the
+ * list has its di_freelink pointing at us its UNLINKPREV is set.
+ * When the on-disk list can reach it from the superblock, its
+ * UNLINKONLIST flag is set. Once all of these flags are set, it
+ * is safe to let its last name be removed.
  */
 #define	ATTACHED	0x000001
 #define	UNDONE		0x000002
@@ -353,20 +362,22 @@ struct bmsafemap {
  * or fragment is allocated from a cylinder group. Its state is set to
  * DEPCOMPLETE when its cylinder group map is written. It is converted to
  * an allocdirect or allocindir allocation once the allocator calls the
- * appropriate setup function.
+ * appropriate setup function. It will initially be linked onto a bmsafemap
+ * list. Once converted it can be linked onto the lists described for
+ * allocdirect or allocindir as described below.
  */ 
 struct newblk {
-	struct	worklist nb_list;
+	struct	worklist nb_list;	/* See comment above. */
 #	define	nb_state nb_list.wk_state
-	LIST_ENTRY(newblk) nb_hash;	/* hashed lookup */
-	LIST_ENTRY(newblk) nb_deps; /* bmsafemap's list of newblks */
+	LIST_ENTRY(newblk) nb_hash;	/* Hashed lookup. */
+	LIST_ENTRY(newblk) nb_deps;	/* Bmsafemap's list of newblks. */
 	struct	jnewblk *nb_jnewblk;	/* New block journal entry. */
-	struct	bmsafemap *nb_bmsafemap;/* cylgrp dep (if pending) */
-	struct	freefrag *nb_freefrag;	/* fragment to be freed (if any) */
+	struct	bmsafemap *nb_bmsafemap;/* Cylgrp dep (if pending). */
+	struct	freefrag *nb_freefrag;	/* Fragment to be freed (if any). */
 	struct	indirdephd nb_indirdeps; /* Children indirect blocks. */
-	struct	workhead nb_newdirblk;	/* dir block to notify when written */
+	struct	workhead nb_newdirblk;	/* Dir block to notify when written. */
 	struct	workhead nb_jwork;	/* Journal work pending. */
-	ufs2_daddr_t	nb_newblkno;	/* new value of block pointer */
+	ufs2_daddr_t	nb_newblkno;	/* New value of block pointer. */
 };
 
 /*
@@ -517,16 +528,16 @@ struct freeblks {
 /*
  * A "freework" structure handles the release of a tree of blocks or a single
  * block.  Each indirect block in a tree is allocated its own freework
- * structure so that the indrect block may be freed only when all of its
+ * structure so that the indirect block may be freed only when all of its
  * children are freed.  In this way we enforce the rule that an allocated
  * block must have a valid path to a root that is journaled.  Each child
  * block acquires a reference and when the ref hits zero the parent ref
  * is decremented.  If there is no parent the freeblks ref is decremented.
  */
 struct freework {
-	struct	worklist fw_list;
+	struct	worklist fw_list;		/* Delayed worklist. */
 #	define	fw_state fw_list.wk_state
-	LIST_ENTRY(freework) fw_next;		/* Queue for freeblksk. */
+	LIST_ENTRY(freework) fw_next;		/* Queue for freeblk list. */
 	struct	freeblks *fw_freeblks;		/* Root of operation. */
 	struct	freework *fw_parent;		/* Parent indirect. */
 	ufs2_daddr_t	 fw_blkno;		/* Our block #. */
@@ -545,7 +556,7 @@ struct freework {
  * to be freed as well.
  */
 struct freedep {
-	struct	worklist fd_list;
+	struct	worklist fd_list;	/* Delayed worklist. */
 	struct	freework *fd_freework;	/* Parent freework. */
 };
 
@@ -705,10 +716,10 @@ struct newdirblk {
  * so they may easily be queued in-order on the inodedep.
  */
 struct inoref {
-	struct	worklist if_list;
+	struct	worklist if_list;	/* Journal pending or jseg entries. */
 #	define	if_state if_list.wk_state
 	TAILQ_ENTRY(inoref) if_deps;	/* Links for inodedep. */
-	struct	jsegdep	*if_jsegdep;
+	struct	jsegdep	*if_jsegdep;	/* Will track our journal record. */
 	off_t		if_diroff;	/* Directory offset. */
 	ino_t		if_ino;		/* Inode number. */
 	ino_t		if_parent;	/* Parent inode number. */
@@ -731,8 +742,8 @@ struct inoref {
  * ultimately released when the file is freed or the link is dropped again.
  */
 struct jaddref {
-	struct	inoref	ja_ref;
-#	define	ja_list	ja_ref.if_list	/* Journal pending or jseg entries. */
+	struct	inoref	ja_ref;		/* see inoref above. */
+#	define	ja_list	ja_ref.if_list	/* Jrnl pending, id_inowait, dm_jwork.*/
 #	define	ja_state ja_ref.if_list.wk_state
 	LIST_ENTRY(jaddref) ja_bmdeps;	/* Links for bmsafemap. */
 	union {
@@ -754,21 +765,28 @@ struct jaddref {
  * may proceed as normal. 
  */
 struct jremref {
-	struct	inoref	jr_ref;
-#	define	jr_list	jr_ref.if_list	/* Journal pending or jseg entries. */
+	struct	inoref	jr_ref;		/* see inoref above. */
+#	define	jr_list	jr_ref.if_list	/* Linked to softdep_journal_pending. */
 #	define	jr_state jr_ref.if_list.wk_state
-	LIST_ENTRY(jremref) jr_deps;	/* Links for pagdep. */
+	LIST_ENTRY(jremref) jr_deps;	/* Links for dirrem. */
 	struct	dirrem	*jr_dirrem;	/* Back pointer to dirrem. */
 };
 
+/*
+ * A "jmvref" structure tracks a name relocations within the same
+ * directory block that occur as a result of directory compaction.
+ * It prevents the updated directory entry from being written to disk
+ * until the journal entry is written. Once the journal has been
+ * written the compacted directory may be written to disk.
+ */
 struct jmvref {
-	struct	worklist jm_list;
-	LIST_ENTRY(jmvref) jm_deps;
-	struct pagedep	*jm_pagedep;
-	ino_t		jm_parent;
-	ino_t		jm_ino;
-	off_t		jm_oldoff;
-	off_t		jm_newoff;
+	struct	worklist jm_list;	/* Linked to softdep_journal_pending. */
+	LIST_ENTRY(jmvref) jm_deps;	/* Jmvref on pagedep. */
+	struct pagedep	*jm_pagedep;	/* Back pointer to pagedep. */
+	ino_t		jm_parent;	/* Containing directory inode number. */
+	ino_t		jm_ino;		/* Inode number of our entry. */
+	off_t		jm_oldoff;	/* Our old offset in directory. */
+	off_t		jm_newoff;	/* Our new offset in directory. */
 };
 
 /*
@@ -780,36 +798,37 @@ struct jmvref {
  * write the jnewblk structure is maintained to prevent the bitmaps from
  * reaching the disk.  Ultimately the jnewblk structure will be passed
  * to the free routine as the in memory cg is modified back to the free
- * state at which time it can be released.
+ * state at which time it can be released. It may be held on any of the
+ * fx_jwork, fw_jwork, fb_jwork, ff_jwork, nb_jwork, or ir_jwork lists.
  */
 struct jnewblk {
-	struct	worklist jn_list;
+	struct	worklist jn_list;	/* See lists above. */
 #	define	jn_state jn_list.wk_state
-	struct	jsegdep	*jn_jsegdep;
-	LIST_ENTRY(jnewblk) jn_deps;		/* All jnewblks on bmsafemap */
-	struct	newblk	*jn_newblk;
-	ino_t		jn_ino;
-	ufs_lbn_t	jn_lbn;
-	ufs2_daddr_t	jn_blkno;
-	int		jn_oldfrags;
-	int		jn_frags;
+	struct	jsegdep	*jn_jsegdep;	/* Will track our journal record. */
+	LIST_ENTRY(jnewblk) jn_deps;	/* Jnewblks on sm_jnewblkhd. */
+	struct	newblk	*jn_newblk;	/* Back pointer to newblk. */
+	ino_t		jn_ino;		/* Ino to which allocated. */
+	ufs_lbn_t	jn_lbn;		/* Lbn to which allocated. */
+	ufs2_daddr_t	jn_blkno;	/* Blkno allocated */
+	int		jn_oldfrags;	/* Previous fragments when extended. */
+	int		jn_frags;	/* Number of fragments. */
 };
 
 /*
  * A "jfreeblk" structure tracks the journal write for freeing a block
  * or tree of blocks.  The block pointer must not be cleared in the inode
- * or indirect prior to the jfreeblk being written.
+ * or indirect prior to the jfreeblk being written to the journal.
  */
 struct jfreeblk {
-	struct	worklist jf_list;
+	struct	worklist jf_list;	/* Linked to softdep_journal_pending. */
 #	define	jf_state jf_list.wk_state
-	struct	jsegdep	*jf_jsegdep;
-	struct freeblks	*jf_freeblks;
-	LIST_ENTRY(jfreeblk) jf_deps;
-	ino_t		jf_ino;
-	ufs_lbn_t	jf_lbn;
-	ufs2_daddr_t	jf_blkno;
-	int		jf_frags;
+	struct	jsegdep	*jf_jsegdep;	/* Will track our journal record. */
+	struct freeblks	*jf_freeblks;	/* Back pointer to freeblks. */
+	LIST_ENTRY(jfreeblk) jf_deps;	/* Jfreeblk on fb_jfreeblkhd. */
+	ino_t		jf_ino;		/* Ino from which blocks freed. */
+	ufs_lbn_t	jf_lbn;		/* Lbn from which blocks freed. */
+	ufs2_daddr_t	jf_blkno;	/* Blkno being freed. */
+	int		jf_frags;	/* Number of frags being freed. */
 };
 
 /*
@@ -818,14 +837,14 @@ struct jfreeblk {
  * freeblks operation.
  */
 struct jfreefrag {
-	struct	worklist fr_list;
+	struct	worklist fr_list;	/* Linked to softdep_journal_pending. */
 #	define	fr_state fr_list.wk_state
-	struct	jsegdep	*fr_jsegdep;
-	struct freefrag	*fr_freefrag;
-	ino_t		fr_ino;
-	ufs_lbn_t	fr_lbn;
-	ufs2_daddr_t	fr_blkno;
-	int		fr_frags;
+	struct	jsegdep	*fr_jsegdep;	/* Will track our journal record. */
+	struct freefrag	*fr_freefrag;	/* Back pointer to freefrag. */
+	ino_t		fr_ino;		/* Ino from which frag freed. */
+	ufs_lbn_t	fr_lbn;		/* Lbn from which frag freed. */
+	ufs2_daddr_t	fr_blkno;	/* Blkno being freed. */
+	int		fr_frags;	/* Size of frag being freed. */
 };
 
 /*
@@ -835,42 +854,45 @@ struct jfreefrag {
  * is complete and the truncated inode is fsync'd.
  */
 struct jtrunc {
-	struct	worklist jt_list;
-	struct	jsegdep	*jt_jsegdep;
-	ino_t		 jt_ino;
-	off_t		 jt_size;
-	int		 jt_extsize;
+	struct	worklist jt_list;	/* Linked to softdep_journal_pending. */
+	struct	jsegdep	*jt_jsegdep;	/* Will track our journal record. */
+	ino_t		 jt_ino;	/* Ino being truncated. */
+	off_t		 jt_size;	/* Final file size. */
+	int		 jt_extsize;	/* Final extent size. */
 };
 
 /*
  * A "jsegdep" structure tracks a single reference to a written journal
  * segment so the journal space can be reclaimed when all dependencies
- * have been written.
+ * have been written. It can hang off of id_inowait, dm_jwork, da_jwork,
+ * nb_jwork, ff_jwork, or fb_jwork lists.
  */
 struct jsegdep {
-	struct	worklist jd_list;
+	struct	worklist jd_list;	/* See above for lists. */
 #	define	jd_state jd_list.wk_state
-	struct	jseg	*jd_seg;
+	struct	jseg	*jd_seg;	/* Our journal record. */
 };
 
 /*
  * A "jseg" structure contains all of the journal records written in a
- * single disk write.  jaddref and jremref structures are linked into
+ * single disk write.  The jaddref and jremref structures are linked into
  * js_entries so thay may be completed when the write completes.  The
- * js_deps array contains as many entries as there are ref counts to
- * reduce the number of allocations required per journal write to one.
+ * js_entries also include the write dependency structures: jmvref,
+ * jnewblk, jfreeblk, jfreefrag, and jtrunc.  The js_refs field counts
+ * the number of entries on the js_entries list. Thus there is a single
+ * jseg entry to describe each journal write.
  */
 struct jseg {
 	struct	worklist js_list;	/* b_deps link for journal */
 #	define	js_state js_list.wk_state
 	struct	workhead js_entries;	/* Entries awaiting write */
-	TAILQ_ENTRY(jseg) js_next;
+	TAILQ_ENTRY(jseg) js_next;	/* List of all unfinished segments. */
 	struct	jblocks *js_jblocks;	/* Back pointer to block/seg list */
 	struct	buf *js_buf;		/* Buffer while unwritten */
-	uint64_t js_seq;
-	int	js_size;		/* Allocated size in bytes */
-	int	js_cnt;			/* Total items allocated */
-	int	js_refs;		/* Count of items pending completion */
+	uint64_t js_seq;		/* Journal record sequence number. */
+	int	js_size;		/* Size of journal record in bytes. */
+	int	js_cnt;			/* Total items allocated. */
+	int	js_refs;		/* Count of js_entries items. */
 };
 
 /*
@@ -878,10 +900,11 @@ struct jseg {
  * superblock writes.  This makes sure the superblock is always pointing at
  * the first possible unlinked inode for the suj recovery process.  If a
  * block write completes and we discover a new head is available the buf
- * is dirtied and the dep is kept.
+ * is dirtied and the dep is kept. See the description of the UNLINK*
+ * flags above for more details.
  */
 struct sbdep {
 	struct	worklist sb_list;	/* b_dep linkage */
 	struct	fs	*sb_fs;		/* Filesystem pointer within buf. */
-	struct	ufsmount *sb_ump;
+	struct	ufsmount *sb_ump;	/* Our mount structure */
 };



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201009141804.o8EI45kp088466>