Date: Thu, 11 Mar 2010 01:02:27 +0000 (UTC) From: Jeff Roberson <jeff@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r204991 - in projects/suj: 6/sbin/fsck_ffs 6/sbin/tunefs 6/sys/ufs/ffs 7/sbin/fsck_ffs 7/sbin/tunefs 7/sys/ufs/ffs 8/sbin/fsck_ffs 8/sbin/tunefs 8/sys/ufs/ffs Message-ID: <201003110102.o2B12RJb014891@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: jeff Date: Thu Mar 11 01:02:27 2010 New Revision: 204991 URL: http://svn.freebsd.org/changeset/base/204991 Log: - Catch up to changes made on the suj/head branch. Modified: projects/suj/6/sbin/fsck_ffs/suj.c projects/suj/6/sbin/tunefs/tunefs.c projects/suj/6/sys/ufs/ffs/ffs_softdep.c projects/suj/7/sbin/fsck_ffs/suj.c projects/suj/7/sbin/tunefs/tunefs.c projects/suj/7/sys/ufs/ffs/ffs_softdep.c projects/suj/8/sbin/fsck_ffs/suj.c projects/suj/8/sbin/tunefs/tunefs.c projects/suj/8/sys/ufs/ffs/ffs_softdep.c Modified: projects/suj/6/sbin/fsck_ffs/suj.c ============================================================================== --- projects/suj/6/sbin/fsck_ffs/suj.c Wed Mar 10 23:02:06 2010 (r204990) +++ projects/suj/6/sbin/fsck_ffs/suj.c Thu Mar 11 01:02:27 2010 (r204991) @@ -142,10 +142,11 @@ uint64_t jbytes; uint64_t jrecs; typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int); -static void ino_trunc(ino_t ino, off_t size); +static void ino_trunc(ino_t, off_t); static void ino_decr(ino_t); static void ino_adjust(struct suj_ino *); -static void ino_build(struct suj_ino *sino); +static void ino_build(struct suj_ino *); +static int blk_isfree(ufs2_daddr_t); static void * errmalloc(size_t n) @@ -496,7 +497,7 @@ blk_setmask(struct jblkrec *brec, int *m * to be freed. The mask value can be used to free partial blocks. */ static int -blk_isfree(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags) +blk_freemask(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags) { struct suj_blk *sblk; struct suj_rec *srec; @@ -532,7 +533,7 @@ blk_isfree(ufs2_daddr_t blk, ino_t ino, blk_setmask(brec, &mask); } if (debug) - printf("blk_isfree: blk %jd sblk %jd off %d mask 0x%X\n", + printf("blk_freemask: blk %jd sblk %jd off %d mask 0x%X\n", blk, sblk->sb_blk, off, mask); return (mask >> off); } @@ -542,6 +543,9 @@ blk_isfree(ufs2_daddr_t blk, ino_t ino, * if any part of the indirect has been reallocated or the last journal * entry was an allocation. Just allocated indirects may not have valid * pointers yet and all of their children will have their own records. + * It is also not safe to follow an indirect if the cg bitmap has been + * cleared as a new allocation may write to the block prior to the journal + * being written. * * Returns 1 if it's safe to follow the indirect and 0 otherwise. */ @@ -559,7 +563,7 @@ blk_isindir(ufs2_daddr_t blk, ino_t ino, brec = (struct jblkrec *)TAILQ_LAST(&sblk->sb_recs, srechd)->sr_rec; if (blk_equals(brec, ino, lbn, blk, fs->fs_frag)) if (brec->jb_op == JOP_FREEBLK) - return (1); + return (!blk_isfree(blk)); return (0); } @@ -644,6 +648,19 @@ blk_free(ufs2_daddr_t bno, int mask, int } /* + * Returns 1 if the whole block starting at 'bno' is marked free and 0 + * otherwise. + */ +static int +blk_isfree(ufs2_daddr_t bno) +{ + struct suj_cg *sc; + + sc = cg_lookup(dtog(fs, bno)); + return ffs_isblock(fs, cg_blksfree(sc->sc_cgp), dtogd(fs, bno)); +} + +/* * Fetch an indirect block to find the block at a given lbn. The lbn * may be negative to fetch a specific indirect block pointer or positive * to fetch a specific block. @@ -1059,7 +1076,7 @@ blk_free_visit(ino_t ino, ufs_lbn_t lbn, { int mask; - mask = blk_isfree(blk, ino, lbn, frags); + mask = blk_freemask(blk, ino, lbn, frags); if (debug) printf("blk %jd freemask 0x%X\n", blk, mask); blk_free(blk, mask, frags); @@ -1076,7 +1093,7 @@ blk_free_lbn(ufs2_daddr_t blk, ino_t ino uint64_t resid; int mask; - mask = blk_isfree(blk, ino, lbn, frags); + mask = blk_freemask(blk, ino, lbn, frags); if (debug) printf("blk %jd freemask 0x%X\n", blk, mask); resid = 0; @@ -1615,7 +1632,7 @@ blk_check(struct suj_blk *sblk) if (isat == 1) { if (frags == brec->jb_frags) continue; - mask = blk_isfree(blk, brec->jb_ino, brec->jb_lbn, + mask = blk_freemask(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags); mask >>= frags; blk += frags; @@ -2259,7 +2276,8 @@ suj_verifyino(union dinode *ip) return (-1); } - if (DIP(ip, di_flags) != (SF_IMMUTABLE | SF_NOUNLINK)) { + if ((DIP(ip, di_flags) & (SF_IMMUTABLE | SF_NOUNLINK)) != + (SF_IMMUTABLE | SF_NOUNLINK)) { printf("Invalid flags 0x%X for journal inode %d\n", DIP(ip, di_flags), sujino); return (-1); @@ -2595,19 +2613,19 @@ suj_check(const char *filesys) cg_apply(cg_check_blk); cg_apply(cg_check_ino); } + if (preen == 0 && reply("WRITE CHANGES") == 0) + return (0); /* * To remain idempotent with partial truncations the free bitmaps * must be written followed by indirect blocks and lastly inode * blocks. This preserves access to the modified pointers until * they are freed. */ - if (preen || reply("WRITE CHANGES")) { - cg_apply(cg_write); - dblk_write(); - cg_apply(cg_write_inos); - /* Write back superblock. */ - closedisk(filesys); - } + cg_apply(cg_write); + dblk_write(); + cg_apply(cg_write_inos); + /* Write back superblock. */ + closedisk(filesys); printf("** %jd journal records in %jd bytes for %.2f%% utilization\n", jrecs, jbytes, ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100); printf("** Freed %jd inodes (%jd dirs) %jd blocks, and %jd frags.\n", Modified: projects/suj/6/sbin/tunefs/tunefs.c ============================================================================== --- projects/suj/6/sbin/tunefs/tunefs.c Wed Mar 10 23:02:06 2010 (r204990) +++ projects/suj/6/sbin/tunefs/tunefs.c Thu Mar 11 01:02:27 2010 (r204991) @@ -529,6 +529,7 @@ journal_findfile(void) { struct ufs1_dinode *dp1; struct ufs2_dinode *dp2; + ino_t ino; int mode; void *ip; int i; @@ -547,9 +548,9 @@ journal_findfile(void) for (i = 0; i < NDADDR; i++) { if (dp1->di_db[i] == 0) break; - if (dir_search(dp1->di_db[i], - sblksize(&sblock, (off_t)dp1->di_size, i)) != 0) - return (-1); + if ((ino = dir_search(dp1->di_db[i], + sblksize(&sblock, (off_t)dp1->di_size, i))) != 0) + return (ino); } } else { if ((off_t)dp1->di_size >= lblktosize(&sblock, NDADDR)) { @@ -559,9 +560,9 @@ journal_findfile(void) for (i = 0; i < NDADDR; i++) { if (dp2->di_db[i] == 0) break; - if (dir_search(dp2->di_db[i], - sblksize(&sblock, (off_t)dp2->di_size, i)) != 0) - return (-1); + if ((ino = dir_search(dp2->di_db[i], + sblksize(&sblock, (off_t)dp2->di_size, i))) != 0) + return (ino); } } @@ -760,10 +761,11 @@ journal_clear(void) void *ip; ino = journal_findfile(); - if (ino <= 0) { + if (ino == (ino_t)-1 || ino == 0) { warnx("Journal file does not exist"); return; } + printf("Clearing journal flags from inode %d\n", ino); if (getino(&disk, &ip, ino, &mode) != 0) { warn("Failed to get journal inode"); return; @@ -801,11 +803,13 @@ journal_alloc(int64_t size) * If the journal file exists we can't allocate it. */ ino = journal_findfile(); - if (ino > 0) + if (ino == (ino_t)-1) + return (-1); + if (ino > 0) { warnx("Journal file %s already exists, please remove.", SUJ_FILE); - if (ino != 0) return (-1); + } /* * If the user didn't supply a size pick one based on the filesystem * size constrained with hardcoded MIN and MAX values. We opt for @@ -859,13 +863,13 @@ journal_alloc(int64_t size) dp1->di_size = size; dp1->di_mode = IFREG | IREAD; dp1->di_nlink = 1; - dp1->di_flags = SF_IMMUTABLE | SF_NOUNLINK; + dp1->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP; } else { bzero(dp2, sizeof(*dp2)); dp2->di_size = size; dp2->di_mode = IFREG | IREAD; dp2->di_nlink = 1; - dp2->di_flags = SF_IMMUTABLE | SF_NOUNLINK; + dp2->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP; } for (i = 0; i < NDADDR && resid; i++, resid--) { blk = journal_balloc(); Modified: projects/suj/6/sys/ufs/ffs/ffs_softdep.c ============================================================================== --- projects/suj/6/sys/ufs/ffs/ffs_softdep.c Wed Mar 10 23:02:06 2010 (r204990) +++ projects/suj/6/sys/ufs/ffs/ffs_softdep.c Thu Mar 11 01:02:27 2010 (r204991) @@ -1,5 +1,7 @@ /*- - * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved. + * Copyright 1998, 2000 Marshall Kirk McKusick. + * Copyright 2009, 2010 Jeffrey W. Roberson <jeff@FreeBSD.org> + * All rights reserved. * * The soft updates code is derived from the appendix of a University * of Michigan technical report (Gregory R. Ganger and Yale N. Patt, @@ -23,17 +25,16 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: @(#)ffs_softdep.c 9.59 (McKusick) 6/21/00 */ @@ -955,6 +956,11 @@ static int stat_jaddref; /* bufs redirti static int stat_jnewblk; /* bufs redirtied as blk bitmap can not write */ static int stat_journal_min; /* Times hit journal min threshold */ static int stat_journal_low; /* Times hit journal low threshold */ +static int stat_journal_wait; /* Times blocked in jwait(). */ +static int stat_jwait_filepage; /* Times blocked in jwait() for filepage. */ +static int stat_jwait_freeblks; /* Times blocked in jwait() for freeblks. */ +static int stat_jwait_inode; /* Times blocked in jwait() for inodes. */ +static int stat_jwait_newblk; /* Times blocked in jwait() for newblks. */ SYSCTL_INT(_debug_softdep, OID_AUTO, max_softdeps, CTLFLAG_RW, &max_softdeps, 0, ""); @@ -990,6 +996,16 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, jou &stat_journal_low, 0, ""); SYSCTL_INT(_debug_softdep, OID_AUTO, journal_min, CTLFLAG_RW, &stat_journal_min, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, journal_wait, CTLFLAG_RW, + &stat_journal_wait, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_filepage, CTLFLAG_RW, + &stat_jwait_filepage, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_freeblks, CTLFLAG_RW, + &stat_jwait_freeblks, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_inode, CTLFLAG_RW, + &stat_jwait_inode, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_newblk, CTLFLAG_RW, + &stat_jwait_newblk, 0, ""); SYSCTL_DECL(_vfs_ffs); @@ -2488,16 +2504,11 @@ softdep_process_journal(mp, flags) * entries and add them to the segment. Notice cnt is * off by one to account for the space required by the * jsegrec. If we don't have a full block to log skip it - * unless we haven't written anything in 5 seconds. + * unless we haven't written anything. */ cnt++; - if (cnt < jrecmax) { - if (segwritten) - break; - if (flags == MNT_NOWAIT && - (ticks - jblocks->jb_age) < hz*5) - break; - } + if (cnt < jrecmax && segwritten) + break; /* * Verify some free journal space. softdep_prealloc() should * guarantee that we don't run out so this is indicative of @@ -2621,23 +2632,16 @@ softdep_process_journal(mp, flags) /* * Write this one buffer and continue. */ -#if 1 WORKLIST_INSERT(&bp->b_dep, &jseg->js_list); FREE_LOCK(&lk); BO_LOCK(bp->b_bufobj); bgetvp(ump->um_devvp, bp); BO_UNLOCK(bp->b_bufobj); - /* XXX Could bawrite here. */ - bwrite(bp); - ACQUIRE_LOCK(&lk); -#else - /* This case simulates the write but does not log anything. */ - handle_written_jseg(jseg, bp); - FREE_LOCK(&lk); - brelse(bp); + if (flags == MNT_NOWAIT) + bawrite(bp); + else + bwrite(bp); ACQUIRE_LOCK(&lk); -#endif - segwritten++; } /* * If we've suspended the filesystem because we ran out of journal @@ -3476,6 +3480,7 @@ jwait(wk) struct worklist *wk; { + stat_journal_wait++; /* * If IO has not started we process the journal. We can't mark the * worklist item as IOWAITING because we drop the lock while @@ -3544,8 +3549,10 @@ softdep_setup_trunc(vp, length, flags) jtrunc->jt_size = DIP(ip, i_size); ACQUIRE_LOCK(&lk); add_to_journal(&jtrunc->jt_list); - while (jsegdep->jd_seg == NULL) + while (jsegdep->jd_seg == NULL) { + stat_jwait_freeblks++; jwait(&jtrunc->jt_list); + } FREE_LOCK(&lk); return (jsegdep); @@ -4949,7 +4956,7 @@ softdep_setup_freeblocks(ip, length, fla * for the allocations will suffice. */ inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep); - if ((inodedep->id_state & (DEPCOMPLETE | UNLINKED)) == UNLINKED || + if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED || (fs->fs_flags & FS_SUJ) == 0) needj = 0; else @@ -5200,6 +5207,7 @@ deallocate_dependencies(bp, inodedep, fr while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL) { + stat_jwait_filepage++; jwait(&jremref->jr_list); return (0); } @@ -5221,6 +5229,7 @@ deallocate_dependencies(bp, inodedep, fr } while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL) { + stat_jwait_filepage++; jwait(&jmvref->jm_list); return (0); } @@ -5496,12 +5505,17 @@ softdep_freefile(pvp, ino, mode) * will never be written. */ if (inodedep && inodedep->id_state & UNLINKED) { + /* + * Save the journal work to be freed with the bitmap + * before we clear UNLINKED. Otherwise it can be lost + * if the inode block is written. + */ + handle_bufwait(inodedep, &freefile->fx_jwork); clear_unlinked_inodedep(inodedep); + /* Re-acquire inodedep as we've dropped lk. */ inodedep_lookup(pvp->v_mount, ino, 0, &inodedep); - if (inodedep && (inodedep->id_state & DEPCOMPLETE) == 0) { + if (inodedep && (inodedep->id_state & DEPCOMPLETE) == 0) inodedep->id_state |= GOINGAWAY; - handle_bufwait(inodedep, &freefile->fx_jwork); - } } if (inodedep == NULL || check_inode_unwritten(inodedep)) { FREE_LOCK(&lk); @@ -5621,21 +5635,24 @@ freework_freeblock(freework) int complete; int pending; int bsize; + int needj; freeblks = freework->fw_freeblks; ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; + needj = freeblks->fb_list.wk_mp->mnt_kern_flag & MNTK_SUJ; complete = 0; LIST_INIT(&wkhd); /* * If we are canceling an existing jnewblk pass it to the free * routine, otherwise pass the freeblk which will ultimately - * release the freeblks + * release the freeblks. If we're not journaling, we can just + * free the freeblks immediately. */ if (!LIST_EMPTY(&freework->fw_jwork)) { LIST_SWAP(&wkhd, &freework->fw_jwork, worklist, wk_list); complete = 1; - } else + } else if (needj) WORKLIST_INSERT_UNLOCKED(&wkhd, &freework->fw_list); bsize = lfragtosize(fs, freework->fw_frags); pending = btodb(bsize); @@ -5652,7 +5669,7 @@ freework_freeblock(freework) } ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize, freeblks->fb_previousinum, &wkhd); - if (complete == 0) + if (complete == 0 && needj) return; /* * The jnewblk will be discarded and the bits in the map never @@ -5823,6 +5840,7 @@ indir_trunc(freework, dbn, lbn) ufs2_daddr_t dbn; ufs_lbn_t lbn; { + struct freework *nfreework; struct workhead wkhd; struct jnewblk *jnewblk; struct freeblks *freeblks; @@ -5838,6 +5856,7 @@ indir_trunc(freework, dbn, lbn) int i, nblocks, ufs1fmt; int fs_pendingblocks; int freedeps; + int needj; int level; int cnt; @@ -5850,6 +5869,7 @@ indir_trunc(freework, dbn, lbn) fs = ump->um_fs; fs_pendingblocks = 0; freedeps = 0; + needj = UFSTOVFS(ump)->mnt_kern_flag & MNTK_SUJ; lbnadd = 1; for (i = level; i > 0; i--) lbnadd *= NINDIR(fs); @@ -5941,7 +5961,8 @@ indir_trunc(freework, dbn, lbn) cnt++; } ACQUIRE_LOCK(&lk); - freework->fw_ref += NINDIR(fs) + 1; + if (needj) + freework->fw_ref += NINDIR(fs) + 1; /* Any remaining journal work can be completed with freeblks. */ jwork_move(&freeblks->fb_jwork, &wkhd); FREE_LOCK(&lk); @@ -5950,6 +5971,7 @@ indir_trunc(freework, dbn, lbn) nb = bap1[0]; else nb = bap2[0]; + nfreework = freework; /* * Reclaim on disk blocks. */ @@ -5965,13 +5987,14 @@ indir_trunc(freework, dbn, lbn) continue; cnt++; if (level != 0) { - struct freework *nfreework; ufs_lbn_t nlbn; nlbn = (lbn + 1) - (i * lbnadd); - nfreework = newfreework(freeblks, freework, nlbn, nb, - fs->fs_frag, 0); - freedeps++; + if (needj != 0) { + nfreework = newfreework(freeblks, freework, + nlbn, nb, fs->fs_frag, 0); + freedeps++; + } indir_trunc(nfreework, fsbtodb(fs, nb), nlbn); } else { struct freedep *freedep; @@ -5981,7 +6004,8 @@ indir_trunc(freework, dbn, lbn) * all blocks being released to the same CG. */ LIST_INIT(&wkhd); - if (nnb == 0 || (dtog(fs, nb) != dtog(fs, nnb))) { + if (needj != 0 && + (nnb == 0 || (dtog(fs, nb) != dtog(fs, nnb)))) { freedep = newfreedep(freework); WORKLIST_INSERT_UNLOCKED(&wkhd, &freedep->fd_list); @@ -5989,22 +6013,37 @@ indir_trunc(freework, dbn, lbn) } ffs_blkfree(ump, fs, freeblks->fb_devvp, nb, fs->fs_bsize, freeblks->fb_previousinum, &wkhd); - fs_pendingblocks += nblocks; } } - ACQUIRE_LOCK(&lk); - freework->fw_off = i; if (level == 0) fs_pendingblocks = (nblocks * cnt); - freework->fw_ref += freedeps; - freework->fw_ref -= NINDIR(fs) + 1; - if (freework->fw_ref != 0) + /* + * If we're not journaling we can free the indirect now. Otherwise + * setup the ref counts and offset so this indirect can be completed + * when its children are free. + */ + if (needj == 0) { + fs_pendingblocks += nblocks; + dbn = dbtofsb(fs, dbn); + ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize, + freeblks->fb_previousinum, NULL); + ACQUIRE_LOCK(&lk); + freeblks->fb_chkcnt -= fs_pendingblocks; + if (freework->fw_blkno == dbn) + handle_written_freework(freework); + FREE_LOCK(&lk); freework = NULL; - FREE_LOCK(&lk); - if (fs_pendingblocks) { + } else { ACQUIRE_LOCK(&lk); + freework->fw_off = i; + freework->fw_ref += freedeps; + freework->fw_ref -= NINDIR(fs) + 1; + if (freework->fw_ref != 0) + freework = NULL; freeblks->fb_chkcnt -= fs_pendingblocks; FREE_LOCK(&lk); + } + if (fs_pendingblocks) { UFS_LOCK(ump); fs->fs_pendingblocks -= fs_pendingblocks; UFS_UNLOCK(ump); @@ -7870,10 +7909,14 @@ initiate_write_filepage(pagedep, bp) * locked so the dependency can not go away. */ LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) - while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL) + while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL) { + stat_jwait_filepage++; jwait(&jremref->jr_list); - while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL) + } + while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL) { + stat_jwait_filepage++; jwait(&jmvref->jm_list); + } for (i = 0; i < DAHASHSZ; i++) { LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) { ep = (struct direct *) @@ -9729,6 +9772,7 @@ again: TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) { if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY)) == DEPCOMPLETE) { + stat_jwait_inode++; jwait(&inoref->if_list); goto again; } @@ -9867,6 +9911,7 @@ restart: TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) { if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY)) == DEPCOMPLETE) { + stat_jwait_inode++; jwait(&inoref->if_list); goto restart; } @@ -10110,6 +10155,7 @@ loop: case D_ALLOCINDIR: newblk = WK_NEWBLK(wk); if (newblk->nb_jnewblk != NULL) { + stat_jwait_newblk++; jwait(&newblk->nb_jnewblk->jn_list); goto restart; } @@ -10135,6 +10181,7 @@ loop: &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) { newblk = (struct newblk *)aip; if (newblk->nb_jnewblk != NULL) { + stat_jwait_newblk++; jwait(&newblk->nb_jnewblk->jn_list); goto restart; } @@ -10262,6 +10309,7 @@ restart: TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) { if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY)) == DEPCOMPLETE) { + stat_jwait_inode++; jwait(&inoref->if_list); goto restart; } @@ -10304,6 +10352,7 @@ flush_deplist(listhead, waitfor, errorp) TAILQ_FOREACH(adp, listhead, ad_next) { newblk = (struct newblk *)adp; if (newblk->nb_jnewblk != NULL) { + stat_jwait_newblk++; jwait(&newblk->nb_jnewblk->jn_list); return (1); } @@ -10368,6 +10417,7 @@ flush_newblk_dep(vp, mp, lbn) * Flush the journal. */ if (newblk->nb_jnewblk != NULL) { + stat_jwait_newblk++; jwait(&newblk->nb_jnewblk->jn_list); continue; } @@ -10477,6 +10527,7 @@ restart: TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) { if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY)) == DEPCOMPLETE) { + stat_jwait_inode++; jwait(&inoref->if_list); goto restart; } @@ -10636,12 +10687,12 @@ softdep_request_cleanup(fs, vp) if (error != 0) return (0); } - process_removes(vp); while (fs->fs_pendingblocks > 0 && fs->fs_cstotal.cs_nbfree <= needed) { if (time_second > starttime) return (0); UFS_UNLOCK(ump); ACQUIRE_LOCK(&lk); + process_removes(vp); if (ump->softdep_on_worklist > 0 && process_worklist_item(UFSTOVFS(ump), LK_NOWAIT) != -1) { stat_worklist_push += 1; @@ -10898,6 +10949,8 @@ softdep_count_dependencies(bp, wantcount struct allocindir *aip; struct pagedep *pagedep; struct dirrem *dirrem; + struct newblk *newblk; + struct mkdir *mkdir; struct diradd *dap; int i, retval; @@ -10992,12 +11045,30 @@ softdep_count_dependencies(bp, wantcount } continue; - case D_FREEWORK: - case D_FREEDEP: - case D_JSEGDEP: case D_ALLOCDIRECT: case D_ALLOCINDIR: + newblk = WK_NEWBLK(wk); + if (newblk->nb_jnewblk) { + /* Journal allocate dependency. */ + retval += 1; + if (!wantcount) + goto out; + } + continue; + case D_MKDIR: + mkdir = WK_MKDIR(wk); + if (mkdir->md_jaddref) { + /* Journal reference dependency. */ + retval += 1; + if (!wantcount) + goto out; + } + continue; + + case D_FREEWORK: + case D_FREEDEP: + case D_JSEGDEP: case D_JSEG: case D_SBDEP: /* never a dependency on these blocks */ Modified: projects/suj/7/sbin/fsck_ffs/suj.c ============================================================================== --- projects/suj/7/sbin/fsck_ffs/suj.c Wed Mar 10 23:02:06 2010 (r204990) +++ projects/suj/7/sbin/fsck_ffs/suj.c Thu Mar 11 01:02:27 2010 (r204991) @@ -142,10 +142,11 @@ uint64_t jbytes; uint64_t jrecs; typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int); -static void ino_trunc(ino_t ino, off_t size); +static void ino_trunc(ino_t, off_t); static void ino_decr(ino_t); static void ino_adjust(struct suj_ino *); -static void ino_build(struct suj_ino *sino); +static void ino_build(struct suj_ino *); +static int blk_isfree(ufs2_daddr_t); static void * errmalloc(size_t n) @@ -496,7 +497,7 @@ blk_setmask(struct jblkrec *brec, int *m * to be freed. The mask value can be used to free partial blocks. */ static int -blk_isfree(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags) +blk_freemask(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags) { struct suj_blk *sblk; struct suj_rec *srec; @@ -532,7 +533,7 @@ blk_isfree(ufs2_daddr_t blk, ino_t ino, blk_setmask(brec, &mask); } if (debug) - printf("blk_isfree: blk %jd sblk %jd off %d mask 0x%X\n", + printf("blk_freemask: blk %jd sblk %jd off %d mask 0x%X\n", blk, sblk->sb_blk, off, mask); return (mask >> off); } @@ -542,6 +543,9 @@ blk_isfree(ufs2_daddr_t blk, ino_t ino, * if any part of the indirect has been reallocated or the last journal * entry was an allocation. Just allocated indirects may not have valid * pointers yet and all of their children will have their own records. + * It is also not safe to follow an indirect if the cg bitmap has been + * cleared as a new allocation may write to the block prior to the journal + * being written. * * Returns 1 if it's safe to follow the indirect and 0 otherwise. */ @@ -559,7 +563,7 @@ blk_isindir(ufs2_daddr_t blk, ino_t ino, brec = (struct jblkrec *)TAILQ_LAST(&sblk->sb_recs, srechd)->sr_rec; if (blk_equals(brec, ino, lbn, blk, fs->fs_frag)) if (brec->jb_op == JOP_FREEBLK) - return (1); + return (!blk_isfree(blk)); return (0); } @@ -644,6 +648,19 @@ blk_free(ufs2_daddr_t bno, int mask, int } /* + * Returns 1 if the whole block starting at 'bno' is marked free and 0 + * otherwise. + */ +static int +blk_isfree(ufs2_daddr_t bno) +{ + struct suj_cg *sc; + + sc = cg_lookup(dtog(fs, bno)); + return ffs_isblock(fs, cg_blksfree(sc->sc_cgp), dtogd(fs, bno)); +} + +/* * Fetch an indirect block to find the block at a given lbn. The lbn * may be negative to fetch a specific indirect block pointer or positive * to fetch a specific block. @@ -1059,7 +1076,7 @@ blk_free_visit(ino_t ino, ufs_lbn_t lbn, { int mask; - mask = blk_isfree(blk, ino, lbn, frags); + mask = blk_freemask(blk, ino, lbn, frags); if (debug) printf("blk %jd freemask 0x%X\n", blk, mask); blk_free(blk, mask, frags); @@ -1076,7 +1093,7 @@ blk_free_lbn(ufs2_daddr_t blk, ino_t ino uint64_t resid; int mask; - mask = blk_isfree(blk, ino, lbn, frags); + mask = blk_freemask(blk, ino, lbn, frags); if (debug) printf("blk %jd freemask 0x%X\n", blk, mask); resid = 0; @@ -1615,7 +1632,7 @@ blk_check(struct suj_blk *sblk) if (isat == 1) { if (frags == brec->jb_frags) continue; - mask = blk_isfree(blk, brec->jb_ino, brec->jb_lbn, + mask = blk_freemask(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags); mask >>= frags; blk += frags; @@ -2259,7 +2276,8 @@ suj_verifyino(union dinode *ip) return (-1); } - if (DIP(ip, di_flags) != (SF_IMMUTABLE | SF_NOUNLINK)) { + if ((DIP(ip, di_flags) & (SF_IMMUTABLE | SF_NOUNLINK)) != + (SF_IMMUTABLE | SF_NOUNLINK)) { printf("Invalid flags 0x%X for journal inode %d\n", DIP(ip, di_flags), sujino); return (-1); @@ -2595,19 +2613,19 @@ suj_check(const char *filesys) cg_apply(cg_check_blk); cg_apply(cg_check_ino); } + if (preen == 0 && reply("WRITE CHANGES") == 0) + return (0); /* * To remain idempotent with partial truncations the free bitmaps * must be written followed by indirect blocks and lastly inode * blocks. This preserves access to the modified pointers until * they are freed. */ - if (preen || reply("WRITE CHANGES")) { - cg_apply(cg_write); - dblk_write(); - cg_apply(cg_write_inos); - /* Write back superblock. */ - closedisk(filesys); - } + cg_apply(cg_write); + dblk_write(); + cg_apply(cg_write_inos); + /* Write back superblock. */ + closedisk(filesys); printf("** %jd journal records in %jd bytes for %.2f%% utilization\n", jrecs, jbytes, ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100); printf("** Freed %jd inodes (%jd dirs) %jd blocks, and %jd frags.\n", Modified: projects/suj/7/sbin/tunefs/tunefs.c ============================================================================== --- projects/suj/7/sbin/tunefs/tunefs.c Wed Mar 10 23:02:06 2010 (r204990) +++ projects/suj/7/sbin/tunefs/tunefs.c Thu Mar 11 01:02:27 2010 (r204991) @@ -562,6 +562,7 @@ journal_findfile(void) { struct ufs1_dinode *dp1; struct ufs2_dinode *dp2; + ino_t ino; int mode; void *ip; int i; @@ -580,9 +581,9 @@ journal_findfile(void) for (i = 0; i < NDADDR; i++) { if (dp1->di_db[i] == 0) break; - if (dir_search(dp1->di_db[i], - sblksize(&sblock, (off_t)dp1->di_size, i)) != 0) - return (-1); + if ((ino = dir_search(dp1->di_db[i], + sblksize(&sblock, (off_t)dp1->di_size, i))) != 0) + return (ino); } } else { if ((off_t)dp1->di_size >= lblktosize(&sblock, NDADDR)) { @@ -592,9 +593,9 @@ journal_findfile(void) for (i = 0; i < NDADDR; i++) { if (dp2->di_db[i] == 0) break; - if (dir_search(dp2->di_db[i], - sblksize(&sblock, (off_t)dp2->di_size, i)) != 0) - return (-1); + if ((ino = dir_search(dp2->di_db[i], + sblksize(&sblock, (off_t)dp2->di_size, i))) != 0) + return (ino); } } @@ -793,10 +794,11 @@ journal_clear(void) void *ip; ino = journal_findfile(); - if (ino <= 0) { + if (ino == (ino_t)-1 || ino == 0) { warnx("Journal file does not exist"); return; } + printf("Clearing journal flags from inode %d\n", ino); if (getino(&disk, &ip, ino, &mode) != 0) { warn("Failed to get journal inode"); return; @@ -834,11 +836,13 @@ journal_alloc(int64_t size) * If the journal file exists we can't allocate it. */ ino = journal_findfile(); - if (ino > 0) + if (ino == (ino_t)-1) + return (-1); + if (ino > 0) { warnx("Journal file %s already exists, please remove.", SUJ_FILE); - if (ino != 0) return (-1); + } /* * If the user didn't supply a size pick one based on the filesystem * size constrained with hardcoded MIN and MAX values. We opt for @@ -892,13 +896,13 @@ journal_alloc(int64_t size) dp1->di_size = size; dp1->di_mode = IFREG | IREAD; dp1->di_nlink = 1; - dp1->di_flags = SF_IMMUTABLE | SF_NOUNLINK; + dp1->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP; } else { bzero(dp2, sizeof(*dp2)); dp2->di_size = size; dp2->di_mode = IFREG | IREAD; dp2->di_nlink = 1; - dp2->di_flags = SF_IMMUTABLE | SF_NOUNLINK; + dp2->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP; } for (i = 0; i < NDADDR && resid; i++, resid--) { blk = journal_balloc(); Modified: projects/suj/7/sys/ufs/ffs/ffs_softdep.c ============================================================================== --- projects/suj/7/sys/ufs/ffs/ffs_softdep.c Wed Mar 10 23:02:06 2010 (r204990) +++ projects/suj/7/sys/ufs/ffs/ffs_softdep.c Thu Mar 11 01:02:27 2010 (r204991) @@ -1,5 +1,7 @@ /*- - * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved. + * Copyright 1998, 2000 Marshall Kirk McKusick. + * Copyright 2009, 2010 Jeffrey W. Roberson <jeff@FreeBSD.org> + * All rights reserved. * * The soft updates code is derived from the appendix of a University * of Michigan technical report (Gregory R. Ganger and Yale N. Patt, @@ -23,17 +25,16 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: @(#)ffs_softdep.c 9.59 (McKusick) 6/21/00 */ @@ -958,6 +959,11 @@ static int stat_jaddref; /* bufs redirti static int stat_jnewblk; /* bufs redirtied as blk bitmap can not write */ static int stat_journal_min; /* Times hit journal min threshold */ static int stat_journal_low; /* Times hit journal low threshold */ +static int stat_journal_wait; /* Times blocked in jwait(). */ +static int stat_jwait_filepage; /* Times blocked in jwait() for filepage. */ +static int stat_jwait_freeblks; /* Times blocked in jwait() for freeblks. */ +static int stat_jwait_inode; /* Times blocked in jwait() for inodes. */ +static int stat_jwait_newblk; /* Times blocked in jwait() for newblks. */ SYSCTL_INT(_debug_softdep, OID_AUTO, max_softdeps, CTLFLAG_RW, &max_softdeps, 0, ""); @@ -993,6 +999,16 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, jou &stat_journal_low, 0, ""); SYSCTL_INT(_debug_softdep, OID_AUTO, journal_min, CTLFLAG_RW, &stat_journal_min, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, journal_wait, CTLFLAG_RW, + &stat_journal_wait, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_filepage, CTLFLAG_RW, + &stat_jwait_filepage, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_freeblks, CTLFLAG_RW, + &stat_jwait_freeblks, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_inode, CTLFLAG_RW, + &stat_jwait_inode, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_newblk, CTLFLAG_RW, + &stat_jwait_newblk, 0, ""); SYSCTL_DECL(_vfs_ffs); @@ -2519,16 +2535,11 @@ softdep_process_journal(mp, flags) * entries and add them to the segment. Notice cnt is * off by one to account for the space required by the * jsegrec. If we don't have a full block to log skip it - * unless we haven't written anything in 5 seconds. + * unless we haven't written anything. */ cnt++; - if (cnt < jrecmax) { - if (segwritten) - break; - if (flags == MNT_NOWAIT && - (ticks - jblocks->jb_age) < hz*5) - break; - } + if (cnt < jrecmax && segwritten) + break; /* * Verify some free journal space. softdep_prealloc() should * guarantee that we don't run out so this is indicative of @@ -2652,23 +2663,16 @@ softdep_process_journal(mp, flags) /* * Write this one buffer and continue. *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201003110102.o2B12RJb014891>