From owner-svn-src-stable-8@FreeBSD.ORG Thu May 20 06:51:48 2010 Return-Path: Delivered-To: svn-src-stable-8@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 77BCF1065673; Thu, 20 May 2010 06:51:48 +0000 (UTC) (envelope-from mm@FreeBSD.org) Received: from svn.freebsd.org (unknown [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 63B108FC1D; Thu, 20 May 2010 06:51:48 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o4K6pmgu018357; Thu, 20 May 2010 06:51:48 GMT (envelope-from mm@svn.freebsd.org) Received: (from mm@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o4K6pmax018346; Thu, 20 May 2010 06:51:48 GMT (envelope-from mm@svn.freebsd.org) Message-Id: <201005200651.o4K6pmax018346@svn.freebsd.org> From: Martin Matuska Date: Thu, 20 May 2010 06:51:48 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org X-SVN-Group: stable-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r208334 - in stable/8: cddl/contrib/opensolaris/cmd/zdb cddl/contrib/opensolaris/cmd/ztest cddl/contrib/opensolaris/lib/libzpool/common cddl/contrib/opensolaris/lib/libzpool/common/sys ... X-BeenThere: svn-src-stable-8@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 8-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 20 May 2010 06:51:48 -0000 Author: mm Date: Thu May 20 06:51:48 2010 New Revision: 208334 URL: http://svn.freebsd.org/changeset/base/208334 Log: MFC r208047: Import OpenSolaris revision 7837:001de5627df3 It includes the following changes: - parallel reads in traversal code (Bug ID 6333409) - faster traversal for zfs send (Bug ID 6418042) - traversal code cleanup (Bug ID 6725675) - fix for two scrub related bugs (Bug ID 6729696, 6730101) - fix assertion in dbuf_verify (Bug ID 6752226) - fix panic during zfs send with i/o errors (Bug ID 6577985) - replace P2CROSS with P2BOUNDARY (Bug ID 6725680) List of OpenSolaris Bug IDs: 6333409, 6418042, 6757112, 6725668, 6725675, 6725680, 6725698, 6729696, 6730101, 6752226, 6577985, 6755042 Approved by: pjd, delphij (mentor) Obtained from: OpenSolaris (multiple Bug IDs) Modified: stable/8/cddl/contrib/opensolaris/cmd/zdb/zdb.c stable/8/cddl/contrib/opensolaris/cmd/ztest/ztest.c stable/8/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c stable/8/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h stable/8/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c stable/8/sys/cddl/boot/zfs/zfsimpl.h stable/8/sys/cddl/compat/opensolaris/sys/sysmacros.h stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scrub.c stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_traverse.h stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c Directory Properties: stable/8/cddl/contrib/opensolaris/ (props changed) stable/8/cddl/contrib/opensolaris/cmd/zdb/ (props changed) stable/8/cddl/contrib/opensolaris/cmd/zfs/ (props changed) stable/8/cddl/contrib/opensolaris/lib/libzfs/ (props changed) stable/8/sys/ (props changed) stable/8/sys/amd64/include/xen/ (props changed) stable/8/sys/cddl/contrib/opensolaris/ (props changed) stable/8/sys/contrib/dev/acpica/ (props changed) stable/8/sys/contrib/pf/ (props changed) stable/8/sys/dev/xen/xenpci/ (props changed) stable/8/sys/geom/sched/ (props changed) Modified: stable/8/cddl/contrib/opensolaris/cmd/zdb/zdb.c ============================================================================== --- stable/8/cddl/contrib/opensolaris/cmd/zdb/zdb.c Thu May 20 06:51:01 2010 (r208333) +++ stable/8/cddl/contrib/opensolaris/cmd/zdb/zdb.c Thu May 20 06:51:48 2010 (r208334) @@ -50,6 +50,7 @@ #include #include #include +#include #undef ZFS_MAXNAMELEN #undef verify #include @@ -62,8 +63,6 @@ typedef void object_viewer_t(objset_t *, extern void dump_intent_log(zilog_t *); uint64_t *zopt_object = NULL; int zopt_objects = 0; -int zdb_advance = ADVANCE_PRE; -zbookmark_t zdb_noread = { 0, 0, ZB_NO_LEVEL, 0 }; libzfs_handle_t *g_zfs; boolean_t zdb_sig_user_data = B_TRUE; int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256; @@ -88,8 +87,8 @@ static void usage(void) { (void) fprintf(stderr, - "Usage: %s [-udibcsvL] [-U cachefile_path] [-O order] " - "[-B os:obj:level:blkid] [-S user:cksumalg] " + "Usage: %s [-udibcsv] [-U cachefile_path] " + "[-S user:cksumalg] " "dataset [object...]\n" " %s -C [pool]\n" " %s -l dev\n" @@ -109,13 +108,8 @@ usage(void) "dump blkptr signatures\n"); (void) fprintf(stderr, " -v verbose (applies to all others)\n"); (void) fprintf(stderr, " -l dump label contents\n"); - (void) fprintf(stderr, " -L live pool (allows some errors)\n"); - (void) fprintf(stderr, " -O [!] " - "visitation order\n"); (void) fprintf(stderr, " -U cachefile_path -- use alternate " "cachefile\n"); - (void) fprintf(stderr, " -B objset:object:level:blkid -- " - "simulate bad block\n"); (void) fprintf(stderr, " -R read and display block from a " "device\n"); (void) fprintf(stderr, " -e Pool is exported/destroyed/" @@ -138,7 +132,7 @@ fatal(const char *fmt, ...) va_end(ap); (void) fprintf(stderr, "\n"); - exit(1); + abort(); } static void @@ -571,7 +565,7 @@ dump_dnode(objset_t *os, uint64_t object } static uint64_t -blkid2offset(dnode_phys_t *dnp, int level, uint64_t blkid) +blkid2offset(const dnode_phys_t *dnp, int level, uint64_t blkid) { if (level < 0) return (blkid); @@ -602,115 +596,104 @@ sprintf_blkptr_compact(char *blkbuf, blk (u_longlong_t)bp->blk_birth); } -/* ARGSUSED */ -static int -zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a) +static void +print_indirect(blkptr_t *bp, const zbookmark_t *zb, + const dnode_phys_t *dnp) { - zbookmark_t *zb = &bc->bc_bookmark; - blkptr_t *bp = &bc->bc_blkptr; - void *data = bc->bc_data; - dnode_phys_t *dnp = bc->bc_dnode; - char blkbuf[BP_SPRINTF_LEN + 80]; + char blkbuf[BP_SPRINTF_LEN]; int l; - if (bc->bc_errno) { - (void) sprintf(blkbuf, - "Error %d reading <%llu, %llu, %lld, %llu>: ", - bc->bc_errno, - (u_longlong_t)zb->zb_objset, - (u_longlong_t)zb->zb_object, - (u_longlong_t)zb->zb_level, - (u_longlong_t)zb->zb_blkid); - goto out; - } - - if (zb->zb_level == -1) { - ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); - ASSERT3U(BP_GET_LEVEL(bp), ==, 0); - } else { - ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); - ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); - } - - if (zb->zb_level > 0) { - uint64_t fill = 0; - blkptr_t *bpx, *bpend; - - for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx); - bpx < bpend; bpx++) { - if (bpx->blk_birth != 0) { - fill += bpx->blk_fill; - } else { - ASSERT(bpx->blk_fill == 0); - } - } - ASSERT3U(fill, ==, bp->blk_fill); - } + ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); + ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); - if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) { - uint64_t fill = 0; - dnode_phys_t *dnx, *dnend; - - for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT); - dnx < dnend; dnx++) { - if (dnx->dn_type != DMU_OT_NONE) - fill++; - } - ASSERT3U(fill, ==, bp->blk_fill); - } - - (void) sprintf(blkbuf, "%16llx ", + (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid)); ASSERT(zb->zb_level >= 0); for (l = dnp->dn_nlevels - 1; l >= -1; l--) { if (l == zb->zb_level) { - (void) sprintf(blkbuf + strlen(blkbuf), "L%llx", - (u_longlong_t)zb->zb_level); + (void) printf("L%llx", (u_longlong_t)zb->zb_level); } else { - (void) sprintf(blkbuf + strlen(blkbuf), " "); + (void) printf(" "); } } -out: - if (bp->blk_birth == 0) { - (void) sprintf(blkbuf + strlen(blkbuf), ""); - (void) printf("%s\n", blkbuf); - } else { - sprintf_blkptr_compact(blkbuf + strlen(blkbuf), bp, - dump_opt['d'] > 5 ? 1 : 0); - (void) printf("%s\n", blkbuf); + sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0); + (void) printf("%s\n", blkbuf); +} + +#define SET_BOOKMARK(zb, objset, object, level, blkid) \ +{ \ + (zb)->zb_objset = objset; \ + (zb)->zb_object = object; \ + (zb)->zb_level = level; \ + (zb)->zb_blkid = blkid; \ +} + +static int +visit_indirect(spa_t *spa, const dnode_phys_t *dnp, + blkptr_t *bp, const zbookmark_t *zb) +{ + int err; + + if (bp->blk_birth == 0) + return (0); + + print_indirect(bp, zb, dnp); + + if (BP_GET_LEVEL(bp) > 0) { + uint32_t flags = ARC_WAIT; + int i; + blkptr_t *cbp; + int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; + arc_buf_t *buf; + uint64_t fill = 0; + + err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf, + ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); + if (err) + return (err); + + /* recursively visit blocks below this */ + cbp = buf->b_data; + for (i = 0; i < epb; i++, cbp++) { + zbookmark_t czb; + + SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, + zb->zb_level - 1, + zb->zb_blkid * epb + i); + err = visit_indirect(spa, dnp, cbp, &czb); + if (err) + break; + fill += cbp->blk_fill; + } + ASSERT3U(fill, ==, bp->blk_fill); + (void) arc_buf_remove_ref(buf, &buf); } - return (bc->bc_errno ? ERESTART : 0); + return (err); } /*ARGSUSED*/ static void -dump_indirect(objset_t *os, uint64_t object, void *data, size_t size) +dump_indirect(dnode_t *dn) { - traverse_handle_t *th; - uint64_t objset = dmu_objset_id(os); - int advance = zdb_advance; + dnode_phys_t *dnp = dn->dn_phys; + int j; + zbookmark_t czb; (void) printf("Indirect blocks:\n"); - if (object == 0) - advance |= ADVANCE_DATA; - - th = traverse_init(dmu_objset_spa(os), zdb_indirect_cb, NULL, advance, - ZIO_FLAG_CANFAIL); - th->th_noread = zdb_noread; - - traverse_add_dnode(th, 0, -1ULL, objset, object); - - while (traverse_more(th) == EAGAIN) - continue; + SET_BOOKMARK(&czb, dmu_objset_id(&dn->dn_objset->os), + dn->dn_object, dnp->dn_nlevels - 1, 0); + for (j = 0; j < dnp->dn_nblkptr; j++) { + czb.zb_blkid = j; + (void) visit_indirect(dmu_objset_spa(&dn->dn_objset->os), dnp, + &dnp->dn_blkptr[j], &czb); + } (void) printf("\n"); - - traverse_fini(th); } /*ARGSUSED*/ @@ -1093,7 +1076,7 @@ dump_object(objset_t *os, uint64_t objec } if (verbosity >= 5) - dump_indirect(os, object, NULL, 0); + dump_indirect(dn); if (verbosity >= 5) { /* @@ -1458,18 +1441,17 @@ typedef struct zdb_blkstats { #define DMU_OT_DEFERRED DMU_OT_NONE #define DMU_OT_TOTAL DMU_OT_NUMTYPES -#define ZB_TOTAL ZB_MAXLEVEL +#define ZB_TOTAL DN_MAX_LEVELS typedef struct zdb_cb { zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1]; uint64_t zcb_errors[256]; - traverse_blk_cache_t *zcb_cache; int zcb_readfails; int zcb_haderrors; } zdb_cb_t; static void -zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type) +zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type) { for (int i = 0; i < 4; i++) { int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; @@ -1485,7 +1467,7 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zc if (dump_opt['S']) { boolean_t print_sig; - print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 && + print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) == DMU_OT_PLAIN_FILE_CONTENTS); if (BP_GET_CHECKSUM(bp) < zdb_sig_cksumalg) @@ -1507,56 +1489,55 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zc } } - if (!dump_opt['L']) - VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp, - NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0); + VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp, + NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0); } static int -zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) +zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb, + const dnode_phys_t *dnp, void *arg) { - zbookmark_t *zb = &bc->bc_bookmark; zdb_cb_t *zcb = arg; - blkptr_t *bp = &bc->bc_blkptr; - dmu_object_type_t type = BP_GET_TYPE(bp); char blkbuf[BP_SPRINTF_LEN]; - int error = 0; - ASSERT(!BP_IS_HOLE(bp)); + if (bp == NULL) + return (0); - zdb_count_block(spa, zcb, bp, type); + zdb_count_block(spa, zcb, bp, BP_GET_TYPE(bp)); - if (bc->bc_errno) { - if (zcb->zcb_readfails++ < 10 && dump_opt['L']) { - uberblock_t ub; - vdev_uberblock_load(NULL, spa->spa_root_vdev, &ub); - if (ub.ub_txg != 0) - spa->spa_ubsync = ub; - error = EAGAIN; - } else { + if (dump_opt['c'] || dump_opt['S']) { + int ioerr, size; + void *data; + + size = BP_GET_LSIZE(bp); + data = malloc(size); + ioerr = zio_wait(zio_read(NULL, spa, bp, data, size, + NULL, NULL, ZIO_PRIORITY_ASYNC_READ, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB, zb)); + free(data); + + /* We expect io errors on intent log */ + if (ioerr && BP_GET_TYPE(bp) != DMU_OT_INTENT_LOG) { zcb->zcb_haderrors = 1; - zcb->zcb_errors[bc->bc_errno]++; - error = ERESTART; - } + zcb->zcb_errors[ioerr]++; - if (dump_opt['b'] >= 3 || (dump_opt['b'] >= 2 && bc->bc_errno)) - sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp); - else - blkbuf[0] = '\0'; + if (dump_opt['b'] >= 2) + sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp); + else + blkbuf[0] = '\0'; - if (!dump_opt['S']) { - (void) printf("zdb_blkptr_cb: Got error %d reading " - "<%llu, %llu, %lld, %llx> %s -- %s\n", - bc->bc_errno, - (u_longlong_t)zb->zb_objset, - (u_longlong_t)zb->zb_object, - (u_longlong_t)zb->zb_level, - (u_longlong_t)zb->zb_blkid, - blkbuf, - error == EAGAIN ? "retrying" : "skipping"); + if (!dump_opt['S']) { + (void) printf("zdb_blkptr_cb: " + "Got error %d reading " + "<%llu, %llu, %lld, %llx> %s -- skipping\n", + ioerr, + (u_longlong_t)zb->zb_objset, + (u_longlong_t)zb->zb_object, + (u_longlong_t)zb->zb_level, + (u_longlong_t)zb->zb_blkid, + blkbuf); + } } - - return (error); } zcb->zcb_readfails = 0; @@ -1566,8 +1547,8 @@ zdb_blkptr_cb(traverse_blk_cache_t *bc, (void) printf("objset %llu object %llu offset 0x%llx %s\n", (u_longlong_t)zb->zb_objset, (u_longlong_t)zb->zb_object, - (u_longlong_t)blkid2offset(bc->bc_dnode, - zb->zb_level, zb->zb_blkid), blkbuf); + (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid), + blkbuf); } return (0); @@ -1576,22 +1557,12 @@ zdb_blkptr_cb(traverse_blk_cache_t *bc, static int dump_block_stats(spa_t *spa) { - traverse_handle_t *th; zdb_cb_t zcb = { 0 }; - traverse_blk_cache_t dummy_cache = { 0 }; zdb_blkstats_t *zb, *tzb; uint64_t alloc, space, logalloc; vdev_t *rvd = spa->spa_root_vdev; int leaks = 0; - int advance = zdb_advance; - int c, e, flags; - - zcb.zcb_cache = &dummy_cache; - - if (dump_opt['c'] || dump_opt['S']) - advance |= ADVANCE_DATA; - - advance |= ADVANCE_PRUNE | ADVANCE_ZIL; + int c, e; if (!dump_opt['S']) { (void) printf("\nTraversing all blocks to %sverify" @@ -1607,8 +1578,7 @@ dump_block_stats(spa_t *spa) * it's not part of any space map) is a double allocation, * reference to a freed block, or an unclaimed log block. */ - if (!dump_opt['L']) - zdb_leak_init(spa); + zdb_leak_init(spa); /* * If there's a deferred-free bplist, process that first. @@ -1634,22 +1604,7 @@ dump_block_stats(spa_t *spa) bplist_close(bpl); } - /* - * Now traverse the pool. If we're reading all data to verify - * checksums, do a scrubbing read so that we validate all copies. - */ - flags = ZIO_FLAG_CANFAIL; - if (advance & ADVANCE_DATA) - flags |= ZIO_FLAG_SCRUB; - th = traverse_init(spa, zdb_blkptr_cb, &zcb, advance, flags); - th->th_noread = zdb_noread; - - traverse_add_pool(th, 0, spa_first_txg(spa) + TXG_CONCURRENT_STATES); - - while (traverse_more(th) == EAGAIN) - continue; - - traverse_fini(th); + zcb.zcb_haderrors |= traverse_pool(spa, zdb_blkptr_cb, &zcb); if (zcb.zcb_haderrors && !dump_opt['S']) { (void) printf("\nError counts:\n\n"); @@ -1665,8 +1620,7 @@ dump_block_stats(spa_t *spa) /* * Report any leaked segments. */ - if (!dump_opt['L']) - zdb_leak_fini(spa); + zdb_leak_fini(spa); /* * If we're interested in printing out the blkptr signatures, @@ -1676,10 +1630,6 @@ dump_block_stats(spa_t *spa) if (dump_opt['S']) return (zcb.zcb_haderrors ? 3 : 0); - if (dump_opt['L']) - (void) printf("\n\n *** Live pool traversal; " - "block counts are only approximate ***\n\n"); - alloc = spa_get_alloc(spa); space = spa_get_space(spa); @@ -2285,7 +2235,6 @@ main(int argc, char **argv) int dump_all = 1; int verbose = 0; int error; - int flag, set; int exported = 0; char *vdev_dir = NULL; @@ -2294,7 +2243,7 @@ main(int argc, char **argv) dprintf_setup(&argc, argv); - while ((c = getopt(argc, argv, "udibcsvCLO:B:S:U:lRep:")) != -1) { + while ((c = getopt(argc, argv, "udibcsvCS:U:lRep:")) != -1) { switch (c) { case 'u': case 'd': @@ -2308,49 +2257,6 @@ main(int argc, char **argv) dump_opt[c]++; dump_all = 0; break; - case 'L': - dump_opt[c]++; - break; - case 'O': - endstr = optarg; - if (endstr[0] == '!') { - endstr++; - set = 0; - } else { - set = 1; - } - if (strcmp(endstr, "post") == 0) { - flag = ADVANCE_PRE; - set = !set; - } else if (strcmp(endstr, "pre") == 0) { - flag = ADVANCE_PRE; - } else if (strcmp(endstr, "prune") == 0) { - flag = ADVANCE_PRUNE; - } else if (strcmp(endstr, "data") == 0) { - flag = ADVANCE_DATA; - } else if (strcmp(endstr, "holes") == 0) { - flag = ADVANCE_HOLES; - } else { - usage(); - } - if (set) - zdb_advance |= flag; - else - zdb_advance &= ~flag; - break; - case 'B': - endstr = optarg - 1; - zdb_noread.zb_objset = strtoull(endstr + 1, &endstr, 0); - zdb_noread.zb_object = strtoull(endstr + 1, &endstr, 0); - zdb_noread.zb_level = strtol(endstr + 1, &endstr, 0); - zdb_noread.zb_blkid = strtoull(endstr + 1, &endstr, 16); - (void) printf("simulating bad block " - "<%llu, %llu, %lld, %llx>\n", - (u_longlong_t)zdb_noread.zb_objset, - (u_longlong_t)zdb_noread.zb_object, - (u_longlong_t)zdb_noread.zb_level, - (u_longlong_t)zdb_noread.zb_blkid); - break; case 'v': verbose++; break; @@ -2387,21 +2293,17 @@ main(int argc, char **argv) } } - if (vdev_dir != NULL && exported == 0) - (void) fatal("-p option requires use of -e\n"); + if (vdev_dir != NULL && exported == 0) { + (void) fprintf(stderr, "-p option requires use of -e\n"); + usage(); + } kernel_init(FREAD); g_zfs = libzfs_init(); ASSERT(g_zfs != NULL); - /* - * Disable vdev caching. If we don't do this, live pool traversal - * won't make progress because it will never see disk updates. - */ - zfs_vdev_cache_size = 0; - for (c = 0; c < 256; c++) { - if (dump_all && c != 'L' && c != 'l' && c != 'R') + if (dump_all && c != 'l' && c != 'R') dump_opt[c] = 1; if (dump_opt[c]) dump_opt[c] += verbose; Modified: stable/8/cddl/contrib/opensolaris/cmd/ztest/ztest.c ============================================================================== --- stable/8/cddl/contrib/opensolaris/cmd/ztest/ztest.c Thu May 20 06:51:01 2010 (r208333) +++ stable/8/cddl/contrib/opensolaris/cmd/ztest/ztest.c Thu May 20 06:51:48 2010 (r208334) @@ -77,7 +77,6 @@ #include #include #include -#include #include #include #include @@ -151,7 +150,6 @@ typedef struct ztest_args { hrtime_t za_start; hrtime_t za_stop; hrtime_t za_kill; - traverse_handle_t *za_th; /* * Thread-local variables can go here to aid debugging. */ @@ -206,7 +204,6 @@ ztest_info_t ztest_info[] = { { ztest_dmu_object_alloc_free, 1, &zopt_always }, { ztest_zap, 30, &zopt_always }, { ztest_zap_parallel, 100, &zopt_always }, - { ztest_traverse, 1, &zopt_often }, { ztest_dsl_prop_get_set, 1, &zopt_sometimes }, { ztest_dmu_objset_create_destroy, 1, &zopt_sometimes }, { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes }, @@ -1447,152 +1444,6 @@ ztest_dmu_snapshot_create_destroy(ztest_ (void) rw_unlock(&ztest_shared->zs_name_lock); } -#define ZTEST_TRAVERSE_BLOCKS 1000 - -static int -ztest_blk_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) -{ - ztest_args_t *za = arg; - zbookmark_t *zb = &bc->bc_bookmark; - blkptr_t *bp = &bc->bc_blkptr; - dnode_phys_t *dnp = bc->bc_dnode; - traverse_handle_t *th = za->za_th; - uint64_t size = BP_GET_LSIZE(bp); - - /* - * Level -1 indicates the objset_phys_t or something in its intent log. - */ - if (zb->zb_level == -1) { - if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { - ASSERT3U(zb->zb_object, ==, 0); - ASSERT3U(zb->zb_blkid, ==, 0); - ASSERT3U(size, ==, sizeof (objset_phys_t)); - za->za_zil_seq = 0; - } else if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) { - ASSERT3U(zb->zb_object, ==, 0); - ASSERT3U(zb->zb_blkid, >, za->za_zil_seq); - za->za_zil_seq = zb->zb_blkid; - } else { - ASSERT3U(zb->zb_object, !=, 0); /* lr_write_t */ - } - - return (0); - } - - ASSERT(dnp != NULL); - - if (bc->bc_errno) - return (ERESTART); - - /* - * Once in a while, abort the traverse. We only do this to odd - * instance numbers to ensure that even ones can run to completion. - */ - if ((za->za_instance & 1) && ztest_random(10000) == 0) - return (EINTR); - - if (bp->blk_birth == 0) { - ASSERT(th->th_advance & ADVANCE_HOLES); - return (0); - } - - if (zb->zb_level == 0 && !(th->th_advance & ADVANCE_DATA) && - bc == &th->th_cache[ZB_DN_CACHE][0]) { - ASSERT(bc->bc_data == NULL); - return (0); - } - - ASSERT(bc->bc_data != NULL); - - /* - * This is an expensive question, so don't ask it too often. - */ - if (((za->za_random ^ th->th_callbacks) & 0xff) == 0) { - void *xbuf = umem_alloc(size, UMEM_NOFAIL); - if (arc_tryread(spa, bp, xbuf) == 0) { - ASSERT(bcmp(bc->bc_data, xbuf, size) == 0); - } - umem_free(xbuf, size); - } - - if (zb->zb_level > 0) { - ASSERT3U(size, ==, 1ULL << dnp->dn_indblkshift); - return (0); - } - - ASSERT(zb->zb_level == 0); - ASSERT3U(size, ==, dnp->dn_datablkszsec << DEV_BSHIFT); - - return (0); -} - -/* - * Verify that live pool traversal works. - */ -void -ztest_traverse(ztest_args_t *za) -{ - spa_t *spa = za->za_spa; - traverse_handle_t *th = za->za_th; - int rc, advance; - uint64_t cbstart, cblimit; - - if (th == NULL) { - advance = 0; - - if (ztest_random(2) == 0) - advance |= ADVANCE_PRE; - - if (ztest_random(2) == 0) - advance |= ADVANCE_PRUNE; - - if (ztest_random(2) == 0) - advance |= ADVANCE_DATA; - - if (ztest_random(2) == 0) - advance |= ADVANCE_HOLES; - - if (ztest_random(2) == 0) - advance |= ADVANCE_ZIL; - - th = za->za_th = traverse_init(spa, ztest_blk_cb, za, advance, - ZIO_FLAG_CANFAIL); - - traverse_add_pool(th, 0, -1ULL); - } - - advance = th->th_advance; - cbstart = th->th_callbacks; - cblimit = cbstart + ((advance & ADVANCE_DATA) ? 100 : 1000); - - while ((rc = traverse_more(th)) == EAGAIN && th->th_callbacks < cblimit) - continue; - - if (zopt_verbose >= 5) - (void) printf("traverse %s%s%s%s %llu blocks to " - "<%llu, %llu, %lld, %llx>%s\n", - (advance & ADVANCE_PRE) ? "pre" : "post", - (advance & ADVANCE_PRUNE) ? "|prune" : "", - (advance & ADVANCE_DATA) ? "|data" : "", - (advance & ADVANCE_HOLES) ? "|holes" : "", - (u_longlong_t)(th->th_callbacks - cbstart), - (u_longlong_t)th->th_lastcb.zb_objset, - (u_longlong_t)th->th_lastcb.zb_object, - (u_longlong_t)th->th_lastcb.zb_level, - (u_longlong_t)th->th_lastcb.zb_blkid, - rc == 0 ? " [done]" : - rc == EINTR ? " [aborted]" : - rc == EAGAIN ? "" : - strerror(rc)); - - if (rc != EAGAIN) { - if (rc != 0 && rc != EINTR) - fatal(0, "traverse_more(%p) = %d", th, rc); - traverse_fini(th); - za->za_th = NULL; - } -} - /* * Verify dsl_dataset_promote handles EBUSY */ @@ -3067,12 +2918,12 @@ ztest_verify_blocks(char *pool) isa = strdup(isa); /* LINTED */ (void) sprintf(bin, - "/usr/sbin%.*s/zdb -bc%s%s -U /tmp/zpool.cache -O %s %s", + "/usr/sbin%.*s/zdb -bc%s%s -U /tmp/zpool.cache %s", isalen, isa, zopt_verbose >= 3 ? "s" : "", zopt_verbose >= 4 ? "v" : "", - ztest_random(2) == 0 ? "pre" : "post", pool); + pool); free(isa); if (zopt_verbose >= 5) @@ -3438,8 +3289,6 @@ ztest_run(char *pool) while (--t >= 0) { VERIFY(thr_join(za[t].za_thread, NULL, NULL) == 0); - if (za[t].za_th) - traverse_fini(za[t].za_th); if (t < zopt_datasets) { zil_close(za[t].za_zilog); dmu_objset_close(za[t].za_os); Modified: stable/8/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c ============================================================================== --- stable/8/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c Thu May 20 06:51:01 2010 (r208333) +++ stable/8/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c Thu May 20 06:51:48 2010 (r208334) @@ -23,8 +23,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include #include @@ -842,6 +840,8 @@ kernel_init(int mode) VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1); VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1); + system_taskq_init(); + spa_init(mode); } Modified: stable/8/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h ============================================================================== --- stable/8/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Thu May 20 06:51:01 2010 (r208333) +++ stable/8/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Thu May 20 06:51:48 2010 (r208334) @@ -334,11 +334,14 @@ typedef void (task_func_t)(void *); #define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */ #define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */ +extern taskq_t *system_taskq; + extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t); extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t); extern void taskq_destroy(taskq_t *); extern void taskq_wait(taskq_t *); extern int taskq_member(taskq_t *, void *); +extern void system_taskq_init(void); #define XVA_MAPSIZE 3 #define XVA_MAGIC 0x78766174 Modified: stable/8/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c ============================================================================== --- stable/8/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c Thu May 20 06:51:01 2010 (r208333) +++ stable/8/cddl/contrib/opensolaris/lib/libzpool/common/taskq.c Thu May 20 06:51:48 2010 (r208334) @@ -19,15 +19,14 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include int taskq_now; +taskq_t *system_taskq; typedef struct task { struct task *task_next; @@ -253,3 +252,10 @@ taskq_member(taskq_t *tq, void *t) return (0); } + +void +system_taskq_init(void) +{ + system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512, + TASKQ_DYNAMIC | TASKQ_PREPOPULATE); +} Modified: stable/8/sys/cddl/boot/zfs/zfsimpl.h ============================================================================== --- stable/8/sys/cddl/boot/zfs/zfsimpl.h Thu May 20 06:51:01 2010 (r208333) +++ stable/8/sys/cddl/boot/zfs/zfsimpl.h Thu May 20 06:51:48 2010 (r208334) @@ -66,7 +66,7 @@ #define P2ROUNDUP(x, align) (-(-(x) & -(align))) #define P2END(x, align) (-(~(x) & -(align))) #define P2PHASEUP(x, align, phase) ((phase) - (((phase) - (x)) & -(align))) -#define P2CROSS(x, y, align) (((x) ^ (y)) > (align) - 1) +#define P2BOUNDARY(off, len, align) (((off) ^ ((off) + (len) - 1)) > (align) - 1) /* * General-purpose 32-bit and 64-bit bitfield encodings. Modified: stable/8/sys/cddl/compat/opensolaris/sys/sysmacros.h ============================================================================== --- stable/8/sys/cddl/compat/opensolaris/sys/sysmacros.h Thu May 20 06:51:01 2010 (r208333) +++ stable/8/sys/cddl/compat/opensolaris/sys/sysmacros.h Thu May 20 06:51:48 2010 (r208334) @@ -43,6 +43,10 @@ extern "C" { #define ABS(a) ((a) < 0 ? -(a) : (a)) #endif +#ifndef SIGNOF +#define SIGNOF(a) ((a) < 0 ? -1 : (a) > 0) +#endif + /* * Macro for checking power of 2 address alignment. */ @@ -63,7 +67,7 @@ extern "C" { #define P2ROUNDUP(x, align) (-(-(x) & -(align))) #define P2END(x, align) (-(~(x) & -(align))) #define P2PHASEUP(x, align, phase) ((phase) - (((phase) - (x)) & -(align))) -#define P2CROSS(x, y, align) (((x) ^ (y)) > (align) - 1) +#define P2BOUNDARY(off, len, align) (((off) ^ ((off) + (len) - 1)) > (align) - 1) /* * Determine whether two numbers have the same high-order bit. */ Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c ============================================================================== --- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c Thu May 20 06:51:01 2010 (r208333) +++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c Thu May 20 06:51:48 2010 (r208334) @@ -308,20 +308,18 @@ dbuf_verify(dmu_buf_impl_t *db) ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size); } - if (db->db_level == 0) { - /* we can be momentarily larger in dnode_set_blksz() */ - if (db->db_blkid != DB_BONUS_BLKID && dn) { - ASSERT3U(db->db.db_size, >=, dn->dn_datablksz); - } - if (db->db.db_object == DMU_META_DNODE_OBJECT) { - dbuf_dirty_record_t *dr = db->db_data_pending; - /* - * it should only be modified in syncing - * context, so make sure we only have - * one copy of the data. - */ - ASSERT(dr == NULL || dr->dt.dl.dr_data == db->db_buf); - } + /* + * We can't assert that db_size matches dn_datablksz because it + * can be momentarily different when another thread is doing + * dnode_set_blksz(). + */ + if (db->db_level == 0 && db->db.db_object == DMU_META_DNODE_OBJECT) { + dbuf_dirty_record_t *dr = db->db_data_pending; + /* + * It should only be modified in syncing context, so + * make sure we only have one copy of the data. + */ + ASSERT(dr == NULL || dr->dt.dl.dr_data == db->db_buf); } /* verify db->db_blkptr */ Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c ============================================================================== --- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c Thu May 20 06:51:01 2010 (r208333) +++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c Thu May 20 06:51:48 2010 (r208334) @@ -23,8 +23,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include #include @@ -172,66 +170,59 @@ dump_dnode(struct backuparg *ba, uint64_ (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) static int -backup_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) +backup_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb, + const dnode_phys_t *dnp, void *arg) { struct backuparg *ba = arg; - uint64_t object = bc->bc_bookmark.zb_object; - int level = bc->bc_bookmark.zb_level; - uint64_t blkid = bc->bc_bookmark.zb_blkid; - blkptr_t *bp = bc->bc_blkptr.blk_birth ? &bc->bc_blkptr : NULL; dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; - void *data = bc->bc_data; int err = 0; if (issig(JUSTLOOKING) && issig(FORREAL)) return (EINTR); - ASSERT(data || bp == NULL); - - if (bp == NULL && object == 0) { - uint64_t span = BP_SPAN(bc->bc_dnode, level); - uint64_t dnobj = (blkid * span) >> DNODE_SHIFT; + if (bp == NULL && zb->zb_object == 0) { + uint64_t span = BP_SPAN(dnp, zb->zb_level); + uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT); } else if (bp == NULL) { - uint64_t span = BP_SPAN(bc->bc_dnode, level); - err = dump_free(ba, object, blkid * span, span); - } else if (data && level == 0 && type == DMU_OT_DNODE) { - dnode_phys_t *blk = data; + uint64_t span = BP_SPAN(dnp, zb->zb_level); + err = dump_free(ba, zb->zb_object, zb->zb_blkid * span, span); + } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { + return (0); + } else if (type == DMU_OT_DNODE) { + dnode_phys_t *blk; int i; int blksz = BP_GET_LSIZE(bp); + uint32_t aflags = ARC_WAIT; + arc_buf_t *abuf; + if (arc_read_nolock(NULL, spa, bp, + arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, + ZIO_FLAG_CANFAIL, &aflags, zb) != 0) + return (EIO); + + blk = abuf->b_data; for (i = 0; i < blksz >> DNODE_SHIFT; i++) { - uint64_t dnobj = - (blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; + uint64_t dnobj = (zb->zb_blkid << + (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; err = dump_dnode(ba, dnobj, blk+i); if (err) break; } - } else if (level == 0 && - type != DMU_OT_DNODE && type != DMU_OT_OBJSET) { + (void) arc_buf_remove_ref(abuf, &abuf); + } else { /* it's a level-0 block of a regular object */ + uint32_t aflags = ARC_WAIT; + arc_buf_t *abuf; int blksz = BP_GET_LSIZE(bp); - if (data == NULL) { - uint32_t aflags = ARC_WAIT; - arc_buf_t *abuf; - zbookmark_t zb; - - zb.zb_objset = ba->os->os->os_dsl_dataset->ds_object; - zb.zb_object = object; - zb.zb_level = level; - zb.zb_blkid = blkid; - (void) arc_read_nolock(NULL, spa, bp, - arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, - ZIO_FLAG_MUSTSUCCEED, &aflags, &zb); - - if (abuf) { - err = dump_data(ba, type, object, blkid * blksz, - blksz, abuf->b_data); - (void) arc_buf_remove_ref(abuf, &abuf); *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***