Date: Thu, 23 Aug 2012 06:39:16 GMT From: Marcelo Araujo <araujo@FreeBSD.org> To: freebsd-gnats-submit@FreeBSD.org Subject: kern/170912: [zfs] unnecessarily setting DS_FLAG_INCONSISTENT on async destroyed datasets Message-ID: <201208230639.q7N6dGJ4028372@red.freebsd.org> Resent-Message-ID: <201208230640.q7N6e2b4043872@freefall.freebsd.org>
next in thread | raw e-mail | index | archive | help
>Number: 170912 >Category: kern >Synopsis: [zfs] unnecessarily setting DS_FLAG_INCONSISTENT on async destroyed datasets >Confidential: no >Severity: non-critical >Priority: low >Responsible: freebsd-bugs >State: open >Quarter: >Keywords: >Date-Required: >Class: sw-bug >Submitter-Id: current-users >Arrival-Date: Thu Aug 23 06:40:01 UTC 2012 >Closed-Date: >Last-Modified: >Originator: Marcelo Araujo >Release: 9-1-BETA1 >Organization: FreeBSD >Environment: FreeBSD QnapAraujo 9.1-BETA1 FreeBSD 9.1-BETA1 #15: Wed Jul 11 08:36:49 PDT 2012 root@build9x64.pcbsd.org:/usr/obj/builds/amd64/pcbsd-build90/fbsd-source/9.0/sys/GENERIC amd64 >Description: Import the source to solve the issue: https://www.illumos.org/issues/3086 Code obtained on: Commit cd512c80fd75 >How-To-Repeat: >Fix: Patch attached with submission follows: Index: cddl/contrib/opensolaris/cmd/ztest/ztest.c =================================================================== --- cddl/contrib/opensolaris/cmd/ztest/ztest.c (revision 239602) +++ cddl/contrib/opensolaris/cmd/ztest/ztest.c (working copy) @@ -2225,6 +2225,7 @@ { objset_t *os = zd->zd_os; + VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0); (void) rw_wrlock(&zd->zd_zilog_lock); /* zfsvfs_teardown() */ @@ -2235,6 +2236,7 @@ zil_replay(os, zd, ztest_replay_vector); (void) rw_unlock(&zd->zd_zilog_lock); + VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0); } /* Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c (revision 239602) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c (working copy) @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -462,6 +462,39 @@ } /* + * Called when we create in-memory log transactions so that we know + * to cleanup the itxs at the end of spa_sync(). + */ + +void +zilog_dirty(zilog_t *zilog, uint64_t txg) +{ + dsl_pool_t *dp = zilog->zl_dmu_pool; + dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); + + if (dsl_dataset_is_snapshot(ds)) + panic("dirtying snapshot!"); + + if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg) == 0) { + /* up the hold count until we can be written out */ + dmu_buf_add_ref(ds->ds_dbuf, zilog); + } +} + + +boolean_t +zilog_is_dirty(zilog_t *zilog) +{ + dsl_pool_t *dp = zilog->zl_dmu_pool; + + for (int t = 0; t < TXG_SIZE; t++) { + if (txg_list_member(&dp->dp_dirty_zilogs, zilog, t)) + return (B_TRUE); + } + return (B_FALSE); +} + +/* * Create an on-disk intent log. */ static lwb_t * @@ -577,14 +610,21 @@ kmem_cache_free(zil_lwb_cache, lwb); } } else if (!keep_first) { - (void) zil_parse(zilog, zil_free_log_block, - zil_free_log_record, tx, zh->zh_claim_txg); + zil_destroy_sync(zilog, tx); } mutex_exit(&zilog->zl_lock); dmu_tx_commit(tx); } +void +zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx) +{ + ASSERT(list_is_empty(&zilog->zl_lwb_list)); + (void) zil_parse(zilog, zil_free_log_block, + zil_free_log_record, tx, zilog->zl_header->zh_claim_txg); +} + int zil_claim(const char *osname, void *txarg) { @@ -998,6 +1038,8 @@ return (NULL); ASSERT(lwb->lwb_buf != NULL); + ASSERT(zilog_is_dirty(zilog) || + spa_freeze_txg(zilog->zl_spa) != UINT64_MAX); if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) dlen = P2ROUNDUP_TYPED( @@ -1218,7 +1260,7 @@ if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_RENAME) zil_async_to_sync(zilog, itx->itx_oid); - if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) + if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) txg = ZILTEST_TXG; else txg = dmu_tx_get_txg(tx); @@ -1269,6 +1311,7 @@ } itx->itx_lr.lrc_txg = dmu_tx_get_txg(tx); + zilog_dirty(zilog, txg); mutex_exit(&itxg->itxg_lock); /* Release the old itxs now we've dropped the lock */ @@ -1278,7 +1321,10 @@ /* * If there are any in-memory intent log transactions which have now been - * synced then start up a taskq to free them. + * synced then start up a taskq to free them. We should only do this after we + * have written out the uberblocks (i.e. txg has been comitted) so that + * don't inadvertently clean out in-memory log records that would be required + * by zil_commit(). */ void zil_clean(zilog_t *zilog, uint64_t synced_txg) @@ -1746,6 +1792,7 @@ mutex_exit(&zilog->zl_lock); if (txg) txg_wait_synced(zilog->zl_dmu_pool, txg); + ASSERT(!zilog_is_dirty(zilog)); taskq_destroy(zilog->zl_clean_taskq); zilog->zl_clean_taskq = NULL; Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c (revision 239602) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c (working copy) @@ -42,6 +42,7 @@ #include <sys/dsl_deadlist.h> #include <sys/bptree.h> #include <sys/zfeature.h> +#include <sys/zil_impl.h> int zfs_no_write_throttle = 0; int zfs_write_limit_shift = 3; /* 1/8th of physical memory */ @@ -111,12 +112,12 @@ txg_list_create(&dp->dp_dirty_datasets, offsetof(dsl_dataset_t, ds_dirty_link)); + txg_list_create(&dp->dp_dirty_zilogs, + offsetof(zilog_t, zl_dirty_link)); txg_list_create(&dp->dp_dirty_dirs, offsetof(dsl_dir_t, dd_dirty_link)); txg_list_create(&dp->dp_sync_tasks, offsetof(dsl_sync_task_group_t, dstg_node)); - list_create(&dp->dp_synced_datasets, sizeof (dsl_dataset_t), - offsetof(dsl_dataset_t, ds_synced_link)); mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL); @@ -249,9 +250,9 @@ dmu_objset_evict(dp->dp_meta_objset); txg_list_destroy(&dp->dp_dirty_datasets); + txg_list_destroy(&dp->dp_dirty_zilogs); txg_list_destroy(&dp->dp_sync_tasks); txg_list_destroy(&dp->dp_dirty_dirs); - list_destroy(&dp->dp_synced_datasets); arc_flush(dp->dp_spa); txg_fini(dp); @@ -331,6 +332,21 @@ return (dp); } +/* + * Account for the meta-objset space in its placeholder dsl_dir. + */ +void +dsl_pool_mos_diduse_space(dsl_pool_t *dp, + int64_t used, int64_t comp, int64_t uncomp) +{ + ASSERT3U(comp, ==, uncomp); /* It's all metadata */ + mutex_enter(&dp->dp_lock); + dp->dp_mos_used_delta += used; + dp->dp_mos_compressed_delta += comp; + dp->dp_mos_uncompressed_delta += uncomp; + mutex_exit(&dp->dp_lock); +} + static int deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { @@ -349,12 +365,15 @@ dmu_tx_t *tx; dsl_dir_t *dd; dsl_dataset_t *ds; - dsl_sync_task_group_t *dstg; objset_t *mos = dp->dp_meta_objset; hrtime_t start, write_time; uint64_t data_written; int err; + list_t synced_datasets; + list_create(&synced_datasets, sizeof(dsl_dataset_t), + offsetof(dsl_dataset_t, ds_synced_link)); + /* * We need to copy dp_space_towrite() before doing * dsl_sync_task_group_sync(), because @@ -376,7 +395,7 @@ * may sync newly-created datasets on pass 2. */ ASSERT(!list_link_active(&ds->ds_synced_link)); - list_insert_tail(&dp->dp_synced_datasets, ds); + list_insert_tail(&synced_datasets, ds); dsl_dataset_sync(ds, zio, tx); } DTRACE_PROBE(pool_sync__1setup); @@ -386,15 +405,20 @@ ASSERT(err == 0); DTRACE_PROBE(pool_sync__2rootzio); - for (ds = list_head(&dp->dp_synced_datasets); ds; - ds = list_next(&dp->dp_synced_datasets, ds)) + /* + * After the data blocks have been written (ensured by the zio_wait() + * above), update the user/group space accounting. + */ + for (ds = list_head(&synced_datasets); ds; + ds = list_next(&synced_datasets, ds)) dmu_objset_do_userquota_updates(ds->ds_objset, tx); /* * Sync the datasets again to push out the changes due to * userspace updates. This must be done before we process the - * sync tasks, because that could cause a snapshot of a dataset - * whose ds_bp will be rewritten when we do this 2nd sync. + * sync tasks, so that any snapshots will have the correct + * user accounting information (and we won't get confused + * about which blocks are part of the snapshot). */ zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) { @@ -405,30 +429,42 @@ err = zio_wait(zio); /* - * Move dead blocks from the pending deadlist to the on-disk - * deadlist. + * Now that the datasets have been completely synced, we can + * clean up our in-memory structures accumulated while syncing: + * + * - move dead blocks from the pending deadlist to the on-disk deadlist + * - clean up zil records + * - release hold from dsl_dataset_dirty() */ - for (ds = list_head(&dp->dp_synced_datasets); ds; - ds = list_next(&dp->dp_synced_datasets, ds)) { + while (ds = list_remove_head(&synced_datasets)) { + objset_t *os = ds->ds_objset; bplist_iterate(&ds->ds_pending_deadlist, deadlist_enqueue_cb, &ds->ds_deadlist, tx); + ASSERT(!dmu_objset_is_dirty(os, txg)); + dmu_buf_rele(ds->ds_dbuf, ds); } - while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) { - /* - * No more sync tasks should have been added while we - * were syncing. - */ - ASSERT(spa_sync_pass(dp->dp_spa) == 1); - dsl_sync_task_group_sync(dstg, tx); - } - DTRACE_PROBE(pool_sync__3task); - start = gethrtime(); while (dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) dsl_dir_sync(dd, tx); write_time += gethrtime() - start; + /* + * The MOS's space is accounted for in the pool/$MOS + * (dp_mos_dir). We can't modify the mos while we're syncing + * it, so we remember the deltas and apply them here. + */ + if (dp->dp_mos_used_delta != 0 || dp->dp_mos_compressed_delta != 0 || + dp->dp_mos_uncompressed_delta != 0) { + dsl_dir_diduse_space(dp->dp_mos_dir, DD_USED_HEAD, + dp->dp_mos_used_delta, + dp->dp_mos_compressed_delta, + dp->dp_mos_uncompressed_delta, tx); + dp->dp_mos_used_delta = 0; + dp->dp_mos_compressed_delta = 0; + dp->dp_mos_uncompressed_delta = 0; + } + start = gethrtime(); if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL || list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) { @@ -444,6 +480,29 @@ hrtime_t, dp->dp_read_overhead); write_time -= dp->dp_read_overhead; + /* + * If we modify a dataset in the same txg that we want to destroy it, + * its dsl_dir's dd_dbuf will be dirty, and thus have a hold on it. + * dsl_dir_destroy_check() will fail if there are unexpected holds. + * Therefore, we want to sync the MOS (thus syncing the dd_dbuf + * and clearing the hold on it) before we process the sync_tasks. + * The MOS data dirtied by the sync_tasks will be synced on the next + * pass. + */ + + DTRACE_PROBE(pool_sync__3task); + if (!txg_list_empty(&dp->dp_sync_tasks, txg)) { + dsl_sync_task_group_t *dstg; + /* + * No more sync tasks should have been added while we + * were syncing. + */ + ASSERT(spa_sync_pass(dp->dp_spa) == 1); + while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) + dsl_sync_task_group_sync(dstg, tx); + + } + dmu_tx_commit(tx); dp->dp_space_towrite[txg & TXG_MASK] = 0; @@ -492,15 +551,14 @@ void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg) { + zilog_t *zilog; dsl_dataset_t *ds; - objset_t *os; - while (ds = list_head(&dp->dp_synced_datasets)) { - list_remove(&dp->dp_synced_datasets, ds); - os = ds->ds_objset; - zil_clean(os->os_zil, txg); - ASSERT(!dmu_objset_is_dirty(os, txg)); - dmu_buf_rele(ds->ds_dbuf, ds); + while (zilog = txg_list_remove(&dp->dp_dirty_zilogs, txg)) { + ds = dmu_objset_ds(zilog->zl_os); + zil_clean(zilog, txg); + ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg)); + dmu_buf_rele(ds->ds_dbuf, zilog); } ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg)); } Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c (revision 239602) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c (working copy) @@ -229,7 +229,7 @@ } } -/* Calculate name legnth, avoiding all the strcat calls of dsl_dir_name */ +/* Calculate name length, avoiding all the strcat calls of dsl_dir_name */ int dsl_dir_namelen(dsl_dir_t *dd) { @@ -593,8 +593,6 @@ { ASSERT(dmu_tx_is_syncing(tx)); - dmu_buf_will_dirty(dd->dd_dbuf, tx); - mutex_enter(&dd->dd_lock); ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0); dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg, @@ -951,8 +949,6 @@ ASSERT(dmu_tx_is_syncing(tx)); ASSERT(type < DD_USED_NUM); - dsl_dir_dirty(dd, tx); - if (needlock) mutex_enter(&dd->dd_lock); accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used); @@ -961,6 +957,7 @@ dd->dd_phys->dd_compressed_bytes >= -compressed); ASSERT(uncompressed >= 0 || dd->dd_phys->dd_uncompressed_bytes >= -uncompressed); + dmu_buf_will_dirty(dd->dd_dbuf, tx); dd->dd_phys->dd_used_bytes += used; dd->dd_phys->dd_uncompressed_bytes += uncompressed; dd->dd_phys->dd_compressed_bytes += compressed; @@ -1002,7 +999,6 @@ if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN)) return; - dsl_dir_dirty(dd, tx); if (needlock) mutex_enter(&dd->dd_lock); ASSERT(delta > 0 ? Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h (revision 239602) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h (working copy) @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2012 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -395,6 +396,7 @@ zil_replay_func_t *replay_func[TX_MAX_TYPE]); extern boolean_t zil_replaying(zilog_t *zilog, dmu_tx_t *tx); extern void zil_destroy(zilog_t *zilog, boolean_t keep_first); +extern void zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx); extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx); extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h (revision 239602) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h (working copy) @@ -82,7 +82,6 @@ /* No lock needed - sync context only */ blkptr_t dp_meta_rootbp; - list_t dp_synced_datasets; hrtime_t dp_read_overhead; uint64_t dp_throughput; /* bytes per millisec */ uint64_t dp_write_limit; @@ -96,10 +95,14 @@ kmutex_t dp_lock; uint64_t dp_space_towrite[TXG_SIZE]; uint64_t dp_tempreserved[TXG_SIZE]; + uint64_t dp_mos_used_delta; + uint64_t dp_mos_compressed_delta; + uint64_t dp_mos_uncompressed_delta; /* Has its own locking */ tx_state_t dp_tx; txg_list_t dp_dirty_datasets; + txg_list_t dp_dirty_zilogs; txg_list_t dp_dirty_dirs; txg_list_t dp_sync_tasks; @@ -139,6 +142,8 @@ void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx); void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx); void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx); +void dsl_pool_mos_diduse_space(dsl_pool_t *dp, + int64_t used, int64_t comp, int64_t uncomp); taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h (revision 239602) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h (working copy) @@ -22,6 +22,9 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ #ifndef _SYS_TXG_H #define _SYS_TXG_H @@ -115,7 +118,7 @@ extern void txg_list_create(txg_list_t *tl, size_t offset); extern void txg_list_destroy(txg_list_t *tl); -extern int txg_list_empty(txg_list_t *tl, uint64_t txg); +extern boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg); extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg); extern int txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg); extern void *txg_list_remove(txg_list_t *tl, uint64_t txg); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h (revision 239602) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h (working copy) @@ -21,6 +21,9 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright (c) 2012, Delphix. All rights reserved. + */ /* Portions Copyright 2010 Robert Milkowski */ @@ -130,6 +133,7 @@ zil_header_t zl_old_header; /* debugging aid */ uint_t zl_prev_blks[ZIL_PREV_BLKS]; /* size - sector rounded */ uint_t zl_prev_rotor; /* rotor for zl_prev[] */ + txg_node_t zl_dirty_link; /* rotected by dp_dirty_zilogs list */ }; typedef struct zil_bp_node { Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c (revision 239602) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c (working copy) @@ -1649,13 +1649,6 @@ dsl_dataset_t *ds = drc->drc_logical_ds; int err, myerr; - /* - * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean() - * expects it to have a ds_user_ptr (and zil), but clone_swap() - * can close it. - */ - txg_wait_synced(ds->ds_dir->dd_pool, 0); - if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) { err = dsl_dataset_clone_swap(drc->drc_real_ds, ds, drc->drc_force); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c (revision 239602) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c (working copy) @@ -22,6 +22,9 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */ +/* + * Copyright (c) 2012, Delphix. All rights reserved. + */ #include <sys/zfs_context.h> #include <sys/txg_impl.h> @@ -596,7 +599,7 @@ mutex_destroy(&tl->tl_lock); } -int +boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg) { return (tl->tl_head[txg & TXG_MASK] == NULL); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c (revision 239602) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c (working copy) @@ -1769,15 +1769,15 @@ dnode_init(); dbuf_init(); zfetch_init(); + l2arc_init(); arc_init(); - l2arc_init(); } void dmu_fini(void) { + arc_fini(); l2arc_fini(); - arc_fini(); zfetch_fini(); dbuf_fini(); dnode_fini(); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c (revision 239602) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c (working copy) @@ -106,14 +106,8 @@ ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); if (ds == NULL) { - /* - * Account for the meta-objset space in its placeholder - * dsl_dir. - */ - ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ - dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, - used, compressed, uncompressed, tx); - dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); + dsl_pool_mos_diduse_space(tx->tx_pool, + used, compressed, uncompressed); return; } dmu_buf_will_dirty(ds->ds_dbuf, tx); @@ -155,9 +149,8 @@ */ dsl_free(tx->tx_pool, tx->tx_txg, bp); - dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, - -used, -compressed, -uncompressed, tx); - dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); + dsl_pool_mos_diduse_space(tx->tx_pool, + -used, -compressed, -uncompressed); return (used); } ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); @@ -1116,26 +1109,26 @@ dummy_ds.ds_dir = dd; dummy_ds.ds_object = ds->ds_object; - /* - * Check for errors and mark this ds as inconsistent, in - * case we crash while freeing the objects. - */ - err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, - dsl_dataset_destroy_begin_sync, ds, NULL, 0); - if (err) - goto out; - - err = dmu_objset_from_ds(ds, &os); - if (err) - goto out; - - /* - * If async destruction is not enabled try to remove all objects - * while in the open context so that there is less work to do in - * the syncing context. - */ if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds), &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { + /* + * If async destruction is not enabled try to remove all objects + * while in the open context so that there is less work to do in + * the syncing context. + */ + err = dsl_sync_task_do(dd->dd_pool, + dsl_dataset_destroy_begin_check, + dsl_dataset_destroy_begin_sync, ds, NULL, 0); + if (err) + goto out; + + err = dmu_objset_from_ds(ds, &os); + if (err) + goto out; + /* + * Remove all objects while in the open context so that + * there is less work to do in the syncing context. + */ for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, ds->ds_phys->ds_prev_snap_txg)) { /* @@ -1146,30 +1139,25 @@ } if (err != ESRCH) goto out; - } - /* - * Only the ZIL knows how to free log blocks. - */ - zil_destroy(dmu_objset_zil(os), B_FALSE); + /* + * Sync out all in-flight IO. + */ + txg_wait_synced(dd->dd_pool, 0); - /* - * Sync out all in-flight IO. - */ - txg_wait_synced(dd->dd_pool, 0); + /* + * If we managed to free al the objects in open + * context, the user space accounting should be zero. + */ + if (ds->ds_phys->ds_bp.blk_fill == 0 && + dmu_objset_userused_enabled(os)) { + uint64_t count; - /* - * If we managed to free all the objects in open - * context, the user space accounting should be zero. - */ - if (ds->ds_phys->ds_bp.blk_fill == 0 && - dmu_objset_userused_enabled(os)) { - uint64_t count; - - ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 || - count == 0); - ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 || - count == 0); + ASSERT(zap_count(os, DMU_USERUSED_OBJECT, + &count) != 0 || count == 0); + ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, + &count) != 0 || count == 0); + } } rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); @@ -1906,6 +1894,7 @@ } else { zfeature_info_t *async_destroy = &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]; + objset_t *os; /* * There's no next snapshot, so this is a head dataset. @@ -1917,6 +1906,8 @@ dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); ds->ds_phys->ds_deadlist_obj = 0; + VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os)); + if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) { err = old_synchronous_dataset_destroy(ds, tx); } else { @@ -1926,12 +1917,12 @@ */ uint64_t used, comp, uncomp; - ASSERT(err == 0 || err == EBUSY); + zil_destroy_sync(dmu_objset_zil(os), tx); + if (!spa_feature_is_active(dp->dp_spa, async_destroy)) { spa_feature_incr(dp->dp_spa, async_destroy, tx); - dp->dp_bptree_obj = bptree_alloc( - dp->dp_meta_objset, tx); - VERIFY(zap_add(dp->dp_meta_objset, + dp->dp_bptree_obj = bptree_alloc(mos, tx); + VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, &dp->dp_bptree_obj, tx) == 0); @@ -1944,7 +1935,7 @@ ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == used); - bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj, + bptree_add(mos, dp->dp_bptree_obj, &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg, used, comp, uncomp, tx); dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, @@ -2233,7 +2224,6 @@ dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; - dsl_dir_dirty(ds->ds_dir, tx); dmu_objset_sync(ds->ds_objset, zio, tx); } >Release-Note: >Audit-Trail: >Unformatted:
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201208230639.q7N6dGJ4028372>