From owner-svn-src-user@FreeBSD.ORG Tue May 26 00:23:04 2009 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 106701065675; Tue, 26 May 2009 00:23:04 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id F228A8FC17; Tue, 26 May 2009 00:23:03 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n4Q0N3TM040879; Tue, 26 May 2009 00:23:03 GMT (envelope-from kmacy@svn.freebsd.org) Received: (from kmacy@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n4Q0N3MB040864; Tue, 26 May 2009 00:23:03 GMT (envelope-from kmacy@svn.freebsd.org) Message-Id: <200905260023.n4Q0N3MB040864@svn.freebsd.org> From: Kip Macy Date: Tue, 26 May 2009 00:23:03 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r192779 - in user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 26 May 2009 00:23:04 -0000 Author: kmacy Date: Tue May 26 00:23:03 2009 New Revision: 192779 URL: http://svn.freebsd.org/changeset/base/192779 Log: - make tx type operation dependent rather than vfs state dependent - simplify ZIL replay handing - add dmu_read_flags to allow explicit disabling of prefetch Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid2.c user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c Tue May 26 00:16:16 2009 (r192778) +++ user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c Tue May 26 00:23:03 2009 (r192779) @@ -177,22 +177,22 @@ dmu_bonus_hold(objset_t *os, uint64_t ob * whose dnodes are in the same block. */ static int -dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, - uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) +dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, + int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags) { dsl_pool_t *dp = NULL; dmu_buf_t **dbp; uint64_t blkid, nblks, i; - uint32_t flags; + uint32_t dbuf_flags; int err; zio_t *zio; hrtime_t start; ASSERT(length <= DMU_MAX_ACCESS); - flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT; - if (length > zfetch_array_rd_sz) - flags |= DB_RF_NOPREFETCH; + dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT; + if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz) + dbuf_flags |= DB_RF_NOPREFETCH; rw_enter(&dn->dn_struct_rwlock, RW_READER); if (dn->dn_datablkshift) { @@ -230,7 +230,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, /* initiate async i/o */ if (read) { rw_exit(&dn->dn_struct_rwlock); - (void) dbuf_read(db, zio, flags); + (void) dbuf_read(db, zio, dbuf_flags); rw_enter(&dn->dn_struct_rwlock, RW_READER); } dbp[i] = &db->db; @@ -282,7 +282,7 @@ dmu_buf_hold_array(objset_t *os, uint64_ return (err); err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, - numbufsp, dbpp); + numbufsp, dbpp, DMU_READ_PREFETCH); dnode_rele(dn, FTAG); @@ -297,7 +297,7 @@ dmu_buf_hold_array_by_bonus(dmu_buf_t *d int err; err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, - numbufsp, dbpp); + numbufsp, dbpp, DMU_READ_PREFETCH); return (err); } @@ -536,8 +536,8 @@ dmu_free_range(objset_t *os, uint64_t ob } int -dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, - void *buf) +dmu_read_flags(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, + void *buf, uint32_t flags) { dnode_t *dn; dmu_buf_t **dbp; @@ -567,7 +567,7 @@ dmu_read(objset_t *os, uint64_t object, * to be reading in parallel. */ err = dmu_buf_hold_array_by_dnode(dn, offset, mylen, - TRUE, FTAG, &numbufs, &dbp); + TRUE, FTAG, &numbufs, &dbp, flags); if (err) break; @@ -593,6 +593,13 @@ dmu_read(objset_t *os, uint64_t object, return (err); } +int +dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, + void *buf) +{ + return dmu_read_flags(os, object, offset, size, buf, 0); +} + void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, const void *buf, dmu_tx_t *tx) Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h Tue May 26 00:16:16 2009 (r192778) +++ user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h Tue May 26 00:23:03 2009 (r192779) @@ -447,8 +447,12 @@ int dmu_free_object(objset_t *os, uint64 * Canfail routines will return 0 on success, or an errno if there is a * nonrecoverable I/O error. */ +#define DMU_READ_PREFETCH 0 /* prefetch */ +#define DMU_READ_NO_PREFETCH 1 /* don't prefetch */ int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, void *buf); +int dmu_read_flags(objset_t *os, uint64_t object, uint64_t offset, + uint64_t size, void *buf, uint32_t flags); void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, const void *buf, dmu_tx_t *tx); int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size); @@ -456,7 +460,10 @@ int dmu_write_uio(objset_t *os, uint64_t dmu_tx_t *tx); int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, struct page *pp, dmu_tx_t *tx); - +struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size); +void dmu_return_arcbuf(struct arc_buf *buf); +void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf, + dmu_tx_t *tx); extern int zfs_prefetch_disable; /* Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h Tue May 26 00:16:16 2009 (r192778) +++ user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h Tue May 26 00:23:03 2009 (r192779) @@ -47,7 +47,6 @@ struct zfsvfs { uint64_t z_root; /* id of root znode */ uint64_t z_unlinkedobj; /* id of unlinked zapobj */ uint64_t z_max_blksz; /* maximum block size for files */ - uint64_t z_assign; /* TXG_NOWAIT or set by zil_replay() */ uint64_t z_fuid_obj; /* fuid table object number */ uint64_t z_fuid_size; /* fuid table size */ avl_tree_t z_fuid_idx; /* fuid tree keyed by index */ @@ -72,6 +71,7 @@ struct zfsvfs { boolean_t z_issnap; /* true if this is a snapshot */ boolean_t z_vscan; /* virus scan on/off */ boolean_t z_use_fuids; /* version allows fuids */ + boolean_t z_replay; /* set during ZIL replay */ kmutex_t z_online_recv_lock; /* recv in prog grabs as WRITER */ uint64_t z_version; /* ZPL version */ #define ZFS_OBJ_MTX_SZ 64 Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h Tue May 26 00:16:16 2009 (r192778) +++ user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h Tue May 26 00:23:03 2009 (r192779) @@ -335,7 +335,6 @@ typedef void zil_parse_blk_func_t(zilog_ typedef void zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg, uint64_t txg); typedef int zil_replay_func_t(); -typedef void zil_replay_cleaner_t(); typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio); extern uint64_t zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, @@ -350,9 +349,8 @@ extern void zil_free(zilog_t *zilog); extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data); extern void zil_close(zilog_t *zilog); -extern void zil_replay(objset_t *os, void *arg, uint64_t *txgp, - zil_replay_func_t *replay_func[TX_MAX_TYPE], - zil_replay_cleaner_t *replay_cleaner); +extern void zil_replay(objset_t *os, void *arg, + zil_replay_func_t *replay_func[TX_MAX_TYPE]); extern void zil_destroy(zilog_t *zilog, boolean_t keep_first); extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx); Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h Tue May 26 00:16:16 2009 (r192778) +++ user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h Tue May 26 00:23:03 2009 (r192779) @@ -74,13 +74,14 @@ struct zilog { uint64_t zl_commit_seq; /* committed upto this number */ uint64_t zl_lr_seq; /* log record sequence number */ uint64_t zl_destroy_txg; /* txg of last zil_destroy() */ - uint64_t zl_replay_seq[TXG_SIZE]; /* seq of last replayed rec */ + uint64_t zl_replayed_seq[TXG_SIZE]; /* seq of last replayed rec */ + uint64_t zl_replaying_seq; /* current replay seq number */ uint32_t zl_suspend; /* log suspend count */ kcondvar_t zl_cv_writer; /* log writer thread completion */ kcondvar_t zl_cv_suspend; /* log suspend completion */ uint8_t zl_suspending; /* log is currently suspending */ uint8_t zl_keep_first; /* keep first log block in destroy */ - uint8_t zl_stop_replay; /* don't replay any further */ + uint8_t zl_replay; /* don't replay any further */ uint8_t zl_stop_sync; /* for debugging */ uint8_t zl_writer; /* boolean: write setup in progress */ uint8_t zl_log_error; /* boolean: log write error */ @@ -102,6 +103,9 @@ typedef struct zil_dva_node { avl_node_t zn_node; } zil_dva_node_t; +#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_trailer_t) - \ + sizeof (lr_write_t)) + #ifdef __cplusplus } #endif Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c Tue May 26 00:16:16 2009 (r192778) +++ user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c Tue May 26 00:23:03 2009 (r192779) @@ -2140,12 +2140,12 @@ top: } } - error = dmu_tx_assign(tx, zfsvfs->z_assign); + error = dmu_tx_assign(tx, TXG_WAIT); if (error) { mutex_exit(&zp->z_acl_lock); mutex_exit(&zp->z_lock); - if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { + if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; @@ -2201,7 +2201,7 @@ zfs_zaccess_common(znode_t *zp, uint32_t *check_privs = B_TRUE; - if (zfsvfs->z_assign >= TXG_INITIAL) { /* ZIL replay */ + if (zfsvfs->z_replay) { *working_mode = 0; return (0); } Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c Tue May 26 00:16:16 2009 (r192778) +++ user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c Tue May 26 00:23:03 2009 (r192779) @@ -561,27 +561,10 @@ zfs_rmnode(znode_t *zp) vfslocked = VFS_LOCK_GIANT(zfsvfs->z_vfs); + ASSERT(ZTOV(zp)->v_count == 0); ASSERT(zp->z_phys->zp_links == 0); /* - * If this is a ZIL replay then leave the object in the unlinked set. - * Otherwise we can get a deadlock, because the delete can be - * quite large and span multiple tx's and txgs, but each replay - * creates a tx to atomically run the replay function and mark the - * replay record as complete. We deadlock trying to start a tx in - * a new txg to further the deletion but can't because the replay - * tx hasn't finished. - * - * We actually delete the object if we get a failure to create an - * object in zil_replay_log_record(), or after calling zil_replay(). - */ - if (zfsvfs->z_assign >= TXG_INITIAL) { - zfs_znode_dmu_fini(zp); - zfs_znode_free(zp); - return; - } - - /* * If this is an attribute directory, purge its contents. */ if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR && @@ -849,9 +832,9 @@ zfs_make_xattrdir(znode_t *zp, vattr_t * FUID_SIZE_ESTIMATE(zfsvfs)); } } - error = dmu_tx_assign(tx, zfsvfs->z_assign); + error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { - if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) + if (error == ERESTART) dmu_tx_wait(tx); dmu_tx_abort(tx); return (error); @@ -936,7 +919,7 @@ top: error = zfs_make_xattrdir(zp, &va, xvpp, cr); zfs_dirent_unlock(dl); - if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { + if (error == ERESTART) { /* NB: we already did dmu_tx_wait() if necessary */ goto top; } @@ -967,7 +950,7 @@ zfs_sticky_remove_access(znode_t *zdp, z uid_t fowner; zfsvfs_t *zfsvfs = zdp->z_zfsvfs; - if (zdp->z_zfsvfs->z_assign >= TXG_INITIAL) /* ZIL replay */ + if (zdp->z_zfsvfs->z_replay) return (0); if ((zdp->z_phys->zp_mode & S_ISVTX) == 0) Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c Tue May 26 00:16:16 2009 (r192778) +++ user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c Tue May 26 00:23:03 2009 (r192779) @@ -525,7 +525,6 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64 uint32_t rid; idmap_stat status; uint64_t idx; - boolean_t is_replay = (zfsvfs->z_assign >= TXG_INITIAL); zfs_fuid_t *zfuid = NULL; zfs_fuid_info_t *fuidp; @@ -540,7 +539,7 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64 if (!zfsvfs->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0) return (id); - if (is_replay) { + if (zfsvfs->z_replay) { fuidp = zfsvfs->z_fuid_replay; /* @@ -594,7 +593,7 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64 idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, tx); - if (!is_replay) + if (!zfsvfs->z_replay) zfs_fuid_node_add(fuidpp, kdomain, rid, idx, id, type); else if (zfuid != NULL) { list_remove(&fuidp->z_fuids, zfuid); Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid2.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid2.c Tue May 26 00:16:16 2009 (r192778) +++ user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid2.c Tue May 26 00:23:03 2009 (r192779) @@ -525,7 +525,6 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64 uint32_t rid; idmap_stat status; uint64_t idx; - boolean_t is_replay = (zfsvfs->z_assign >= TXG_INITIAL); zfs_fuid_t *zfuid = NULL; zfs_fuid_info_t *fuidp; @@ -540,7 +539,7 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64 if (!zfsvfs->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0) return (id); - if (is_replay) { + if (zfsvfs->z_replay) { fuidp = zfsvfs->z_fuid_replay; /* @@ -594,7 +593,7 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64 idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, tx); - if (!is_replay) + if (!zfsvfs->z_replay) zfs_fuid_node_add(fuidpp, kdomain, rid, idx, id, type); else if (zfuid != NULL) { list_remove(&fuidp->z_fuids, zfuid); Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c Tue May 26 00:16:16 2009 (r192778) +++ user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c Tue May 26 00:23:03 2009 (r192779) @@ -42,6 +42,17 @@ #include #include #include +#include + +#define ZFS_HANDLE_REPLAY(zilog, tx) \ + if (zilog->zl_replay) { \ + dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); \ + zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] = \ + zilog->zl_replaying_seq; \ + return; \ + } + + /* * All the functions in this file are used to construct the log entries @@ -236,6 +247,8 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t if (zilog == NULL) return; + ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ + /* * If we have FUIDs present then add in space for * domains and ACE fuid's if any. @@ -339,6 +352,8 @@ zfs_log_remove(zilog_t *zilog, dmu_tx_t if (zilog == NULL) return; + ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ + itx = zil_itx_create(txtype, sizeof (*lr) + namesize); lr = (lr_remove_t *)&itx->itx_lr; lr->lr_doid = dzp->z_id; @@ -363,6 +378,8 @@ zfs_log_link(zilog_t *zilog, dmu_tx_t *t if (zilog == NULL) return; + ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ + itx = zil_itx_create(txtype, sizeof (*lr) + namesize); lr = (lr_link_t *)&itx->itx_lr; lr->lr_doid = dzp->z_id; @@ -390,6 +407,8 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t if (zilog == NULL) return; + ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ + itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize); lr = (lr_create_t *)&itx->itx_lr; lr->lr_doid = dzp->z_id; @@ -424,6 +443,8 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t if (zilog == NULL) return; + ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ + itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); lr = (lr_rename_t *)&itx->itx_lr; lr->lr_sdoid = sdzp->z_id; @@ -456,6 +477,8 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t * if (zilog == NULL || zp->z_unlinked) return; + ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ + /* * Writes are handled in three different ways: * @@ -553,6 +576,8 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_ if (zilog == NULL || zp->z_unlinked) return; + ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ + itx = zil_itx_create(txtype, sizeof (*lr)); lr = (lr_truncate_t *)&itx->itx_lr; lr->lr_foid = zp->z_id; @@ -582,6 +607,8 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t if (zilog == NULL || zp->z_unlinked) return; + ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ + /* * If XVATTR set, then log record size needs to allow * for lr_attr_t + xvattr mask, mapsize and create time @@ -648,6 +675,8 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx if (zilog == NULL || zp->z_unlinked) return; + ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ + txtype = (zp->z_zfsvfs->z_version < ZPL_VERSION_FUID) ? TX_ACL_V0 : TX_ACL; Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c Tue May 26 00:16:16 2009 (r192778) +++ user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c Tue May 26 00:23:03 2009 (r192779) @@ -492,6 +492,13 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t dmu_objset_set_user(zfsvfs->z_os, zfsvfs); mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); + zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); + if (zil_disable) { + zil_destroy(zfsvfs->z_log, 0); + zfsvfs->z_log = NULL; + } + + /* * If we are not mounting (ie: online recv), then we don't * have to worry about replaying the log as we blocked all @@ -505,21 +512,27 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t * allow replays to succeed. */ readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; - zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; - - /* - * Parse and replay the intent log. - */ - zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, - zfs_replay_vector, zfs_unlinked_drain); - - zfs_unlinked_drain(zfsvfs); + if (readonly != 0) + zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; + else + zfs_unlinked_drain(zfsvfs); + + if (zfsvfs->z_log) { + + /* + * Parse and replay the intent log. + * Because of ziltest, this must be done after + * zfs_unlinked_drain(). (Further note: ziltest + * doesn't use readonly mounts, where + */ + zfsvfs->z_replay = B_TRUE; + zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector); + zfsvfs->z_replay = B_FALSE; + } + zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ } - if (!zil_disable) - zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); - return (0); } @@ -555,7 +568,6 @@ zfs_domount(vfs_t *vfsp, char *osname) zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); zfsvfs->z_vfs = vfsp; zfsvfs->z_parent = zfsvfs; - zfsvfs->z_assign = TXG_NOWAIT; zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Tue May 26 00:16:16 2009 (r192778) +++ user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Tue May 26 00:23:03 2009 (r192779) @@ -133,12 +133,12 @@ * rw_enter(...); // grab any other locks you need * tx = dmu_tx_create(...); // get DMU tx * dmu_tx_hold_*(); // hold each object you might modify - * error = dmu_tx_assign(tx, zfsvfs->z_assign); // try to assign + * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign * if (error) { * rw_exit(...); // drop locks * zfs_dirent_unlock(dl); // unlock directory entry * VN_RELE(...); // release held vnodes - * if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { + * if (error == ERESTART) { * dmu_tx_wait(tx); * dmu_tx_abort(tx); * goto top; @@ -794,10 +794,9 @@ zfs_write(vnode_t *vp, uio_t *uio, int i tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_bonus(tx, zp->z_id); dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); - error = dmu_tx_assign(tx, zfsvfs->z_assign); + error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { - if (error == ERESTART && - zfsvfs->z_assign == TXG_NOWAIT) { + if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); continue; @@ -907,7 +906,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int i * If we're in replay mode, or we made no progress, return error. * Otherwise, it's at least a partial write, so it's successful. */ - if (zfsvfs->z_assign >= TXG_INITIAL || uio->uio_resid == start_resid) { + if (zfsvfs->z_replay || uio->uio_resid == start_resid) { ZFS_EXIT(zfsvfs); return (error); } @@ -1396,11 +1395,10 @@ top: dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE); } - error = dmu_tx_assign(tx, zfsvfs->z_assign); + error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); - if (error == ERESTART && - zfsvfs->z_assign == TXG_NOWAIT) { + if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; @@ -1609,11 +1607,11 @@ top: /* charge as an update -- would be nice not to charge at all */ dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); - error = dmu_tx_assign(tx, zfsvfs->z_assign); + error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); VN_RELE(vp); - if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { + if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; @@ -1811,10 +1809,10 @@ top: if ((dzp->z_phys->zp_flags & ZFS_INHERIT_ACE) || aclp) dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE); - error = dmu_tx_assign(tx, zfsvfs->z_assign); + error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); - if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { + if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; @@ -1941,13 +1939,13 @@ top: dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); dmu_tx_hold_bonus(tx, zp->z_id); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); - error = dmu_tx_assign(tx, zfsvfs->z_assign); + error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { rw_exit(&zp->z_parent_lock); rw_exit(&zp->z_name_lock); zfs_dirent_unlock(dl); VN_RELE(vp); - if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { + if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; @@ -2841,7 +2839,7 @@ top: dmu_tx_hold_bonus(tx, attrzp->z_id); } - err = dmu_tx_assign(tx, zfsvfs->z_assign); + err = dmu_tx_assign(tx, TXG_NOWAIT); if (err) { if (attrzp) VN_RELE(ZTOV(attrzp)); @@ -2851,7 +2849,7 @@ top: aclp = NULL; } - if (err == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { + if (err == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; @@ -3296,7 +3294,7 @@ top: if (tzp) dmu_tx_hold_bonus(tx, tzp->z_id); /* parent changes */ dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); - error = dmu_tx_assign(tx, zfsvfs->z_assign); + error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { if (zl != NULL) zfs_rename_unlock(&zl); @@ -3305,7 +3303,7 @@ top: VN_RELE(ZTOV(szp)); if (tzp) VN_RELE(ZTOV(tzp)); - if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { + if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; @@ -3442,10 +3440,10 @@ top: FUID_SIZE_ESTIMATE(zfsvfs)); } } - error = dmu_tx_assign(tx, zfsvfs->z_assign); + error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); - if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { + if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; @@ -3661,10 +3659,10 @@ top: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_bonus(tx, szp->z_id); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); - error = dmu_tx_assign(tx, zfsvfs->z_assign); + error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); - if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { + if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c Tue May 26 00:16:16 2009 (r192778) +++ user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c Tue May 26 00:23:03 2009 (r192779) @@ -664,7 +664,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, d ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); - if (zfsvfs->z_assign >= TXG_INITIAL) { /* ZIL replay */ + if (zfsvfs->z_replay) { obj = vap->va_nodeid; flag |= IS_REPLAY; now = vap->va_ctime; /* see zfs_replay_create() */ @@ -1194,9 +1194,9 @@ top: newblksz = 0; } - error = dmu_tx_assign(tx, zfsvfs->z_assign); + error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { - if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { + if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; @@ -1316,9 +1316,9 @@ zfs_trunc(znode_t *zp, uint64_t end) top: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_bonus(tx, zp->z_id); - error = dmu_tx_assign(tx, zfsvfs->z_assign); + error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { - if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { + if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; @@ -1395,9 +1395,9 @@ zfs_freesp(znode_t *zp, uint64_t off, ui log: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_bonus(tx, zp->z_id); - error = dmu_tx_assign(tx, zfsvfs->z_assign); + error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { - if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { + if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto log; @@ -1503,7 +1503,6 @@ zfs_create_fs(objset_t *os, cred_t *cr, bzero(&zfsvfs, sizeof (zfsvfs_t)); zfsvfs.z_os = os; - zfsvfs.z_assign = TXG_NOWAIT; zfsvfs.z_parent = &zfsvfs; zfsvfs.z_version = version; zfsvfs.z_use_fuids = USE_FUIDS(version, os); Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c Tue May 26 00:16:16 2009 (r192778) +++ user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c Tue May 26 00:23:03 2009 (r192779) @@ -357,7 +357,7 @@ zil_create(zilog_t *zilog) txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); ASSERT(zh->zh_claim_txg == 0); - ASSERT(zh->zh_replay_seq == 0); + ASSERT(zh->zh_replayed_seq == 0); blk = zh->zh_log; @@ -1225,7 +1225,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx) ASSERT(zilog->zl_stop_sync == 0); - zh->zh_replay_seq = zilog->zl_replay_seq[txg & TXG_MASK]; + zh->zh_replay_seq = zilog->zl_replayed_seq[txg & TXG_MASK]; if (zilog->zl_destroy_txg == txg) { blkptr_t blk = zh->zh_log; @@ -1234,7 +1234,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx) ASSERT(spa_sync_pass(spa) == 1); bzero(zh, sizeof (zil_header_t)); - bzero(zilog->zl_replay_seq, sizeof (zilog->zl_replay_seq)); + bzero(zilog->zl_replayed_seq, sizeof (zilog->zl_replayed_seq)); if (zilog->zl_keep_first) { /* @@ -1471,9 +1471,7 @@ zil_resume(zilog_t *zilog) typedef struct zil_replay_arg { objset_t *zr_os; zil_replay_func_t **zr_replay; - zil_replay_cleaner_t *zr_replay_cleaner; void *zr_arg; - uint64_t *zr_txgp; boolean_t zr_byteswap; char *zr_lrbuf; } zil_replay_arg_t; @@ -1488,7 +1486,7 @@ zil_replay_log_record(zilog_t *zilog, lr char *name; int pass, error, sunk; - if (zilog->zl_stop_replay) + if (!zilog->zl_replay) /* giving up */ return; if (lr->lrc_txg < claim_txg) /* already committed */ @@ -1552,44 +1550,14 @@ zil_replay_log_record(zilog_t *zilog, lr /* * We must now do two things atomically: replay this log record, * and update the log header to reflect the fact that we did so. - * We use the DMU's ability to assign into a specific txg to do this. + * At the end of each replay function the sequence number + * is updated if we are in replay mode. */ - for (pass = 1, sunk = B_FALSE; /* CONSTANTCONDITION */; pass++) { - uint64_t replay_txg; - dmu_tx_t *replay_tx; - - replay_tx = dmu_tx_create(zr->zr_os); - error = dmu_tx_assign(replay_tx, TXG_WAIT); - if (error) { - dmu_tx_abort(replay_tx); - break; - } - - replay_txg = dmu_tx_get_txg(replay_tx); - - if (txtype == 0 || txtype >= TX_MAX_TYPE) { - error = EINVAL; - } else { - /* - * On the first pass, arrange for the replay vector - * to fail its dmu_tx_assign(). That's the only way - * to ensure that those code paths remain well tested. - * - * Only byteswap (if needed) on the 1st pass. - */ - *zr->zr_txgp = replay_txg - (pass == 1); - error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lrbuf, - zr->zr_byteswap && pass == 1); - *zr->zr_txgp = TXG_NOWAIT; - } - - if (error == 0) { - dsl_dataset_dirty(dmu_objset_ds(zr->zr_os), replay_tx); - zilog->zl_replay_seq[replay_txg & TXG_MASK] = - lr->lrc_seq; - } - - dmu_tx_commit(replay_tx); + for (pass = 1; pass <= 2; pass++) { + zilog->zl_replaying_seq = lr->lrc_seq; + /* Only byteswap (if needed) on the 1st pass. */ + error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lrbuf, + zr->zr_byteswap && pass == 1); if (!error) return; @@ -1597,36 +1565,22 @@ zil_replay_log_record(zilog_t *zilog, lr /* * The DMU's dnode layer doesn't see removes until the txg * commits, so a subsequent claim can spuriously fail with - * EEXIST. So if we receive any error other than ERESTART - * we try syncing out any removes then retrying the - * transaction. + * EEXIST. So if we receive any error we try syncing out + * any removes then retry the transaction. */ - if (error != ERESTART && !sunk) { - if (zr->zr_replay_cleaner) - zr->zr_replay_cleaner(zr->zr_arg); + if (pass == 1) txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0); - sunk = B_TRUE; - continue; /* retry */ - } - - if (error != ERESTART) - break; - - if (pass != 1) - txg_wait_open(spa_get_dsl(zilog->zl_spa), - replay_txg + 1); - - dprintf("pass %d, retrying\n", pass); } - - ASSERT(error && error != ERESTART); +bad: + + ASSERT(error); name = kmem_alloc(MAXNAMELEN, KM_SLEEP); dmu_objset_name(zr->zr_os, name); cmn_err(CE_WARN, "ZFS replay transaction error %d, " "dataset %s, seq 0x%llx, txtype %llu %s\n", error, name, (u_longlong_t)lr->lrc_seq, (u_longlong_t)txtype, (lr->lrc_txtype & TX_CI) ? "CI" : ""); - zilog->zl_stop_replay = 1; + zilog->zl_replay = B_FALSE; kmem_free(name, MAXNAMELEN); } @@ -1641,9 +1595,8 @@ zil_incr_blks(zilog_t *zilog, blkptr_t * * If this dataset has a non-empty intent log, replay it and destroy it. */ void -zil_replay(objset_t *os, void *arg, uint64_t *txgp, - zil_replay_func_t *replay_func[TX_MAX_TYPE], - zil_replay_cleaner_t *replay_cleaner) +zil_replay(objset_t *os, void *arg, + zil_replay_func_t *replay_func[TX_MAX_TYPE]) { zilog_t *zilog = dmu_objset_zil(os); const zil_header_t *zh = zilog->zl_header; @@ -1657,9 +1610,7 @@ zil_replay(objset_t *os, void *arg, uint zr.zr_os = os; zr.zr_replay = replay_func; - zr.zr_replay_cleaner = replay_cleaner; zr.zr_arg = arg; - zr.zr_txgp = txgp; zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zh->zh_log); zr.zr_lrbuf = kmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP); @@ -1668,7 +1619,7 @@ zil_replay(objset_t *os, void *arg, uint */ txg_wait_synced(zilog->zl_dmu_pool, 0); - zilog->zl_stop_replay = 0; + zilog->zl_replay = B_TRUE; zilog->zl_replay_time = LBOLT; ASSERT(zilog->zl_replay_blks == 0); (void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr, @@ -1677,6 +1628,7 @@ zil_replay(objset_t *os, void *arg, uint zil_destroy(zilog, B_FALSE); txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); + zilog->zl_replay = B_FALSE; //printf("ZFS: Replay of ZIL on %s finished.\n", os->os->os_spa->spa_name); } Modified: user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c ============================================================================== --- user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c Tue May 26 00:16:16 2009 (r192778) +++ user/kmacy/releng_7_2_fcs/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c Tue May 26 00:23:03 2009 (r192779) @@ -73,6 +73,7 @@ #include #include #include +#include #include "zfs_namecheck.h" @@ -138,6 +139,7 @@ typedef struct zvol_state { #define ZVOL_RDONLY 0x1 #define ZVOL_DUMPIFIED 0x2 #define ZVOL_EXCL 0x4 +#define ZVOL_WCE 0x8 /* * zvol maximum transfer in one DMU tx. @@ -278,28 +280,72 @@ zvol_access(struct g_provider *pp, int a ssize_t zvol_immediate_write_sz = 32768; static void -zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t len) +zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid, + boolean_t sync) { uint32_t blocksize = zv->zv_volblocksize; - lr_write_t *lr; + zilog_t *zilog = zv->zv_zilog; + boolean_t slogging; - while (len) { - ssize_t nbytes = MIN(len, blocksize - P2PHASE(off, blocksize)); - itx_t *itx = zil_itx_create(TX_WRITE, sizeof (*lr)); + if (zil_disable) + return; - itx->itx_wr_state = - len > zvol_immediate_write_sz ? WR_INDIRECT : WR_NEED_COPY; - itx->itx_private = zv; + if (zilog->zl_replay) { + dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); + zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] = + zilog->zl_replaying_seq; + return; + } + slogging = spa_has_slogs(zilog->zl_spa); + + while (resid) { + ssize_t len; + itx_t *itx; + lr_write_t *lr; + itx_wr_state_t write_state; + + /* + * Unlike zfs_log_write() we can be called with + * upto DMU_MAX_ACCESS/2 (5MB) writes. + */ + if (blocksize > zvol_immediate_write_sz && !slogging && + resid >= blocksize && off % blocksize == 0) { + write_state = WR_INDIRECT; /* uses dmu_sync */ + len = blocksize; + } else if (sync) { + write_state = WR_COPIED; + len = MIN(ZIL_MAX_LOG_DATA, resid); + } else { + write_state = WR_NEED_COPY; + len = MIN(ZIL_MAX_LOG_DATA, resid); + } + + itx = zil_itx_create(TX_WRITE, sizeof (*lr) + + (write_state == WR_COPIED ? len : 0)); lr = (lr_write_t *)&itx->itx_lr; + if (write_state == WR_COPIED && dmu_read_flags(zv->zv_objset, + ZVOL_OBJ, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { + kmem_free(itx, offsetof(itx_t, itx_lr) + + itx->itx_lr.lrc_reclen); + itx = zil_itx_create(TX_WRITE, sizeof (*lr)); + lr = (lr_write_t *)&itx->itx_lr; + write_state = WR_NEED_COPY; + } + + itx->itx_wr_state = write_state; + if (write_state == WR_NEED_COPY) + itx->itx_sod += len; + itx->itx_private = zv; lr->lr_foid = ZVOL_OBJ; lr->lr_offset = off; - lr->lr_length = nbytes; + lr->lr_length = len; lr->lr_blkoff = off - P2ALIGN_TYPED(off, blocksize, uint64_t); BP_ZERO(&lr->lr_blkptr); (void) zil_itx_assign(zv->zv_zilog, itx, tx); - len -= nbytes; - off += nbytes; + + off += len; + resid -= len; } } @@ -337,6 +383,7 @@ zvol_serve_one(zvol_state_t *zv, struct rl_t *rl; int error = 0; boolean_t reading; + boolean_t sync; *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***