Date: Thu, 3 Dec 2009 00:27:16 +0000 (UTC) From: Kip Macy <kmacy@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r200049 - in user/kmacy/releng_8_fcs_buf_xen: cddl/lib/libzpool sys/cddl/contrib/opensolaris/uts/common/fs/zfs sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys Message-ID: <200912030027.nB30RGoI093428@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kmacy Date: Thu Dec 3 00:27:16 2009 New Revision: 200049 URL: http://svn.freebsd.org/changeset/base/200049 Log: - Minimize ARC churn by moving functions interfacing with the buffer cache to a separate file - consolidate I/O cache synchronization in zio_create Added: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bio.h (contents, props changed) user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_bio.c (contents, props changed) Modified: user/kmacy/releng_8_fcs_buf_xen/cddl/lib/libzpool/Makefile user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c Modified: user/kmacy/releng_8_fcs_buf_xen/cddl/lib/libzpool/Makefile ============================================================================== --- user/kmacy/releng_8_fcs_buf_xen/cddl/lib/libzpool/Makefile Wed Dec 2 21:58:34 2009 (r200048) +++ user/kmacy/releng_8_fcs_buf_xen/cddl/lib/libzpool/Makefile Thu Dec 3 00:27:16 2009 (r200049) @@ -23,7 +23,7 @@ ATOMIC_SRCS= opensolaris_atomic.c LIB= zpool -ZFS_COMMON_SRCS= ${ZFS_COMMON_OBJS:C/.o$/.c/} vdev_file.c +ZFS_COMMON_SRCS= ${ZFS_COMMON_OBJS:C/.o$/.c/} vdev_file.c zfs_bio.c ZFS_SHARED_SRCS= ${ZFS_SHARED_OBJS:C/.o$/.c/} KERNEL_SRCS= kernel.c taskq.c util.c LIST_SRCS= list.c Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c ============================================================================== --- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Wed Dec 2 21:58:34 2009 (r200048) +++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Thu Dec 3 00:27:16 2009 (r200049) @@ -122,12 +122,12 @@ #include <sys/zio_checksum.h> #include <sys/zfs_context.h> #include <sys/arc.h> +#include <sys/zfs_bio.h> #include <sys/refcount.h> #include <sys/vdev.h> #ifdef _KERNEL #include <sys/dnlc.h> #endif -#include <sys/ktr.h> #include <sys/callb.h> #include <sys/kstat.h> #include <sys/sdt.h> @@ -187,11 +187,6 @@ SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_min, SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN, &zfs_mdcomp_disable, 0, "Disable metadata compression"); -static int zfs_page_cache_disable = 0; -TUNABLE_INT("vfs.zfs.page_cache_disable", &zfs_page_cache_disable); -SYSCTL_INT(_vfs_zfs, OID_AUTO, page_cache_disable, CTLFLAG_RDTUN, - &zfs_page_cache_disable, 0, "Disable backing ARC with page cache "); - #ifdef ZIO_USE_UMA extern kmem_cache_t *zio_buf_cache[]; extern kmem_cache_t *zio_data_buf_cache[]; @@ -263,8 +258,8 @@ static arc_state_t ARC_mfu_ghost; static arc_state_t ARC_l2c_only; typedef struct arc_stats { - kstat_named_t arcstat_hits; kstat_named_t arcstat_page_cache_hits; + kstat_named_t arcstat_hits; kstat_named_t arcstat_misses; kstat_named_t arcstat_demand_data_hits; kstat_named_t arcstat_demand_data_misses; @@ -453,28 +448,33 @@ struct arc_write_callback { arc_buf_t *awcb_buf; }; +/* + * Keep initial ordering in-sync with zbio_buf_hdr + */ + struct arc_buf_hdr { /* protected by hash lock */ dva_t b_dva; uint64_t b_birth; - uint64_t b_cksum0; + uint32_t b_flags; + uint32_t b_datacnt; + /* immutable */ + arc_buf_contents_t b_type; + uint64_t b_size; + spa_t *b_spa; + + /* protected by hash lock */ kmutex_t b_freeze_lock; zio_cksum_t *b_freeze_cksum; arc_buf_hdr_t *b_hash_next; arc_buf_t *b_buf; - uint32_t b_flags; - uint32_t b_datacnt; + uint64_t b_cksum0; arc_callback_t *b_acb; kcondvar_t b_cv; - /* immutable */ - arc_buf_contents_t b_type; - uint64_t b_size; - spa_t *b_spa; - /* protected by arc state mutex */ arc_state_t *b_state; list_node_t b_arc_node; @@ -520,7 +520,6 @@ static void arc_evict_ghost(arc_state_t #define ARC_L2_EVICTED (1 << 17) /* evicted during I/O */ #define ARC_L2_WRITE_HEAD (1 << 18) /* head of write list */ #define ARC_STORED (1 << 19) /* has been store()d to */ -#define ARC_BUF_CLONING (1 << 21) /* is being cloned */ #define HDR_IN_HASH_TABLE(hdr) ((hdr)->b_flags & ARC_IN_HASH_TABLE) #define HDR_IO_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_IO_IN_PROGRESS) @@ -642,9 +641,8 @@ struct l2arc_buf_hdr { typedef struct l2arc_data_free { /* protected by l2arc_free_on_write_mtx */ arc_buf_t *l2df_buf; - void *l2df_data; size_t l2df_size; - void (*l2df_func)(arc_buf_t *, void *, size_t); + void (*l2df_func)(arc_buf_t *, size_t); list_node_t l2df_list_node; } l2arc_data_free_t; @@ -1260,7 +1258,7 @@ arc_buf_clone(arc_buf_t *from) buf->b_private = NULL; buf->b_next = hdr->b_buf; hdr->b_buf = buf; - hdr->b_flags |= ARC_BUF_CLONING; + hdr->b_flags |= ZBIO_BUF_CLONING; arc_get_data_buf(buf); bcopy(from->b_data, buf->b_data, size); hdr->b_datacnt += 1; @@ -1299,259 +1297,18 @@ arc_buf_add_ref(arc_buf_t *buf, void* ta data, metadata, hits); } -#ifdef _KERNEL -void -arc_binval(spa_t *spa, dva_t *dva, uint64_t size) -{ - uint64_t blkno, blkno_lookup; - struct vnode *vp; - struct bufobj *bo; - struct buf *bp; - vm_pindex_t start, end; - vm_object_t object; - vm_page_t m; - int i; - - if (zfs_page_cache_disable) - return; - - if (dva == NULL || spa == NULL || blkno == 0 || size == 0) - return; - - blkno_lookup = blkno = dva->dva_word[1] & ~(1ULL<<63); - vp = spa_get_vnode(spa); - bo = &vp->v_bufobj; - - BO_LOCK(bo); -retry: - bp = gbincore(bo, blkno_lookup); - if (bp != NULL) { - BUF_LOCK(bp, LK_EXCLUSIVE | LK_INTERLOCK, BO_MTX(bo)); - CTR3(KTR_SPARE2, "arc_binval() bp=%p blkno %ld npages %d", - bp, blkno, bp->b_npages); - bremfree(bp); - KASSERT(bp->b_flags & B_VMIO, ("buf found, VMIO not set")); - bp->b_flags |= B_INVAL; - bp->b_birth = 0; - brelse(bp); - } else if (blkno_lookup & 0x7) { - blkno_lookup &= ~0x7; - goto retry; - } else { - CTR2(KTR_SPARE2, "arc_binval() blkno %ld npages %d", - blkno, OFF_TO_IDX(size)); - BO_UNLOCK(bo); - } - start = OFF_TO_IDX((blkno_lookup << 9)); - end = start + OFF_TO_IDX(size + PAGE_MASK); - object = vp->v_object; - - VM_OBJECT_LOCK(object); - vm_page_cache_free(object, start, end); - vm_object_page_remove(object, start, end, FALSE); -#ifdef INVARIANTS - for (i = 0; i < OFF_TO_IDX(size); i++) { - KASSERT(vm_page_lookup(object, start + i) == NULL, - ("found page at %ld blkno %ld blkno_lookup %ld", - start + i, blkno, blkno_lookup)); - } -#endif - VM_OBJECT_UNLOCK(object); -} - -static void -arc_pcache(struct vnode *vp, struct buf *bp, uint64_t blkno) -{ - vm_pindex_t start = OFF_TO_IDX((blkno << 9)); - vm_object_t object = vp->v_object; - struct bufobj *bo = &vp->v_bufobj; - vm_page_t m; - int i; - - BO_LOCK(bo); - bgetvp(vp, bp); - BO_UNLOCK(bo); - - CTR3(KTR_SPARE2, "arc_pcache() bp=%p blkno %ld npages %d", - bp, blkno, bp->b_npages); - VM_OBJECT_LOCK(object); - for (i = 0; i < bp->b_npages; i++) { - m = bp->b_pages[i]; - vm_page_insert(m, object, start + i); - } - VM_OBJECT_UNLOCK(object); - bp->b_flags |= B_VMIO; -} - -static void -arc_bcache(arc_buf_t *buf) -{ - uint64_t blkno = buf->b_hdr->b_dva.dva_word[1] & ~(1ULL<<63); - struct buf *newbp, *bp = buf->b_bp; - struct vnode *vp = spa_get_vnode(buf->b_hdr->b_spa); - struct bufobj *bo = &vp->v_bufobj; - arc_buf_hdr_t *hdr = buf->b_hdr; - int cachebuf; - - if (zfs_page_cache_disable) - return; - - if (blkno == 0 || hdr->b_birth == 0) - return; - - newbp = buf->b_bp; - newbp->b_birth = hdr->b_birth; - newbp->b_blkno = newbp->b_lblkno = blkno; - newbp->b_offset = (blkno << 9); - cachebuf = ((hdr->b_datacnt == 1) && - !(hdr->b_flags & ARC_IO_ERROR) && - ((newbp->b_flags & (B_INVAL|B_CACHE)) == B_CACHE) && - (blkno & 0x7) == 0); - - arc_binval(hdr->b_spa, &hdr->b_dva, hdr->b_size); - if (cachebuf) - arc_pcache(vp, newbp, blkno); -} -#else -void -arc_binval(spa_t *spa, dva_t *dva, uint64_t size) -{ -} -#endif - - -static void -arc_getblk(arc_buf_t *buf) -{ - uint64_t size = buf->b_hdr->b_size; - arc_buf_contents_t type = buf->b_hdr->b_type; - spa_t *spa = buf->b_hdr->b_spa; - uint64_t blkno = buf->b_hdr->b_dva.dva_word[1] & ~(1ULL<<63); - void *data; - arc_buf_t *tbuf; - struct vnode *vp; - int i, flags = 0; -#ifdef _KERNEL - struct buf *newbp, *bp; - struct bufobj *bo; - vm_pindex_t start, end; - vm_object_t object; -#endif - if (type == ARC_BUFC_METADATA) { - arc_space_consume(size); - } else { - ASSERT(type == ARC_BUFC_DATA); -#ifdef _KERNEL - flags = GB_NODUMP; -#endif - atomic_add_64(&arc_size, size); - } - -#ifdef _KERNEL - vp = spa_get_vnode(spa); - bo = &vp->v_bufobj; - newbp = NULL; -#endif - if (size < PAGE_SIZE) { - data = zio_buf_alloc(size); - } -#ifdef _KERNEL - else if ((buf->b_hdr->b_flags & ARC_BUF_CLONING) || - BUF_EMPTY(buf->b_hdr) || - (blkno == 0)) { - newbp = geteblk(size, flags); - data = newbp->b_data; - buf->b_hdr->b_flags &= ~ARC_BUF_CLONING; - } else { - newbp = getblk(vp, blkno, size, 0, 0, flags | GB_LOCK_NOWAIT); - if (newbp == NULL) - newbp = geteblk(size, flags); - else { - vm_object_t object = vp->v_object; - vm_page_t m; - - /* - * Strip the buffers pages from the object - */ - VM_OBJECT_LOCK(object); - vm_page_lock_queues(); - for (i = 0; i < newbp->b_npages; i++){ - m = newbp->b_pages[i]; - vm_page_remove(m); - } - vm_page_unlock_queues(); - VM_OBJECT_UNLOCK(object); - brelvp(newbp); - newbp->b_flags &= ~B_VMIO; - } - data = newbp->b_data; - } - - if (newbp != NULL) { - BUF_KERNPROC(newbp); - - CTR4(KTR_SPARE2, "arc_getblk() bp=%p flags %X blkno %ld npages %d", - newbp, newbp->b_flags, blkno, newbp->b_npages); -#ifdef INVARIANTS - for (i = 0; i < newbp->b_npages; i++) - KASSERT(newbp->b_pages[i]->object == NULL, - ("newbp page not removed")); -#endif - } - buf->b_bp = newbp; -#endif - buf->b_data = data; -} - -static void -arc_brelse(arc_buf_t *buf, void *data, size_t size) -{ - struct buf *bp = buf->b_bp; - arc_buf_hdr_t *hdr = buf->b_hdr; -#ifdef INVARIANTS - int i; -#endif - - if (bp == NULL) { - zio_buf_free(buf->b_data, size); - return; - } -#ifdef _KERNEL -#ifdef INVARIANTS - for (i = 0; i < bp->b_npages; i++) - KASSERT(bp->b_pages[i]->object == NULL, - ("newbp page not removed")); -#endif - arc_bcache(buf); - - - if (bp->b_vp == NULL) - KASSERT((bp->b_flags & B_VMIO) == 0, ("no vp but VMIO set!")); - else { - KASSERT((bp->b_flags & B_VMIO), ("vp but VMIO not set!")); - CTR4(KTR_SPARE2, "arc_brelse() bp=%p flags %X" - " size %ld blkno=%ld", - bp, bp->b_flags, size, bp->b_blkno); - } - - bp->b_flags |= B_ZFS; - brelse(bp); -#endif -} - /* * Free the arc data buffer. If it is an l2arc write in progress, * the buffer is placed on l2arc_free_on_write to be freed later. */ static void -arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(arc_buf_t *, void *, size_t), - arc_buf_t *buf, void *data, size_t size) +arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(arc_buf_t *, size_t), + arc_buf_t *buf, size_t size) { if (HDR_L2_WRITING(hdr)) { l2arc_data_free_t *df; df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP); df->l2df_buf = buf; - df->l2df_data = data; df->l2df_size = size; df->l2df_func = free_func; mutex_enter(&l2arc_free_on_write_mtx); @@ -1559,7 +1316,7 @@ arc_buf_data_free(arc_buf_hdr_t *hdr, vo mutex_exit(&l2arc_free_on_write_mtx); ARCSTAT_BUMP(arcstat_l2_free_on_write); } else { - free_func(buf, data, size); + free_func(buf, size); } } @@ -1577,13 +1334,13 @@ arc_buf_destroy(arc_buf_t *buf, boolean_ arc_cksum_verify(buf); if (!recycle) { if (type == ARC_BUFC_METADATA) { - arc_buf_data_free(buf->b_hdr, arc_brelse, - buf, buf->b_data, size); + arc_buf_data_free(buf->b_hdr, zbio_relse, + buf, size); arc_space_return(size); } else { ASSERT(type == ARC_BUFC_DATA); - arc_buf_data_free(buf->b_hdr, arc_brelse, - buf, buf->b_data, size); + arc_buf_data_free(buf->b_hdr, + zbio_relse, buf, size); atomic_add_64(&arc_size, -size); } } @@ -1802,12 +1559,14 @@ arc_evict(arc_state_t *state, spa_t *spa evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost; +#ifdef _KERNEL /* * don't recycle page cache bufs * */ if (recycle && (bytes >= PAGE_SIZE)) recycle = FALSE; +#endif if (type == ARC_BUFC_METADATA) { offset = 0; list_count = ARC_BUFC_NUMMETADATALISTS; @@ -1822,9 +1581,7 @@ arc_evict(arc_state_t *state, spa_t *spa list_count = ARC_BUFC_NUMDATALISTS; idx = evict_data_offset; } - for (bytes_remaining = 0, i = 0; i < list_count; i++) - bytes_remaining += evicted_state->arcs_lsize[i + offset]; - + bytes_remaining = evicted_state->arcs_lsize[type]; count = 0; evict_start: @@ -2422,7 +2179,7 @@ arc_reclaim_thread(void *dummy __unused) static void arc_adapt(int bytes, arc_state_t *state) { - int mult, divisor; + int mult; if (state == arc_l2c_only) return; @@ -2437,15 +2194,13 @@ arc_adapt(int bytes, arc_state_t *state) * target size of the MRU list. */ if (state == arc_mru_ghost) { - divisor = MAX(arc_mru_ghost->arcs_size, 1); mult = ((arc_mru_ghost->arcs_size >= arc_mfu_ghost->arcs_size) ? 1 : (arc_mfu_ghost->arcs_size/arc_mru_ghost->arcs_size)); arc_p = MIN(arc_c, arc_p + bytes * mult); } else if (state == arc_mfu_ghost) { - divisor = MAX(arc_mfu_ghost->arcs_size, 1); mult = ((arc_mfu_ghost->arcs_size >= arc_mru_ghost->arcs_size) ? - 1 : (arc_mru_ghost->arcs_size/divisor)); + 1 : (arc_mru_ghost->arcs_size/arc_mfu_ghost->arcs_size)); arc_p = MAX(0, (int64_t)arc_p - bytes * mult); } @@ -2545,7 +2300,14 @@ arc_get_data_buf(arc_buf_t *buf) * just allocate a new buffer. */ if (!arc_evict_needed(type)) { - arc_getblk(buf); + if (type == ARC_BUFC_METADATA) { + zbio_getblk(buf); + arc_space_consume(size); + } else { + ASSERT(type == ARC_BUFC_DATA); + zbio_data_getblk(buf); + atomic_add_64(&arc_size, size); + } goto out; } @@ -2569,10 +2331,18 @@ arc_get_data_buf(arc_buf_t *buf) mfu_space > arc_mfu->arcs_size) ? arc_mru : arc_mfu; } if ((buf->b_data = arc_evict(state, NULL, size, TRUE, type)) == NULL) { - arc_getblk(buf); - ASSERT(buf->b_data != NULL); + if (type == ARC_BUFC_METADATA) { + zbio_getblk(buf); + arc_space_consume(size); + } else { + ASSERT(type == ARC_BUFC_DATA); + zbio_data_getblk(buf); + atomic_add_64(&arc_size, size); + } + if (size < PAGE_SIZE) + ARCSTAT_BUMP(arcstat_recycle_miss); } - + ASSERT(buf->b_data != NULL); out: /* * Update the state size. Note that ghost states have a @@ -2818,18 +2588,7 @@ arc_read_done(zio_t *zio) buf_hash_remove(hdr); freeable = refcount_is_zero(&hdr->b_refcnt); } -#ifdef _KERNEL - else if (buf->b_bp != NULL) { -#ifdef INVARIANTS - int i; - for (i = 0; i < buf->b_bp->b_npages; i++) - KASSERT(buf->b_bp->b_pages[i]->object == NULL, - ("bp page not removed")); -#endif - buf->b_bp->b_flags |= B_CACHE; - buf->b_bp->b_flags &= ~B_INVAL; - } -#endif + /* * Broadcast before we drop the hash_lock to avoid the possibility * that the hdr (and hence the cv) might be freed before we get to @@ -3535,12 +3294,6 @@ arc_write_done(zio_t *zio) exists = buf_hash_insert(hdr, &hash_lock); ASSERT3P(exists, ==, NULL); } -#ifdef _KERNEL - else if (buf->b_bp != NULL) { - buf->b_bp->b_flags |= B_CACHE; - buf->b_bp->b_flags &= ~B_INVAL; - } -#endif hdr->b_flags &= ~ARC_IO_IN_PROGRESS; /* if it's not anon, we are doing a scrub */ if (hdr->b_state == arc_anon) @@ -3832,7 +3585,6 @@ arc_tempreserve_space(uint64_t reserve, static kmutex_t arc_lowmem_lock; #ifdef _KERNEL static eventhandler_tag arc_event_lowmem = NULL; -static eventhandler_tag arc_event_shutdown = NULL; static void arc_lowmem(void *arg __unused, int howto __unused) @@ -3846,44 +3598,6 @@ arc_lowmem(void *arg __unused, int howto tsleep(&needfree, 0, "zfs:lowmem", hz / 5); mutex_exit(&arc_lowmem_lock); } -void -arc_shutdown(void *arg __unused, int howto __unused) -{ - struct mount *mp, *tmpmp; - int error; - - /* - * unmount all ZFS file systems - freeing any buffers - * then free all space allocator resources - */ - TAILQ_FOREACH_SAFE(mp, &mountlist, mnt_list, tmpmp) { - if (strcmp(mp->mnt_vfc->vfc_name, "zfs") == 0) { - error = dounmount(mp, MNT_FORCE, curthread); - if (error) { - TAILQ_REMOVE(&mountlist, mp, mnt_list); - printf("unmount of %s failed (", - mp->mnt_stat.f_mntonname); - if (error == EBUSY) - printf("BUSY)\n"); - else - printf("%d)\n", error); - } - } - - } - arc_flush(NULL); - -#ifdef NOTYET - /* - * need corresponding includes - */ - zfsdev_fini(); - zvol_fini(); - zfs_fini(); -#endif - spa_fini(); -} - #endif void @@ -4009,8 +3723,6 @@ arc_init(void) #ifdef _KERNEL arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL, EVENTHANDLER_PRI_FIRST); - arc_event_shutdown = EVENTHANDLER_REGISTER(shutdown_pre_sync, - arc_shutdown, NULL, EVENTHANDLER_PRI_FIRST); #endif arc_dead = FALSE; @@ -4105,8 +3817,6 @@ arc_fini(void) #ifdef _KERNEL if (arc_event_lowmem != NULL) EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem); - if (arc_event_shutdown != NULL) - EVENTHANDLER_DEREGISTER(shutdown_pre_sync, arc_event_shutdown); #endif } @@ -4326,9 +4036,8 @@ l2arc_do_free_on_write() for (df = list_tail(buflist); df; df = df_prev) { df_prev = list_prev(buflist, df); - ASSERT(df->l2df_data != NULL); ASSERT(df->l2df_func != NULL); - df->l2df_func(df->l2df_buf, df->l2df_data, df->l2df_size); + df->l2df_func(df->l2df_buf, df->l2df_size); list_remove(buflist, df); kmem_free(df, sizeof (l2arc_data_free_t)); } Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h ============================================================================== --- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h Wed Dec 2 21:58:34 2009 (r200048) +++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h Thu Dec 3 00:27:16 2009 (r200049) @@ -52,7 +52,9 @@ struct arc_buf { void *b_data; arc_evict_func_t *b_efunc; void *b_private; +#ifdef _KERNEL struct buf *b_bp; +#endif }; typedef enum arc_buf_contents { @@ -83,6 +85,7 @@ int arc_released(arc_buf_t *buf); int arc_has_callback(arc_buf_t *buf); void arc_buf_freeze(arc_buf_t *buf); void arc_buf_thaw(arc_buf_t *buf); + #ifdef ZFS_DEBUG int arc_referenced(arc_buf_t *buf); #endif @@ -112,7 +115,6 @@ int arc_tryread(spa_t *spa, blkptr_t *bp void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private); int arc_buf_evict(arc_buf_t *buf); -void arc_binval(spa_t *spa, dva_t *dva, uint64_t size); void arc_flush(spa_t *spa); void arc_tempreserve_clear(uint64_t reserve); int arc_tempreserve_space(uint64_t reserve, uint64_t txg); Added: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bio.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bio.h Thu Dec 3 00:27:16 2009 (r200049) @@ -0,0 +1,60 @@ +/************************************************************************** + +Copyright (c) 2009, Kip Macy, BitGravity Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the BitGravity Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +$FreeBSD$ + +***************************************************************************/ + +#ifndef _SYS_ZFS_BIO_H +#define _SYS_ZFS_BIO_H + +#define ZBIO_BUF_CLONING (1 << 30) /* is being cloned */ + +void zbio_sync_cache(spa_t *spa, blkptr_t *bp, uint64_t txg, uint64_t size); +void zbio_getblk(arc_buf_t *buf); +void zbio_data_getblk(arc_buf_t *buf); +void zbio_relse(arc_buf_t *buf, size_t size); + +typedef struct zbio_buf_hdr zbio_buf_hdr_t; +struct zbio_buf_hdr { + /* protected by hash lock */ + dva_t b_dva; + uint64_t b_birth; + uint32_t b_flags; + uint32_t b_datacnt; + + /* immutable */ + arc_buf_contents_t b_type; + uint64_t b_size; + spa_t *b_spa; +}; + +#ifdef _KERNEL +void zbio_init(void); +void zbio_fini(void); +#endif +#endif Added: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_bio.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_bio.c Thu Dec 3 00:27:16 2009 (r200049) @@ -0,0 +1,321 @@ +/************************************************************************** + +Copyright (c) 2009, Kip Macy, BitGravity Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the BitGravity Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/spa.h> +#include <sys/zio.h> +#include <sys/zio_checksum.h> +#include <sys/zfs_context.h> +#include <sys/arc.h> +#include <sys/zfs_bio.h> +#include <sys/refcount.h> +#include <sys/vdev.h> +#include <sys/callb.h> +#include <sys/kstat.h> +#include <sys/sdt.h> + +#include <vm/vm_pageout.h> + +#ifdef _KERNEL + +#define BUF_EMPTY(buf) \ + ((buf)->b_dva.dva_word[0] == 0 && \ + (buf)->b_dva.dva_word[1] == 0 && \ + (buf)->b_birth == 0) + +SYSCTL_DECL(_vfs_zfs); +static int zfs_page_cache_disable = 1; +TUNABLE_INT("vfs.zfs.page_cache_disable", &zfs_page_cache_disable); +SYSCTL_INT(_vfs_zfs, OID_AUTO, page_cache_disable, CTLFLAG_RDTUN, + &zfs_page_cache_disable, 0, "Disable backing ARC with page cache "); + +static eventhandler_tag zbio_event_shutdown = NULL; + +void +zbio_data_getblk(arc_buf_t *buf) +{ + + zbio_getblk(buf); +} + +void +zbio_getblk(arc_buf_t *buf) +{ + zbio_buf_hdr_t *hdr = (zbio_buf_hdr_t *)buf->b_hdr; + uint64_t size = hdr->b_size; + arc_buf_contents_t type = hdr->b_type; + spa_t *spa = hdr->b_spa; + uint64_t blkno = hdr->b_dva.dva_word[1] & ~(1ULL<<63); + void *data; + arc_buf_t *tbuf; + struct vnode *vp; + int i, flags = 0; + struct buf *newbp; + struct bufobj *bo; + vm_pindex_t start, end; + vm_object_t object; + + vp = spa_get_vnode(spa); + bo = &vp->v_bufobj; + newbp = NULL; + if ((size < PAGE_SIZE) || (hdr->b_flags & ZBIO_BUF_CLONING) || + zfs_page_cache_disable) { + data = zio_buf_alloc(size); + hdr->b_flags &= ~ZBIO_BUF_CLONING; + } else if (BUF_EMPTY(hdr)) { + newbp = geteblk(size, flags); + data = newbp->b_data; + } else { + newbp = getblk(vp, blkno, size, 0, 0, flags | GB_LOCK_NOWAIT); + if (newbp == NULL) + newbp = geteblk(size, flags); + else + brelvp(newbp); + data = newbp->b_data; + } + + if (newbp != NULL) { + BUF_KERNPROC(newbp); + newbp->b_bufobj = bo; + CTR4(KTR_SPARE2, "arc_getblk() bp=%p flags %X " + "blkno %ld npages %d", + newbp, newbp->b_flags, blkno, newbp->b_npages); + } + + buf->b_bp = newbp; + buf->b_data = data; +} + +void +zbio_relse(arc_buf_t *buf, size_t size) +{ + struct buf *bp = buf->b_bp; + void * data = buf->b_data; + + if (bp == NULL) { + zio_buf_free(data, size); + return; + } + + CTR4(KTR_SPARE2, "arc_brelse() bp=%p flags %X" + " size %ld blkno=%ld", + bp, bp->b_flags, size, bp->b_blkno); + + bp->b_flags |= B_ZFS; + brelse(bp); +} + +void +zbio_sync_cache(spa_t *spa, blkptr_t *bp, uint64_t txg, uint64_t size) +{ +#ifdef notyet + uint64_t blkno, blkno_lookup; + struct vnode *vp; + struct bufobj *bo; + struct buf *bp; + vm_pindex_t start, end; + vm_object_t object; + vm_page_t m; + int i; + + if (zfs_page_cache_disable) + return; + blkno_lookup = blkno = dva->dva_word[1] & ~(1ULL<<63); + vp = spa_get_vnode(spa); + bo = &vp->v_bufobj; + + if (dva == NULL || spa == NULL || blkno == 0 || size == 0) + return; + + start = OFF_TO_IDX((blkno_lookup << 9)); + end = start + OFF_TO_IDX(size + PAGE_MASK); + object = vp->v_object; + + VM_OBJECT_LOCK(object); + vm_page_cache_free(object, start, end); + vm_object_page_remove(object, start, end, FALSE); +#ifdef INVARIANTS + for (i = 0; i < OFF_TO_IDX(size); i++) { + KASSERT(vm_page_lookup(object, start + i) == NULL, + ("found page at %ld blkno %ld blkno_lookup %ld", + start + i, blkno, blkno_lookup)); + } +#endif + VM_OBJECT_UNLOCK(object); +#endif +} + +#if 0 +static void +arc_pcache(struct vnode *vp, struct buf *bp, uint64_t blkno) +{ + vm_pindex_t start = OFF_TO_IDX((blkno << 9)); + vm_object_t object = vp->v_object; + struct bufobj *bo = &vp->v_bufobj; + vm_page_t m; + int i; + + CTR3(KTR_SPARE2, "arc_pcache() bp=%p blkno %ld npages %d", + bp, blkno, bp->b_npages); + VM_OBJECT_LOCK(object); + vm_page_lock_queues(); + for (i = 0; i < bp->b_npages; i++) { + m = bp->b_pages[i]; + m->valid = VM_PAGE_BITS_ALL; + vm_page_insert(m, object, start + i); + m->flags &= ~PG_UNMANAGED; + vm_page_enqueue(PQ_INACTIVE, m); + vdrop(vp); + } + vm_page_unlock_queues(); + VM_OBJECT_UNLOCK(object); + bp->b_bufobj = bo; + bp->b_flags |= B_VMIO; +} + +static void +arc_bcache(arc_buf_t *buf) +{ + uint64_t blkno = buf->b_hdr->b_dva.dva_word[1] & ~(1ULL<<63); + struct buf *bp; + struct vnode *vp = spa_get_vnode(buf->b_hdr->b_spa); + arc_buf_hdr_t *hdr = buf->b_hdr; + int cachebuf; + + if (zfs_page_cache_disable) + return; + + if (blkno == 0 || hdr->b_birth == 0) + return; + + bp = buf->b_bp; + bp->b_birth = hdr->b_birth; + bp->b_blkno = bp->b_lblkno = blkno; + bp->b_offset = (blkno << 9); + cachebuf = ((hdr->b_datacnt == 1) && + !(hdr->b_flags & ARC_IO_ERROR) && + ((bp->b_flags & (B_INVAL|B_CACHE)) == B_CACHE) && + (blkno & 0x7) == 0); + + arc_binval(hdr->b_spa, &hdr->b_dva, hdr->b_size); + if (cachebuf) + arc_pcache(vp, bp, blkno); +} +#endif + +static void +zbio_shutdown(void *arg __unused, int howto __unused) +{ + struct mount *mp, *tmpmp; + int error; + + /* + * unmount all ZFS file systems - freeing any buffers + * then free all space allocator resources + */ + TAILQ_FOREACH_SAFE(mp, &mountlist, mnt_list, tmpmp) { + if (strcmp(mp->mnt_vfc->vfc_name, "zfs") == 0) { + error = dounmount(mp, MNT_FORCE, curthread); + if (error) { + TAILQ_REMOVE(&mountlist, mp, mnt_list); + printf("unmount of %s failed (", + mp->mnt_stat.f_mntonname); + if (error == EBUSY) + printf("BUSY)\n"); + else + printf("%d)\n", error); + } + } + + } + arc_flush(NULL); + +#ifdef NOTYET + /* + * need corresponding includes + */ + zfsdev_fini(); + zvol_fini(); + zfs_fini(); +#endif + spa_fini(); +} + +void +zbio_init(void) +{ + + zbio_event_shutdown = EVENTHANDLER_REGISTER(shutdown_pre_sync, + zbio_shutdown, NULL, EVENTHANDLER_PRI_FIRST); +} + +void +zbio_fini(void) +{ + if (zbio_event_shutdown != NULL) + EVENTHANDLER_DEREGISTER(shutdown_pre_sync, zbio_event_shutdown); +} +#else + +void +zbio_getblk(arc_buf_t *buf) +{ + zbio_buf_hdr_t *hdr = (zbio_buf_hdr_t *)buf->b_hdr; + uint64_t size = hdr->b_size; + *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200912030027.nB30RGoI093428>