From owner-svn-src-user@FreeBSD.ORG Fri Jan 1 03:58:22 2010 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 74C0A1065692; Fri, 1 Jan 2010 03:58:22 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 6170D8FC0A; Fri, 1 Jan 2010 03:58:22 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o013wMm8099410; Fri, 1 Jan 2010 03:58:22 GMT (envelope-from kmacy@svn.freebsd.org) Received: (from kmacy@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o013wM8V099409; Fri, 1 Jan 2010 03:58:22 GMT (envelope-from kmacy@svn.freebsd.org) Message-Id: <201001010358.o013wM8V099409@svn.freebsd.org> From: Kip Macy Date: Fri, 1 Jan 2010 03:58:22 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r201359 - in user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 01 Jan 2010 03:58:22 -0000 Author: kmacy Date: Fri Jan 1 03:58:21 2010 New Revision: 201359 URL: http://svn.freebsd.org/changeset/base/201359 Log: - fix tailq usage sa that each tailq usage uses a different tailq entry - decrement page wire count before freeing - re-map buffer kva when swapping out pages - skip over validating reads in to non-VMIO buffers - fix zio_cache_valid usage check Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bio.h user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_bio.c Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bio.h ============================================================================== --- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bio.h Thu Dec 31 23:52:19 2009 (r201358) +++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_bio.h Fri Jan 1 03:58:21 2010 (r201359) @@ -32,6 +32,7 @@ $FreeBSD$ #ifndef _SYS_ZFS_BIO_H #define _SYS_ZFS_BIO_H #include /* vd->vdev_vnode */ +#include /* spa->spa_root_vdev */ #include extern int zfs_page_cache_disable; @@ -57,8 +58,8 @@ static __inline void zio_cache_valid(void *data, uint64_t size, zio_type_t type, vdev_t *vd) { - if ((vd != NULL) && (type == ZIO_TYPE_READ) && - (size & PAGE_MASK) == 0) + if (((vd == NULL) || (vd->vdev_spa->spa_root_vdev == vd)) && + (type == ZIO_TYPE_READ) && (size & PAGE_MASK) == 0) _zio_cache_valid(data, size); } Modified: user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_bio.c ============================================================================== --- user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_bio.c Thu Dec 31 23:52:19 2009 (r201358) +++ user/kmacy/releng_8_fcs_buf_xen/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_bio.c Fri Jan 1 03:58:21 2010 (r201359) @@ -77,6 +77,14 @@ Logic in sync_cache: No work to do + + b_bobufs -> hash table tailqs + b_freelist -> blkno memq + b_cluster.cluster_entry -> temporary list + + + + **************************************************************************/ #include __FBSDID("$FreeBSD$"); @@ -297,7 +305,7 @@ zio_buf_va_insert(buf_t bp) CTR3(KTR_SPARE3, "va_insert(va=%p size=%ld) idx=%ld", va, size, idx); mtx_lock(lock); - TAILQ_INSERT_HEAD(bh, bp, b_freelist); + TAILQ_INSERT_HEAD(bh, bp, b_bobufs); mtx_unlock(lock); } @@ -323,7 +331,7 @@ zio_buf_va_lookup(caddr_t va, uint64_t s lock = BUF_HASH_LOCK(idx); bh = &buf_hash_table.ht_table[idx]; mtx_lock(lock); - TAILQ_FOREACH(bp, bh, b_freelist) + TAILQ_FOREACH(bp, bh, b_bobufs) if (bp->b_data == va) break; mtx_unlock(lock); @@ -353,9 +361,9 @@ zio_buf_va_remove(caddr_t va, uint64_t s CTR3(KTR_SPARE3, "va_remove(va=%p size=%ld) idx=%ld", va, (long)size, idx); mtx_lock(lock); - TAILQ_FOREACH(bp, bh, b_freelist) + TAILQ_FOREACH(bp, bh, b_bobufs) if (bp->b_data == va) { - TAILQ_REMOVE(bh, bp, b_freelist); + TAILQ_REMOVE(bh, bp, b_bobufs); break; } mtx_unlock(lock); @@ -505,12 +513,14 @@ zio_buf_blkno_lookup(zio_spa_state_t sta * This routine may not block. */ static void -zio_buf_blkno_remove(buf_t bp) +zio_buf_blkno_remove_locked(vm_object_t object, buf_t bp) { zio_spa_state_t state; buf_t root; daddr_t blkno, blkno_end; + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + if ((state = bp->b_state) == NULL) return; @@ -526,6 +536,9 @@ zio_buf_blkno_remove(buf_t bp) root->b_right = bp->b_right; } state->zss_blkno_root = root; + /* + * can't use b_bobufs for both memq and hash table :< + */ TAILQ_REMOVE(&state->zss_blkno_memq, bp, b_freelist); /* @@ -535,6 +548,16 @@ zio_buf_blkno_remove(buf_t bp) state->zss_generation++; } +static void +zio_buf_blkno_remove(buf_t bp) +{ + vm_object_t object = zio_buf_get_vm_object(bp); + + VM_OBJECT_LOCK(object); + zio_buf_blkno_remove_locked(object, bp); + VM_OBJECT_UNLOCK(object); +} + static __inline void zio_buf_vm_object_copy(vm_object_t object, buf_t bp, int direction) { @@ -552,7 +575,6 @@ zio_buf_vm_object_copy(vm_object_t objec start = OFF_TO_IDX(byte_offset); end = OFF_TO_IDX(byte_offset + bp->b_bcount); - VM_OBJECT_LOCK(object); if (vm_pages_valid_locked(object, bp->b_blkno, bp->b_bcount) == 0) goto done; @@ -577,7 +599,6 @@ zio_buf_vm_object_copy(vm_object_t objec done: bp->b_npages = 0; - VM_OBJECT_UNLOCK(object); } static void @@ -606,7 +627,6 @@ zio_buf_vm_object_evict(buf_t bp) m = bp->b_pages[i]; vm_pageq_remove(m); } - vm_page_unlock_queues(); /* * remove pages from backing vm_object */ @@ -616,6 +636,7 @@ zio_buf_vm_object_evict(buf_t bp) m->valid = 0; m->flags |= PG_UNMANAGED; } + vm_page_unlock_queues(); } static void @@ -661,26 +682,27 @@ static void zio_buf_evict_overlap(vm_object_t object, daddr_t blkno, int size, zio_spa_state_t state, uint64_t txg, int evict_op) { - buf_t root, tmpbp; + buf_t root, tmpbp, bp_prev; daddr_t blkno_end, tmpblkno, tmpblkno_end; struct cluster_list_head clh; int i, collisions; uint64_t tmptxg; vm_pindex_t start, end; + TAILQ_INIT(&clh); if ((root = state->zss_blkno_root) == NULL) goto done; collisions = 0; + blkno_end = blkno + btos(size); root = zio_buf_blkno_splay(blkno, root); - TAILQ_INIT(&clh); if (blkno < root->b_blkno) tmpbp = TAILQ_PREV(root, cluster_list_head, b_freelist); /* * Find all existing buffers that overlap with this range */ - tmpbp = tmpbp != NULL ? tmpbp : root; + bp_prev = tmpbp = tmpbp != NULL ? tmpbp : root; while (tmpbp != NULL && tmpbp->b_blkno < blkno_end) { tmpblkno = tmpbp->b_blkno; tmpblkno_end = tmpblkno + btos(tmpbp->b_bcount); @@ -689,14 +711,15 @@ zio_buf_evict_overlap(vm_object_t object if (((tmpblkno >= blkno) && (tmpblkno < blkno_end)) || (tmpblkno_end > blkno) && (tmpblkno_end <= blkno_end) && ((txg == NO_TXG) || (tmptxg < txg))) { - TAILQ_INSERT_TAIL(&clh, tmpbp, b_freelist); + TAILQ_INSERT_TAIL(&clh, tmpbp, b_cluster.cluster_entry); collisions++; } + bp_prev = tmpbp; tmpbp = TAILQ_NEXT(tmpbp, b_freelist); } while (!TAILQ_EMPTY(&clh)) { tmpbp = TAILQ_FIRST(&clh); - TAILQ_REMOVE(&clh, tmpbp, b_freelist); + TAILQ_REMOVE(&clh, tmpbp, b_cluster.cluster_entry); zio_buf_vm_object_evict(tmpbp); tmpbp->b_flags &= ~B_VMIO; @@ -704,7 +727,7 @@ zio_buf_evict_overlap(vm_object_t object /* * move buffer to the unmanaged tree */ - zio_buf_blkno_remove(tmpbp); + zio_buf_blkno_remove_locked(object, tmpbp); } done: if (!(collisions == 1 && tmpbp->b_blkno == blkno && @@ -717,7 +740,7 @@ done: #ifdef INVARIANTS for (i = 0; i < OFF_TO_IDX(size); i++) { KASSERT(vm_page_lookup(object, start + i) == NULL, - ("found page at %ld blkno %ld ",start + i, blkno)); + ("found page at %ld blkno %lld ",start + i, blkno)); } #endif } @@ -743,6 +766,8 @@ vm_object_reference_pages(vm_object_t ob bp->b_pages[i] = m; } vm_page_unlock_queues(); + pmap_qenter_prot((vm_offset_t)bp->b_saveaddr, bp->b_pages, + bp->b_npages, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXCLUDE); } /* @@ -804,6 +829,9 @@ _zio_getblk_vmio(uint64_t size, int flag newbp = geteblk(size, flags); BUF_KERNPROC(newbp); + KASSERT((newbp->b_flags & B_MALLOC) == 0, + ("geteblk allocated a malloc'd buffer")); + return (newbp); } @@ -812,7 +840,7 @@ zio_getblk(uint64_t size, int flags) { buf_t newbp; - if (size & PAGE_MASK) + if (size != 128*1024) newbp = _zio_getblk_malloc(size, flags); else newbp = _zio_getblk_vmio(size, flags); @@ -864,6 +892,7 @@ _zio_sync_cache(spa_t *spa, blkptr_t *bl bp = zio_buf_va_lookup(data, size); if (bp->b_flags & B_MALLOC) { + VM_OBJECT_LOCK(object); zio_buf_evict_overlap(object, blkno, size, state, txg, ZB_EVICT_BUFFERED); if (zio_op == ZIO_TYPE_READ) { @@ -879,9 +908,14 @@ _zio_sync_cache(spa_t *spa, blkptr_t *bl } else { zio_buf_vm_object_copyout(object, bp); } + VM_OBJECT_UNLOCK(object); } else if (bp->b_flags & B_VMIO) { +#ifdef INVARIANTS + VM_OBJECT_LOCK(object); KASSERT(bp == zio_buf_blkno_lookup(state, blkno), ("VMIO buffer not mapped")); + VM_OBJECT_UNLOCK(object); +#endif if (zio_op == ZIO_TYPE_READ && (bp->b_flags & (B_CACHE|B_INVAL)) == B_CACHE) io_bypass = TRUE; } else if ((zio_op == ZIO_TYPE_WRITE) || !vm_pages_valid(object, blkno, size)) { @@ -891,21 +925,28 @@ _zio_sync_cache(spa_t *spa, blkptr_t *bl bp->b_blkno = bp->b_lblkno = blkno; bp->b_flags |= B_VMIO; bp->b_birth = txg; + bp->b_state = state; zio_buf_blkno_insert(bp, state); zio_buf_vm_object_insert(bp, vp, object, zio_op == ZIO_TYPE_WRITE); VM_OBJECT_UNLOCK(object); } else { KASSERT(zio_op == ZIO_TYPE_READ, ("unexpected op %d", zio_op)); + VM_OBJECT_LOCK(object); zio_buf_evict_overlap(object, blkno, size, state, NO_TXG, ZB_EVICT_BUFFERED); bp->b_blkno = bp->b_lblkno = blkno; bp->b_flags |= B_VMIO; bp->b_birth = txg; + bp->b_state = state; zio_buf_blkno_insert(bp, state); - VM_OBJECT_LOCK(object); if (vm_pages_valid_locked(object, blkno, size)) { - for (i = 0; i < bp->b_npages; i++) - vm_page_free(bp->b_pages[i]); + for (i = 0; i < bp->b_npages; i++) { + m = bp->b_pages[i]; + m->wire_count--; + vm_page_free(m); + } + + vm_object_reference_pages(object, bp); } else zio_buf_vm_object_insert(bp, vp, object, FALSE); @@ -922,8 +963,13 @@ _zio_cache_valid(void *data, uint64_t si int i; bp = zio_buf_va_lookup(data, size); - for (i = 0; i < bp->b_npages; i++) + if ((bp->b_flags & B_VMIO) == 0) + return; + for (i = 0; i < bp->b_npages; i++) { + KASSERT((bp->b_pages[i]->flags & PG_UNMANAGED) == 0, + ("validating unmanaged page")); bp->b_pages[i]->valid = VM_PAGE_BITS_ALL; + } bp->b_flags &= ~B_INVAL; bp->b_flags |= B_CACHE; }