From owner-p4-projects Wed Mar 20 18: 9:37 2002 Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id C134537B404; Wed, 20 Mar 2002 18:07:26 -0800 (PST) Delivered-To: perforce@freebsd.org Received: from freefall.freebsd.org (freefall.FreeBSD.org [216.136.204.21]) by hub.freebsd.org (Postfix) with ESMTP id 8F6C337B421 for ; Wed, 20 Mar 2002 18:07:12 -0800 (PST) Received: (from perforce@localhost) by freefall.freebsd.org (8.11.6/8.11.6) id g2L27BE62790 for perforce@freebsd.org; Wed, 20 Mar 2002 18:07:11 -0800 (PST) (envelope-from tmm@freebsd.org) Date: Wed, 20 Mar 2002 18:07:11 -0800 (PST) Message-Id: <200203210207.g2L27BE62790@freefall.freebsd.org> X-Authentication-Warning: freefall.freebsd.org: perforce set sender to tmm@freebsd.org using -f From: Thomas Moestl Subject: PERFORCE change 8110 for review To: Perforce Change Reviews Sender: owner-p4-projects@FreeBSD.ORG Precedence: bulk List-ID: List-Archive: (Web Archive) List-Help: (List Instructions) List-Subscribe: List-Unsubscribe: X-Loop: FreeBSD.ORG http://people.freebsd.org/~peter/p4db/chv.cgi?CH=8110 Change 8110 by tmm@tmm_sparc64 on 2002/03/20 18:06:36 Integ VM optimizations and cacheing fixes from sparc64-tmm. Affected files ... ... //depot/projects/sparc64/sys/fs/nwfs/nwfs_io.c#5 integrate ... //depot/projects/sparc64/sys/fs/smbfs/smbfs_io.c#4 integrate ... //depot/projects/sparc64/sys/fs/specfs/spec_vnops.c#10 integrate ... //depot/projects/sparc64/sys/kern/imgact_elf.c#17 integrate ... //depot/projects/sparc64/sys/kern/init_main.c#17 integrate ... //depot/projects/sparc64/sys/kern/kern_exec.c#18 integrate ... //depot/projects/sparc64/sys/kern/kern_resource.c#14 integrate ... //depot/projects/sparc64/sys/kern/sys_pipe.c#13 integrate ... //depot/projects/sparc64/sys/kern/sys_process.c#15 integrate ... //depot/projects/sparc64/sys/kern/vfs_bio.c#20 integrate ... //depot/projects/sparc64/sys/kern/vfs_cluster.c#9 integrate ... //depot/projects/sparc64/sys/nfsclient/nfs_bio.c#8 integrate ... //depot/projects/sparc64/sys/nfsclient/nfs_vnops.c#10 integrate ... //depot/projects/sparc64/sys/sparc64/include/param.h#22 integrate ... //depot/projects/sparc64/sys/sparc64/include/pmap.h#28 integrate ... //depot/projects/sparc64/sys/sparc64/include/vmparam.h#16 integrate ... //depot/projects/sparc64/sys/sparc64/sparc64/pmap.c#78 integrate ... //depot/projects/sparc64/sys/sparc64/sparc64/vm_machdep.c#46 integrate ... //depot/projects/sparc64/sys/sys/buf.h#12 integrate ... //depot/projects/sparc64/sys/sys/pipe.h#3 integrate ... //depot/projects/sparc64/sys/vm/pmap.h#7 integrate ... //depot/projects/sparc64/sys/vm/swap_pager.c#9 integrate ... //depot/projects/sparc64/sys/vm/vm.h#5 integrate ... //depot/projects/sparc64/sys/vm/vm_contig.c#6 integrate ... //depot/projects/sparc64/sys/vm/vm_extern.h#7 integrate ... //depot/projects/sparc64/sys/vm/vm_fault.c#9 integrate ... //depot/projects/sparc64/sys/vm/vm_init.c#5 integrate ... //depot/projects/sparc64/sys/vm/vm_kern.c#6 integrate ... //depot/projects/sparc64/sys/vm/vm_kern.h#2 integrate ... //depot/projects/sparc64/sys/vm/vm_map.c#12 integrate ... //depot/projects/sparc64/sys/vm/vm_mmap.c#13 integrate ... //depot/projects/sparc64/sys/vm/vm_object.c#8 integrate ... //depot/projects/sparc64/sys/vm/vm_page.c#12 integrate ... //depot/projects/sparc64/sys/vm/vm_page.h#7 integrate ... //depot/projects/sparc64/sys/vm/vm_pager.c#10 integrate ... //depot/projects/sparc64/sys/vm/vm_pager.h#4 integrate ... //depot/projects/sparc64/sys/vm/vm_unix.c#5 integrate ... //depot/projects/sparc64/sys/vm/vnode_pager.c#11 integrate Differences ... ==== //depot/projects/sparc64/sys/fs/nwfs/nwfs_io.c#5 (text+ko) ==== @@ -421,8 +421,9 @@ bp = getpbuf(&nwfs_pbuf_freecnt); npages = btoc(count); + mappbuf(bp, vp->v_object, IDX_TP_OFF(pages[0]->pindex), pages, + npages, BIO_READ); kva = (vm_offset_t) bp->b_data; - pmap_qenter(kva, pages, npages); iov.iov_base = (caddr_t) kva; iov.iov_len = count; @@ -435,7 +436,7 @@ uio.uio_td = td; error = ncp_read(NWFSTOCONN(nmp), &np->n_fh, &uio,cred); - pmap_qremove(kva, npages); + unmappbuf(bp, pages, 0, npages, BIO_READ); relpbuf(bp, &nwfs_pbuf_freecnt); @@ -458,6 +459,7 @@ m->flags &= ~PG_ZERO; if (nextoff <= size) { + pmap_page_validated(m); m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; } else { @@ -548,8 +550,9 @@ } bp = getpbuf(&nwfs_pbuf_freecnt); + mappbuf(bp, vp->v_object, IDX_TP_OFF(pages[0]->pindex), pages, + npages, BIO_WRITE); kva = (vm_offset_t) bp->b_data; - pmap_qenter(kva, pages, npages); iov.iov_base = (caddr_t) kva; iov.iov_len = count; @@ -566,7 +569,7 @@ /* VOP_CLOSE(vp, FWRITE, cred, td);*/ NCPVNDEBUG("paged write done: %d\n", error); - pmap_qremove(kva, npages); + unmappbuf(bp, pages, 0, npages, BIO_WRITE); relpbuf(bp, &nwfs_pbuf_freecnt); if (!error) { ==== //depot/projects/sparc64/sys/fs/smbfs/smbfs_io.c#4 (text+ko) ==== @@ -447,8 +447,9 @@ bp = getpbuf(); #endif npages = btoc(count); + mappbuf(bp, vp->v_object, IDX_TO_OFF(pages[0]->pindex), pages, + npages, BIO_READ); kva = (vm_offset_t) bp->b_data; - pmap_qenter(kva, pages, npages); cnt.v_vnodein++; cnt.v_vnodepgsin += count; @@ -463,7 +464,7 @@ uio.uio_td = td; error = smb_read(smp->sm_share, np->n_fid, &uio, &scred); - pmap_qremove(kva, npages); + unmappbuf(bp, pages, 0, npages, BIO_READ); #if __FreeBSD_version >= 400000 relpbuf(bp, &smbfs_pbuf_freecnt); @@ -490,6 +491,7 @@ m->flags &= ~PG_ZERO; if (nextoff <= size) { + pmap_page_validated(m); m->valid = VM_PAGE_BITS_ALL; vm_page_undirty(m); } else { @@ -585,8 +587,9 @@ #else bp = getpbuf(); #endif + mappbuf(bp, vp->v_object, IDX_TO_OFF(pages[0]->pindex), pages, + npages, BIO_WRITE); kva = (vm_offset_t) bp->b_data; - pmap_qenter(kva, pages, npages); cnt.v_vnodeout++; cnt.v_vnodepgsout += count; @@ -606,7 +609,7 @@ /* VOP_CLOSE(vp, FWRITE, cred, td);*/ SMBVDEBUG("paged write done: %d\n", error); - pmap_qremove(kva, npages); + unmappbuf(bp, pages, 0, npages, BIO_WRITE); #if __FreeBSD_version >= 400000 relpbuf(bp, &smbfs_pbuf_freecnt); #else ==== //depot/projects/sparc64/sys/fs/specfs/spec_vnops.c#10 (text+ko) ==== @@ -668,6 +668,7 @@ daddr_t blkno; struct buf *bp; vm_page_t m; + vm_object_t obj; vm_ooffset_t offset; int toff, nextoff, nread; struct vnode *vp = ap->a_vp; @@ -718,12 +719,14 @@ size = (ap->a_count + blksiz - 1) & ~(blksiz - 1); bp = getpbuf(NULL); - kva = (vm_offset_t)bp->b_data; /* - * Map the pages to be read into the kva. + * Map the pages to be read into the kva. The object may be NULL. */ - pmap_qenter(kva, ap->a_m, pcount); + VOP_GETVOBJECT(vp, &obj); + mappbuf(bp, obj, IDX_TO_OFF(ap->a_m[0]->pindex), ap->a_m, + pcount, BIO_READ); + kva = (vm_offset_t)bp->b_data; /* Build a minimal buffer header. */ bp->b_iocmd = BIO_READ; @@ -770,7 +773,7 @@ bzero((caddr_t)kva + nread, ap->a_count - nread); } - pmap_qremove(kva, pcount); + unmappbuf(bp, ap->a_m, pcount, BIO_READ); gotreqpage = 0; @@ -781,6 +784,7 @@ m->flags &= ~PG_ZERO; if (nextoff <= nread) { + pmap_page_validated(m); m->valid = VM_PAGE_BITS_ALL; vm_page_undirty(m); } else if (toff < nread) { ==== //depot/projects/sparc64/sys/kern/imgact_elf.c#17 (text+ko) ==== @@ -194,6 +194,8 @@ size_t copy_len; vm_offset_t file_addr; vm_offset_t data_buf = 0; + vm_offset_t color; + vm_size_t modulus; GIANT_REQUIRED; error = 0; @@ -213,6 +215,20 @@ return (ENOEXEC); } +#ifdef VM_EOE + /* + * XXX: this is a bit h0h0: if the segment we are mapping is writable + * and executable, use VM_PROT_LAZY_EXECUTE. Since it is writable, it + * is likely to contain non-executable data too, so that is + * advantageous. In practice, this is the case for data/bss segments of + * dynamic executables, which are usually executable because they + * contain PLTs. + */ + if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == + (VM_PROT_WRITE | VM_PROT_EXECUTE)) + prot = (prot & ~VM_PROT_EXECUTE) | VM_PROT_LAZY_EXECUTE; +#endif + map_addr = trunc_page((vm_offset_t)vmaddr); file_addr = trunc_page(offset); @@ -228,6 +244,12 @@ map_len = round_page(offset+filsz) - file_addr; if (map_len != 0) { + /* + * Notify that we are going to map this section. Since the + * address is fixed, the color and modulus that are returned + * must be ignored. + */ + pmap_addr_color(object, map_addr, file_addr, &modulus); vm_object_reference(object); vm_map_lock(&vmspace->vm_map); rv = vm_map_insert(&vmspace->vm_map, @@ -266,7 +288,8 @@ vm_map_lock(&vmspace->vm_map); rv = vm_map_insert(&vmspace->vm_map, NULL, 0, map_addr, map_addr + map_len, - VM_PROT_ALL, VM_PROT_ALL, 0); + VM_PROT_READ | VM_PROT_WRITE, + VM_PROT_ALL, 0); vm_map_unlock(&vmspace->vm_map); if (rv != KERN_SUCCESS) { return EINVAL; @@ -274,16 +297,24 @@ } if (copy_len != 0) { + /* + * The virtual address of this is a bit bogus, but it does not + * really matter in this case, since the correct virtual address + * got passed in above, if not before. + */ + color = pmap_addr_color(object, trunc_page(offset + filsz), + trunc_page(offset + filsz), &modulus); vm_object_reference(object); - rv = vm_map_find(exec_map, - object, - trunc_page(offset + filsz), - &data_buf, - PAGE_SIZE, - TRUE, - VM_PROT_READ, - VM_PROT_ALL, - MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL); + rv = vm_map_find_color(exec_map, + object, + trunc_page(offset + filsz), + &data_buf, + PAGE_SIZE, + TRUE, + VM_PROT_READ, + VM_PROT_ALL, + MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL, + color, modulus); if (rv != KERN_SUCCESS) { vm_object_deallocate(object); return EINVAL; @@ -355,14 +386,7 @@ imgp->uap = NULL; imgp->attr = attr; imgp->firstpage = NULL; - imgp->image_header = (char *)kmem_alloc_wait(exec_map, PAGE_SIZE); - if (imgp->image_header == NULL) { - nd->ni_vp = NULL; - error = ENOMEM; - goto fail; - } - /* XXXKSE */ NDINIT(nd, LOOKUP, LOCKLEAF|FOLLOW, UIO_SYSSPACE, file, curthread); @@ -462,9 +486,6 @@ fail: if (imgp->firstpage) exec_unmap_first_page(imgp); - if (imgp->image_header) - kmem_free_wakeup(exec_map, (vm_offset_t)imgp->image_header, - PAGE_SIZE); if (nd->ni_vp) vrele(nd->ni_vp); ==== //depot/projects/sparc64/sys/kern/init_main.c#17 (text+ko) ==== @@ -521,7 +521,7 @@ */ addr = trunc_page(USRSTACK - PAGE_SIZE); if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE, - FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0) + FALSE, VM_PROT_STACK, VM_PROT_ALL, 0) != 0) panic("init: couldn't allocate argument space"); p->p_vmspace->vm_maxsaddr = (caddr_t)addr; p->p_vmspace->vm_ssize = 1; ==== //depot/projects/sparc64/sys/kern/kern_exec.c#18 (text+ko) ==== @@ -171,14 +171,13 @@ * Allocate temporary demand zeroed space for argument and * environment strings */ - imgp->stringbase = (char *)kmem_alloc_wait(exec_map, ARG_MAX + PAGE_SIZE); + imgp->stringbase = (char *)kmem_alloc_wait(exec_map, ARG_MAX); if (imgp->stringbase == NULL) { error = ENOMEM; goto exec_fail; } imgp->stringp = imgp->stringbase; imgp->stringspace = ARG_MAX; - imgp->image_header = imgp->stringbase + ARG_MAX; /* * Translate the file name. namei() returns a vnode pointer @@ -193,7 +192,7 @@ error = namei(ndp); if (error) { kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase, - ARG_MAX + PAGE_SIZE); + ARG_MAX); goto exec_fail; } @@ -471,7 +470,7 @@ if (imgp->stringbase != NULL) kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase, - ARG_MAX + PAGE_SIZE); + ARG_MAX); if (imgp->vp) { NDFREE(ndp, NDF_ONLY_PNBUF); @@ -502,7 +501,7 @@ exec_map_first_page(imgp) struct image_params *imgp; { - int rv, i; + int rv, i, col; int initial_pagein; vm_page_t ma[VM_INITIAL_PAGEIN]; vm_object_t object; @@ -548,10 +547,20 @@ } } + col = vm_mdpg_pref_vcol(object, 0, ma, 1); + imgp->image_header = (char *)VM_MDPG_KMEM_ALLOC(exec_map, PAGE_SIZE, + col); + if (imgp->image_header == NULL) { + vm_page_wakeup(ma[0]); + return ENOMEM; + } + pmap_qenter((vm_offset_t)imgp->image_header, ma, 1); + /* Reading from the page has the same semantics as a write operation. */ + vm_mdpg_start_io((vm_offset_t)imgp->image_header, ma, 1, VM_MDPG_READ); + vm_page_wire(ma[0]); vm_page_wakeup(ma[0]); - pmap_qenter((vm_offset_t)imgp->image_header, ma, 1); imgp->firstpage = ma[0]; return 0; @@ -564,9 +573,13 @@ GIANT_REQUIRED; if (imgp->firstpage) { + vm_mdpg_done_io((vm_offset_t)imgp->image_header, + &imgp->firstpage, 0, 1, VM_MDPG_READ); pmap_qremove((vm_offset_t)imgp->image_header, 1); vm_page_unwire(imgp->firstpage, 1); imgp->firstpage = NULL; + kmem_free_wakeup(exec_map, (vm_offset_t)imgp->image_header, + PAGE_SIZE); } } @@ -613,7 +626,7 @@ /* Allocate a new stack */ error = vm_map_stack(&vmspace->vm_map, stack_addr, (vm_size_t)maxssiz, - VM_PROT_ALL, VM_PROT_ALL, 0); + VM_PROT_STACK, VM_PROT_ALL, 0); if (error) return (error); ==== //depot/projects/sparc64/sys/kern/kern_resource.c#14 (text+ko) ==== @@ -564,7 +564,7 @@ vm_prot_t prot; if (limp->rlim_cur > alimp->rlim_cur) { - prot = VM_PROT_ALL; + prot = VM_PROT_STACK; size = limp->rlim_cur - alimp->rlim_cur; addr = USRSTACK - limp->rlim_cur; } else { ==== //depot/projects/sparc64/sys/kern/sys_pipe.c#13 (text+ko) ==== @@ -307,7 +307,7 @@ */ error = vm_map_find(kernel_map, object, 0, (vm_offset_t *) &buffer, size, 1, - VM_PROT_ALL, VM_PROT_ALL, 0); + VM_PROT_READ | VM_PROT_WRITE, VM_PROT_ALL, 0); if (error != KERN_SUCCESS) { vm_object_deallocate(object); @@ -345,7 +345,7 @@ /* so pipespace()->pipe_free_kmem() doesn't follow junk pointer */ cpipe->pipe_buffer.object = NULL; #ifndef PIPE_NODIRECT - cpipe->pipe_map.kva = NULL; + cpipe->pipe_map.bkva = cpipe->pipe_map.kva = NULL; #endif /* * protect so pipeclose() doesn't follow a junk pointer @@ -361,7 +361,7 @@ * pipe data structure initializations to support direct pipe I/O */ cpipe->pipe_map.cnt = 0; - cpipe->pipe_map.kva = 0; + cpipe->pipe_map.bkva = cpipe->pipe_map.kva = 0; cpipe->pipe_map.pos = 0; cpipe->pipe_map.npages = 0; /* cpipe->pipe_map.ms[] = invalid */ @@ -607,8 +607,8 @@ vm_page_t m; boolean_t wired; u_int size; - int i; - vm_offset_t addr, endaddr; + int i, col; + vm_offset_t addr, endaddr, kva; GIANT_REQUIRED; PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); @@ -648,18 +648,32 @@ /* * and map the buffer + * The range may span multiple objects, so don't bother to find out which object + * to pass to vm_mdpg_pref_vcol(); let it decide based on page colors. */ - if (wpipe->pipe_map.kva == 0) { + if (wpipe->pipe_map.bkva == 0) { /* * We need to allocate space for an extra page because the * address range might (will) span pages at times. */ - wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, - wpipe->pipe_buffer.size + PAGE_SIZE); + wpipe->pipe_map.bkva = kmem_alloc_pageable(kernel_map, + wpipe->pipe_buffer.size + PAGE_SIZE + VCOLPAD); amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; } - pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, - wpipe->pipe_map.npages); + col = vm_mdpg_pref_vcol(NULL, 0, wpipe->pipe_map.ms, + wpipe->pipe_map.npages); + if (col != -1) { + kva = vm_roundcolor2(wpipe->pipe_map.bkva, col << PAGE_SHIFT, + VCOLBOUND); + } else + kva = wpipe->pipe_map.bkva; + + /* XXX */ + for (i = 0; i < wpipe->pipe_map.npages; i++) { + pmap_enter(kernel_pmap, kva + (vm_offset_t)i * PAGE_SIZE, + wpipe->pipe_map.ms[i], VM_PROT_READ, 1); + } + wpipe->pipe_map.kva = kva; /* * and update the uio data @@ -687,11 +701,13 @@ PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); if (wpipe->pipe_map.kva) { - pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); + pmap_remove(kernel_pmap, wpipe->pipe_map.kva, + wpipe->pipe_map.kva + (vm_offset_t)wpipe->pipe_map.npages * + PAGE_SIZE); if (amountpipekva > MAXPIPEKVA) { - vm_offset_t kva = wpipe->pipe_map.kva; - wpipe->pipe_map.kva = 0; + vm_offset_t kva = wpipe->pipe_map.bkva; + wpipe->pipe_map.bkva = wpipe->pipe_map.kva = 0; kmem_free(kernel_map, kva, wpipe->pipe_buffer.size + PAGE_SIZE); amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; @@ -912,7 +928,7 @@ */ if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && (fp->f_flag & FNONBLOCK) == 0 && - (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && + (wpipe->pipe_map.bkva || (amountpipekva < LIMITPIPEKVA)) && (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { error = pipe_direct_write( wpipe, uio); if (error) @@ -1278,13 +1294,13 @@ cpipe->pipe_buffer.buffer = NULL; } #ifndef PIPE_NODIRECT - if (cpipe->pipe_map.kva != NULL) { + if (cpipe->pipe_map.bkva != NULL) { amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; kmem_free(kernel_map, - cpipe->pipe_map.kva, + cpipe->pipe_map.bkva, cpipe->pipe_buffer.size + PAGE_SIZE); cpipe->pipe_map.cnt = 0; - cpipe->pipe_map.kva = 0; + cpipe->pipe_map.bkva = cpipe->pipe_map.kva = 0; cpipe->pipe_map.pos = 0; cpipe->pipe_map.npages = 0; } ==== //depot/projects/sparc64/sys/kern/sys_process.c#15 (text+ko) ==== @@ -151,8 +151,9 @@ vm_object_t object = NULL; vm_offset_t pageno = 0; /* page number */ vm_prot_t reqprot; + vm_offset_t bkva; vm_offset_t kva; - int error, writing; + int error, writing, col; GIANT_REQUIRED; @@ -176,7 +177,7 @@ reqprot = writing ? (VM_PROT_WRITE | VM_PROT_OVERRIDE_WRITE) : VM_PROT_READ; - kva = kmem_alloc_pageable(kernel_map, PAGE_SIZE); + bkva = kmem_alloc_pageable(kernel_map, PAGE_SIZE + VCOLPAD); /* * Only map in one page at a time. We don't have to, but it @@ -278,14 +279,19 @@ vm_object_reference(object); vm_map_lookup_done(tmap, out_entry); - pmap_qenter(kva, &m, 1); + col = vm_mdpg_pref_vcol(object, IDX_TO_OFF(m->pindex), &m, 1); + if (col != -1) + kva = vm_roundcolor2(bkva, col << PAGE_SHIFT, VCOLBOUND); + else + kva = bkva; + pmap_enter(kernel_pmap, kva, m, VM_PROT_READ | VM_PROT_WRITE, 1); /* * Now do the i/o move. */ error = uiomove((caddr_t)(kva + page_offset), len, uio); - pmap_qremove(kva, 1); + pmap_remove(kernel_pmap, kva, kva + PAGE_SIZE); /* * release the page and the object @@ -300,7 +306,7 @@ if (object) vm_object_deallocate(object); - kmem_free(kernel_map, kva, PAGE_SIZE); + kmem_free(kernel_map, bkva, PAGE_SIZE + VCOLPAD); vmspace_free(vm); return (error); } ==== //depot/projects/sparc64/sys/kern/vfs_bio.c#20 (text+ko) ==== @@ -232,6 +232,9 @@ #define VFS_BIO_NEED_FREE 0x04 /* wait for free bufs, hi hysteresis */ #define VFS_BIO_NEED_BUFSPACE 0x08 /* wait for buf space, lo hysteresis */ +/* Last used buffer map index. */ +static int buf_map_idx; + /* * Buffer hash table code. Note that the logical block scans linearly, which * gives us some L1 cache locality. @@ -588,12 +591,15 @@ static void bfreekva(struct buf * bp) { + int col; + GIANT_REQUIRED; if (bp->b_kvasize) { ++buffreekvacnt; bufspace -= bp->b_kvasize; - vm_map_delete(buffer_map, + col = ((vm_offset_t) bp->b_kvabase >> PAGE_SHIFT) % NVCOLORS; + vm_map_delete(buffer_map[col], (vm_offset_t) bp->b_kvabase, (vm_offset_t) bp->b_kvabase + bp->b_kvasize ); @@ -676,6 +682,8 @@ *bpp = bp = getblk(vp, blkno, size, 0, 0); + if (bp->b_flags & B_VMIO) + vmiobufstart(bp, bp->b_pages, bp->b_npages, BIO_READ); /* if not found in cache, do some I/O */ if ((bp->b_flags & B_CACHE) == 0) { if (curthread != PCPU_GET(idlethread)) @@ -686,8 +694,14 @@ if (bp->b_rcred == NOCRED && cred != NOCRED) bp->b_rcred = crhold(cred); vfs_busy_pages(bp, 0); + if (bp->b_flags & B_VMIO) + vmiobufstart(bp, bp->b_pages, bp->b_npages, BIO_READ); VOP_STRATEGY(vp, bp); ++readwait; + } else if (bp->b_flags & B_VMIO) { + /* No i/o needed, but cache flushing needs to be done anyway. */ + vmiobufstart(bp, bp->b_pages, bp->b_npages, BIO_READ); + vmiobufdone(bp, bp->b_pages, 0, bp->b_npages, BIO_READ); } for (i = 0; i < cnt; i++, rablkno++, rabsize++) { @@ -705,6 +719,10 @@ if (rabp->b_rcred == NOCRED && cred != NOCRED) rabp->b_rcred = crhold(cred); vfs_busy_pages(rabp, 0); + if (bp->b_flags & B_VMIO) { + vmiobufstart(bp, bp->b_pages, bp->b_npages, + BIO_READ); + } BUF_KERNPROC(rabp); VOP_STRATEGY(vp, rabp); } else { @@ -821,6 +839,8 @@ bp->b_vp->v_numoutput++; vfs_busy_pages(bp, 1); + if (bp->b_flags & B_VMIO) + vmiobufstart(bp, bp->b_pages, bp->b_npages, BIO_WRITE); /* * Normal bwrites pipeline writes @@ -1629,12 +1649,13 @@ */ static struct buf * -getnewbuf(int slpflag, int slptimeo, int size, int maxsize) +getnewbuf(int slpflag, int slptimeo, int size, int maxsize, int prefcol) { struct buf *bp; struct buf *nbp; int defrag = 0; int nqindex; + int col, scol, rv; static int flushingbufs; GIANT_REQUIRED; @@ -1881,8 +1902,27 @@ bfreekva(bp); - if (vm_map_findspace(buffer_map, - vm_map_min(buffer_map), maxsize, &addr)) { + /* + * Try to use the preferred color, but if the map + * is full, cycle through all maps to find space. If + * no color is specified, use the maps in a round-robin + * fashion. + */ + if (prefcol == -1) { + scol = buf_map_idx; + buf_map_idx = (buf_map_idx + 1) % NVCOLORS; + } else + scol = prefcol; + col = scol; + do { + rv = vm_map_findspace_color(buffer_map[col], + vm_map_min(buffer_map[col]), maxsize, &addr, + (vm_offset_t)col << PAGE_SHIFT, VCOLBOUND); + if (rv == KERN_SUCCESS) + break; + col = (col + 1) % NVCOLORS; + } while (col != scol); + if (rv != KERN_SUCCESS) { /* * Uh oh. Buffer map is to fragmented. We * must defragment the map. @@ -1894,7 +1934,7 @@ goto restart; } if (addr) { - vm_map_insert(buffer_map, NULL, 0, + vm_map_insert(buffer_map[col], NULL, 0, addr, addr + maxsize, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); @@ -2030,6 +2070,17 @@ bp = TAILQ_FIRST(&bufqueues[QUEUE_DIRTY]); continue; } + /* + * Turn off background writing. This can lead to a + * deadlock when bwrite() is trying to allocate a buffer + * to execute a background write in a buffer shortage, + * which is fatal if we haven't reached hidirtybuffers + * yet, but there are no usable buffers on the queues + * anyway (due to buffers not being on any queue, and + * not reaching lodirtybuffers when that buffers get + * requeued). + */ + bp->b_xflags &= ~BX_BKGRDWRITE; vfs_bio_awrite(bp); ++r; break; @@ -2359,7 +2410,8 @@ * returned by getnewbuf() is locked. Note that the returned * buffer is also considered valid (not marked B_INVAL). */ - int bsize, maxsize, vmio; + int bsize, maxsize, vmio, col; + vm_object_t obj; off_t offset; if (vn_isdisk(vp, NULL)) @@ -2372,11 +2424,16 @@ bsize = size; offset = (off_t)blkno * bsize; - vmio = (VOP_GETVOBJECT(vp, NULL) == 0) && (vp->v_flag & VOBJBUF); + vmio = (VOP_GETVOBJECT(vp, &obj) == 0) && (vp->v_flag & VOBJBUF); maxsize = vmio ? size + (offset & PAGE_MASK) : size; maxsize = imax(maxsize, bsize); - if ((bp = getnewbuf(slpflag, slptimeo, size, maxsize)) == NULL) { + if (vmio) { + col = vm_mdpg_pref_vcol(obj, offset, NULL, + (maxsize + PAGE_MASK) >> PAGE_SHIFT); + } else + col = -1; + if ((bp = getnewbuf(slpflag, slptimeo, size, maxsize, col)) == NULL) { if (slpflag || slptimeo) { splx(s); return NULL; @@ -2451,7 +2508,7 @@ maxsize = (size + BKVAMASK) & ~BKVAMASK; s = splbio(); - while ((bp = getnewbuf(0, 0, size, maxsize)) == 0); + while ((bp = getnewbuf(0, 0, size, maxsize, -1)) == 0); splx(s); allocbuf(bp, size); bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */ @@ -2752,7 +2809,7 @@ bp->b_pages, bp->b_npages ); - + bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | (vm_offset_t)(bp->b_offset & PAGE_MASK)); } @@ -2827,7 +2884,7 @@ void bufdone(struct buf *bp) { - int s, error; + int s, error, resid; void (*biodone)(struct buf *); GIANT_REQUIRED; @@ -2899,22 +2956,8 @@ } #endif - /* - * Set B_CACHE if the op was a normal read and no error - * occured. B_CACHE is set for writes in the b*write() - * routines. - */ iosize = bp->b_bcount - bp->b_resid; - if (bp->b_iocmd == BIO_READ && - !(bp->b_flags & (B_INVAL|B_NOCACHE)) && - !(bp->b_ioflags & BIO_ERROR)) { - bp->b_flags |= B_CACHE; - } - for (i = 0; i < bp->b_npages; i++) { - int bogusflag = 0; - int resid; - resid = ((foff + PAGE_SIZE) & ~(off_t)PAGE_MASK) - foff; if (resid > iosize) resid = iosize; @@ -2924,13 +2967,43 @@ */ m = bp->b_pages[i]; if (m == bogus_page) { - bogusflag = 1; m = vm_page_lookup(obj, OFF_TO_IDX(foff)); if (m == NULL) panic("biodone: page disappeared!"); bp->b_pages[i] = m; pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages); + } else { + /* + * In the write case, the valid and clean bits + * are already changed correctly( see + * bdwrite() ), so we only need to do this here + * in the read case. + */ + if ((bp->b_iocmd == BIO_READ) && resid > 0) + vfs_page_set_valid(bp, foff, i, m); } + vm_page_flag_clear(m, PG_ZERO); + foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; + } + vmiobufdone(bp, bp->b_pages, 0, bp->b_npages, bp->b_iocmd); + /* + * Set B_CACHE if the op was a normal read and no error + * occured. B_CACHE is set for writes in the b*write() + * routines. + */ + if (bp->b_iocmd == BIO_READ && + !(bp->b_flags & (B_INVAL|B_NOCACHE)) && + !(bp->b_ioflags & BIO_ERROR)) { + bp->b_flags |= B_CACHE; + } + + foff = bp->b_offset; + for (i = 0; i < bp->b_npages; i++) { + resid = ((foff + PAGE_SIZE) & ~(off_t)PAGE_MASK) - foff; + if (resid > iosize) + resid = iosize; + m = bp->b_pages[i]; + #if defined(VFS_BIO_DEBUG) if (OFF_TO_IDX(foff) != m->pindex) { printf( @@ -2940,16 +3013,6 @@ #endif /* - * In the write case, the valid and clean bits are - * already changed correctly ( see bdwrite() ), so we - * only need to do this here in the read case. - */ - if ((bp->b_iocmd == BIO_READ) && !bogusflag && resid > 0) { - vfs_page_set_valid(bp, foff, i, m); - } - vm_page_flag_clear(m, PG_ZERO); - - /* * when debugging new filesystems or buffer I/O methods, this * is the most common error that pops up. if you see this, you * have not set the page busy flag correctly!!! @@ -3364,6 +3427,30 @@ bp->b_npages = newnpages; } +/* + * Map a buffer into kva. This may change the buffer virtual address by at + * most VCOLPAD bytes to achieve the preferred color. + * This can e.g. be used with pbufs. + */ +void +vmiomapbuf(struct buf *bp, struct vm_object *obj, vm_ooffset_t offs, + vm_page_t *m, int count, int remap) +{ + vm_offset_t kva; + int color; + + kva = trunc_page((vm_offset_t)bp->b_data); + color = vm_mdpg_pref_vcol(obj, offs, m, count); + if (color != -1) { + kva = vm_roundcolor2(kva, (vm_offset_t)color << PAGE_SHIFT, + VCOLBOUND); + } + if (remap) + pmap_qremove((vm_offset_t)bp->b_data, count); + pmap_qenter(kva, m, count); + bp->b_data = (caddr_t)kva; +} + #include "opt_ddb.h" #ifdef DDB ==== //depot/projects/sparc64/sys/kern/vfs_cluster.c#9 (text+ko) ==== @@ -135,6 +135,10 @@ * back-off on prospective read-aheads. */ if (bp->b_flags & B_CACHE) { + if (bp->b_flags & B_VMIO) { + vmiobufstart(bp, bp->b_pages, bp->b_npages, BIO_READ); + vmiobufdone(bp, bp->b_pages, 0, bp->b_npages, BIO_READ); + } if (!seqcount) { return 0; } else if ((bp->b_flags & B_RAM) == 0) { @@ -258,6 +262,8 @@ if ((bp->b_flags & B_CLUSTER) == 0) { vfs_busy_pages(bp, 0); } + if (bp->b_flags & B_VMIO) + vmiobufstart(bp, bp->b_pages, bp->b_npages, BIO_READ); bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; if ((bp->b_flags & B_ASYNC) || bp->b_iodone != NULL) @@ -295,6 +301,10 @@ if ((rbp->b_flags & B_CLUSTER) == 0) { vfs_busy_pages(rbp, 0); } + if (rbp->b_flags & B_VMIO) { + vmiobufstart(rbp, rbp->b_pages, rbp->b_npages, + BIO_READ); + } rbp->b_flags &= ~B_INVAL; rbp->b_ioflags &= ~BIO_ERROR; if ((rbp->b_flags & B_ASYNC) || rbp->b_iodone != NULL) @@ -325,6 +335,7 @@ struct buf *fbp; { struct buf *bp, *tbp; + vm_object_t obj; daddr_t bn; int i, inc, j; @@ -497,6 +508,8 @@ printf("warning: tbp->b_bcount wrong %ld vs %ld\n", tbp->b_bcount, size); if (tbp->b_bufsize != size) printf("warning: tbp->b_bufsize wrong %ld vs %ld\n", tbp->b_bufsize, size); + /* XXX: vmiobufstart() can be avoided in the read-ahead case. */ + vmiobufstart(tbp, tbp->b_pages, tbp->b_npages, BIO_READ); bp->b_bcount += size; bp->b_bufsize += size; } @@ -516,8 +529,9 @@ bp->b_bufsize, bp->b_kvasize); bp->b_kvasize = bp->b_bufsize; - pmap_qenter(trunc_page((vm_offset_t) bp->b_data), - (vm_page_t *)bp->b_pages, bp->b_npages); + VOP_GETVOBJECT(vp, &obj); + vmiomapbuf(bp, obj, bp->b_offset, bp->b_pages, + bp->b_npages, BIO_READ); return (bp); } @@ -542,7 +556,8 @@ if (bp->b_ioflags & BIO_ERROR) error = bp->b_error; - pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); + KASSERT((bp->b_flags & B_VMIO) != NULL, ("cluster_callback: non-VMIO")); + unmappbuf(bp, bp->b_pages, bp->b_npages, bp->b_iocmd); /* * Move memory from the large cluster buffer into the component * buffers and mark IO as done on these. @@ -949,6 +964,14 @@ tbp->b_ioflags &= ~BIO_ERROR; tbp->b_flags |= B_ASYNC; tbp->b_iocmd = BIO_WRITE; + /* + * XXX: vmiobufstart() can be ommitted, since the write + * perform from the cluster buffer; hoever, this + * requires another flag to tell bufdone() not to call + * vmiobufdone(). >>> TRUNCATED FOR MAIL (1000 lines) <<< To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe p4-projects" in the body of the message