From owner-svn-src-user@FreeBSD.ORG Sun Feb 5 17:37:27 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 7F311106564A; Sun, 5 Feb 2012 17:37:27 +0000 (UTC) (envelope-from attilio@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 6D03A8FC0C; Sun, 5 Feb 2012 17:37:27 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q15HbRmn093503; Sun, 5 Feb 2012 17:37:27 GMT (envelope-from attilio@svn.freebsd.org) Received: (from attilio@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q15HbR4a093494; Sun, 5 Feb 2012 17:37:27 GMT (envelope-from attilio@svn.freebsd.org) Message-Id: <201202051737.q15HbR4a093494@svn.freebsd.org> From: Attilio Rao Date: Sun, 5 Feb 2012 17:37:27 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r231027 - in user/attilio/vmcontention/sys: kern vm X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 05 Feb 2012 17:37:27 -0000 Author: attilio Date: Sun Feb 5 17:37:26 2012 New Revision: 231027 URL: http://svn.freebsd.org/changeset/base/231027 Log: Remove the panic from vm_radix_insert() and propagate the error to the callers of vm_page_insert(). The default action for every caller is to unwind-back the operation besides vm_page_rename() where this has proven to be impossible to do. For that case, it just spins until the page is not available to be allocated. However, due to vm_page_rename() to be mostly rare (and having never hit this panic in the past) it is tought to be a very seldom thing and not a possible performance factor. The patch has been tested with an atomic counter returning NULL from the zone allocator every 1/100000 allocations. Per-printf, I've verified that a typical buildkernel could trigger this 30 times. The patch survived to 2 hours of repeated buildkernel/world. Several technical notes: - The vm_page_insert() is moved, in several callers, closer to failure points. This could be committed separately before vmcontention hits the tree just to verify -CURRENT is happy with it. - vm_page_rename() does not need to have the page lock in the callers as it hide that as an implementation detail. Do the locking internally. - now vm_page_insert() returns an int, with 0 meaning everything was ok, thus KPI is broken by this patch. Modified: user/attilio/vmcontention/sys/kern/subr_uio.c user/attilio/vmcontention/sys/vm/device_pager.c user/attilio/vmcontention/sys/vm/sg_pager.c user/attilio/vmcontention/sys/vm/vm_fault.c user/attilio/vmcontention/sys/vm/vm_object.c user/attilio/vmcontention/sys/vm/vm_page.c user/attilio/vmcontention/sys/vm/vm_page.h user/attilio/vmcontention/sys/vm/vm_radix.c Modified: user/attilio/vmcontention/sys/kern/subr_uio.c ============================================================================== --- user/attilio/vmcontention/sys/kern/subr_uio.c Sun Feb 5 16:54:26 2012 (r231026) +++ user/attilio/vmcontention/sys/kern/subr_uio.c Sun Feb 5 17:37:26 2012 (r231027) @@ -85,6 +85,7 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t k vm_map_entry_t entry; vm_pindex_t upindex; vm_prot_t prot; + vm_page_bits_t vbits; boolean_t wired; KASSERT((uaddr & PAGE_MASK) == 0, @@ -95,6 +96,7 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t k * unwired in sf_buf_mext(). */ kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); + vbits = kern_pg->valid; kern_pg->valid = VM_PAGE_BITS_ALL; KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1, ("vm_pgmoveco: kern_pg is not correctly wired")); @@ -105,6 +107,13 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t k return(EFAULT); } VM_OBJECT_LOCK(uobject); + if (vm_page_insert(kern_pg, uobject, upindex) != 0) { + kern_pg->valid = vbits; + VM_OBJECT_UNLOCK(uobject); + vm_map_lookup_done(map, entry); + return(ENOMEM); + } + vm_page_dirty(kern_pg); retry: if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco")) @@ -122,8 +131,6 @@ retry: if (uobject->backing_object != NULL) pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE); } - vm_page_insert(kern_pg, uobject, upindex); - vm_page_dirty(kern_pg); VM_OBJECT_UNLOCK(uobject); vm_map_lookup_done(map, entry); return(KERN_SUCCESS); Modified: user/attilio/vmcontention/sys/vm/device_pager.c ============================================================================== --- user/attilio/vmcontention/sys/vm/device_pager.c Sun Feb 5 16:54:26 2012 (r231026) +++ user/attilio/vmcontention/sys/vm/device_pager.c Sun Feb 5 17:37:26 2012 (r231027) @@ -307,11 +307,14 @@ old_dev_pager_fault(vm_object_t object, */ page = vm_page_getfake(paddr, memattr); VM_OBJECT_LOCK(object); + if (vm_page_insert(page, object, offset) != 0) { + vm_page_putfake(page); + return (VM_PAGER_FAIL); + } vm_page_lock(*mres); vm_page_free(*mres); vm_page_unlock(*mres); *mres = page; - vm_page_insert(page, object, pidx); } page->valid = VM_PAGE_BITS_ALL; return (VM_PAGER_OK); Modified: user/attilio/vmcontention/sys/vm/sg_pager.c ============================================================================== --- user/attilio/vmcontention/sys/vm/sg_pager.c Sun Feb 5 16:54:26 2012 (r231026) +++ user/attilio/vmcontention/sys/vm/sg_pager.c Sun Feb 5 17:37:26 2012 (r231027) @@ -179,6 +179,10 @@ sg_pager_getpages(vm_object_t object, vm /* Construct a new fake page. */ page = vm_page_getfake(paddr, memattr); VM_OBJECT_LOCK(object); + if (vm_page_insert(page, object, offset) != 0) { + vm_page_putfake(page); + return (VM_PAGER_FAIL); + } TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, pageq); /* Free the original pages and insert this fake page into the object. */ @@ -187,7 +191,6 @@ sg_pager_getpages(vm_object_t object, vm vm_page_free(m[i]); vm_page_unlock(m[i]); } - vm_page_insert(page, object, offset); m[reqpage] = page; page->valid = VM_PAGE_BITS_ALL; Modified: user/attilio/vmcontention/sys/vm/vm_fault.c ============================================================================== --- user/attilio/vmcontention/sys/vm/vm_fault.c Sun Feb 5 16:54:26 2012 (r231026) +++ user/attilio/vmcontention/sys/vm/vm_fault.c Sun Feb 5 17:37:26 2012 (r231027) @@ -769,9 +769,7 @@ vnode_locked: * process'es object. The page is * automatically made dirty. */ - vm_page_lock(fs.m); vm_page_rename(fs.m, fs.first_object, fs.first_pindex); - vm_page_unlock(fs.m); vm_page_busy(fs.m); fs.first_m = fs.m; fs.m = NULL; Modified: user/attilio/vmcontention/sys/vm/vm_object.c ============================================================================== --- user/attilio/vmcontention/sys/vm/vm_object.c Sun Feb 5 16:54:26 2012 (r231026) +++ user/attilio/vmcontention/sys/vm/vm_object.c Sun Feb 5 17:37:26 2012 (r231027) @@ -1397,9 +1397,7 @@ retry: vm_reserv_rename(m, new_object, orig_object, offidxstart); #endif - vm_page_lock(m); vm_page_rename(m, new_object, idx); - vm_page_unlock(m); /* * page automatically made dirty by rename and * cache handled @@ -1654,9 +1652,7 @@ restart: * If the page was mapped to a process, it can remain * mapped through the rename. */ - vm_page_lock(p); vm_page_rename(p, object, new_pindex); - vm_page_unlock(p); /* page automatically made dirty by rename */ } } Modified: user/attilio/vmcontention/sys/vm/vm_page.c ============================================================================== --- user/attilio/vmcontention/sys/vm/vm_page.c Sun Feb 5 16:54:26 2012 (r231026) +++ user/attilio/vmcontention/sys/vm/vm_page.c Sun Feb 5 17:37:26 2012 (r231027) @@ -109,6 +109,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include /* @@ -777,13 +778,15 @@ vm_page_dirty(vm_page_t m) * The object and page must be locked. * This routine may not block. */ -void +int vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) { vm_page_t neighbor; + vm_pindex_t cpindex; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if (m->object != NULL) panic("vm_page_insert: page already inserted"); + cpindex = m->pindex; /* * Record the object/offset pair in this page @@ -804,8 +807,13 @@ vm_page_insert(vm_page_t m, vm_object_t } else TAILQ_INSERT_TAIL(&object->memq, m, listq); } - if (vm_radix_insert(&object->rtree, pindex, m) != 0) - panic("vm_page_insert: unable to insert the new page"); + + if (vm_radix_insert(&object->rtree, pindex, m) != 0) { + TAILQ_REMOVE(&object->memq, m, listq); + m->object = NULL; + m->pindex = cpindex; + return (ENOMEM); + } /* * show that the object has one more resident page. @@ -823,6 +831,7 @@ vm_page_insert(vm_page_t m, vm_object_t */ if (m->aflags & PGA_WRITEABLE) vm_object_set_writeable_dirty(object); + return (0); } /* @@ -967,9 +976,20 @@ vm_page_prev(vm_page_t m) void vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) { + u_int i; + + MPASS(m->object != NULL); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED); + vm_page_lock(m); vm_page_remove(m); - vm_page_insert(m, new_object, new_pindex); + vm_page_unlock(m); + while (vm_page_insert(m, new_object, new_pindex) != 0) { + pagedaemon_wakeup(); + for (i = 0; i < 10000000; i++) + cpu_spinwait(); + } vm_page_dirty(m); } @@ -1250,7 +1270,19 @@ vm_page_alloc(vm_object_t object, vm_pin if (object->memattr != VM_MEMATTR_DEFAULT && object->type != OBJT_DEVICE && object->type != OBJT_SG) pmap_page_set_memattr(m, object->memattr); - vm_page_insert(m, object, pindex); + if (vm_page_insert(m, object, pindex) != 0) { + + /* See the comment below about hold count handling. */ + if (vp != NULL) + vdrop(vp); + vm_page_lock(m); + if (req & VM_ALLOC_WIRED) + vm_page_unwire(m, 0); + vm_page_free(m); + vm_page_unlock(m); + pagedaemon_wakeup(); + return (NULL); + } } else m->pindex = pindex; @@ -1317,6 +1349,7 @@ vm_page_alloc_contig(vm_object_t object, { struct vnode *drop; vm_page_t deferred_vdrop_list, m, m_ret; + vm_pindex_t cpindex; u_int flags, oflags; int req_class; @@ -1403,6 +1436,7 @@ retry: memattr == VM_MEMATTR_DEFAULT) memattr = object->memattr; } + cpindex = pindex; for (m = m_ret; m < &m_ret[npages]; m++) { m->aflags = 0; m->flags &= flags; @@ -1412,12 +1446,30 @@ retry: m->oflags = oflags; if (memattr != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, memattr); - if (object != NULL) - vm_page_insert(m, object, pindex); - else - m->pindex = pindex; pindex++; } + for (m = m_ret; m < &m_ret[npages]; m++) { + if (object != NULL) { + if (vm_page_insert(m, object, cpindex) != 0) { + while (deferred_vdrop_list != NULL) { + vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev); + deferred_vdrop_list = + deferred_vdrop_list->pageq.tqe_next; + } + for (m = m_ret; m < &m_ret[npages]; m++) { + vm_page_lock(m); + if (req & VM_ALLOC_WIRED) + vm_page_unwire(m, 0); + vm_page_free(m); + vm_page_unlock(m); + } + pagedaemon_wakeup(); + return (NULL); + } + } else + m->pindex = cpindex; + cpindex++; + } while (deferred_vdrop_list != NULL) { vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev); deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next; @@ -2642,11 +2694,8 @@ vm_page_cowfault(vm_page_t m) pindex = m->pindex; retry_alloc: - pmap_remove_all(m); - vm_page_remove(m); mnew = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY); if (mnew == NULL) { - vm_page_insert(m, object, pindex); vm_page_unlock(m); VM_OBJECT_UNLOCK(object); VM_WAIT; @@ -2672,8 +2721,9 @@ vm_page_cowfault(vm_page_t m) vm_page_lock(mnew); vm_page_free(mnew); vm_page_unlock(mnew); - vm_page_insert(m, object, pindex); } else { /* clear COW & copy page */ + pmap_remove_all(m); + vm_page_remove(m); if (!so_zerocp_fullpage) pmap_copy_page(m, mnew); mnew->valid = VM_PAGE_BITS_ALL; Modified: user/attilio/vmcontention/sys/vm/vm_page.h ============================================================================== --- user/attilio/vmcontention/sys/vm/vm_page.h Sun Feb 5 16:54:26 2012 (r231026) +++ user/attilio/vmcontention/sys/vm/vm_page.h Sun Feb 5 17:37:26 2012 (r231027) @@ -386,7 +386,7 @@ void vm_page_dontneed(vm_page_t); void vm_page_deactivate (vm_page_t); vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t); vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr); -void vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); +int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t); vm_page_t vm_page_next(vm_page_t m); int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *); Modified: user/attilio/vmcontention/sys/vm/vm_radix.c ============================================================================== --- user/attilio/vmcontention/sys/vm/vm_radix.c Sun Feb 5 16:54:26 2012 (r231026) +++ user/attilio/vmcontention/sys/vm/vm_radix.c Sun Feb 5 17:37:26 2012 (r231027) @@ -55,6 +55,7 @@ #include CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE); +CTASSERT((sizeof(u_int) * NBBY) >= VM_RADIX_LIMIT); static uma_zone_t vm_radix_node_zone; @@ -211,6 +212,24 @@ vm_radix_setroot(struct vm_radix *rtree, rtree->rt_root = root; } +static inline void +vm_radix_unwind_heightup(struct vm_radix *rtree, struct vm_radix_node *root, + struct vm_radix_node *iroot, int ilevel) +{ + struct vm_radix_node *rnode; + + CTR4(KTR_VM, "unwind: tree %p, root %p, iroot %p, ilevel %d", + rtree, root, iroot, ilevel); + while (iroot != root && root != NULL) { + rnode = root; + MPASS(rnode->rn_count == 0 || rnode->rn_count == 1); + rnode->rn_count = 0; + root = rnode->rn_child[0]; + vm_radix_node_put(rnode); + } + vm_radix_setroot(rtree, iroot, ilevel); +} + static inline void * vm_radix_match(void *child, int color) { @@ -262,10 +281,9 @@ vm_radix_reclaim_allnodes_internal(struc int vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val) { - struct vm_radix_node *rnode; - struct vm_radix_node *root; - int level; - int slot; + struct vm_radix_node *iroot, *rnode, *root; + u_int allocmsk; + int clev, ilevel, level, slot; CTR3(KTR_VM, "insert: tree %p, index %ju, val %p", rtree, (uintmax_t)index, val); @@ -276,6 +294,8 @@ vm_radix_insert(struct vm_radix *rtree, * Increase the height by adding nodes at the root until * there is sufficient space. */ + ilevel = level; + iroot = root; while (level == 0 || index > VM_RADIX_MAX(level)) { CTR3(KTR_VM, "insert: expanding %ju > %ju height %d", (uintmax_t)index, (uintmax_t)VM_RADIX_MAX(level), level); @@ -292,6 +312,8 @@ vm_radix_insert(struct vm_radix *rtree, CTR4(KTR_VM, "insert: tree %p, root %p, index: %ju, level: %d ENOMEM", rtree, root, (uintmax_t)index, level); + vm_radix_unwind_heightup(rtree, root, iroot, + ilevel); return (ENOMEM); } /* @@ -309,6 +331,8 @@ vm_radix_insert(struct vm_radix *rtree, } /* Now that the tree is tall enough, fill in the path to the index. */ + allocmsk = 0; + clev = level; rnode = root; for (level = level - 1; level > 0; level--) { slot = vm_radix_slot(index, level); @@ -324,9 +348,35 @@ vm_radix_insert(struct vm_radix *rtree, "insert: tree %p, rnode %p, child %p, count %u ENOMEM", rtree, rnode, rnode->rn_child[slot], rnode->rn_count); + MPASS(level != clev || allocmsk == 0); + while (allocmsk != 0) { + rnode = root; + level = clev; + level--; + CTR4(KTR_VM, + "insert: unwind root %p, level %d, slot %d, allocmsk: 0x%x", + root, level, slot, allocmsk); + slot = vm_radix_slot(index, level); + MPASS(level >= (ffs(allocmsk) - 1)); + while (level > (ffs(allocmsk) - 1)) { + MPASS(level > 0); + slot = vm_radix_slot(index, + level); + rnode = rnode->rn_child[slot]; + level--; + } + MPASS((allocmsk & (1 << level)) != 0); + allocmsk &= ~(1 << level); + rnode->rn_count--; + vm_radix_node_put(rnode->rn_child[slot]); + rnode->rn_child[slot] = NULL; + } + vm_radix_unwind_heightup(rtree, root, iroot, + ilevel); return (ENOMEM); } rnode->rn_count++; + allocmsk |= (1 << level); } CTR5(KTR_VM, "insert: tree %p, index %ju, level %d, slot %d, rnode %p",