Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 5 Feb 2012 17:37:27 +0000 (UTC)
From:      Attilio Rao <attilio@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r231027 - in user/attilio/vmcontention/sys: kern vm
Message-ID:  <201202051737.q15HbR4a093494@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: attilio
Date: Sun Feb  5 17:37:26 2012
New Revision: 231027
URL: http://svn.freebsd.org/changeset/base/231027

Log:
  Remove the panic from vm_radix_insert() and propagate the error to the
  callers of vm_page_insert().
  
  The default action for every caller is to unwind-back the operation
  besides vm_page_rename() where this has proven to be impossible to do.
  For that case, it just spins until the page is not available to be
  allocated. However, due to vm_page_rename() to be mostly rare (and
  having never hit this panic in the past) it is tought to be a very
  seldom thing and not a possible performance factor.
  
  The patch has been tested with an atomic counter returning NULL from
  the zone allocator every 1/100000 allocations. Per-printf, I've verified
  that a typical buildkernel could trigger this 30 times. The patch
  survived to 2 hours of repeated buildkernel/world.
  
  Several technical notes:
  - The vm_page_insert() is moved, in several callers, closer to failure
    points.  This could be committed separately before vmcontention hits
    the tree just to verify -CURRENT is happy with it.
  - vm_page_rename() does not need to have the page lock in the callers
    as it hide that as an implementation detail. Do the locking internally.
  - now vm_page_insert() returns an int, with 0 meaning everything was ok,
    thus KPI is broken by this patch.

Modified:
  user/attilio/vmcontention/sys/kern/subr_uio.c
  user/attilio/vmcontention/sys/vm/device_pager.c
  user/attilio/vmcontention/sys/vm/sg_pager.c
  user/attilio/vmcontention/sys/vm/vm_fault.c
  user/attilio/vmcontention/sys/vm/vm_object.c
  user/attilio/vmcontention/sys/vm/vm_page.c
  user/attilio/vmcontention/sys/vm/vm_page.h
  user/attilio/vmcontention/sys/vm/vm_radix.c

Modified: user/attilio/vmcontention/sys/kern/subr_uio.c
==============================================================================
--- user/attilio/vmcontention/sys/kern/subr_uio.c	Sun Feb  5 16:54:26 2012	(r231026)
+++ user/attilio/vmcontention/sys/kern/subr_uio.c	Sun Feb  5 17:37:26 2012	(r231027)
@@ -85,6 +85,7 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t k
 	vm_map_entry_t entry;
 	vm_pindex_t upindex;
 	vm_prot_t prot;
+	vm_page_bits_t vbits;
 	boolean_t wired;
 
 	KASSERT((uaddr & PAGE_MASK) == 0,
@@ -95,6 +96,7 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t k
 	 * unwired in sf_buf_mext().
 	 */
 	kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr));
+	vbits = kern_pg->valid;
 	kern_pg->valid = VM_PAGE_BITS_ALL;
 	KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1,
 	    ("vm_pgmoveco: kern_pg is not correctly wired"));
@@ -105,6 +107,13 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t k
 		return(EFAULT);
 	}
 	VM_OBJECT_LOCK(uobject);
+	if (vm_page_insert(kern_pg, uobject, upindex) != 0) {
+		kern_pg->valid = vbits;
+		VM_OBJECT_UNLOCK(uobject);
+		vm_map_lookup_done(map, entry);
+		return(ENOMEM);
+	}
+	vm_page_dirty(kern_pg);
 retry:
 	if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) {
 		if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco"))
@@ -122,8 +131,6 @@ retry:
 		if (uobject->backing_object != NULL)
 			pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE);
 	}
-	vm_page_insert(kern_pg, uobject, upindex);
-	vm_page_dirty(kern_pg);
 	VM_OBJECT_UNLOCK(uobject);
 	vm_map_lookup_done(map, entry);
 	return(KERN_SUCCESS);

Modified: user/attilio/vmcontention/sys/vm/device_pager.c
==============================================================================
--- user/attilio/vmcontention/sys/vm/device_pager.c	Sun Feb  5 16:54:26 2012	(r231026)
+++ user/attilio/vmcontention/sys/vm/device_pager.c	Sun Feb  5 17:37:26 2012	(r231027)
@@ -307,11 +307,14 @@ old_dev_pager_fault(vm_object_t object, 
 		 */
 		page = vm_page_getfake(paddr, memattr);
 		VM_OBJECT_LOCK(object);
+		if (vm_page_insert(page, object, offset) != 0) {
+			vm_page_putfake(page);
+			return (VM_PAGER_FAIL);
+		}
 		vm_page_lock(*mres);
 		vm_page_free(*mres);
 		vm_page_unlock(*mres);
 		*mres = page;
-		vm_page_insert(page, object, pidx);
 	}
 	page->valid = VM_PAGE_BITS_ALL;
 	return (VM_PAGER_OK);

Modified: user/attilio/vmcontention/sys/vm/sg_pager.c
==============================================================================
--- user/attilio/vmcontention/sys/vm/sg_pager.c	Sun Feb  5 16:54:26 2012	(r231026)
+++ user/attilio/vmcontention/sys/vm/sg_pager.c	Sun Feb  5 17:37:26 2012	(r231027)
@@ -179,6 +179,10 @@ sg_pager_getpages(vm_object_t object, vm
 	/* Construct a new fake page. */
 	page = vm_page_getfake(paddr, memattr);
 	VM_OBJECT_LOCK(object);
+	if (vm_page_insert(page, object, offset) != 0) {
+		vm_page_putfake(page);
+		return (VM_PAGER_FAIL);
+	}
 	TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, pageq);
 
 	/* Free the original pages and insert this fake page into the object. */
@@ -187,7 +191,6 @@ sg_pager_getpages(vm_object_t object, vm
 		vm_page_free(m[i]);
 		vm_page_unlock(m[i]);
 	}
-	vm_page_insert(page, object, offset);
 	m[reqpage] = page;
 	page->valid = VM_PAGE_BITS_ALL;
 

Modified: user/attilio/vmcontention/sys/vm/vm_fault.c
==============================================================================
--- user/attilio/vmcontention/sys/vm/vm_fault.c	Sun Feb  5 16:54:26 2012	(r231026)
+++ user/attilio/vmcontention/sys/vm/vm_fault.c	Sun Feb  5 17:37:26 2012	(r231027)
@@ -769,9 +769,7 @@ vnode_locked:
 				 * process'es object.  The page is 
 				 * automatically made dirty.
 				 */
-				vm_page_lock(fs.m);
 				vm_page_rename(fs.m, fs.first_object, fs.first_pindex);
-				vm_page_unlock(fs.m);
 				vm_page_busy(fs.m);
 				fs.first_m = fs.m;
 				fs.m = NULL;

Modified: user/attilio/vmcontention/sys/vm/vm_object.c
==============================================================================
--- user/attilio/vmcontention/sys/vm/vm_object.c	Sun Feb  5 16:54:26 2012	(r231026)
+++ user/attilio/vmcontention/sys/vm/vm_object.c	Sun Feb  5 17:37:26 2012	(r231027)
@@ -1397,9 +1397,7 @@ retry:
 			vm_reserv_rename(m, new_object, orig_object,
 			    offidxstart);
 #endif
-			vm_page_lock(m);
 			vm_page_rename(m, new_object, idx);
-			vm_page_unlock(m);
 			/*
 			 * page automatically made dirty by rename and
 			 * cache handled
@@ -1654,9 +1652,7 @@ restart:
 			 * If the page was mapped to a process, it can remain 
 			 * mapped through the rename.
 			 */
-			vm_page_lock(p);
 			vm_page_rename(p, object, new_pindex);
-			vm_page_unlock(p);
 			/* page automatically made dirty by rename */
 		}
 	}

Modified: user/attilio/vmcontention/sys/vm/vm_page.c
==============================================================================
--- user/attilio/vmcontention/sys/vm/vm_page.c	Sun Feb  5 16:54:26 2012	(r231026)
+++ user/attilio/vmcontention/sys/vm/vm_page.c	Sun Feb  5 17:37:26 2012	(r231027)
@@ -109,6 +109,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 
+#include <machine/cpu.h>
 #include <machine/md_var.h>
 
 /*
@@ -777,13 +778,15 @@ vm_page_dirty(vm_page_t m)
  *	The object and page must be locked.
  *	This routine may not block.
  */
-void
+int
 vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t neighbor;
+	vm_pindex_t cpindex;
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	if (m->object != NULL)
 		panic("vm_page_insert: page already inserted");
+	cpindex = m->pindex;
 
 	/*
 	 * Record the object/offset pair in this page
@@ -804,8 +807,13 @@ vm_page_insert(vm_page_t m, vm_object_t 
 		} else 
 			TAILQ_INSERT_TAIL(&object->memq, m, listq);
 	}
-	if (vm_radix_insert(&object->rtree, pindex, m) != 0)
-		panic("vm_page_insert: unable to insert the new page");
+
+	if (vm_radix_insert(&object->rtree, pindex, m) != 0) {
+		TAILQ_REMOVE(&object->memq, m, listq);
+		m->object = NULL;
+		m->pindex = cpindex;
+		return (ENOMEM);
+	}
 
 	/*
 	 * show that the object has one more resident page.
@@ -823,6 +831,7 @@ vm_page_insert(vm_page_t m, vm_object_t 
 	 */
 	if (m->aflags & PGA_WRITEABLE)
 		vm_object_set_writeable_dirty(object);
+	return (0);
 }
 
 /*
@@ -967,9 +976,20 @@ vm_page_prev(vm_page_t m)
 void
 vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
 {
+	u_int i;
+
+	MPASS(m->object != NULL);
+	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
+	VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED);
 
+	vm_page_lock(m);
 	vm_page_remove(m);
-	vm_page_insert(m, new_object, new_pindex);
+	vm_page_unlock(m);
+	while (vm_page_insert(m, new_object, new_pindex) != 0) {
+		pagedaemon_wakeup();
+		for (i = 0; i < 10000000; i++)
+			cpu_spinwait();
+	}
 	vm_page_dirty(m);
 }
 
@@ -1250,7 +1270,19 @@ vm_page_alloc(vm_object_t object, vm_pin
 		if (object->memattr != VM_MEMATTR_DEFAULT &&
 		    object->type != OBJT_DEVICE && object->type != OBJT_SG)
 			pmap_page_set_memattr(m, object->memattr);
-		vm_page_insert(m, object, pindex);
+		if (vm_page_insert(m, object, pindex) != 0) {
+
+			/* See the comment below about hold count handling. */
+			if (vp != NULL)
+				vdrop(vp);
+			vm_page_lock(m);
+			if (req & VM_ALLOC_WIRED)
+				vm_page_unwire(m, 0);
+			vm_page_free(m);
+			vm_page_unlock(m);
+			pagedaemon_wakeup();
+			return (NULL);
+		}
 	} else
 		m->pindex = pindex;
 
@@ -1317,6 +1349,7 @@ vm_page_alloc_contig(vm_object_t object,
 {
 	struct vnode *drop;
 	vm_page_t deferred_vdrop_list, m, m_ret;
+	vm_pindex_t cpindex;
 	u_int flags, oflags;
 	int req_class;
 
@@ -1403,6 +1436,7 @@ retry:
 		    memattr == VM_MEMATTR_DEFAULT)
 			memattr = object->memattr;
 	}
+	cpindex = pindex;
 	for (m = m_ret; m < &m_ret[npages]; m++) {
 		m->aflags = 0;
 		m->flags &= flags;
@@ -1412,12 +1446,30 @@ retry:
 		m->oflags = oflags;
 		if (memattr != VM_MEMATTR_DEFAULT)
 			pmap_page_set_memattr(m, memattr);
-		if (object != NULL)
-			vm_page_insert(m, object, pindex);
-		else
-			m->pindex = pindex;
 		pindex++;
 	}
+	for (m = m_ret; m < &m_ret[npages]; m++) {
+		if (object != NULL) {
+			if (vm_page_insert(m, object, cpindex) != 0) {
+				while (deferred_vdrop_list != NULL) {
+		vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
+					deferred_vdrop_list =
+					    deferred_vdrop_list->pageq.tqe_next;
+				}
+				for (m = m_ret; m < &m_ret[npages]; m++) {
+					vm_page_lock(m);
+					if (req & VM_ALLOC_WIRED)
+						vm_page_unwire(m, 0);
+					vm_page_free(m);
+					vm_page_unlock(m);
+				}
+				pagedaemon_wakeup();
+				return (NULL);
+			}
+		} else
+			m->pindex = cpindex;
+		cpindex++;
+	}
 	while (deferred_vdrop_list != NULL) {
 		vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
 		deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next;
@@ -2642,11 +2694,8 @@ vm_page_cowfault(vm_page_t m)
 	pindex = m->pindex;
 
  retry_alloc:
-	pmap_remove_all(m);
-	vm_page_remove(m);
 	mnew = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY);
 	if (mnew == NULL) {
-		vm_page_insert(m, object, pindex);
 		vm_page_unlock(m);
 		VM_OBJECT_UNLOCK(object);
 		VM_WAIT;
@@ -2672,8 +2721,9 @@ vm_page_cowfault(vm_page_t m)
 		vm_page_lock(mnew);
 		vm_page_free(mnew);
 		vm_page_unlock(mnew);
-		vm_page_insert(m, object, pindex);
 	} else { /* clear COW & copy page */
+		pmap_remove_all(m);
+		vm_page_remove(m);
 		if (!so_zerocp_fullpage)
 			pmap_copy_page(m, mnew);
 		mnew->valid = VM_PAGE_BITS_ALL;

Modified: user/attilio/vmcontention/sys/vm/vm_page.h
==============================================================================
--- user/attilio/vmcontention/sys/vm/vm_page.h	Sun Feb  5 16:54:26 2012	(r231026)
+++ user/attilio/vmcontention/sys/vm/vm_page.h	Sun Feb  5 17:37:26 2012	(r231027)
@@ -386,7 +386,7 @@ void vm_page_dontneed(vm_page_t);
 void vm_page_deactivate (vm_page_t);
 vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
 vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);
-void vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
+int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
 vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t);
 vm_page_t vm_page_next(vm_page_t m);
 int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *);

Modified: user/attilio/vmcontention/sys/vm/vm_radix.c
==============================================================================
--- user/attilio/vmcontention/sys/vm/vm_radix.c	Sun Feb  5 16:54:26 2012	(r231026)
+++ user/attilio/vmcontention/sys/vm/vm_radix.c	Sun Feb  5 17:37:26 2012	(r231027)
@@ -55,6 +55,7 @@
 #include <sys/kdb.h>
 
 CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE);
+CTASSERT((sizeof(u_int) * NBBY) >= VM_RADIX_LIMIT);
 
 static uma_zone_t vm_radix_node_zone;
 
@@ -211,6 +212,24 @@ vm_radix_setroot(struct vm_radix *rtree,
 	rtree->rt_root = root;
 }
 
+static inline void
+vm_radix_unwind_heightup(struct vm_radix *rtree, struct vm_radix_node *root,
+    struct vm_radix_node *iroot, int ilevel)
+{
+	struct vm_radix_node *rnode;
+
+	CTR4(KTR_VM, "unwind: tree %p, root %p, iroot %p, ilevel %d",
+	    rtree, root, iroot, ilevel);
+	while (iroot != root && root != NULL) {
+		rnode = root;
+		MPASS(rnode->rn_count == 0 || rnode->rn_count == 1);
+		rnode->rn_count = 0;
+		root = rnode->rn_child[0];
+		vm_radix_node_put(rnode);
+	}
+	vm_radix_setroot(rtree, iroot, ilevel);
+}
+
 static inline void *
 vm_radix_match(void *child, int color)
 {
@@ -262,10 +281,9 @@ vm_radix_reclaim_allnodes_internal(struc
 int
 vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val)
 {
-	struct vm_radix_node *rnode;
-	struct vm_radix_node *root;
-	int level;
-	int slot;
+	struct vm_radix_node *iroot, *rnode, *root;
+	u_int allocmsk;
+	int clev, ilevel, level, slot;
 
 	CTR3(KTR_VM,
 	    "insert: tree %p, index %ju, val %p", rtree, (uintmax_t)index, val);
@@ -276,6 +294,8 @@ vm_radix_insert(struct vm_radix *rtree, 
 	 * Increase the height by adding nodes at the root until
 	 * there is sufficient space.
 	 */
+	ilevel = level;
+	iroot = root;
 	while (level == 0 || index > VM_RADIX_MAX(level)) {
 		CTR3(KTR_VM, "insert: expanding %ju > %ju height %d",
 		    (uintmax_t)index, (uintmax_t)VM_RADIX_MAX(level), level);
@@ -292,6 +312,8 @@ vm_radix_insert(struct vm_radix *rtree, 
 				CTR4(KTR_VM,
 		"insert: tree %p, root %p, index: %ju, level: %d ENOMEM",
 				    rtree, root, (uintmax_t)index, level);
+				vm_radix_unwind_heightup(rtree, root, iroot,
+				    ilevel);
 				return (ENOMEM);
 			}
 			/*
@@ -309,6 +331,8 @@ vm_radix_insert(struct vm_radix *rtree, 
 	}
 
 	/* Now that the tree is tall enough, fill in the path to the index. */
+	allocmsk = 0;
+	clev = level;
 	rnode = root;
 	for (level = level - 1; level > 0; level--) {
 		slot = vm_radix_slot(index, level);
@@ -324,9 +348,35 @@ vm_radix_insert(struct vm_radix *rtree, 
 			"insert: tree %p, rnode %p, child %p, count %u ENOMEM",
 		    		    rtree, rnode, rnode->rn_child[slot],
 				    rnode->rn_count);
+				MPASS(level != clev || allocmsk == 0);
+				while (allocmsk != 0) {
+					rnode = root;
+					level = clev;
+					level--;
+					CTR4(KTR_VM,
+		    "insert: unwind root %p, level %d, slot %d, allocmsk: 0x%x",
+					    root, level, slot, allocmsk);
+					slot = vm_radix_slot(index, level);
+					MPASS(level >= (ffs(allocmsk) - 1));
+					while (level > (ffs(allocmsk) - 1)) {
+						MPASS(level > 0);
+						slot = vm_radix_slot(index,
+						    level);
+						rnode = rnode->rn_child[slot];
+						level--;
+					}
+					MPASS((allocmsk & (1 << level)) != 0);
+					allocmsk &= ~(1 << level);
+					rnode->rn_count--;
+				vm_radix_node_put(rnode->rn_child[slot]);
+					rnode->rn_child[slot] = NULL;
+				}
+				vm_radix_unwind_heightup(rtree, root, iroot,
+				    ilevel);
 		    		return (ENOMEM);
 			}
 			rnode->rn_count++;
+			allocmsk |= (1 << level);
 	    	}
 		CTR5(KTR_VM,
 		    "insert: tree %p, index %ju, level %d, slot %d, rnode %p",



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201202051737.q15HbR4a093494>