Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 21 May 2001 11:12:05 -0700 (PDT)
From:      Matt Dillon <dillon@earth.backplane.com>
To:        "Brian F. Feldman" <green@FreeBSD.ORG>
Cc:        hackers@FreeBSD.ORG
Subject:   Re: vmspace leak (+ tentative fix)
Message-ID:  <200105211812.f4LIC5t03635@earth.backplane.com>
References:   <200105210205.f4L25x102099@green.bikeshed.org>

next in thread | previous in thread | raw e-mail | index | archive | help
    This could explain a few bug reports I've had over the years in regards
    to systems leaking swap space.  Good find!

    Hmmm.  May I suggest an alternative?

	* Keep the part that changes vm->vm_refcnt == 1 to
	  --vm->vm_refcnt == 0 when checking whether to free
	  the underlying resources or not.  This introduces one
	  extra ref count decrement.

	* Instead of breaking out the vmspace freeing code or adding
	  any check fields, simply change the way vmspace_free() operates
	  so instead of checking --vm->vm_refcnt == 0, it instead checks
	  for vm->vm_refcnt == -1.

    Old vmspace_free() code:

	if (vm->vm_refcnt == 0)
		panic(...)

	if (--vm->vm_refcnt == 0) {
		...
	}

    Suggested fix (pseudo code / incomplete):

	/*
	 * One extra refcnt decrement occurs before freeing.  The
	 * process that takes responsibility for releasing the 
	 * vmspace resources decrements refcnt to 0, then the vmspace
	 * is further decremented when released from cpu_wait().
	 */
	if (vm->vm_refcnt <= -1)
		panic(...)

	if (--vm->vm_refcnt == -1) {
		...
	}

    You might have to make other adjustments in the codebase, since there
    are a couple of other places where vm_refcnt is used 
    (fgrep vm_refcnt */*.c).  This is only a suggestion.  I have not
    tested it in any way.  We use a similar trick for the vnode ref count.

    I would be happy to review and test your final solution.

						-Matt

:...
:There's a certain issue that when several processes sharing a vmspace are 
:exiting at the same time, there is a race condition such that the shared 
:memory is going to be lost because the check for vm->vm_refcnt being the 
:check for the last decrement happening before the last decrement is
:actually performed, allowing for the possibility of Giant being dropped 
:(duh, during flushing of dirty pages), and all the trouble that entails...
:
:Anyway, here's what I currently have which seems to fix it.  Anyone want to 
:comment on its correctness?  The newly introduced vm_freer should be valid 
:to test against: it's only necessary to differentiate between multiple 
:holders of the same vmspace, so there shouldn't be any problem with recycled 
:proc pointers or anything.
:
:Index: kern/kern_exit.c
:===================================================================
:RCS file: /usr2/ncvs/src/sys/kern/kern_exit.c,v
:retrieving revision 1.123
:diff -u -r1.123 kern_exit.c
:--- kern/kern_exit.c	2001/03/28 11:52:53	1.123
:+++ kern/kern_exit.c	2001/04/29 23:47:36
:@@ -222,13 +222,14 @@
: 	 * Can't free the entire vmspace as the kernel stack
: 	 * may be mapped within that space also.
: 	 */
:-	if (vm->vm_refcnt == 1) {
:+	if (--vm->vm_refcnt == 0) {
: 		if (vm->vm_shm)
: 			shmexit(p);
: 		pmap_remove_pages(vmspace_pmap(vm), VM_MIN_ADDRESS,
: 		    VM_MAXUSER_ADDRESS);
: 		(void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS,
: 		    VM_MAXUSER_ADDRESS);
:+		vm->vm_freer = curproc;
: 	}
: 
: 	PROC_LOCK(p);
:@@ -379,7 +380,7 @@
: 	/*
: 	 * Finally, call machine-dependent code to release the remaining
: 	 * resources including address space, the kernel stack and pcb.
:-	 * The address space is released by "vmspace_free(p->p_vmspace)";
:+	 * The address space is released by "vmspace_exitfree(p)";
: 	 * This is machine-dependent, as we may have to change stacks
: 	 * or ensure that the current one isn't reallocated before we
: 	 * finish.  cpu_exit will end with a call to cpu_switch(), finishing
:Index: vm/vm_map.c
:===================================================================
:RCS file: /usr2/ncvs/src/sys/vm/vm_map.c,v
:retrieving revision 1.198
:diff -u -r1.198 vm_map.c
:--- vm/vm_map.c	2001/04/12 21:50:03	1.198
:+++ vm/vm_map.c	2001/04/29 23:44:09
:@@ -178,6 +178,7 @@
: 	vm->vm_map.pmap = vmspace_pmap(vm);		/* XXX */
: 	vm->vm_refcnt = 1;
: 	vm->vm_shm = NULL;
:+	vm->vm_freer = NULL;
: 	return (vm);
: }
: 
:@@ -194,6 +195,27 @@
: 	vm_object_init2();
: }
: 
:+static __inline void
:+vmspace_dofree(vm)
:+	struct vmspace *vm;
:+{
:+
:+	/*
:+	 * Lock the map, to wait out all other references to it.
:+	 * Delete all of the mappings and pages they hold, then call
:+	 * the pmap module to reclaim anything left.
:+	 */
:+	vm_map_lock(&vm->vm_map);
:+	(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
:+		vm->vm_map.max_offset);
:+	vm_map_unlock(&vm->vm_map);
:+
:+	pmap_release(vmspace_pmap(vm));
:+	vm_map_destroy(&vm->vm_map);
:+	zfree(vmspace_zone, vm);
:+}
:+	
:+
: void
: vmspace_free(vm)
: 	struct vmspace *vm;
:@@ -202,22 +224,17 @@
: 	if (vm->vm_refcnt == 0)
: 		panic("vmspace_free: attempt to free already freed vmspace");
: 
:-	if (--vm->vm_refcnt == 0) {
:+	if (--vm->vm_refcnt == 0)
:+		vmspace_dofree(vm);
:+}
: 
:-		/*
:-		 * Lock the map, to wait out all other references to it.
:-		 * Delete all of the mappings and pages they hold, then call
:-		 * the pmap module to reclaim anything left.
:-		 */
:-		vm_map_lock(&vm->vm_map);
:-		(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
:-		    vm->vm_map.max_offset);
:-		vm_map_unlock(&vm->vm_map);
:+void
:+vmspace_exitfree(p)
:+	struct proc *p;
:+{
: 
:-		pmap_release(vmspace_pmap(vm));
:-		vm_map_destroy(&vm->vm_map);
:-		zfree(vmspace_zone, vm);
:-	}
:+	if (p == p->p_vmspace->vm_freer)
:+		vmspace_dofree(p->p_vmspace);
: }
: 
: /*
:@@ -2128,7 +2145,7 @@
: 
: 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
: 	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
:-	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
:+	    (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy);
: 	new_map = &vm2->vm_map;	/* XXX */
: 	new_map->timestamp = 1;
: 
:Index: vm/vm_map.h
:===================================================================
:RCS file: /usr2/ncvs/src/sys/vm/vm_map.h,v
:retrieving revision 1.60
:diff -u -r1.60 vm_map.h
:--- vm/vm_map.h	2001/04/13 10:22:14	1.60
:+++ vm/vm_map.h	2001/04/29 23:26:50
:@@ -192,6 +192,8 @@
: 	caddr_t vm_daddr;	/* user virtual address of data XXX */
: 	caddr_t vm_maxsaddr;	/* user VA at max stack growth */
: 	caddr_t vm_minsaddr;	/* user VA at max stack growth */
:+#define	vm_endcopy vm_freer
:+	struct proc *vm_freer;	/* vm freed on whose behalf */
: };
: 
: /*
:Index: vm/vm_extern.h
:===================================================================
:RCS file: /usr2/ncvs/src/sys/vm/vm_extern.h,v
:retrieving revision 1.47
:diff -u -r1.47 vm_extern.h
:--- vm/vm_extern.h	2000/03/13 10:47:24	1.47
:+++ vm/vm_extern.h	2001/04/29 23:29:09
:@@ -90,6 +90,7 @@
: void vmspace_exec __P((struct proc *));
: void vmspace_unshare __P((struct proc *));
: void vmspace_free __P((struct vmspace *));
:+void vmspace_exitfree __P((struct proc *));
: void vnode_pager_setsize __P((struct vnode *, vm_ooffset_t));
: void vslock __P((caddr_t, u_int));
: void vsunlock __P((caddr_t, u_int));
:Index: alpha/alpha/vm_machdep.c
:===================================================================
:RCS file: /usr2/ncvs/src/sys/alpha/alpha/vm_machdep.c,v
:retrieving revision 1.47
:diff -u -r1.47 vm_machdep.c
:--- alpha/alpha/vm_machdep.c	2001/03/15 02:32:26	1.47
:+++ alpha/alpha/vm_machdep.c	2001/04/29 23:29:59
:@@ -273,7 +273,7 @@
: 	pmap_dispose_proc(p);
: 
: 	/* and clean-out the vmspace */
:-	vmspace_free(p->p_vmspace);
:+	vmspace_exitfree(p);
: }
: 
: /*
:Index: i386/i386/vm_machdep.c
:===================================================================
:RCS file: /usr2/ncvs/src/sys/i386/i386/vm_machdep.c,v
:retrieving revision 1.154
:diff -u -r1.154 vm_machdep.c
:--- i386/i386/vm_machdep.c	2001/03/07 03:20:14	1.154
:+++ i386/i386/vm_machdep.c	2001/04/29 23:30:06
:@@ -282,7 +282,7 @@
: 	pmap_dispose_proc(p);
: 
: 	/* and clean-out the vmspace */
:-	vmspace_free(p->p_vmspace);
:+	vmspace_exitfree(p);
: }
: 
: /*
:Index: ia64/ia64/vm_machdep.c
:===================================================================
:RCS file: /usr2/ncvs/src/sys/ia64/ia64/vm_machdep.c,v
:retrieving revision 1.16
:diff -u -r1.16 vm_machdep.c
:--- ia64/ia64/vm_machdep.c	2001/03/07 03:20:15	1.16
:+++ ia64/ia64/vm_machdep.c	2001/04/29 23:30:16
:@@ -324,7 +324,7 @@
: 	pmap_dispose_proc(p);
: 
: 	/* and clean-out the vmspace */
:-	vmspace_free(p->p_vmspace);
:+	vmspace_exitfree(p);
: }
: 
: /*
:
:
:-- 
: Brian Fundakowski Feldman           \  FreeBSD: The Power to Serve!  /
: green@FreeBSD.org                    `------------------------------'

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-hackers" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200105211812.f4LIC5t03635>