From owner-svn-src-stable-9@FreeBSD.ORG Mon Jan 14 10:58:56 2013 Return-Path: Delivered-To: svn-src-stable-9@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by hub.freebsd.org (Postfix) with ESMTP id C8ACDBE3; Mon, 14 Jan 2013 10:58:56 +0000 (UTC) (envelope-from zont@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id AB34830F; Mon, 14 Jan 2013 10:58:56 +0000 (UTC) Received: from svn.freebsd.org (svn.FreeBSD.org [8.8.178.70]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id r0EAwu7S044536; Mon, 14 Jan 2013 10:58:56 GMT (envelope-from zont@svn.freebsd.org) Received: (from zont@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id r0EAwtsA044527; Mon, 14 Jan 2013 10:58:55 GMT (envelope-from zont@svn.freebsd.org) Message-Id: <201301141058.r0EAwtsA044527@svn.freebsd.org> From: Andrey Zonov Date: Mon, 14 Jan 2013 10:58:55 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org Subject: svn commit: r245416 - in stable/9/sys: kern vm X-SVN-Group: stable-9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable-9@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for only the 9-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 14 Jan 2013 10:58:56 -0000 Author: zont Date: Mon Jan 14 10:58:55 2013 New Revision: 245416 URL: http://svnweb.freebsd.org/changeset/base/245416 Log: MFC r244384: - Fix locked memory accounting for maps with MAP_WIREFUTURE flag. - Add sysctl vm.old_mlock which may turn such accounting off. MFC r244385: - Add sysctl to allow unprivileged users to call mlock(2)-family system calls and turn it off for compatibility. - Do not allow to call them inside jail. Approved by: kib (mentor) Modified: stable/9/sys/kern/kern_priv.c stable/9/sys/vm/vm.h stable/9/sys/vm/vm_map.c stable/9/sys/vm/vm_mmap.c stable/9/sys/vm/vm_unix.c Directory Properties: stable/9/sys/ (props changed) Modified: stable/9/sys/kern/kern_priv.c ============================================================================== --- stable/9/sys/kern/kern_priv.c Mon Jan 14 10:58:20 2013 (r245415) +++ stable/9/sys/kern/kern_priv.c Mon Jan 14 10:58:55 2013 (r245416) @@ -59,6 +59,11 @@ SYSCTL_INT(_security_bsd, OID_AUTO, suse &suser_enabled, 0, "processes with uid 0 have privilege"); TUNABLE_INT("security.bsd.suser_enabled", &suser_enabled); +static int unprivileged_mlock = 1; +SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_mlock, CTLFLAG_RW|CTLFLAG_TUN, + &unprivileged_mlock, 0, "Allow non-root users to call mlock(2)"); +TUNABLE_INT("security.bsd.unprivileged_mlock", &unprivileged_mlock); + SDT_PROVIDER_DEFINE(priv); SDT_PROBE_DEFINE1(priv, kernel, priv_check, priv_ok, priv-ok, "int"); SDT_PROBE_DEFINE1(priv, kernel, priv_check, priv_err, priv-err, "int"); @@ -93,6 +98,19 @@ priv_check_cred(struct ucred *cred, int if (error) goto out; + if (unprivileged_mlock) { + /* + * Allow unprivileged users to call mlock(2)/munlock(2) and + * mlockall(2)/munlockall(2). + */ + switch (priv) { + case PRIV_VM_MLOCK: + case PRIV_VM_MUNLOCK: + error = 0; + goto out; + } + } + /* * Having determined if privilege is restricted by various policies, * now determine if privilege is granted. At this point, any policy Modified: stable/9/sys/vm/vm.h ============================================================================== --- stable/9/sys/vm/vm.h Mon Jan 14 10:58:20 2013 (r245415) +++ stable/9/sys/vm/vm.h Mon Jan 14 10:58:55 2013 (r245416) @@ -141,6 +141,8 @@ struct kva_md_info { extern struct kva_md_info kmi; extern void vm_ksubmap_init(struct kva_md_info *); +extern int old_mlock; + struct ucred; int swap_reserve(vm_ooffset_t incr); int swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred); Modified: stable/9/sys/vm/vm_map.c ============================================================================== --- stable/9/sys/vm/vm_map.c Mon Jan 14 10:58:20 2013 (r245415) +++ stable/9/sys/vm/vm_map.c Mon Jan 14 10:58:55 2013 (r245416) @@ -3247,7 +3247,7 @@ vm_map_stack(vm_map_t map, vm_offset_t a vm_offset_t bot, top; vm_size_t growsize, init_ssize; int orient, rv; - rlim_t vmemlim; + rlim_t lmemlim, vmemlim; /* * The stack orientation is piggybacked with the cow argument. @@ -3267,9 +3267,10 @@ vm_map_stack(vm_map_t map, vm_offset_t a growsize = sgrowsiz; init_ssize = (max_ssize < growsize) ? max_ssize : growsize; - PROC_LOCK(curthread->td_proc); - vmemlim = lim_cur(curthread->td_proc, RLIMIT_VMEM); - PROC_UNLOCK(curthread->td_proc); + PROC_LOCK(curproc); + lmemlim = lim_cur(curproc, RLIMIT_MEMLOCK); + vmemlim = lim_cur(curproc, RLIMIT_VMEM); + PROC_UNLOCK(curproc); vm_map_lock(map); @@ -3279,6 +3280,14 @@ vm_map_stack(vm_map_t map, vm_offset_t a return (KERN_NO_SPACE); } + if (!old_mlock && map->flags & MAP_WIREFUTURE) { + if (ptoa(vmspace_wired_count(curproc->p_vmspace)) + + init_ssize > lmemlim) { + vm_map_unlock(map); + return (KERN_NO_SPACE); + } + } + /* If we would blow our VMEM resource limit, no go */ if (map->size + init_ssize > vmemlim) { vm_map_unlock(map); @@ -3360,7 +3369,7 @@ vm_map_growstack(struct proc *p, vm_offs vm_offset_t end; vm_size_t growsize; size_t grow_amount, max_grow; - rlim_t stacklim, vmemlim; + rlim_t lmemlim, stacklim, vmemlim; int is_procstack, rv; struct ucred *cred; #ifdef notyet @@ -3372,6 +3381,7 @@ vm_map_growstack(struct proc *p, vm_offs Retry: PROC_LOCK(p); + lmemlim = lim_cur(p, RLIMIT_MEMLOCK); stacklim = lim_cur(p, RLIMIT_STACK); vmemlim = lim_cur(p, RLIMIT_VMEM); PROC_UNLOCK(p); @@ -3494,7 +3504,25 @@ Retry: if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit)) grow_amount = limit - ctob(vm->vm_ssize); #endif - + if (!old_mlock && map->flags & MAP_WIREFUTURE) { + if (ptoa(vmspace_wired_count(p->p_vmspace)) + grow_amount > + lmemlim) { + vm_map_unlock_read(map); + rv = KERN_NO_SPACE; + goto out; + } +#ifdef RACCT + PROC_LOCK(p); + if (racct_set(p, RACCT_MEMLOCK, + ptoa(vmspace_wired_count(p->p_vmspace)) + grow_amount)) { + PROC_UNLOCK(p); + vm_map_unlock_read(map); + rv = KERN_NO_SPACE; + goto out; + } + PROC_UNLOCK(p); +#endif + } /* If we would blow our VMEM resource limit, no go */ if (map->size + grow_amount > vmemlim) { vm_map_unlock_read(map); @@ -3615,6 +3643,11 @@ out: PROC_LOCK(p); error = racct_set(p, RACCT_VMEM, map->size); KASSERT(error == 0, ("decreasing RACCT_VMEM failed")); + if (!old_mlock) { + error = racct_set(p, RACCT_MEMLOCK, + ptoa(vmspace_wired_count(p->p_vmspace))); + KASSERT(error == 0, ("decreasing RACCT_MEMLOCK failed")); + } error = racct_set(p, RACCT_STACK, ctob(vm->vm_ssize)); KASSERT(error == 0, ("decreasing RACCT_STACK failed")); PROC_UNLOCK(p); Modified: stable/9/sys/vm/vm_mmap.c ============================================================================== --- stable/9/sys/vm/vm_mmap.c Mon Jan 14 10:58:20 2013 (r245415) +++ stable/9/sys/vm/vm_mmap.c Mon Jan 14 10:58:55 2013 (r245416) @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -87,6 +88,11 @@ __FBSDID("$FreeBSD$"); #include #endif +int old_mlock = 0; +SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RW | CTLFLAG_TUN, &old_mlock, 0, + "Do not apply RLIMIT_MEMLOCK on mlockall"); +TUNABLE_INT("vm.old_mlock", &old_mlock); + #ifndef _SYS_SYSPROTO_H_ struct sbrk_args { int incr; @@ -1100,27 +1106,25 @@ sys_mlockall(td, uap) int error; map = &td->td_proc->p_vmspace->vm_map; - error = 0; + error = priv_check(td, PRIV_VM_MLOCK); + if (error) + return (error); if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) return (EINVAL); -#if 0 /* * If wiring all pages in the process would cause it to exceed * a hard resource limit, return ENOMEM. */ - PROC_LOCK(td->td_proc); - if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { + if (!old_mlock && uap->how & MCL_CURRENT) { + PROC_LOCK(td->td_proc); + if (map->size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { + PROC_UNLOCK(td->td_proc); + return (ENOMEM); + } PROC_UNLOCK(td->td_proc); - return (ENOMEM); } - PROC_UNLOCK(td->td_proc); -#else - error = priv_check(td, PRIV_VM_MLOCK); - if (error) - return (error); -#endif #ifdef RACCT PROC_LOCK(td->td_proc); error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size); @@ -1483,6 +1487,24 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, PROC_LOCK(td->td_proc); if (td->td_proc->p_vmspace->vm_map.size + size > lim_cur(td->td_proc, RLIMIT_VMEM)) { + if (!old_mlock && map->flags & MAP_WIREFUTURE) { + if (ptoa(vmspace_wired_count(td->td_proc->p_vmspace)) + + size > lim_cur(td->td_proc, RLIMIT_MEMLOCK)) { + racct_set_force(td->td_proc, RACCT_VMEM, + map->size); + PROC_UNLOCK(td->td_proc); + return (ENOMEM); + } + error = racct_set(td->td_proc, RACCT_MEMLOCK, + ptoa(vmspace_wired_count(td->td_proc->p_vmspace)) + + size); + if (error != 0) { + racct_set_force(td->td_proc, RACCT_VMEM, + map->size); + PROC_UNLOCK(td->td_proc); + return (error); + } + } PROC_UNLOCK(td->td_proc); return (ENOMEM); } Modified: stable/9/sys/vm/vm_unix.c ============================================================================== --- stable/9/sys/vm/vm_unix.c Mon Jan 14 10:58:20 2013 (r245415) +++ stable/9/sys/vm/vm_unix.c Mon Jan 14 10:58:55 2013 (r245416) @@ -77,13 +77,14 @@ sys_obreak(td, uap) { struct vmspace *vm = td->td_proc->p_vmspace; vm_offset_t new, old, base; - rlim_t datalim, vmemlim; + rlim_t datalim, lmemlim, vmemlim; int prot, rv; int error = 0; boolean_t do_map_wirefuture; PROC_LOCK(td->td_proc); datalim = lim_cur(td->td_proc, RLIMIT_DATA); + lmemlim = lim_cur(td->td_proc, RLIMIT_MEMLOCK); vmemlim = lim_cur(td->td_proc, RLIMIT_VMEM); PROC_UNLOCK(td->td_proc); @@ -116,6 +117,13 @@ sys_obreak(td, uap) goto done; } if (new > old) { + if (!old_mlock && vm->vm_map.flags & MAP_WIREFUTURE) { + if (ptoa(vmspace_wired_count(td->td_proc->p_vmspace)) + + (new - old) > lmemlim) { + error = ENOMEM; + goto done; + } + } if (vm->vm_map.size + (new - old) > vmemlim) { error = ENOMEM; goto done; @@ -136,6 +144,20 @@ sys_obreak(td, uap) error = ENOMEM; goto done; } + if (!old_mlock && vm->vm_map.flags & MAP_WIREFUTURE) { + error = racct_set(td->td_proc, RACCT_MEMLOCK, + ptoa(vmspace_wired_count(td->td_proc->p_vmspace)) + + (new - old)); + if (error != 0) { + racct_set_force(td->td_proc, RACCT_DATA, + old - base); + racct_set_force(td->td_proc, RACCT_VMEM, + vm->vm_map.size); + PROC_UNLOCK(td->td_proc); + error = ENOMEM; + goto done; + } + } PROC_UNLOCK(td->td_proc); #endif prot = VM_PROT_RW; @@ -152,6 +174,11 @@ sys_obreak(td, uap) PROC_LOCK(td->td_proc); racct_set_force(td->td_proc, RACCT_DATA, old - base); racct_set_force(td->td_proc, RACCT_VMEM, vm->vm_map.size); + if (!old_mlock && vm->vm_map.flags & MAP_WIREFUTURE) { + racct_set_force(td->td_proc, RACCT_MEMLOCK, + ptoa(vmspace_wired_count( + td->td_proc->p_vmspace))); + } PROC_UNLOCK(td->td_proc); #endif error = ENOMEM; @@ -183,6 +210,10 @@ sys_obreak(td, uap) PROC_LOCK(td->td_proc); racct_set_force(td->td_proc, RACCT_DATA, new - base); racct_set_force(td->td_proc, RACCT_VMEM, vm->vm_map.size); + if (!old_mlock && vm->vm_map.flags & MAP_WIREFUTURE) { + racct_set_force(td->td_proc, RACCT_MEMLOCK, + ptoa(vmspace_wired_count(td->td_proc->p_vmspace))); + } PROC_UNLOCK(td->td_proc); #endif }