Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 5 Jan 2017 01:44:12 +0000 (UTC)
From:      Mark Johnston <markj@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r311346 - in head/sys: kern sys vm
Message-ID:  <201701050144.v051iCso008577@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: markj
Date: Thu Jan  5 01:44:12 2017
New Revision: 311346
URL: https://svnweb.freebsd.org/changeset/base/311346

Log:
  Add a small allocator for exec_map entries.
  
  Upon each execve, we allocate a KVA range for use in copying data to the
  new image. Pages must be faulted into the range, and when the range is
  freed, the backing pages are freed and their mappings are destroyed. This
  is a lot of needless overhead, and the exec_map management becomes a
  bottleneck when many CPUs are executing execve concurrently. Moreover, the
  number of available ranges is fixed at 16, which is insufficient on large
  systems and potentially excessive on 32-bit systems.
  
  The new allocator reduces overhead by making exec_map allocations
  persistent. When a range is freed, pages backing the range are marked clean
  and made easy to reclaim. With this change, the exec_map is sized based on
  the number of CPUs.
  
  Reviewed by:	kib
  MFC after:	1 month
  Differential Revision:	https://reviews.freebsd.org/D8921

Modified:
  head/sys/kern/kern_exec.c
  head/sys/sys/imgact.h
  head/sys/vm/vm_init.c
  head/sys/vm/vm_kern.c
  head/sys/vm/vm_kern.h

Modified: head/sys/kern/kern_exec.c
==============================================================================
--- head/sys/kern/kern_exec.c	Thu Jan  5 01:28:08 2017	(r311345)
+++ head/sys/kern/kern_exec.c	Thu Jan  5 01:44:12 2017	(r311346)
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
+#include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
@@ -59,6 +60,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sf_buf.h>
 #include <sys/shm.h>
 #include <sys/signalvar.h>
+#include <sys/smp.h>
 #include <sys/stat.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
@@ -1315,17 +1317,80 @@ err_exit:
 	return (error);
 }
 
+struct exec_args_kva {
+	vm_offset_t addr;
+	SLIST_ENTRY(exec_args_kva) next;
+};
+
+static DPCPU_DEFINE(struct exec_args_kva *, exec_args_kva);
+
+static SLIST_HEAD(, exec_args_kva) exec_args_kva_freelist;
+static struct mtx exec_args_kva_mtx;
+
+static void
+exec_prealloc_args_kva(void *arg __unused)
+{
+	struct exec_args_kva *argkva;
+	u_int i;
+
+	SLIST_INIT(&exec_args_kva_freelist);
+	mtx_init(&exec_args_kva_mtx, "exec args kva", NULL, MTX_DEF);
+	for (i = 0; i < exec_map_entries; i++) {
+		argkva = malloc(sizeof(*argkva), M_PARGS, M_WAITOK);
+		argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size);
+		SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next);
+	}
+}
+SYSINIT(exec_args_kva, SI_SUB_EXEC, SI_ORDER_ANY, exec_prealloc_args_kva, NULL);
+
+static vm_offset_t
+exec_alloc_args_kva(void **cookie)
+{
+	struct exec_args_kva *argkva;
+
+	argkva = (void *)atomic_readandclear_ptr(
+	    (uintptr_t *)DPCPU_PTR(exec_args_kva));
+	if (argkva == NULL) {
+		mtx_lock(&exec_args_kva_mtx);
+		while ((argkva = SLIST_FIRST(&exec_args_kva_freelist)) == NULL)
+			(void)mtx_sleep(&exec_args_kva_freelist,
+			    &exec_args_kva_mtx, 0, "execkva", 0);
+		SLIST_REMOVE_HEAD(&exec_args_kva_freelist, next);
+		mtx_unlock(&exec_args_kva_mtx);
+	}
+	*(struct exec_args_kva **)cookie = argkva;
+	return (argkva->addr);
+}
+
+static void
+exec_free_args_kva(void *cookie)
+{
+	struct exec_args_kva *argkva;
+	vm_offset_t base;
+
+	argkva = cookie;
+	base = argkva->addr;
+
+	vm_map_madvise(exec_map, base, base + exec_map_entry_size, MADV_FREE);
+	if (!atomic_cmpset_ptr((uintptr_t *)DPCPU_PTR(exec_args_kva),
+	    (uintptr_t)NULL, (uintptr_t)argkva)) {
+		mtx_lock(&exec_args_kva_mtx);
+		SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next);
+		wakeup_one(&exec_args_kva_freelist);
+		mtx_unlock(&exec_args_kva_mtx);
+	}
+}
+
 /*
  * Allocate temporary demand-paged, zero-filled memory for the file name,
- * argument, and environment strings.  Returns zero if the allocation succeeds
- * and ENOMEM otherwise.
+ * argument, and environment strings.
  */
 int
 exec_alloc_args(struct image_args *args)
 {
 
-	args->buf = (char *)kmap_alloc_wait(exec_map, PATH_MAX + ARG_MAX);
-	return (args->buf != NULL ? 0 : ENOMEM);
+	args->buf = (char *)exec_alloc_args_kva(&args->bufkva);
+	return (0);
 }
 
 void
@@ -1333,8 +1398,7 @@ exec_free_args(struct image_args *args)
 {
 
 	if (args->buf != NULL) {
-		kmap_free_wakeup(exec_map, (vm_offset_t)args->buf,
-		    PATH_MAX + ARG_MAX);
+		exec_free_args_kva(args->bufkva);
 		args->buf = NULL;
 	}
 	if (args->fname_buf != NULL) {

Modified: head/sys/sys/imgact.h
==============================================================================
--- head/sys/sys/imgact.h	Thu Jan  5 01:28:08 2017	(r311345)
+++ head/sys/sys/imgact.h	Thu Jan  5 01:44:12 2017	(r311346)
@@ -42,6 +42,7 @@ struct ucred;
 
 struct image_args {
 	char *buf;		/* pointer to string buffer */
+	void *bufkva;		/* cookie for string buffer KVA */
 	char *begin_argv;	/* beginning of argv in buf */
 	char *begin_envv;	/* beginning of envv in buf */
 	char *endp;		/* current `end' pointer of arg & env strings */

Modified: head/sys/vm/vm_init.c
==============================================================================
--- head/sys/vm/vm_init.c	Thu Jan  5 01:28:08 2017	(r311345)
+++ head/sys/vm/vm_init.c	Thu Jan  5 01:44:12 2017	(r311346)
@@ -67,6 +67,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
+#include <sys/libkern.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
@@ -91,10 +92,6 @@ __FBSDID("$FreeBSD$");
 
 long physmem;
 
-static int exec_map_entries = 16;
-SYSCTL_INT(_vm, OID_AUTO, exec_map_entries, CTLFLAG_RDTUN, &exec_map_entries, 0,
-    "Maximum number of simultaneous execs");
-
 /*
  * System initialization
  */
@@ -271,10 +268,19 @@ again:
 		panic("Clean map calculation incorrect");
 
 	/*
- 	 * Allocate the pageable submaps.
+	 * Allocate the pageable submaps.  We may cache an exec map entry per
+	 * CPU, so we therefore need to reserve space for at least ncpu+1
+	 * entries to avoid deadlock.  The exec map is also used by some image
+	 * activators, so we leave a fixed number of pages for their use.
 	 */
+#ifdef __LP64__
+	exec_map_entries = 8 * mp_ncpus;
+#else
+	exec_map_entries = min(8 * mp_ncpus, 2 * mp_ncpus + 4);
+#endif
+	exec_map_entry_size = round_page(PATH_MAX + ARG_MAX);
 	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
-	    exec_map_entries * round_page(PATH_MAX + ARG_MAX), FALSE);
+	    exec_map_entries * exec_map_entry_size + 64 * PAGE_SIZE, FALSE);
 	pipe_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, maxpipekva,
 	    FALSE);
 }

Modified: head/sys/vm/vm_kern.c
==============================================================================
--- head/sys/vm/vm_kern.c	Thu Jan  5 01:28:08 2017	(r311345)
+++ head/sys/vm/vm_kern.c	Thu Jan  5 01:44:12 2017	(r311346)
@@ -97,6 +97,9 @@ CTASSERT((ZERO_REGION_SIZE & PAGE_MASK) 
 /* NB: Used by kernel debuggers. */
 const u_long vm_maxuser_address = VM_MAXUSER_ADDRESS;
 
+u_int exec_map_entry_size;
+u_int exec_map_entries;
+
 SYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD,
     SYSCTL_NULL_ULONG_PTR, VM_MIN_KERNEL_ADDRESS, "Min kernel address");
 

Modified: head/sys/vm/vm_kern.h
==============================================================================
--- head/sys/vm/vm_kern.h	Thu Jan  5 01:28:08 2017	(r311345)
+++ head/sys/vm/vm_kern.h	Thu Jan  5 01:44:12 2017	(r311346)
@@ -61,7 +61,7 @@
  */
 
 #ifndef _VM_VM_KERN_H_
-#define _VM_VM_KERN_H_ 1
+#define	_VM_VM_KERN_H_
 
 /* Kernel memory management definitions. */
 extern vm_map_t kernel_map;
@@ -74,5 +74,7 @@ extern struct vmem *transient_arena;
 extern struct vmem *memguard_arena;
 extern vm_offset_t swapbkva;
 extern u_long vm_kmem_size;
+extern u_int exec_map_entries;
+extern u_int exec_map_entry_size;
 
-#endif				/* _VM_VM_KERN_H_ */
+#endif /* _VM_VM_KERN_H_ */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201701050144.v051iCso008577>