Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 16 Aug 2013 21:13:55 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r254430 - in head: lib/libc/sys sys/sys sys/vm usr.bin/kdump usr.bin/truss
Message-ID:  <201308162113.r7GLDtYC062588@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Fri Aug 16 21:13:55 2013
New Revision: 254430
URL: http://svnweb.freebsd.org/changeset/base/254430

Log:
  Add new mmap(2) flags to permit applications to request specific virtual
  address alignment of mappings.
  - MAP_ALIGNED(n) requests a mapping aligned on a boundary of (1 << n).
    Requests for n >= number of bits in a pointer or less than the size of
    a page fail with EINVAL.  This matches the API provided by NetBSD.
  - MAP_ALIGNED_SUPER is a special case of MAP_ALIGNED.  It can be used
    to optimize the chances of using large pages.  By default it will align
    the mapping on a large page boundary (the system is free to choose any
    large page size to align to that seems best for the mapping request).
    However, if the object being mapped is already using large pages, then
    it will align the virtual mapping to match the existing large pages in
    the object instead.
  - Internally, VMFS_ALIGNED_SPACE is now renamed to VMFS_SUPER_SPACE, and
    VMFS_ALIGNED_SPACE(n) is repurposed for specifying a specific alignment.
    MAP_ALIGNED(n) maps to using VMFS_ALIGNED_SPACE(n), while
    MAP_ALIGNED_SUPER maps to VMFS_SUPER_SPACE.
  - mmap() of a device object now uses VMFS_OPTIMAL_SPACE rather than
    explicitly using VMFS_SUPER_SPACE.  All device objects are forced to
    use a specific color on creation, so VMFS_OPTIMAL_SPACE is effectively
    equivalent.
  
  Reviewed by:	alc
  MFC after:	1 month

Modified:
  head/lib/libc/sys/mmap.2
  head/sys/sys/mman.h
  head/sys/vm/vm_init.c
  head/sys/vm/vm_kern.c
  head/sys/vm/vm_map.c
  head/sys/vm/vm_map.h
  head/sys/vm/vm_mmap.c
  head/usr.bin/kdump/mksubr
  head/usr.bin/truss/syscalls.c

Modified: head/lib/libc/sys/mmap.2
==============================================================================
--- head/lib/libc/sys/mmap.2	Fri Aug 16 21:04:58 2013	(r254429)
+++ head/lib/libc/sys/mmap.2	Fri Aug 16 21:13:55 2013	(r254430)
@@ -28,7 +28,7 @@
 .\"	@(#)mmap.2	8.4 (Berkeley) 5/11/95
 .\" $FreeBSD$
 .\"
-.Dd March 18, 2012
+.Dd August 16, 2013
 .Dt MMAP 2
 .Os
 .Sh NAME
@@ -97,7 +97,30 @@ Sharing, mapping type and options are sp
 argument by
 .Em or Ns 'ing
 the following values:
-.Bl -tag -width MAP_HASSEMAPHORE
+.Bl -tag -width MAP_PREFAULT_READ
+.It Dv MAP_ALIGNED Ns Pq Fa n
+Align the region on a requested boundary.
+If a suitable region cannot be found,
+.Fn mmap
+will fail.
+The
+.Fa n
+argument specifies the binary logarithm of the desired alignment.
+.It Dv MAP_ALIGNED_SUPER
+Align the region to maximize the potential use of large
+.Pq Dq super
+pages.
+If a suitable region cannot be found,
+.Fn mmap
+will fail.
+The system will choose a suitable page size based on the size of
+mapping.
+The page size used as well as the alignment of the region may both be
+affected by properties of the file being mapped.
+In particular,
+the physical address of existing pages of a file may require a specific
+alignment.
+The region is not guaranteed to be aligned on any specific boundary.
 .It Dv MAP_ANON
 Map anonymous memory not associated with any specific file.
 The file descriptor used for creating
@@ -274,6 +297,25 @@ Although this implementation does not im
 the
 .Fa offset
 argument, a portable program must only use page-aligned values.
+.Pp
+Large page mappings require that the pages backing an object be
+aligned in matching blocks in both the virtual address space and RAM.
+The system will automatically attempt to use large page mappings when
+mapping an object that is already backed by large pages in RAM by
+aligning the mapping request in the virtual address space to match the
+alignment of the large physical pages.
+The system may also use large page mappings when mapping portions of an
+object that are not yet backed by pages in RAM.
+The
+.Dv MAP_ALIGNED_SUPER
+flag is an optimization that will align the mapping request to the
+size of a large page similar to
+.Dv MAP_ALIGNED ,
+except that the system will override this alignment if an object already
+uses large pages so that the mapping will be consistent with the existing
+large pages.
+This flag is mostly useful for maximizing the use of large pages on the
+first mapping of objects that do not yet have pages present in RAM.
 .Sh RETURN VALUES
 Upon successful completion,
 .Fn mmap
@@ -325,6 +367,10 @@ The
 argument
 was equal to zero.
 .It Bq Er EINVAL
+.Dv MAP_ALIGNED
+was specified and the desired alignment was either larger than the
+virtual address size of the machine or smaller than a page.
+.It Bq Er EINVAL
 .Dv MAP_ANON
 was specified and the
 .Fa fd
@@ -356,7 +402,8 @@ was specified and insufficient memory wa
 .Xr msync 2 ,
 .Xr munlock 2 ,
 .Xr munmap 2 ,
-.Xr getpagesize 3
+.Xr getpagesize 3 ,
+.Xr getpagesizes 3
 .Sh BUGS
 The
 .Fa len

Modified: head/sys/sys/mman.h
==============================================================================
--- head/sys/sys/mman.h	Fri Aug 16 21:04:58 2013	(r254429)
+++ head/sys/sys/mman.h	Fri Aug 16 21:13:55 2013	(r254430)
@@ -91,6 +91,17 @@
  */
 #define	MAP_NOCORE	 0x00020000 /* dont include these pages in a coredump */
 #define	MAP_PREFAULT_READ 0x00040000 /* prefault mapping for reading */
+
+/*
+ * Request specific alignment (n == log2 of the desired alignment).
+ *
+ * MAP_ALIGNED_SUPER requests optimal superpage alignment, but does
+ * not enforce a specific alignment.
+ */
+#define	MAP_ALIGNED(n)	 ((n) << MAP_ALIGNMENT_SHIFT)
+#define	MAP_ALIGNMENT_SHIFT	24
+#define	MAP_ALIGNMENT_MASK	MAP_ALIGNED(0xff)
+#define	MAP_ALIGNED_SUPER	MAP_ALIGNED(1) /* align on a superpage */
 #endif /* __BSD_VISIBLE */
 
 #if __POSIX_VISIBLE >= 199309

Modified: head/sys/vm/vm_init.c
==============================================================================
--- head/sys/vm/vm_init.c	Fri Aug 16 21:04:58 2013	(r254429)
+++ head/sys/vm/vm_init.c	Fri Aug 16 21:13:55 2013	(r254430)
@@ -112,7 +112,7 @@ kva_import(void *unused, vmem_size_t siz
  
 	addr = vm_map_min(kernel_map);
 	result = vm_map_find(kernel_map, NULL, 0, &addr, size,
-	    VMFS_ALIGNED_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
+	    VMFS_SUPER_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
 	if (result != KERN_SUCCESS)
                 return (ENOMEM);
 

Modified: head/sys/vm/vm_kern.c
==============================================================================
--- head/sys/vm/vm_kern.c	Fri Aug 16 21:04:58 2013	(r254429)
+++ head/sys/vm/vm_kern.c	Fri Aug 16 21:13:55 2013	(r254430)
@@ -286,7 +286,7 @@ kmem_suballoc(vm_map_t parent, vm_offset
 
 	*min = vm_map_min(parent);
 	ret = vm_map_find(parent, NULL, 0, min, size, superpage_align ?
-	    VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL,
+	    VMFS_SUPER_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL,
 	    MAP_ACC_NO_CHARGE);
 	if (ret != KERN_SUCCESS)
 		panic("kmem_suballoc: bad status return of %d", ret);

Modified: head/sys/vm/vm_map.c
==============================================================================
--- head/sys/vm/vm_map.c	Fri Aug 16 21:04:58 2013	(r254429)
+++ head/sys/vm/vm_map.c	Fri Aug 16 21:13:55 2013	(r254430)
@@ -1434,12 +1434,17 @@ vm_map_find(vm_map_t map, vm_object_t ob
 	    vm_size_t length, int find_space, vm_prot_t prot,
 	    vm_prot_t max, int cow)
 {
-	vm_offset_t start, initial_addr;
+	vm_offset_t alignment, initial_addr, start;
 	int result;
 
 	if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL ||
 	    (object->flags & OBJ_COLORED) == 0))
-			find_space = VMFS_ANY_SPACE;
+		find_space = VMFS_ANY_SPACE;
+	if (find_space >> 8 != 0) {
+		KASSERT((find_space & 0xff) == 0, ("bad VMFS flags"));
+		alignment = (vm_offset_t)1 << (find_space >> 8);
+	} else
+		alignment = 0;
 	initial_addr = *addr;
 again:
 	start = initial_addr;
@@ -1455,12 +1460,18 @@ again:
 				return (KERN_NO_SPACE);
 			}
 			switch (find_space) {
-			case VMFS_ALIGNED_SPACE:
+			case VMFS_SUPER_SPACE:
 			case VMFS_OPTIMAL_SPACE:
 				pmap_align_superpage(object, offset, addr,
 				    length);
 				break;
+			case VMFS_ANY_SPACE:
+				break;
 			default:
+				if ((*addr & (alignment - 1)) != 0) {
+					*addr &= ~(alignment - 1);
+					*addr += alignment;
+				}
 				break;
 			}
 
@@ -1468,8 +1479,8 @@ again:
 		}
 		result = vm_map_insert(map, object, offset, start, start +
 		    length, prot, max, cow);
-	} while (result == KERN_NO_SPACE && (find_space == VMFS_ALIGNED_SPACE ||
-	    find_space == VMFS_OPTIMAL_SPACE));
+	} while (result == KERN_NO_SPACE && find_space != VMFS_NO_SPACE &&
+	    find_space != VMFS_ANY_SPACE);
 	vm_map_unlock(map);
 	return (result);
 }

Modified: head/sys/vm/vm_map.h
==============================================================================
--- head/sys/vm/vm_map.h	Fri Aug 16 21:04:58 2013	(r254429)
+++ head/sys/vm/vm_map.h	Fri Aug 16 21:13:55 2013	(r254430)
@@ -339,12 +339,16 @@ long vmspace_resident_count(struct vmspa
 #define	VM_FAULT_READ_AHEAD_MAX		min(atop(MAXPHYS) - 1, UINT8_MAX)
 
 /*
- * The following "find_space" options are supported by vm_map_find()
+ * The following "find_space" options are supported by vm_map_find().
+ *
+ * For VMFS_ALIGNED_SPACE, the desired alignment is specified to
+ * the macro argument as log base 2 of the desired alignment.
  */
 #define	VMFS_NO_SPACE		0	/* don't find; use the given range */
 #define	VMFS_ANY_SPACE		1	/* find a range with any alignment */
 #define	VMFS_OPTIMAL_SPACE	2	/* find a range with optimal alignment*/
-#define	VMFS_ALIGNED_SPACE	3	/* find a superpage-aligned range */
+#define	VMFS_SUPER_SPACE	3	/* find a superpage-aligned range */
+#define	VMFS_ALIGNED_SPACE(x)	((x) << 8) /* find a range with fixed alignment */
 
 /*
  * vm_map_wire and vm_map_unwire option flags

Modified: head/sys/vm/vm_mmap.c
==============================================================================
--- head/sys/vm/vm_mmap.c	Fri Aug 16 21:04:58 2013	(r254429)
+++ head/sys/vm/vm_mmap.c	Fri Aug 16 21:13:55 2013	(r254430)
@@ -201,7 +201,7 @@ sys_mmap(td, uap)
 	vm_prot_t cap_maxprot, prot, maxprot;
 	void *handle;
 	objtype_t handle_type;
-	int flags, error;
+	int align, error, flags;
 	off_t pos;
 	struct vmspace *vms = td->td_proc->p_vmspace;
 	cap_rights_t rights;
@@ -251,6 +251,13 @@ sys_mmap(td, uap)
 	size += pageoff;			/* low end... */
 	size = (vm_size_t) round_page(size);	/* hi end */
 
+	/* Ensure alignment is at least a page and fits in a pointer. */
+	align = flags & MAP_ALIGNMENT_MASK;
+	if (align != 0 && align != MAP_ALIGNED_SUPER &&
+	    (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY ||
+	    align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT))
+		return (EINVAL);
+
 	/*
 	 * Check for illegal addresses.  Watch out for address wrap... Note
 	 * that VM_*_ADDRESS are not constants due to casts (argh).
@@ -1490,7 +1497,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
 	boolean_t fitit;
 	vm_object_t object = NULL;
 	struct thread *td = curthread;
-	int docow, error, rv;
+	int docow, error, findspace, rv;
 	boolean_t writecounted;
 
 	if (size == 0)
@@ -1605,12 +1612,17 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
 	if (flags & MAP_STACK)
 		rv = vm_map_stack(map, *addr, size, prot, maxprot,
 		    docow | MAP_STACK_GROWS_DOWN);
-	else if (fitit)
-		rv = vm_map_find(map, object, foff, addr, size,
-		    object != NULL && object->type == OBJT_DEVICE ?
-		    VMFS_ALIGNED_SPACE : VMFS_OPTIMAL_SPACE, prot, maxprot,
-		    docow);
-	else
+	else if (fitit) {
+		if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER)
+			findspace = VMFS_SUPER_SPACE;
+		else if ((flags & MAP_ALIGNMENT_MASK) != 0)
+			findspace = VMFS_ALIGNED_SPACE(flags >>
+			    MAP_ALIGNMENT_SHIFT);
+		else
+			findspace = VMFS_OPTIMAL_SPACE;
+		rv = vm_map_find(map, object, foff, addr, size, findspace,
+		    prot, maxprot, docow);
+	} else
 		rv = vm_map_fixed(map, object, foff, *addr, size,
 				 prot, maxprot, docow);
 

Modified: head/usr.bin/kdump/mksubr
==============================================================================
--- head/usr.bin/kdump/mksubr	Fri Aug 16 21:04:58 2013	(r254429)
+++ head/usr.bin/kdump/mksubr	Fri Aug 16 21:13:55 2013	(r254430)
@@ -385,7 +385,6 @@ auto_switch_type "lio_listioname"      "
 auto_switch_type "madvisebehavname"    "_?MADV_[A-Z]+[[:space:]]+[0-9]+"              "sys/mman.h"
 auto_switch_type "minheritname"        "INHERIT_[A-Z]+[[:space:]]+[0-9]+"             "sys/mman.h"
 auto_or_type     "mlockallname"        "MCL_[A-Z]+[[:space:]]+0x[0-9]+"               "sys/mman.h"
-auto_or_type     "mmapflagsname"       "MAP_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+"         "sys/mman.h"
 auto_or_type     "mmapprotname"        "PROT_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+"        "sys/mman.h"
 auto_or_type     "modename"            "S_[A-Z]+[[:space:]]+[0-6]{7}"                 "sys/stat.h"
 auto_or_type     "mountflagsname"      "MNT_[A-Z]+[[:space:]]+0x[0-9]+"               "sys/mount.h"
@@ -469,6 +468,40 @@ cat <<_EOF_
 /*
  * AUTO - Special
  *
+ * The MAP_ALIGNED flag requires special handling.
+ */
+void
+mmapflagsname(int flags)
+{
+	int align;
+	int or = 0;
+	printf("%#x<", flags);
+_EOF_
+egrep "^#[[:space:]]*define[[:space:]]+MAP_[A-Z_]+[[:space:]]+0x[0-9A-Fa-f]+[[:space:]]*" \
+	$include_dir/sys/mman.h | grep -v MAP_ALIGNED | \
+	awk '{ for (i = 1; i <= NF; i++) \
+		if ($i ~ /define/) \
+			break; \
+		++i; \
+		printf "\tif (!((flags > 0) ^ ((%s) > 0)))\n\t\tif_print_or(flags, %s, or);\n", $i, $i }'
+cat <<_EOF_
+	align = flags & MAP_ALIGNMENT_MASK;
+	if (align != 0) {
+		if (align == MAP_ALIGNED_SUPER)
+			print_or("MAP_ALIGNED_SUPER", or);
+		else {
+			print_or("MAP_ALIGNED", or);
+			printf("(%d)", align >> MAP_ALIGNMENT_SHIFT);
+		}
+	}
+	printf(">");
+	if (or == 0)
+		printf("<invalid>%d", flags);
+}
+
+/*
+ * AUTO - Special
+ *
  * The only reason this is not fully automated is due to the
  * grep -v RTP_PRIO statement. A better egrep line should
  * make this capable of being a auto_switch_type() function.

Modified: head/usr.bin/truss/syscalls.c
==============================================================================
--- head/usr.bin/truss/syscalls.c	Fri Aug 16 21:04:58 2013	(r254429)
+++ head/usr.bin/truss/syscalls.c	Fri Aug 16 21:13:55 2013	(r254430)
@@ -296,7 +296,7 @@ static struct xlat mmap_flags[] = {
 	X(MAP_SHARED) X(MAP_PRIVATE) X(MAP_FIXED) X(MAP_RENAME)
 	X(MAP_NORESERVE) X(MAP_RESERVED0080) X(MAP_RESERVED0100)
 	X(MAP_HASSEMAPHORE) X(MAP_STACK) X(MAP_NOSYNC) X(MAP_ANON)
-	X(MAP_NOCORE) XEND
+	X(MAP_NOCORE) X(MAP_PREFAULT_READ) XEND
 };
 
 static struct xlat mprot_flags[] = {
@@ -893,9 +893,41 @@ print_arg(struct syscall_args *sc, unsig
 	case Mprot:
 		tmp = strdup(xlookup_bits(mprot_flags, args[sc->offset]));
 		break;
-	case Mmapflags:
-		tmp = strdup(xlookup_bits(mmap_flags, args[sc->offset]));
+	case Mmapflags: {
+		const char *base, *alignstr;
+		int align, flags;
+
+		/*
+		 * MAP_ALIGNED can't be handled by xlookup_bits(), so
+		 * generate that string manually and prepend it to the
+		 * string from xlookup_bits().  Have to be careful to
+		 * avoid outputting MAP_ALIGNED|0 if MAP_ALIGNED is
+		 * the only flag.
+		 */
+		flags = args[sc->offset] & ~MAP_ALIGNMENT_MASK;
+		align = args[sc->offset] & MAP_ALIGNMENT_MASK;
+		if (align != 0) {
+			if (align == MAP_ALIGNED_SUPER)
+				alignstr = strdup("MAP_ALIGNED_SUPER");
+			else
+				asprintf(&alignstr, "MAP_ALIGNED(%d)",
+				    align >> MAP_ALIGNMENT_SHIFT);
+			if (flags == 0) {
+				tmp = alignstr;
+				break;
+			}
+		} else
+			alignstr = NULL;
+		base = strdup(xlookup_bits(mmap_flags, flags));
+		if (alignstr == NULL) {
+			tmp = base;
+			break;
+		}
+		asprintf(&tmp, "%s|%s", alignstr, base);
+		free(alignstr);
+		free(base);
 		break;
+	}
 	case Whence:
 		tmp = strdup(xlookup(whence_arg, args[sc->offset]));
 		break;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201308162113.r7GLDtYC062588>