Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 8 Apr 2013 19:10:45 +0000 (UTC)
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r249264 - in head: share/man/man9 sys/vm
Message-ID:  <201304081910.r38JAj9t087353@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: glebius
Date: Mon Apr  8 19:10:45 2013
New Revision: 249264
URL: http://svnweb.freebsd.org/changeset/base/249264

Log:
  Merge from projects/counters: UMA_ZONE_PCPU zones.
  
    These zones have slab size == sizeof(struct pcpu), but request from VM
  enough pages to fit (uk_slabsize * mp_ncpus). An item allocated from such
  zone would have a separate twin for each CPU in the system, and these twins
  are at a distance of sizeof(struct pcpu) from each other. This magic value
  of distance would allow us to make some optimizations later.
  
    To address private item from a CPU simple arithmetics should be used:
  
    item = (type *)((char *)base + sizeof(struct pcpu) * curcpu)
  
    These arithmetics are available as zpcpu_get() macro in pcpu.h.
  
    To introduce non-page size slabs a new field had been added to uma_keg
  uk_slabsize. This shifted some frequently used fields of uma_keg to the
  fourth cache line on amd64. To mitigate this pessimization, uma_keg fields
  were a bit rearranged and least frequently used uk_name and uk_link moved
  down to the fourth cache line. All other fields, that are dereferenced
  frequently fit into first three cache lines.
  
  Sponsored by:	Nginx, Inc.

Modified:
  head/share/man/man9/zone.9
  head/sys/vm/uma.h
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/share/man/man9/zone.9
==============================================================================
--- head/share/man/man9/zone.9	Mon Apr  8 19:03:01 2013	(r249263)
+++ head/share/man/man9/zone.9	Mon Apr  8 19:10:45 2013	(r249264)
@@ -153,6 +153,23 @@ See
 .Fn uma_find_refcnt .
 .It Dv UMA_ZONE_NODUMP
 Pages belonging to the zone will not be included into mini-dumps.
+.It Dv UMA_ZONE_PCPU
+An allocation from zone would have
+.Va mp_ncpu
+shadow copies, that are privately assigned to CPUs.
+A CPU can address its private copy using base allocation address plus
+multiple of current CPU id and
+.Fn sizeof "struct pcpu" :
+.Bd -literal -offset indent
+foo_zone = uma_zcreate(..., UMA_ZONE_PCPU);
+ ...
+foo_base = uma_zalloc(foo_zone, ...);
+ ...
+critical_enter();
+foo_pcpu = (foo_t *)zpcpu_get(foo_base);
+/* do something with foo_pcpu */
+critical_exit();
+.Ed
 .It Dv UMA_ZONE_OFFPAGE
 By default book-keeping of items within a slab is done in the slab page itself.
 This flag explicitly tells subsystem that book-keeping structure should be

Modified: head/sys/vm/uma.h
==============================================================================
--- head/sys/vm/uma.h	Mon Apr  8 19:03:01 2013	(r249263)
+++ head/sys/vm/uma.h	Mon Apr  8 19:10:45 2013	(r249264)
@@ -252,6 +252,10 @@ int uma_zsecond_add(uma_zone_t zone, uma
 					 * Zone's pages will not be included in
 					 * mini-dumps.
 					 */
+#define	UMA_ZONE_PCPU		0x8000	/*
+					 * Allocates mp_ncpus slabs sized to
+					 * sizeof(struct pcpu).
+					 */
 
 /*
  * These flags are shared between the keg and zone.  In zones wishing to add
@@ -260,7 +264,7 @@ int uma_zsecond_add(uma_zone_t zone, uma
  */
 #define	UMA_ZONE_INHERIT						\
     (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE |		\
-    UMA_ZONE_HASH | UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB)
+    UMA_ZONE_HASH | UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU)
 
 /* Definitions for align */
 #define UMA_ALIGN_PTR	(sizeof(void *) - 1)	/* Alignment fit for ptr */

Modified: head/sys/vm/uma_core.c
==============================================================================
--- head/sys/vm/uma_core.c	Mon Apr  8 19:03:01 2013	(r249263)
+++ head/sys/vm/uma_core.c	Mon Apr  8 19:10:45 2013	(r249264)
@@ -765,9 +765,9 @@ finished:
 			    SKIP_NONE, ZFREE_STATFREE);
 #ifdef UMA_DEBUG
 		printf("%s: Returning %d bytes.\n",
-		    keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera);
+		    keg->uk_name, PAGE_SIZE * keg->uk_ppera);
 #endif
-		keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
+		keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
 	}
 }
 
@@ -865,7 +865,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t
 		wait |= M_NODUMP;
 
 	/* zone is passed for legacy reasons. */
-	mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait);
+	mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
 	if (mem == NULL) {
 		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 			zone_free_item(keg->uk_slabzone, slab, NULL,
@@ -927,7 +927,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t
 			if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 				zone_free_item(keg->uk_slabzone, slab,
 				    NULL, SKIP_NONE, ZFREE_STATFREE);
-			keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
+			keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera,
 			    flags);
 			KEG_LOCK(keg);
 			return (NULL);
@@ -1138,16 +1138,27 @@ keg_small_init(uma_keg_t keg)
 	u_int wastedspace;
 	u_int shsize;
 
-	KASSERT(keg != NULL, ("Keg is null in keg_small_init"));
+	if (keg->uk_flags & UMA_ZONE_PCPU) {
+		keg->uk_slabsize = sizeof(struct pcpu);
+		keg->uk_ppera = howmany(mp_ncpus * sizeof(struct pcpu),
+		    PAGE_SIZE);
+	} else {
+		keg->uk_slabsize = UMA_SLAB_SIZE;
+		keg->uk_ppera = 1;
+	}
+
 	rsize = keg->uk_size;
 
-	if (rsize < UMA_SMALLEST_UNIT)
-		rsize = UMA_SMALLEST_UNIT;
 	if (rsize & keg->uk_align)
 		rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
+	if (rsize < keg->uk_slabsize / 256)
+		rsize = keg->uk_slabsize / 256;
 
 	keg->uk_rsize = rsize;
-	keg->uk_ppera = 1;
+
+	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
+	    keg->uk_rsize < sizeof(struct pcpu),
+	    ("%s: size %u too large", __func__, keg->uk_rsize));
 
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
 		shsize = 0;
@@ -1159,10 +1170,12 @@ keg_small_init(uma_keg_t keg)
 		shsize = sizeof(struct uma_slab);
 	}
 
-	keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
-	KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0"));
+	keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize;
+	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= 255,
+	    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
+
 	memused = keg->uk_ipers * rsize + shsize;
-	wastedspace = UMA_SLAB_SIZE - memused;
+	wastedspace = keg->uk_slabsize - memused;
 
 	/*
 	 * We can't do OFFPAGE if we're internal or if we've been
@@ -1175,24 +1188,26 @@ keg_small_init(uma_keg_t keg)
 	    (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
 		return;
 
-	if ((wastedspace >= UMA_MAX_WASTE) &&
-	    (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
-		keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
-		KASSERT(keg->uk_ipers <= 255,
-		    ("keg_small_init: keg->uk_ipers too high!"));
+	if ((wastedspace >= keg->uk_slabsize / UMA_MAX_WASTE) &&
+	    (keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) {
+		keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize;
+		KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= 255,
+		    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
 #ifdef UMA_DEBUG
 		printf("UMA decided we need offpage slab headers for "
 		    "keg: %s, calculated wastedspace = %d, "
 		    "maximum wasted space allowed = %d, "
 		    "calculated ipers = %d, "
 		    "new wasted space = %d\n", keg->uk_name, wastedspace,
-		    UMA_MAX_WASTE, keg->uk_ipers,
-		    UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
+		    keg->uk_slabsize / UMA_MAX_WASTE, keg->uk_ipers,
+		    keg->uk_slabsize - keg->uk_ipers * keg->uk_rsize);
 #endif
 		keg->uk_flags |= UMA_ZONE_OFFPAGE;
-		if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
-			keg->uk_flags |= UMA_ZONE_HASH;
 	}
+
+	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
+	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
+		keg->uk_flags |= UMA_ZONE_HASH;
 }
 
 /*
@@ -1209,19 +1224,15 @@ keg_small_init(uma_keg_t keg)
 static void
 keg_large_init(uma_keg_t keg)
 {
-	int pages;
 
 	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
 	KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
 	    ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
+	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
+	    ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
 
-	pages = keg->uk_size / UMA_SLAB_SIZE;
-
-	/* Account for remainder */
-	if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
-		pages++;
-
-	keg->uk_ppera = pages;
+	keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
+	keg->uk_slabsize = keg->uk_ppera * PAGE_SIZE;
 	keg->uk_ipers = 1;
 	keg->uk_rsize = keg->uk_size;
 
@@ -1242,6 +1253,9 @@ keg_cachespread_init(uma_keg_t keg)
 	int pages;
 	int rsize;
 
+	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
+	    ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
+
 	alignsize = keg->uk_align + 1;
 	rsize = keg->uk_size;
 	/*
@@ -1259,6 +1273,7 @@ keg_cachespread_init(uma_keg_t keg)
 	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
 	keg->uk_rsize = rsize;
 	keg->uk_ppera = pages;
+	keg->uk_slabsize = UMA_SLAB_SIZE;
 	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
 	keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
 	KASSERT(keg->uk_ipers <= uma_max_ipers,
@@ -1308,6 +1323,13 @@ keg_ctor(void *mem, int size, void *udat
 	if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
 		keg->uk_flags |= UMA_ZONE_VTOSLAB;
 
+	if (arg->flags & UMA_ZONE_PCPU)
+#ifdef SMP
+		keg->uk_flags |= UMA_ZONE_OFFPAGE;
+#else
+		keg->uk_flags &= ~UMA_ZONE_PCPU;
+#endif
+
 	/*
 	 * The +UMA_FRITM_SZ added to uk_size is to account for the
 	 * linkage that is added to the size in keg_small_init().  If
@@ -1385,7 +1407,7 @@ keg_ctor(void *mem, int size, void *udat
 		if (totsize & UMA_ALIGN_PTR)
 			totsize = (totsize & ~UMA_ALIGN_PTR) +
 			    (UMA_ALIGN_PTR + 1);
-		keg->uk_pgoff = (UMA_SLAB_SIZE * keg->uk_ppera) - totsize;
+		keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
 
 		if (keg->uk_flags & UMA_ZONE_REFCNT)
 			totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
@@ -1401,7 +1423,7 @@ keg_ctor(void *mem, int size, void *udat
 		 * mathematically possible for all cases, so we make
 		 * sure here anyway.
 		 */
-		if (totsize > UMA_SLAB_SIZE * keg->uk_ppera) {
+		if (totsize > PAGE_SIZE * keg->uk_ppera) {
 			printf("zone %s ipers %d rsize %d size %d\n",
 			    zone->uz_name, keg->uk_ipers, keg->uk_rsize,
 			    keg->uk_size);
@@ -1676,7 +1698,8 @@ uma_startup(void *bootmem, int boot_page
 	 * that we need to go to offpage slab headers.  Or, if we do,
 	 * then we trap that condition below and panic in the INVARIANTS case.
 	 */
-	wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
+	wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) -
+	    (UMA_SLAB_SIZE / UMA_MAX_WASTE);
 	totsize = wsize;
 	objsize = UMA_SMALLEST_UNIT;
 	while (totsize >= wsize) {
@@ -1689,7 +1712,8 @@ uma_startup(void *bootmem, int boot_page
 		objsize--;
 	uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64);
 
-	wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
+	wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) -
+	    (UMA_SLAB_SIZE / UMA_MAX_WASTE);
 	totsize = wsize;
 	objsize = UMA_SMALLEST_UNIT;
 	while (totsize >= wsize) {

Modified: head/sys/vm/uma_int.h
==============================================================================
--- head/sys/vm/uma_int.h	Mon Apr  8 19:03:01 2013	(r249263)
+++ head/sys/vm/uma_int.h	Mon Apr  8 19:10:45 2013	(r249264)
@@ -120,8 +120,8 @@
 
 #define UMA_BOOT_PAGES		64	/* Pages allocated for startup */
 
-/* Max waste before going to off page slab management */
-#define UMA_MAX_WASTE	(UMA_SLAB_SIZE / 10)
+/* Max waste percentage before going to off page slab management */
+#define UMA_MAX_WASTE	10
 
 /*
  * I doubt there will be many cases where this is exceeded. This is the initial
@@ -197,12 +197,9 @@ typedef struct uma_cache * uma_cache_t;
  *
  */
 struct uma_keg {
-	LIST_ENTRY(uma_keg)	uk_link;	/* List of all kegs */
-
 	struct mtx	uk_lock;	/* Lock for the keg */
 	struct uma_hash	uk_hash;
 
-	const char	*uk_name;		/* Name of creating zone. */
 	LIST_HEAD(,uma_zone)	uk_zones;	/* Keg's zones */
 	LIST_HEAD(,uma_slab)	uk_part_slab;	/* partially allocated slabs */
 	LIST_HEAD(,uma_slab)	uk_free_slab;	/* empty slab list */
@@ -225,10 +222,15 @@ struct uma_keg {
 	vm_offset_t	uk_kva;		/* Zone base KVA */
 	uma_zone_t	uk_slabzone;	/* Slab zone backing us, if OFFPAGE */
 
+	u_int16_t	uk_slabsize;	/* Slab size for this keg */
 	u_int16_t	uk_pgoff;	/* Offset to uma_slab struct */
 	u_int16_t	uk_ppera;	/* pages per allocation from backend */
 	u_int16_t	uk_ipers;	/* Items per slab */
 	u_int32_t	uk_flags;	/* Internal flags */
+
+	/* Least used fields go to the last cache line. */
+	const char	*uk_name;		/* Name of creating zone. */
+	LIST_ENTRY(uma_keg)	uk_link;	/* List of all kegs */
 };
 typedef struct uma_keg	* uma_keg_t;
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201304081910.r38JAj9t087353>