Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 21 Nov 2017 21:18:04 +0000 (UTC)
From:      Jeff Roberson <jeff@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r326077 - in user/jeff/numa: lib/libmemstat sys/amd64/amd64 sys/arm64/arm64 sys/i386/i386 sys/kern sys/mips/mips sys/powerpc/aim sys/powerpc/powerpc sys/riscv/riscv sys/sparc64/sparc64 ...
Message-ID:  <201711212118.vALLI4sU082050@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jeff
Date: Tue Nov 21 21:18:04 2017
New Revision: 326077
URL: https://svnweb.freebsd.org/changeset/base/326077

Log:
  NUMAify UMA
  
  This is a slightly refactored version from the original projects/numa branch
  with better iterators and cleaner integration with the vm_page_alloc and
  kmem_*alloc layers.

Modified:
  user/jeff/numa/lib/libmemstat/memstat_uma.c
  user/jeff/numa/sys/amd64/amd64/uma_machdep.c
  user/jeff/numa/sys/arm64/arm64/uma_machdep.c
  user/jeff/numa/sys/i386/i386/pmap.c
  user/jeff/numa/sys/kern/kern_mbuf.c
  user/jeff/numa/sys/kern/subr_busdma_bufalloc.c
  user/jeff/numa/sys/kern/subr_vmem.c
  user/jeff/numa/sys/kern/vfs_bio.c
  user/jeff/numa/sys/mips/mips/uma_machdep.c
  user/jeff/numa/sys/powerpc/aim/mmu_oea64.c
  user/jeff/numa/sys/powerpc/aim/slb.c
  user/jeff/numa/sys/powerpc/powerpc/uma_machdep.c
  user/jeff/numa/sys/riscv/riscv/uma_machdep.c
  user/jeff/numa/sys/sparc64/sparc64/vm_machdep.c
  user/jeff/numa/sys/sys/busdma_bufalloc.h
  user/jeff/numa/sys/vm/uma.h
  user/jeff/numa/sys/vm/uma_core.c
  user/jeff/numa/sys/vm/uma_int.h

Modified: user/jeff/numa/lib/libmemstat/memstat_uma.c
==============================================================================
--- user/jeff/numa/lib/libmemstat/memstat_uma.c	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/lib/libmemstat/memstat_uma.c	Tue Nov 21 21:18:04 2017	(r326077)
@@ -53,6 +53,8 @@ static struct nlist namelist[] = {
 	{ .n_name = "_mp_maxid" },
 #define	X_ALL_CPUS	2
 	{ .n_name = "_all_cpus" },
+#define	X_VM_NDOMAINS	3
+	{ .n_name = "_vm_ndomains" },
 	{ .n_name = "" },
 };
 
@@ -295,11 +297,12 @@ memstat_kvm_uma(struct memory_type_list *list, void *k
 {
 	LIST_HEAD(, uma_keg) uma_kegs;
 	struct memory_type *mtp;
+	struct uma_zone_domain uzd;
 	struct uma_bucket *ubp, ub;
 	struct uma_cache *ucp, *ucp_array;
 	struct uma_zone *uzp, uz;
 	struct uma_keg *kzp, kz;
-	int hint_dontsearch, i, mp_maxid, ret;
+	int hint_dontsearch, i, mp_maxid, ndomains, ret;
 	char name[MEMTYPE_MAXNAME];
 	cpuset_t all_cpus;
 	long cpusetsize;
@@ -321,6 +324,12 @@ memstat_kvm_uma(struct memory_type_list *list, void *k
 		list->mtl_error = ret;
 		return (-1);
 	}
+	ret = kread_symbol(kvm, X_VM_NDOMAINS, &ndomains,
+	    sizeof(ndomains), 0);
+	if (ret != 0) {
+		list->mtl_error = ret;
+		return (-1);
+	}
 	ret = kread_symbol(kvm, X_UMA_KEGS, &uma_kegs, sizeof(uma_kegs), 0);
 	if (ret != 0) {
 		list->mtl_error = ret;
@@ -445,10 +454,17 @@ skip_percpu:
 				    kz.uk_ipers;
 			mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size;
 			mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
-			for (ubp = LIST_FIRST(&uz.uz_buckets); ubp !=
-			    NULL; ubp = LIST_NEXT(&ub, ub_link)) {
-				ret = kread(kvm, ubp, &ub, sizeof(ub), 0);
-				mtp->mt_zonefree += ub.ub_cnt;
+			for (i = 0; i < ndomains; i++) {
+				ret = kread(kvm, &uz.uz_domain[i], &uzd,
+				   sizeof(uzd), 0);
+				for (ubp =
+				    LIST_FIRST(&uzd.uzd_buckets);
+				    ubp != NULL;
+				    ubp = LIST_NEXT(&ub, ub_link)) {
+					ret = kread(kvm, ubp, &ub,
+					   sizeof(ub), 0);
+					mtp->mt_zonefree += ub.ub_cnt;
+				}
 			}
 			if (!((kz.uk_flags & UMA_ZONE_SECONDARY) &&
 			    LIST_FIRST(&kz.uk_zones) != uzp)) {

Modified: user/jeff/numa/sys/amd64/amd64/uma_machdep.c
==============================================================================
--- user/jeff/numa/sys/amd64/amd64/uma_machdep.c	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/amd64/amd64/uma_machdep.c	Tue Nov 21 21:18:04 2017	(r326077)
@@ -42,14 +42,15 @@ __FBSDID("$FreeBSD$");
 #include <machine/vmparam.h>
 
 void *
-uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait)
+uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags,
+    int wait)
 {
 	vm_page_t m;
 	vm_paddr_t pa;
 	void *va;
 
 	*flags = UMA_SLAB_PRIV;
-	m = vm_page_alloc(NULL, 0,
+	m = vm_page_alloc_domain(NULL, 0, domain,
 	    malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
 	if (m == NULL)
 		return (NULL);

Modified: user/jeff/numa/sys/arm64/arm64/uma_machdep.c
==============================================================================
--- user/jeff/numa/sys/arm64/arm64/uma_machdep.c	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/arm64/arm64/uma_machdep.c	Tue Nov 21 21:18:04 2017	(r326077)
@@ -42,14 +42,15 @@ __FBSDID("$FreeBSD$");
 #include <machine/vmparam.h>
 
 void *
-uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait)
+uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags,
+    int wait)
 {
 	vm_page_t m;
 	vm_paddr_t pa;
 	void *va;
 
 	*flags = UMA_SLAB_PRIV;
-	m = vm_page_alloc(NULL, 0,
+	m = vm_page_alloc_domain(NULL, 0, domain,
 	    malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
 	if (m == NULL)
 		return (NULL);

Modified: user/jeff/numa/sys/i386/i386/pmap.c
==============================================================================
--- user/jeff/numa/sys/i386/i386/pmap.c	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/i386/i386/pmap.c	Tue Nov 21 21:18:04 2017	(r326077)
@@ -342,8 +342,8 @@ static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offs
 static void pmap_pte_release(pt_entry_t *pte);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, struct spglist *);
 #if defined(PAE) || defined(PAE_TABLES)
-static void *pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags,
-    int wait);
+static void *pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, int domain,
+    uint8_t *flags, int wait);
 #endif
 static void pmap_set_pg(void);
 
@@ -703,12 +703,13 @@ pmap_page_init(vm_page_t m)
 
 #if defined(PAE) || defined(PAE_TABLES)
 static void *
-pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
+pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
+    int wait)
 {
 
 	/* Inform UMA that this allocator uses kernel_map/object. */
 	*flags = UMA_SLAB_KERNEL;
-	return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait, 0x0ULL,
+	return ((void *)kmem_alloc_contig_domain(domain, bytes, wait, 0x0ULL,
 	    0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT));
 }
 #endif

Modified: user/jeff/numa/sys/kern/kern_mbuf.c
==============================================================================
--- user/jeff/numa/sys/kern/kern_mbuf.c	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/kern/kern_mbuf.c	Tue Nov 21 21:18:04 2017	(r326077)
@@ -281,7 +281,7 @@ static void	mb_dtor_pack(void *, int, void *);
 static int	mb_zinit_pack(void *, int, int);
 static void	mb_zfini_pack(void *, int);
 static void	mb_reclaim(uma_zone_t, int);
-static void    *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
+static void    *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
 
 /* Ensure that MSIZE is a power of 2. */
 CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
@@ -384,12 +384,13 @@ SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, 
  * pages.
  */
 static void *
-mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
+mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
+    int wait)
 {
 
 	/* Inform UMA that this allocator uses kernel_map/object. */
 	*flags = UMA_SLAB_KERNEL;
-	return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait,
+	return ((void *)kmem_alloc_contig_domain(domain, bytes, wait,
 	    (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT));
 }
 

Modified: user/jeff/numa/sys/kern/subr_busdma_bufalloc.c
==============================================================================
--- user/jeff/numa/sys/kern/subr_busdma_bufalloc.c	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/kern/subr_busdma_bufalloc.c	Tue Nov 21 21:18:04 2017	(r326077)
@@ -147,7 +147,7 @@ busdma_bufalloc_findzone(busdma_bufalloc_t ba, bus_siz
 }
 
 void *
-busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, vm_size_t size,
+busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, vm_size_t size, int domain,
     uint8_t *pflag, int wait)
 {
 #ifdef VM_MEMATTR_UNCACHEABLE
@@ -155,7 +155,7 @@ busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, vm_
 	/* Inform UMA that this allocator uses kernel_arena/object. */
 	*pflag = UMA_SLAB_KERNEL;
 
-	return ((void *)kmem_alloc_attr(kernel_arena, size, wait, 0,
+	return ((void *)kmem_alloc_attr_domain(domain, size, wait, 0,
 	    BUS_SPACE_MAXADDR, VM_MEMATTR_UNCACHEABLE));
 
 #else

Modified: user/jeff/numa/sys/kern/subr_vmem.c
==============================================================================
--- user/jeff/numa/sys/kern/subr_vmem.c	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/kern/subr_vmem.c	Tue Nov 21 21:18:04 2017	(r326077)
@@ -497,7 +497,7 @@ bt_insfree(vmem_t *vm, bt_t *bt)
  * Import from the arena into the quantum cache in UMA.
  */
 static int
-qc_import(void *arg, void **store, int cnt, int flags)
+qc_import(void *arg, void **store, int cnt, int domain, int flags)
 {
 	qcache_t *qc;
 	vmem_addr_t addr;
@@ -611,7 +611,8 @@ static struct mtx_padalign __exclusive_cache_line vmem
  * we are really out of KVA.
  */
 static void *
-vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
+vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
+    int wait)
 {
 	vmem_addr_t addr;
 
@@ -625,7 +626,7 @@ vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, uint8_
 	if (vmem_xalloc(kernel_arena, bytes, 0, 0, 0, VMEM_ADDR_MIN,
 	    VMEM_ADDR_MAX, M_NOWAIT | M_NOVM | M_USE_RESERVE | M_BESTFIT,
 	    &addr) == 0) {
-		if (kmem_back(kernel_object, addr, bytes,
+		if (kmem_back_domain(domain, kernel_object, addr, bytes,
 		    M_NOWAIT | M_USE_RESERVE) == 0) {
 			mtx_unlock(&vmem_bt_lock);
 			return ((void *)addr);

Modified: user/jeff/numa/sys/kern/vfs_bio.c
==============================================================================
--- user/jeff/numa/sys/kern/vfs_bio.c	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/kern/vfs_bio.c	Tue Nov 21 21:18:04 2017	(r326077)
@@ -131,7 +131,7 @@ static __inline void bd_wakeup(void);
 static int sysctl_runningspace(SYSCTL_HANDLER_ARGS);
 static void bufkva_reclaim(vmem_t *, int);
 static void bufkva_free(struct buf *);
-static int buf_import(void *, void **, int, int);
+static int buf_import(void *, void **, int, int, int);
 static void buf_release(void *, void **, int);
 static void maxbcachebuf_adjust(void);
 
@@ -1417,7 +1417,7 @@ buf_free(struct buf *bp)
  *	only as a per-cpu cache of bufs still maintained on a global list.
  */
 static int
-buf_import(void *arg, void **store, int cnt, int flags)
+buf_import(void *arg, void **store, int cnt, int domain, int flags)
 {
 	struct buf *bp;
 	int i;

Modified: user/jeff/numa/sys/mips/mips/uma_machdep.c
==============================================================================
--- user/jeff/numa/sys/mips/mips/uma_machdep.c	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/mips/mips/uma_machdep.c	Tue Nov 21 21:18:04 2017	(r326077)
@@ -42,7 +42,8 @@ __FBSDID("$FreeBSD$");
 #include <machine/vmparam.h>
 
 void *
-uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait)
+uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags,
+    int wait)
 {
 	vm_paddr_t pa;
 	vm_page_t m;
@@ -57,7 +58,8 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_in
 #endif
 
 	for (;;) {
-		m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, pflags);
+		m = vm_page_alloc_freelist_domain(domain, VM_FREELIST_DIRECT,
+		    pflags);
 #ifndef __mips_n64
 		if (m == NULL && vm_page_reclaim_contig(pflags, 1,
 		    0, MIPS_KSEG0_LARGEST_PHYS, PAGE_SIZE, 0))

Modified: user/jeff/numa/sys/powerpc/aim/mmu_oea64.c
==============================================================================
--- user/jeff/numa/sys/powerpc/aim/mmu_oea64.c	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/powerpc/aim/mmu_oea64.c	Tue Nov 21 21:18:04 2017	(r326077)
@@ -1500,8 +1500,8 @@ retry:
 static mmu_t installed_mmu;
 
 static void *
-moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags,
-    int wait)
+moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain,
+    uint8_t *flags, int wait)
 {
 	struct pvo_entry *pvo;
         vm_offset_t va;
@@ -1518,7 +1518,7 @@ moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes
 	*flags = UMA_SLAB_PRIV;
 	needed_lock = !PMAP_LOCKED(kernel_pmap);
 
-	m = vm_page_alloc(NULL, 0,
+	m = vm_page_alloc_domain(NULL, 0, domain,
 	    malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
 	if (m == NULL)
 		return (NULL);

Modified: user/jeff/numa/sys/powerpc/aim/slb.c
==============================================================================
--- user/jeff/numa/sys/powerpc/aim/slb.c	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/powerpc/aim/slb.c	Tue Nov 21 21:18:04 2017	(r326077)
@@ -478,7 +478,8 @@ slb_insert_user(pmap_t pm, struct slb *slb)
 }
 
 static void *
-slb_uma_real_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait)
+slb_uma_real_alloc(uma_zone_t zone, vm_size_t bytes, int domain,
+    u_int8_t *flags, int wait)
 {
 	static vm_offset_t realmax = 0;
 	void *va;
@@ -488,7 +489,7 @@ slb_uma_real_alloc(uma_zone_t zone, vm_size_t bytes, u
 		realmax = platform_real_maxaddr();
 
 	*flags = UMA_SLAB_PRIV;
-	m = vm_page_alloc_contig(NULL, 0,
+	m = vm_page_alloc_contig_domain(NULL, 0, domain,
 	    malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED,
 	    1, 0, realmax, PAGE_SIZE, PAGE_SIZE, VM_MEMATTR_DEFAULT);
 	if (m == NULL)

Modified: user/jeff/numa/sys/powerpc/powerpc/uma_machdep.c
==============================================================================
--- user/jeff/numa/sys/powerpc/powerpc/uma_machdep.c	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/powerpc/powerpc/uma_machdep.c	Tue Nov 21 21:18:04 2017	(r326077)
@@ -51,7 +51,8 @@ SYSCTL_INT(_hw, OID_AUTO, uma_mdpages, CTLFLAG_RD, &hw
 	   "UMA MD pages in use");
 
 void *
-uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait)
+uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags,
+    int wait)
 {
 	void *va;
 	vm_paddr_t pa;
@@ -59,7 +60,7 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_in
 	
 	*flags = UMA_SLAB_PRIV;
 
-	m = vm_page_alloc(NULL, 0,
+	m = vm_page_alloc_domain(NULL, 0, domain,
 	    malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
 	if (m == NULL) 
 		return (NULL);

Modified: user/jeff/numa/sys/riscv/riscv/uma_machdep.c
==============================================================================
--- user/jeff/numa/sys/riscv/riscv/uma_machdep.c	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/riscv/riscv/uma_machdep.c	Tue Nov 21 21:18:04 2017	(r326077)
@@ -41,7 +41,8 @@ __FBSDID("$FreeBSD$");
 #include <machine/vmparam.h>
 
 void *
-uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait)
+uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags,
+    int wait)
 {
 
 	panic("uma_small_alloc");

Modified: user/jeff/numa/sys/sparc64/sparc64/vm_machdep.c
==============================================================================
--- user/jeff/numa/sys/sparc64/sparc64/vm_machdep.c	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/sparc64/sparc64/vm_machdep.c	Tue Nov 21 21:18:04 2017	(r326077)
@@ -390,7 +390,8 @@ swi_vm(void *v)
 }
 
 void *
-uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait)
+uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags,
+    int wait)
 {
 	vm_paddr_t pa;
 	vm_page_t m;
@@ -400,7 +401,7 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_in
 
 	*flags = UMA_SLAB_PRIV;
 
-	m = vm_page_alloc(NULL, 0, 
+	m = vm_page_alloc_domain(NULL, 0, domain,
 	    malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
 	if (m == NULL)
 		return (NULL);

Modified: user/jeff/numa/sys/sys/busdma_bufalloc.h
==============================================================================
--- user/jeff/numa/sys/sys/busdma_bufalloc.h	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/sys/busdma_bufalloc.h	Tue Nov 21 21:18:04 2017	(r326077)
@@ -111,7 +111,7 @@ struct busdma_bufzone * busdma_bufalloc_findzone(busdm
  * you can probably use these when you need uncacheable buffers.
  */
 void * busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, vm_size_t size,
-    uint8_t *pflag, int wait);
+    int domain, uint8_t *pflag, int wait);
 void  busdma_bufalloc_free_uncacheable(void *item, vm_size_t size,
     uint8_t pflag);
 

Modified: user/jeff/numa/sys/vm/uma.h
==============================================================================
--- user/jeff/numa/sys/vm/uma.h	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/vm/uma.h	Tue Nov 21 21:18:04 2017	(r326077)
@@ -45,6 +45,7 @@
 /* Types and type defs */
 
 struct uma_zone;
+struct vm_domain_iterator;
 /* Opaque type used as a handle to the zone */
 typedef struct uma_zone * uma_zone_t;
 
@@ -126,7 +127,8 @@ typedef void (*uma_fini)(void *mem, int size);
 /*
  * Import new memory into a cache zone.
  */
-typedef int (*uma_import)(void *arg, void **store, int count, int flags);
+typedef int (*uma_import)(void *arg, void **store, int count, int domain,
+    int flags);
 
 /*
  * Free memory from a cache zone.
@@ -279,6 +281,10 @@ uma_zone_t uma_zcache_create(char *name, int size, uma
 					 * Allocates mp_maxid + 1 slabs sized to
 					 * sizeof(struct pcpu).
 					 */
+#define	UMA_ZONE_NUMA		0x10000	/*
+					 * NUMA aware Zone.  Implements a best
+					 * effort first-touch policy.
+					 */
 
 /*
  * These flags are shared between the keg and zone.  In zones wishing to add
@@ -371,25 +377,21 @@ uma_zfree(uma_zone_t zone, void *item)
 void uma_zwait(uma_zone_t zone);
 
 /*
- * XXX The rest of the prototypes in this header are h0h0 magic for the VM.
- * If you think you need to use it for a normal zone you're probably incorrect.
- */
-
-/*
  * Backend page supplier routines
  *
  * Arguments:
  *	zone  The zone that is requesting pages.
  *	size  The number of bytes being requested.
  *	pflag Flags for these memory pages, see below.
+ *	domain The NUMA domain that we prefer for this allocation.
  *	wait  Indicates our willingness to block.
  *
  * Returns:
  *	A pointer to the allocated memory or NULL on failure.
  */
 
-typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, uint8_t *pflag,
-    int wait);
+typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain,
+    uint8_t *pflag, int wait);
 
 /*
  * Backend page free routines
@@ -403,8 +405,6 @@ typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t 
  *	None
  */
 typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
-
-
 
 /*
  * Sets up the uma allocator. (Called by vm_mem_init)

Modified: user/jeff/numa/sys/vm/uma_core.c
==============================================================================
--- user/jeff/numa/sys/vm/uma_core.c	Tue Nov 21 21:16:48 2017	(r326076)
+++ user/jeff/numa/sys/vm/uma_core.c	Tue Nov 21 21:18:04 2017	(r326077)
@@ -76,10 +76,12 @@ __FBSDID("$FreeBSD$");
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
+#include <vm/vm_domain.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_param.h>
+#include <vm/vm_phys.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
@@ -94,17 +96,12 @@ __FBSDID("$FreeBSD$");
 #endif
 
 /*
- * This is the zone and keg from which all zones are spawned.  The idea is that
- * even the zone & keg heads are allocated from the allocator, so we use the
- * bss section to bootstrap us.
+ * This is the zone and keg from which all zones are spawned.
  */
-static struct uma_keg masterkeg;
-static struct uma_zone masterzone_k;
-static struct uma_zone masterzone_z;
-static uma_zone_t kegs = &masterzone_k;
-static uma_zone_t zones = &masterzone_z;
+static uma_zone_t kegs;
+static uma_zone_t zones;
 
-/* This is the zone from which all of uma_slab_t's are allocated. */
+/* This is the zone from which all offpage uma_slab_ts are allocated. */
 static uma_zone_t slabzone;
 
 /*
@@ -225,11 +222,11 @@ enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI }
 
 /* Prototypes.. */
 
-static void *noobj_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
-static void *page_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
-static void *startup_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
+static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
+static void *page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
+static void *startup_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
 static void page_free(void *, vm_size_t, uint8_t);
-static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
+static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int);
 static void cache_drain(uma_zone_t);
 static void bucket_drain(uma_zone_t, uma_bucket_t);
 static void bucket_cache_drain(uma_zone_t zone);
@@ -247,23 +244,23 @@ static int hash_expand(struct uma_hash *, struct uma_h
 static void hash_free(struct uma_hash *hash);
 static void uma_timeout(void *);
 static void uma_startup3(void);
-static void *zone_alloc_item(uma_zone_t, void *, int);
+static void *zone_alloc_item(uma_zone_t, void *, int, int);
 static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
 static void bucket_enable(void);
 static void bucket_init(void);
 static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
 static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
 static void bucket_zone_drain(void);
-static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags);
-static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
-static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
+static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int);
+static uma_slab_t zone_fetch_slab(uma_zone_t, uma_keg_t, int, int);
+static uma_slab_t zone_fetch_slab_multi(uma_zone_t, uma_keg_t, int, int);
 static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
 static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
 static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
     uma_fini fini, int align, uint32_t flags);
-static int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
-static void zone_release(uma_zone_t zone, void **bucket, int cnt);
-static void uma_zero_item(void *item, uma_zone_t zone);
+static int zone_import(uma_zone_t, void **, int, int, int);
+static void zone_release(uma_zone_t, void **, int);
+static void uma_zero_item(void *, uma_zone_t);
 
 void uma_print_zone(uma_zone_t);
 void uma_print_stats(void);
@@ -329,7 +326,7 @@ bucket_init(void)
 		size += sizeof(void *) * ubz->ubz_entries;
 		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
-		    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET);
+		    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET | UMA_ZONE_NUMA);
 	}
 }
 
@@ -566,7 +563,7 @@ hash_alloc(struct uma_hash *hash)
 		    M_UMAHASH, M_NOWAIT);
 	} else {
 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
-		hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
+		hash->uh_slab_hash = zone_alloc_item(hashzone, NULL, 0,
 		    M_WAITOK);
 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
 	}
@@ -733,6 +730,7 @@ cache_drain_safe_cpu(uma_zone_t zone)
 {
 	uma_cache_t cache;
 	uma_bucket_t b1, b2;
+	int domain;
 
 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
 		return;
@@ -740,10 +738,14 @@ cache_drain_safe_cpu(uma_zone_t zone)
 	b1 = b2 = NULL;
 	ZONE_LOCK(zone);
 	critical_enter();
+	if (zone->uz_flags & UMA_ZONE_NUMA)
+		domain = PCPU_GET(domain);
+	else
+		domain = 0;
 	cache = &zone->uz_cpu[curcpu];
 	if (cache->uc_allocbucket) {
 		if (cache->uc_allocbucket->ub_cnt != 0)
-			LIST_INSERT_HEAD(&zone->uz_buckets,
+			LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
 			    cache->uc_allocbucket, ub_link);
 		else
 			b1 = cache->uc_allocbucket;
@@ -751,7 +753,7 @@ cache_drain_safe_cpu(uma_zone_t zone)
 	}
 	if (cache->uc_freebucket) {
 		if (cache->uc_freebucket->ub_cnt != 0)
-			LIST_INSERT_HEAD(&zone->uz_buckets,
+			LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
 			    cache->uc_freebucket, ub_link);
 		else
 			b2 = cache->uc_freebucket;
@@ -806,18 +808,22 @@ cache_drain_safe(uma_zone_t zone)
 static void
 bucket_cache_drain(uma_zone_t zone)
 {
+	uma_zone_domain_t zdom;
 	uma_bucket_t bucket;
+	int i;
 
 	/*
-	 * Drain the bucket queues and free the buckets, we just keep two per
-	 * cpu (alloc/free).
+	 * Drain the bucket queues and free the buckets.
 	 */
-	while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
-		LIST_REMOVE(bucket, ub_link);
-		ZONE_UNLOCK(zone);
-		bucket_drain(zone, bucket);
-		bucket_free(zone, bucket, NULL);
-		ZONE_LOCK(zone);
+	for (i = 0; i < vm_ndomains; i++) {
+		zdom = &zone->uz_domain[i];
+		while ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
+			LIST_REMOVE(bucket, ub_link);
+			ZONE_UNLOCK(zone);
+			bucket_drain(zone, bucket);
+			bucket_free(zone, bucket, NULL);
+			ZONE_LOCK(zone);
+		}
 	}
 
 	/*
@@ -862,7 +868,9 @@ static void
 keg_drain(uma_keg_t keg)
 {
 	struct slabhead freeslabs = { 0 };
+	uma_domain_t dom;
 	uma_slab_t slab, tmp;
+	int i;
 
 	/*
 	 * We don't want to take pages from statically allocated kegs at this
@@ -877,20 +885,25 @@ keg_drain(uma_keg_t keg)
 	if (keg->uk_free == 0)
 		goto finished;
 
-	LIST_FOREACH_SAFE(slab, &keg->uk_free_slab, us_link, tmp) {
-		/* We have nowhere to free these to. */
-		if (slab->us_flags & UMA_SLAB_BOOT)
-			continue;
+	for (i = 0; i < vm_ndomains; i++) {
+		dom = &keg->uk_domain[i];
+		LIST_FOREACH_SAFE(slab, &dom->ud_free_slab, us_link, tmp) {
+			/* We have nowhere to free these to. */
+			if (slab->us_flags & UMA_SLAB_BOOT)
+				continue;
 
-		LIST_REMOVE(slab, us_link);
-		keg->uk_pages -= keg->uk_ppera;
-		keg->uk_free -= keg->uk_ipers;
+			LIST_REMOVE(slab, us_link);
+			keg->uk_pages -= keg->uk_ppera;
+			keg->uk_free -= keg->uk_ipers;
 
-		if (keg->uk_flags & UMA_ZONE_HASH)
-			UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
+			if (keg->uk_flags & UMA_ZONE_HASH)
+				UMA_HASH_REMOVE(&keg->uk_hash, slab,
+				    slab->us_data);
 
-		SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
+			SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
+		}
 	}
+
 finished:
 	KEG_UNLOCK(keg);
 
@@ -950,7 +963,7 @@ zone_drain(uma_zone_t zone)
  *	caller specified M_NOWAIT.
  */
 static uma_slab_t
-keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
+keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait)
 {
 	uma_alloc allocf;
 	uma_slab_t slab;
@@ -959,6 +972,8 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wai
 	uint8_t flags;
 	int i;
 
+	KASSERT(domain >= 0 && domain < vm_ndomains,
+	    ("keg_alloc_slab: domain %d out of range", domain));
 	mtx_assert(&keg->uk_lock, MA_OWNED);
 	slab = NULL;
 	mem = NULL;
@@ -968,7 +983,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wai
 	size = keg->uk_ppera * PAGE_SIZE;
 
 	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
-		slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
+		slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, wait);
 		if (slab == NULL)
 			goto out;
 	}
@@ -989,7 +1004,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wai
 		wait |= M_NODUMP;
 
 	/* zone is passed for legacy reasons. */
-	mem = allocf(zone, size, &flags, wait);
+	mem = allocf(zone, size, domain, &flags, wait);
 	if (mem == NULL) {
 		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 			zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
@@ -1010,6 +1025,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wai
 	slab->us_data = mem;
 	slab->us_freecount = keg->uk_ipers;
 	slab->us_flags = flags;
+	slab->us_domain = domain;
 	BIT_FILL(SLAB_SETSIZE, &slab->us_free);
 #ifdef INVARIANTS
 	BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
@@ -1049,7 +1065,8 @@ out:
  * the VM is ready.
  */
 static void *
-startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
+startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
+    int wait)
 {
 	uma_keg_t keg;
 	void *mem;
@@ -1082,7 +1099,7 @@ startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_
 #else
 	keg->uk_allocf = page_alloc;
 #endif
-	return keg->uk_allocf(zone, bytes, pflag, wait);
+	return keg->uk_allocf(zone, bytes, domain, pflag, wait);
 }
 
 /*
@@ -1097,12 +1114,13 @@ startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_
  *	NULL if M_NOWAIT is set.
  */
 static void *
-page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
+page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
+    int wait)
 {
 	void *p;	/* Returned page */
 
 	*pflag = UMA_SLAB_KERNEL;
-	p = (void *) kmem_malloc(kernel_arena, bytes, wait);
+	p = (void *) kmem_malloc_domain(domain, bytes, wait);
 
 	return (p);
 }
@@ -1119,7 +1137,8 @@ page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *
  *	NULL if M_NOWAIT is set.
  */
 static void *
-noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
+noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
+    int wait)
 {
 	TAILQ_HEAD(, vm_page) alloctail;
 	u_long npages;
@@ -1132,7 +1151,7 @@ noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t 
 
 	npages = howmany(bytes, PAGE_SIZE);
 	while (npages > 0) {
-		p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
+		p = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_INTERRUPT |
 		    VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
 		    ((wait & M_WAITOK) != 0 ? VM_ALLOC_WAITOK :
 		    VM_ALLOC_NOWAIT));
@@ -1404,6 +1423,7 @@ keg_ctor(void *mem, int size, void *udata, int flags)
 	keg->uk_init = arg->uminit;
 	keg->uk_fini = arg->fini;
 	keg->uk_align = arg->align;
+	keg->uk_cursor = 0;
 	keg->uk_free = 0;
 	keg->uk_reserve = 0;
 	keg->uk_pages = 0;
@@ -1545,6 +1565,8 @@ zone_ctor(void *mem, int size, void *udata, int flags)
 	zone->uz_count_min = 0;
 	zone->uz_flags = 0;
 	zone->uz_warning = NULL;
+	/* The domain structures follow the cpu structures. */
+	zone->uz_domain = (struct uma_zone_domain *)&zone->uz_cpu[mp_ncpus];
 	timevalclear(&zone->uz_ratecheck);
 	keg = arg->keg;
 
@@ -1750,22 +1772,43 @@ void
 uma_startup(void *mem, int npages)
 {
 	struct uma_zctor_args args;
+	uma_keg_t masterkeg;
+	uintptr_t m;
+	int zsize;
+	int ksize;
 
 	rw_init(&uma_rwlock, "UMA lock");
 
+	ksize = sizeof(struct uma_keg) +
+	    (sizeof(struct uma_domain) * vm_ndomains);
+	zsize = sizeof(struct uma_zone) +
+	    (sizeof(struct uma_cache) * mp_ncpus) +
+	    (sizeof(struct uma_zone_domain) * vm_ndomains);
+
+	/* Use bootpages memory for the zone of zones and zone of kegs. */
+	m = (uintptr_t)mem;
+	zones = (uma_zone_t)m;
+	m += roundup(zsize, CACHE_LINE_SIZE);
+	kegs = (uma_zone_t)m;
+	m += roundup(zsize, CACHE_LINE_SIZE);
+	masterkeg = (uma_keg_t)m;
+	m += roundup(ksize, CACHE_LINE_SIZE);
+	m = roundup(m, PAGE_SIZE);
+	npages -= (m - (uintptr_t)mem) / PAGE_SIZE;
+	mem = (void *)m;
+
 	/* "manually" create the initial zone */
 	memset(&args, 0, sizeof(args));
 	args.name = "UMA Kegs";
-	args.size = sizeof(struct uma_keg);
+	args.size = ksize;
 	args.ctor = keg_ctor;
 	args.dtor = keg_dtor;
 	args.uminit = zero_init;
 	args.fini = NULL;
-	args.keg = &masterkeg;
+	args.keg = masterkeg;
 	args.align = 32 - 1;
 	args.flags = UMA_ZFLAG_INTERNAL;
-	/* The initial zone has no Per cpu queues so it's smaller */
-	zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
+	zone_ctor(kegs, zsize, &args, M_WAITOK);
 
 	mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
 	bootmem = mem;
@@ -1773,7 +1816,8 @@ uma_startup(void *mem, int npages)
 
 	args.name = "UMA Zones";
 	args.size = sizeof(struct uma_zone) +
-	    (sizeof(struct uma_cache) * (mp_maxid + 1));
+	    (sizeof(struct uma_cache) * (mp_maxid + 1)) +
+	    (sizeof(struct uma_zone_domain) * vm_ndomains);
 	args.ctor = zone_ctor;
 	args.dtor = zone_dtor;
 	args.uminit = zero_init;
@@ -1781,8 +1825,7 @@ uma_startup(void *mem, int npages)
 	args.keg = NULL;
 	args.align = 32 - 1;
 	args.flags = UMA_ZFLAG_INTERNAL;
-	/* The initial zone has no Per cpu queues so it's smaller */
-	zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
+	zone_ctor(zones, zsize, &args, M_WAITOK);
 
 	/* Now make a zone for slab headers */
 	slabzone = uma_zcreate("UMA Slabs",
@@ -1834,7 +1877,7 @@ uma_kcreate(uma_zone_t zone, size_t size, uma_init umi
 	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
 	args.flags = flags;
 	args.zone = zone;
-	return (zone_alloc_item(kegs, &args, M_WAITOK));
+	return (zone_alloc_item(kegs, &args, 0, M_WAITOK));
 }
 
 /* See uma.h */
@@ -1891,7 +1934,7 @@ uma_zcreate(const char *name, size_t size, uma_ctor ct
 		sx_slock(&uma_drain_lock);
 		locked = true;
 	}
-	res = zone_alloc_item(zones, &args, M_WAITOK);
+	res = zone_alloc_item(zones, &args, 0, M_WAITOK);
 	if (locked)
 		sx_sunlock(&uma_drain_lock);
 	return (res);
@@ -1926,7 +1969,7 @@ uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor
 		locked = true;
 	}
 	/* XXX Attaches only one keg of potentially many. */
-	res = zone_alloc_item(zones, &args, M_WAITOK);
+	res = zone_alloc_item(zones, &args, 0, M_WAITOK);
 	if (locked)
 		sx_sunlock(&uma_drain_lock);
 	return (res);
@@ -1953,7 +1996,7 @@ uma_zcache_create(char *name, int size, uma_ctor ctor,
 	args.align = 0;
 	args.flags = flags;
 
-	return (zone_alloc_item(zones, &args, M_WAITOK));
+	return (zone_alloc_item(zones, &args, 0, M_WAITOK));
 }
 
 static void
@@ -2058,11 +2101,11 @@ uma_zwait(uma_zone_t zone)
 void *
 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
 {
-	void *item;
-	uma_cache_t cache;
+	uma_zone_domain_t zdom;
 	uma_bucket_t bucket;
-	int lockfail;
-	int cpu;
+	uma_cache_t cache;
+	void *item;
+	int cpu, domain, lockfail;
 
 	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
 	random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
@@ -2159,6 +2202,11 @@ zalloc_start:
 	if (bucket != NULL)
 		bucket_free(zone, bucket, udata);
 
+	if (zone->uz_flags & UMA_ZONE_NUMA)
+		domain = PCPU_GET(domain);
+	else
+		domain = 0;
+
 	/* Short-circuit for zones without buckets and low memory. */
 	if (zone->uz_count == 0 || bucketdisable)
 		goto zalloc_item;
@@ -2199,7 +2247,8 @@ zalloc_start:
 	/*
 	 * Check the zone's cache of buckets.
 	 */
-	if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
+	zdom = &zone->uz_domain[domain];
+	if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
 		KASSERT(bucket->ub_cnt != 0,
 		    ("uma_zalloc_arg: Returning an empty bucket."));
 
@@ -2224,7 +2273,7 @@ zalloc_start:
 	 * works we'll restart the allocation from the beginning and it
 	 * will use the just filled bucket.
 	 */
-	bucket = zone_alloc_bucket(zone, udata, flags);
+	bucket = zone_alloc_bucket(zone, udata, domain, flags);
 	CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
 	    zone->uz_name, zone, bucket);
 	if (bucket != NULL) {
@@ -2237,10 +2286,12 @@ zalloc_start:
 		 * initialized bucket to make this less likely or claim
 		 * the memory directly.
 		 */
-		if (cache->uc_allocbucket == NULL)
-			cache->uc_allocbucket = bucket;
+		if (cache->uc_allocbucket != NULL ||
+		    (zone->uz_flags & UMA_ZONE_NUMA &&
+		    domain != PCPU_GET(domain)))
+			LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
 		else
-			LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
+			cache->uc_allocbucket = bucket;
 		ZONE_UNLOCK(zone);
 		goto zalloc_start;
 	}
@@ -2249,38 +2300,79 @@ zalloc_start:
 	 * We may not be able to get a bucket so return an actual item.
 	 */
 zalloc_item:
-	item = zone_alloc_item(zone, udata, flags);
+	item = zone_alloc_item(zone, udata, domain, flags);
 
 	return (item);
 }
 
+/*
+ * Find a slab with some space.  Prefer slabs that are partially used over those
+ * that are totally full.  This helps to reduce fragmentation.
+ *
+ * If 'rr' is 1, search all domains starting from 'domain'.  Otherwise check
+ * only 'domain'.
+ */
 static uma_slab_t
-keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
+keg_first_slab(uma_keg_t keg, int domain, int rr)
 {
+	uma_domain_t dom;
 	uma_slab_t slab;
-	int reserve;
+	int start;
 
+	KASSERT(domain >= 0 && domain < vm_ndomains,
+	    ("keg_first_slab: domain %d out of range", domain));
+
+	slab = NULL;
+	start = domain;
+	do {
+		dom = &keg->uk_domain[domain];
+		if (!LIST_EMPTY(&dom->ud_part_slab))
+			return (LIST_FIRST(&dom->ud_part_slab));
+		if (!LIST_EMPTY(&dom->ud_free_slab)) {
+			slab = LIST_FIRST(&dom->ud_free_slab);
+			LIST_REMOVE(slab, us_link);
+			LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
+			return (slab);
+		}
+		if (rr)
+			domain = (domain + 1) % vm_ndomains;
+	} while (domain != start);
+
+	return (NULL);
+}
+
+static uma_slab_t
+keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, int flags)
+{
+	uma_domain_t dom;
+	uma_slab_t slab;
+	int allocflags, domain, reserve, rr, start;
+
 	mtx_assert(&keg->uk_lock, MA_OWNED);
 	slab = NULL;
 	reserve = 0;
+	allocflags = flags;
 	if ((flags & M_USE_RESERVE) == 0)
 		reserve = keg->uk_reserve;
 
-	for (;;) {
-		/*
-		 * Find a slab with some space.  Prefer slabs that are partially
-		 * used over those that are totally full.  This helps to reduce
-		 * fragmentation.
-		 */
-		if (keg->uk_free > reserve) {
-			if (!LIST_EMPTY(&keg->uk_part_slab)) {
-				slab = LIST_FIRST(&keg->uk_part_slab);
-			} else {
-				slab = LIST_FIRST(&keg->uk_free_slab);
-				LIST_REMOVE(slab, us_link);
-				LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
-				    us_link);
-			}
+	/*
+	 * Round-robin for non first-touch zones when there is more than one
+	 * domain.
+	 */

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201711212118.vALLI4sU082050>