Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 9 Mar 2018 05:46:49 +0000 (UTC)
From:      Jeff Roberson <jeff@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r330683 - in user/jeff/numa: sys/kern sys/sys sys/vm usr.bin/cpuset
Message-ID:  <201803090546.w295knrF047040@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jeff
Date: Fri Mar  9 05:46:48 2018
New Revision: 330683
URL: https://svnweb.freebsd.org/changeset/base/330683

Log:
  Add an interleave policy that picks the domain based on the pindex.
  Restructure the domainsets to include an array of domains in order to
  make this iterator work quickly and eliminate bit searches on alloc.
  
  Fix a bug in first touch that would prevent the iterator from visiting
  other domains in the set.
  
  Fix a bug in the policy inheritance that gave user threads the kernel
  cpuset.

Modified:
  user/jeff/numa/sys/kern/kern_cpuset.c
  user/jeff/numa/sys/sys/domainset.h
  user/jeff/numa/sys/vm/vm_domainset.c
  user/jeff/numa/sys/vm/vm_domainset.h
  user/jeff/numa/sys/vm/vm_object.c
  user/jeff/numa/sys/vm/vm_page.c
  user/jeff/numa/usr.bin/cpuset/cpuset.c

Modified: user/jeff/numa/sys/kern/kern_cpuset.c
==============================================================================
--- user/jeff/numa/sys/kern/kern_cpuset.c	Fri Mar  9 05:44:05 2018	(r330682)
+++ user/jeff/numa/sys/kern/kern_cpuset.c	Fri Mar  9 05:46:48 2018	(r330683)
@@ -445,6 +445,7 @@ static struct domainset *
 _domainset_create(struct domainset *domain, struct domainlist *freelist)
 {
 	struct domainset *ndomain;
+	int i, j, max;
 
 	mtx_lock_spin(&cpuset_lock);
 	LIST_FOREACH(ndomain, &cpuset_domains, ds_link)
@@ -457,7 +458,10 @@ _domainset_create(struct domainset *domain, struct dom
 	if (ndomain == NULL) {
 		LIST_INSERT_HEAD(&cpuset_domains, domain, ds_link);
 		domain->ds_cnt = DOMAINSET_COUNT(&domain->ds_mask);
-		domain->ds_max = DOMAINSET_FLS(&domain->ds_mask) + 1;
+		max = DOMAINSET_FLS(&domain->ds_mask) + 1;
+		for (i = 0, j = 0; i < max; i++)
+			if (DOMAINSET_ISSET(i, &domain->ds_mask))
+				domain->ds_order[j++] = i;
 	}
 	mtx_unlock_spin(&cpuset_lock);
 	if (ndomain == NULL)
@@ -1269,10 +1273,9 @@ domainset_zero(void)
 	dset->ds_policy = DOMAINSET_POLICY_FIRSTTOUCH;
 	dset->ds_prefer = -1;
 	curthread->td_domain.dr_policy = _domainset_create(dset, NULL);
-	kernel_object->domain.dr_policy = curthread->td_domain.dr_policy;
 
 	domainset_copy(dset, &domainset2);
-	domainset2.ds_policy = DOMAINSET_POLICY_ROUNDROBIN;
+	domainset2.ds_policy = DOMAINSET_POLICY_INTERLEAVE;
 	kernel_object->domain.dr_policy = _domainset_create(&domainset2, NULL);
 }
 
@@ -1297,9 +1300,9 @@ cpuset_thread0(void)
 	int error;
 
 	cpuset_zone = uma_zcreate("cpuset", sizeof(struct cpuset), NULL, NULL,
-	    NULL, NULL, UMA_ALIGN_PTR, 0);
+	    NULL, NULL, UMA_ALIGN_CACHE, 0);
 	domainset_zone = uma_zcreate("domainset", sizeof(struct domainset),
-	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
 
 	/*
 	 * Create the root system set (0) for the whole machine.  Doesn't use
@@ -1336,7 +1339,7 @@ cpuset_thread0(void)
 	 */
 	cpuset_unr = new_unrhdr(2, INT_MAX, NULL);
 
-	return (set);
+	return (cpuset_default);
 }
 
 void
@@ -2133,9 +2136,8 @@ DB_SHOW_COMMAND(domainsets, db_show_domainsets)
 	struct domainset *set;
 
 	LIST_FOREACH(set, &cpuset_domains, ds_link) {
-		db_printf("set=%p policy %d prefer %d cnt %d max %d\n",
-		    set, set->ds_policy, set->ds_prefer, set->ds_cnt,
-		    set->ds_max);
+		db_printf("set=%p policy %d prefer %d cnt %d\n",
+		    set, set->ds_policy, set->ds_prefer, set->ds_cnt);
 		db_printf("  mask =");
 		ddb_display_domainset(&set->ds_mask);
 		db_printf("\n");

Modified: user/jeff/numa/sys/sys/domainset.h
==============================================================================
--- user/jeff/numa/sys/sys/domainset.h	Fri Mar  9 05:44:05 2018	(r330682)
+++ user/jeff/numa/sys/sys/domainset.h	Fri Mar  9 05:46:48 2018	(r330683)
@@ -73,19 +73,26 @@
 #define	DOMAINSET_POLICY_ROUNDROBIN	1
 #define	DOMAINSET_POLICY_FIRSTTOUCH	2
 #define	DOMAINSET_POLICY_PREFER		3
-#define	DOMAINSET_POLICY_MAX		DOMAINSET_POLICY_PREFER
+#define	DOMAINSET_POLICY_INTERLEAVE	4
+#define	DOMAINSET_POLICY_MAX		DOMAINSET_POLICY_INTERLEAVE
 
 #ifdef _KERNEL
 #include <sys/queue.h>
 LIST_HEAD(domainlist, domainset);
 
+#if MAXMEMDOM < 256
+typedef	uint8_t		domainid_t;
+#else
+typedef uint16_t	domainid_t;
+#endif
+
 struct domainset {
 	LIST_ENTRY(domainset)	ds_link;
 	domainset_t	ds_mask;	/* allowed domains. */
 	uint16_t	ds_policy;	/* Policy type. */
-	int16_t		ds_prefer;	/* Preferred domain or -1. */
-	uint16_t	ds_cnt;		/* popcnt from above. */
-	uint16_t	ds_max;		/* Maximum domain in set. */
+	domainid_t	ds_prefer;	/* Preferred domain or -1. */
+	domainid_t	ds_cnt;		/* popcnt from above. */
+	domainid_t	ds_order[MAXMEMDOM];  /* nth domain table. */
 };
 
 void domainset_zero(void);

Modified: user/jeff/numa/sys/vm/vm_domainset.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_domainset.c	Fri Mar  9 05:44:05 2018	(r330682)
+++ user/jeff/numa/sys/vm/vm_domainset.c	Fri Mar  9 05:46:48 2018	(r330683)
@@ -56,11 +56,14 @@ __FBSDID("$FreeBSD$");
  * assumed that most allocations are successful.
  */
 
+static int vm_domainset_default_stride = 64;
+
 /*
  * Determine which policy is to be used for this allocation.
  */
 static void
-vm_domainset_iter_domain(struct vm_domainset_iter *di, struct vm_object *obj)
+vm_domainset_iter_init(struct vm_domainset_iter *di, struct vm_object *obj,
+    vm_pindex_t pindex)
 {
 	struct domainset *domain;
 
@@ -72,22 +75,26 @@ vm_domainset_iter_domain(struct vm_domainset_iter *di,
 	if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) {
 		di->di_domain = domain;
 		di->di_iter = &obj->domain.dr_iterator;
+		if (vm_object_reserv(obj))
+			di->di_stride = 1 << VM_LEVEL_0_ORDER;
+		else if (obj->iosize)
+			di->di_stride = obj->iosize / PAGE_SIZE;
+		else
+			di->di_stride = vm_domainset_default_stride;
 	} else {
 		di->di_domain = curthread->td_domain.dr_policy;
 		di->di_iter = &curthread->td_domain.dr_iterator;
+		di->di_stride = vm_domainset_default_stride;
 	}
+	di->di_policy = di->di_domain->ds_policy;
+	di->di_pindex = pindex;
 }
 
 static void
 vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain)
 {
-	int d;
 
-	d = *di->di_iter;
-	do {
-		d = (d + 1) % di->di_domain->ds_max;
-	} while (!DOMAINSET_ISSET(d, &di->di_domain->ds_mask));
-	*di->di_iter = *domain = d;
+	*domain = di->di_domain->ds_order[++(*di->di_iter) % di->di_domain->ds_cnt];
 }
 
 static void
@@ -95,27 +102,37 @@ vm_domainset_iter_prefer(struct vm_domainset_iter *di,
 {
 	int d;
 
-	d = *di->di_iter;
 	do {
-		d = (d + 1) % di->di_domain->ds_max;
-	} while (!DOMAINSET_ISSET(d, &di->di_domain->ds_mask) || 
-	    d == di->di_domain->ds_prefer);
-	*di->di_iter = *domain = d;
+		d = di->di_domain->ds_order[
+		    ++(*di->di_iter) % di->di_domain->ds_cnt];
+	} while (d == di->di_domain->ds_prefer);
+	*domain = d;
 }
 
 static void
+vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain)
+{
+	int d;
+
+	d = (di->di_pindex / di->di_stride) % di->di_domain->ds_cnt;
+	*domain = di->di_domain->ds_order[d];
+}
+
+static void
 vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain)
 {
 
 	KASSERT(di->di_n > 0,
 	    ("vm_domainset_iter_first: Invalid n %d", di->di_n));
-	switch (di->di_domain->ds_policy) {
+	switch (di->di_policy) {
 	case DOMAINSET_POLICY_FIRSTTOUCH:
 		/*
 		 * To prevent impossible allocations we convert an invalid
 		 * first-touch to round-robin.
 		 */
 		/* FALLTHROUGH */
+	case DOMAINSET_POLICY_INTERLEAVE:
+		/* FALLTHROUGH */
 	case DOMAINSET_POLICY_ROUNDROBIN:
 		vm_domainset_iter_rr(di, domain);
 		break;
@@ -124,7 +141,7 @@ vm_domainset_iter_next(struct vm_domainset_iter *di, i
 		break;
 	default:
 		panic("vm_domainset_iter_first: Unknown policy %d",
-		    di->di_domain->ds_policy);
+		    di->di_policy);
 	}
 	KASSERT(*domain < vm_ndomains,
 	    ("vm_domainset_iter_next: Invalid domain %d", *domain));
@@ -134,11 +151,11 @@ static void
 vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain)
 {
 
-	switch (di->di_domain->ds_policy) {
+	switch (di->di_policy) {
 	case DOMAINSET_POLICY_FIRSTTOUCH:
 		*domain = PCPU_GET(domain);
 		if (DOMAINSET_ISSET(*domain, &di->di_domain->ds_mask)) {
-			di->di_n = 1;
+			di->di_n = di->di_domain->ds_cnt;
 			break;
 		}
 		/*
@@ -154,9 +171,13 @@ vm_domainset_iter_first(struct vm_domainset_iter *di, 
 		*domain = di->di_domain->ds_prefer;
 		di->di_n = di->di_domain->ds_cnt;
 		break;
+	case DOMAINSET_POLICY_INTERLEAVE:
+		vm_domainset_iter_interleave(di, domain);
+		di->di_n = di->di_domain->ds_cnt;
+		break;
 	default:
 		panic("vm_domainset_iter_first: Unknown policy %d",
-		    di->di_domain->ds_policy);
+		    di->di_policy);
 	}
 	KASSERT(di->di_n > 0,
 	    ("vm_domainset_iter_first: Invalid n %d", di->di_n));
@@ -166,10 +187,10 @@ vm_domainset_iter_first(struct vm_domainset_iter *di, 
 
 void
 vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
-    int *domain, int *req)
+    vm_pindex_t pindex, int *domain, int *req)
 {
 
-	vm_domainset_iter_domain(di, obj);
+	vm_domainset_iter_init(di, obj, pindex);
 	di->di_flags = *req;
 	*req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) |
 	    VM_ALLOC_NOWAIT;
@@ -213,7 +234,9 @@ vm_domainset_iter_malloc_init(struct vm_domainset_iter
     struct vm_object *obj, int *domain, int *flags)
 {
 
-	vm_domainset_iter_domain(di, obj);
+	vm_domainset_iter_init(di, obj, 0);
+	if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE)
+		di->di_policy = DOMAINSET_POLICY_ROUNDROBIN;
 	di->di_flags = *flags;
 	*flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT;
 	vm_domainset_iter_first(di, domain);
@@ -253,7 +276,7 @@ vm_domainset_iter_page(struct vm_domainset_iter *di, i
 
 void
 vm_domainset_iter_page_init(struct vm_domainset_iter *di,
-            struct vm_object *obj, int *domain, int *flags)
+            struct vm_object *obj, vm_pindex_t pindex, int *domain, int *flags)
 {
 
 	*domain = 0;

Modified: user/jeff/numa/sys/vm/vm_domainset.h
==============================================================================
--- user/jeff/numa/sys/vm/vm_domainset.h	Fri Mar  9 05:44:05 2018	(r330682)
+++ user/jeff/numa/sys/vm/vm_domainset.h	Fri Mar  9 05:46:48 2018	(r330683)
@@ -33,13 +33,16 @@
 struct vm_domainset_iter {
 	struct domainset	*di_domain;
 	int			*di_iter;
+	vm_pindex_t		di_pindex;
+	int			di_policy;
 	int			di_flags;
+	int			di_stride;
 	int			di_n;
 };
 
 int	vm_domainset_iter_page(struct vm_domainset_iter *, int *, int *);
 void	vm_domainset_iter_page_init(struct vm_domainset_iter *,
-	    struct vm_object *, int *, int *);
+	    struct vm_object *, vm_pindex_t, int *, int *);
 int	vm_domainset_iter_malloc(struct vm_domainset_iter *, int *, int *);
 void	vm_domainset_iter_malloc_init(struct vm_domainset_iter *,
 	    struct vm_object *, int *, int *);

Modified: user/jeff/numa/sys/vm/vm_object.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_object.c	Fri Mar  9 05:44:05 2018	(r330682)
+++ user/jeff/numa/sys/vm/vm_object.c	Fri Mar  9 05:46:48 2018	(r330683)
@@ -282,7 +282,7 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, 
 	object->handle = NULL;
 	object->backing_object = NULL;
 	object->backing_object_offset = (vm_ooffset_t) 0;
-	object->iosize = PAGE_SIZE;
+	object->iosize = 0;
 #if VM_NRESERVLEVEL > 0
 	LIST_INIT(&object->rvq);
 #endif

Modified: user/jeff/numa/sys/vm/vm_page.c
==============================================================================
--- user/jeff/numa/sys/vm/vm_page.c	Fri Mar  9 05:44:05 2018	(r330682)
+++ user/jeff/numa/sys/vm/vm_page.c	Fri Mar  9 05:46:48 2018	(r330683)
@@ -1696,7 +1696,7 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pi
 	vm_page_t m;
 	int domain;
 
-	vm_domainset_iter_page_init(&di, object, &domain, &req);
+	vm_domainset_iter_page_init(&di, object, pindex, &domain, &req);
 	do {
 		m = vm_page_alloc_domain_after(object, pindex, domain, req,
 		    mpred);
@@ -1933,7 +1933,7 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t p
 	vm_page_t m;
 	int domain;
 
-	vm_domainset_iter_page_init(&di, object, &domain, &req);
+	vm_domainset_iter_page_init(&di, object, pindex, &domain, &req);
 	do {
 		m = vm_page_alloc_contig_domain(object, pindex, domain, req,
 		    npages, low, high, alignment, boundary, memattr);
@@ -2133,7 +2133,7 @@ vm_page_alloc_freelist(int freelist, int req)
 	vm_page_t m;
 	int domain;
 
-	vm_domainset_iter_page_init(&di, kernel_object, &domain, &req);
+	vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req);
 	do {
 		m = vm_page_alloc_freelist_domain(domain, freelist, req);
 		if (m != NULL)
@@ -2777,7 +2777,7 @@ vm_page_reclaim_contig(int req, u_long npages, vm_padd
 	int domain;
 	bool ret;
 
-	vm_domainset_iter_page_init(&di, kernel_object, &domain, &req);
+	vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req);
 	do {
 		ret = vm_page_reclaim_contig_domain(domain, req, npages, low,
 		    high, alignment, boundary);

Modified: user/jeff/numa/usr.bin/cpuset/cpuset.c
==============================================================================
--- user/jeff/numa/usr.bin/cpuset/cpuset.c	Fri Mar  9 05:44:05 2018	(r330682)
+++ user/jeff/numa/usr.bin/cpuset/cpuset.c	Fri Mar  9 05:46:48 2018	(r330683)
@@ -79,6 +79,8 @@ static struct numa_policy policies[] = {
 	{ "first-touch", DOMAINSET_POLICY_FIRSTTOUCH },
 	{ "ft", DOMAINSET_POLICY_FIRSTTOUCH },
 	{ "prefer", DOMAINSET_POLICY_PREFER },
+	{ "interleave", DOMAINSET_POLICY_INTERLEAVE},
+	{ "il", DOMAINSET_POLICY_INTERLEAVE},
 	{ NULL, DOMAINSET_POLICY_INVALID }
 };
 
@@ -237,7 +239,7 @@ static const char *whichnames[] = { NULL, "tid", "pid"
 				    "domain" };
 static const char *levelnames[] = { NULL, " root", " cpuset", "" };
 static const char *policynames[] = { "invalid", "round-robin", "first-touch",
-				    "prefer" };
+				    "prefer", "interleave" };
 
 static void
 printaffinity(void)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201803090546.w295knrF047040>