Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 28 Aug 2021 02:21:30 GMT
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: 18bc112f63b6 - stable/13 - sched_ule(4): Reduce duplicate search for load.
Message-ID:  <202108280221.17S2LUPf052163@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by mav:

URL: https://cgit.FreeBSD.org/src/commit/?id=18bc112f63b61c6f912419cb613bb92959216dab

commit 18bc112f63b61c6f912419cb613bb92959216dab
Author:     Alexander Motin <mav@FreeBSD.org>
AuthorDate: 2021-08-02 02:07:51 +0000
Commit:     Alexander Motin <mav@FreeBSD.org>
CommitDate: 2021-08-28 02:17:55 +0000

    sched_ule(4): Reduce duplicate search for load.
    
    When sched_highest() called for some CPU group returns nothing, idle
    thread calls it for the parent CPU group.  But the parent CPU group
    also includes the CPU group we've just searched, and unless there is
    a race going on, it is unlikely we find anything new this time.
    
    Avoid the double search in case of parent group having only two sub-
    groups (the most prominent case). Instead of escalating to the parent
    group run the next search over the sibling subgroup and escalate two
    levels up after if that fail too.  In case of more than two siblings
    the difference is less significant, while searching the parent group
    can result in better decision if we find several candidate CPUs.
    
    On 2-socket 40-core Xeon system I am measuring ~25% reduction of CPU
    time spent inside cpu_search_highest() in both SMT (2x20x2) and non-
    SMT (2x20) cases.
    
    MFC after:      1 month
    
    (cherry picked from commit 2668bb2add8d11c56524ce9014b510412f8f6aa9)
---
 sys/kern/sched_ule.c | 63 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 52 insertions(+), 11 deletions(-)

diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index 6e9ef73efe99..55af36f40bc1 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -933,10 +933,10 @@ tdq_move(struct tdq *from, struct tdq *to)
 static int
 tdq_idled(struct tdq *tdq)
 {
-	struct cpu_group *cg;
+	struct cpu_group *cg, *parent;
 	struct tdq *steal;
 	cpuset_t mask;
-	int cpu, switchcnt;
+	int cpu, switchcnt, goup;
 
 	if (smp_started == 0 || steal_idle == 0 || tdq->tdq_cg == NULL)
 		return (1);
@@ -944,7 +944,7 @@ tdq_idled(struct tdq *tdq)
 	CPU_CLR(PCPU_GET(cpuid), &mask);
     restart:
 	switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
-	for (cg = tdq->tdq_cg; ; ) {
+	for (cg = tdq->tdq_cg, goup = 0; ; ) {
 		cpu = sched_highest(cg, &mask, steal_thresh);
 		/*
 		 * We were assigned a thread but not preempted.  Returning
@@ -952,10 +952,29 @@ tdq_idled(struct tdq *tdq)
 		 */
 		if (tdq->tdq_load)
 			return (0);
+
+		/*
+		 * We found no CPU to steal from in this group.  Escalate to
+		 * the parent and repeat.  But if parent has only two children
+		 * groups we can avoid searching this group again by searching
+		 * the other one specifically and then escalating two levels.
+		 */
 		if (cpu == -1) {
-			cg = cg->cg_parent;
-			if (cg == NULL)
+			if (goup) {
+				cg = cg->cg_parent;
+				goup = 0;
+			}
+			parent = cg->cg_parent;
+			if (parent == NULL)
 				return (1);
+			if (parent->cg_children == 2) {
+				if (cg == &parent->cg_child[0])
+					cg = &parent->cg_child[1];
+				else
+					cg = &parent->cg_child[0];
+				goup = 1;
+			} else
+				cg = parent;
 			continue;
 		}
 		steal = TDQ_CPU(cpu);
@@ -1868,10 +1887,10 @@ lend:
 static void
 tdq_trysteal(struct tdq *tdq)
 {
-	struct cpu_group *cg;
+	struct cpu_group *cg, *parent;
 	struct tdq *steal;
 	cpuset_t mask;
-	int cpu, i;
+	int cpu, i, goup;
 
 	if (smp_started == 0 || trysteal_limit == 0 || tdq->tdq_cg == NULL)
 		return;
@@ -1880,7 +1899,7 @@ tdq_trysteal(struct tdq *tdq)
 	/* We don't want to be preempted while we're iterating. */
 	spinlock_enter();
 	TDQ_UNLOCK(tdq);
-	for (i = 1, cg = tdq->tdq_cg; ; ) {
+	for (i = 1, cg = tdq->tdq_cg, goup = 0; ; ) {
 		cpu = sched_highest(cg, &mask, steal_thresh);
 		/*
 		 * If a thread was added while interrupts were disabled don't
@@ -1890,13 +1909,35 @@ tdq_trysteal(struct tdq *tdq)
 			TDQ_LOCK(tdq);
 			break;
 		}
+
+		/*
+		 * We found no CPU to steal from in this group.  Escalate to
+		 * the parent and repeat.  But if parent has only two children
+		 * groups we can avoid searching this group again by searching
+		 * the other one specifically and then escalating two levels.
+		 */
 		if (cpu == -1) {
-			i++;
-			cg = cg->cg_parent;
-			if (cg == NULL || i > trysteal_limit) {
+			if (goup) {
+				cg = cg->cg_parent;
+				goup = 0;
+			}
+			if (++i > trysteal_limit) {
+				TDQ_LOCK(tdq);
+				break;
+			}
+			parent = cg->cg_parent;
+			if (parent == NULL) {
 				TDQ_LOCK(tdq);
 				break;
 			}
+			if (parent->cg_children == 2) {
+				if (cg == &parent->cg_child[0])
+					cg = &parent->cg_child[1];
+				else
+					cg = &parent->cg_child[0];
+				goup = 1;
+			} else
+				cg = parent;
 			continue;
 		}
 		steal = TDQ_CPU(cpu);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202108280221.17S2LUPf052163>