Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 2 Jul 2001 00:32:13 -0500
From:      Alfred Perlstein <bright@sneakerz.org>
To:        smp@freebsd.org
Subject:   per cpu runqueues, cpu affinity and cpu binding.
Message-ID:  <20010702003213.I84523@sneakerz.org>

next in thread | raw e-mail | index | archive | help

--2oS5YaxWCcQjTEyO
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

I've been playing around with using per-cpu run queues to 
achieve processor affinity as well as give the ability to
bind a process to a particular cpu.

Attached to this mail you'll find the diff to do this
along with a program that binds processes to a cpu.

There's a couple of things I want to note about this
work.

) The cpu affinity seems to actually buy performance, I've seen
seconds taken off user/sys time when doing kernel compiles with
this.  Of course if people were to provide thier own micro-benchmarks
it would assist in determining the utility of this work.

) The binding is not very flexible.  You can only bind to one cpu,
not a group of cpus, nor can you prohibit a process from running
on any particular cpu.  Suggestions would be appreciated.

) It somewhat butchers the nice functional interface that Jake did
because it accesses a global, namely the per-cpu queues are a
global.  I plan on fixing this.

) Input on how affinity/binding could be improved (along with code
examples) would be appreciated.  Please don't say "I would do it
this way" unless your mail happens to contain an algorithm that
clearly maps to some code. :)

The current way it is implemented is that for unbound processes
there is a double linkage, basically an unbound process will be on
both the cpu it last ran on and the global queue.  A certain weight
is assigned to tip the scales in favor of running a process that's
last ran on a particular cpu, basically 4 * RQ_PPQ (see the mod to
runq_choose()), this could be adjusted in order to give either
higher priority processes a boost, or a process that last ran on
the cpu pulling it off the runqueue a boost.

Bound processes only exist on the per-cpu queue that they are bound
to.

What I'd actually prefer is no global queue, when schedcpu() is
called it would balance out the processes amongst the per-cpu
queues, or if a particular cpu realized it was stuck with a lot of
high or low priority processes while another cpu is occupied with
the opposite it would attempt to migrate or steal depending on the
type of imbalance going on.  Suggestions on how to do this would
also be appreciated. :)

The attached bindcpu.c program will need sys/pioctl.h installed to
compile, once compiled and the kernel is rebuilt (don't forget
modules as the size of proc has changed) you can use it to bind
processes like so:

./bindcpu <curproc|pid> 1  # bind curproc/pid to cpu 1
./bindcpu <curproc|pid> -1 # unbind

have fun.

-- 
-Alfred Perlstein [alfred@freebsd.org]
Ok, who wrote this damn function called '??'?
And why do my programs keep crashing in it?

--2oS5YaxWCcQjTEyO
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="runq.diff"

Index: fs/procfs/procfs_vnops.c
===================================================================
RCS file: /home/ncvs/src/sys/fs/procfs/procfs_vnops.c,v
retrieving revision 1.98
diff -u -r1.98 procfs_vnops.c
--- fs/procfs/procfs_vnops.c	2001/05/25 16:59:04	1.98
+++ fs/procfs/procfs_vnops.c	2001/07/01 16:48:51
@@ -57,6 +57,7 @@
 #include <sys/proc.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
+#include <sys/smp.h>
 #include <sys/sx.h>
 #include <sys/time.h>
 #include <sys/uio.h>
@@ -68,6 +69,12 @@
 
 #include <fs/procfs/procfs.h>
 
+#ifdef	SMP
+#define	NCPU_PRESENT	mp_ncpus
+#else
+#define	NCPU_PRESENT	1
+#endif
+
 static int	procfs_access __P((struct vop_access_args *));
 static int	procfs_badop __P((void));
 static int	procfs_close __P((struct vop_close_args *));
@@ -231,6 +238,7 @@
 {
 	struct pfsnode *pfs = VTOPFS(ap->a_vp);
 	struct proc *procp, *p;
+	int cpu, srun;
 	int error;
 	int signo;
 	struct procfs_status *psp;
@@ -248,6 +256,32 @@
 	}
 
 	switch (ap->a_command) {
+	case PIOCBIND:
+		cpu = *(int *)ap->a_data;
+		if (cpu < -1 || cpu >= NCPU_PRESENT) {
+			PROC_UNLOCK(procp);
+			return (EINVAL);
+		}
+		mtx_lock_spin(&sched_lock);
+		srun =  (procp != curproc &&
+#ifdef SMP
+		     procp->p_oncpu == NOCPU && 	/* idle */
+#endif
+		     procp->p_stat == SRUN);
+		
+		if (srun)
+			remrunqueue(procp);
+		if (cpu == -1) {
+			procp->p_sflag &= ~PS_BOUND;
+		} else {
+			procp->p_sflag |= PS_BOUND;
+			procp->p_rqcpu = cpu;
+		}
+		if (srun)
+			setrunqueue(procp);
+		mtx_unlock_spin(&sched_lock);
+printf("srun == %d, cpu == %d\n", srun, cpu);
+		break;
 	case PIOCBIS:
 	  procp->p_stops |= *(unsigned int*)ap->a_data;
 	  break;
Index: kern/kern_switch.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_switch.c,v
retrieving revision 1.15
diff -u -r1.15 kern_switch.c
--- kern/kern_switch.c	2001/03/28 09:17:54	1.15
+++ kern/kern_switch.c	2001/07/01 16:48:51
@@ -32,14 +32,18 @@
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
+#include <sys/smp.h>
 
 /*
  * Global run queue.
  */
 static struct runq runq;
-SYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq)
+static struct runq *runqcpu;
+SYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq);
+SYSINIT(runqcpu, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runqcpu_init, &runqcpu);
 
 /*
  * Wrappers which implement old interface; act on global run queue.
@@ -54,12 +58,14 @@
 int
 procrunnable(void)
 {
-	return runq_check(&runq);
+	
+	return (runq_check(&runqcpu[PCPU_GET(cpuid)]) || runq_check(&runq));
 }
 
 void
 remrunqueue(struct proc *p)
 {
+
 	runq_remove(&runq, p);
 }
 
@@ -154,7 +160,7 @@
 runq_add(struct runq *rq, struct proc *p)
 {
 	struct rqhead *rqh;
-	int pri;
+	int pri, cpu;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	KASSERT(p->p_stat == SRUN, ("runq_add: proc %p (%s) not SRUN",
@@ -163,11 +169,33 @@
 	    ("runq_add: proc %p (%s) already in run queue", p, p->p_comm));
 	pri = p->p_pri.pri_level / RQ_PPQ;
 	p->p_rqindex = pri;
+
+	if ((p->p_sflag & PS_BOUND) == 0) {
+		cpu = p->p_lastcpu;
+		if (cpu < 0 || cpu >= mp_ncpus)
+			cpu = PCPU_GET(cpuid);
+		p->p_rqcpu = cpu;
+		runq_setbit(rq, pri);
+		rqh = &rq->rq_queues[pri];
+		CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
+		    p, p->p_pri.pri_level, pri, rqh);
+		TAILQ_INSERT_TAIL(rqh, p, p_procq);
+	} else {
+		CTR2(KTR_RUNQ, "runq_add: proc %p bound to cpu %d",
+		    p, (int)p->p_rqcpu);
+		cpu = p->p_rqcpu;
+	}
+
+	rq = &runqcpu[cpu];
+	KASSERT(runq_find(rq, p) == 0,
+	    ("runq_add: proc %p (%s) already in cpu (%d) run queue",
+	     p, p->p_comm, cpu));
 	runq_setbit(rq, pri);
+
 	rqh = &rq->rq_queues[pri];
-	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
-	    p, p->p_pri.pri_level, pri, rqh);
-	TAILQ_INSERT_TAIL(rqh, p, p_procq);
+	CTR5(KTR_RUNQ, "runq_cpu_add: p=%p pri=%d %d rqh=%p cpu=%d",
+	    p, p->p_pri.pri_level, pri, rqh, cpu);
+	TAILQ_INSERT_TAIL(rqh, p, p_proccpuq);
 }
 
 /*
@@ -203,29 +231,53 @@
 {
 	struct rqhead *rqh;
 	struct proc *p;
-	int pri;
+	int pri, pricpu, cpu;
 
 	mtx_assert(&sched_lock, MA_OWNED);
-	if ((pri = runq_findbit(rq)) != -1) {
+	cpu = PCPU_GET(cpuid);
+	pricpu = runq_findbit(&runqcpu[cpu]);
+	pri = runq_findbit(rq);
+	CTR2(KTR_RUNQ, "runq_choose: pri=%d cpupri=%d", pri, pricpu);
+	if (pricpu != -1 && (pricpu <= pri + 4 * RQ_PPQ || pri == -1)) {
+		pri = pricpu;
+		rqh = &runqcpu[cpu].rq_queues[pri];
+	} else if (pri != -1) {
+		rqh = &rq->rq_queues[pri];
+	} else {
+		CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
+		return (PCPU_GET(idleproc));
+	}
+	p = TAILQ_FIRST(rqh);
+	KASSERT(p != NULL, ("runq_choose: no proc on busy queue"));
+	KASSERT(p->p_stat == SRUN,
+	    ("runq_chose: process %d(%s) in state %d", p->p_pid,
+	    p->p_comm, p->p_stat));
+	CTR3(KTR_RUNQ, "runq_choose: pri=%d p=%p rqh=%p", pri, p, rqh);
+	
+	if ((p->p_sflag & PS_BOUND) == 0) {
 		rqh = &rq->rq_queues[pri];
-		p = TAILQ_FIRST(rqh);
-		KASSERT(p != NULL, ("runq_choose: no proc on busy queue"));
-		KASSERT(p->p_stat == SRUN,
-		    ("runq_chose: process %d(%s) in state %d", p->p_pid,
-		    p->p_comm, p->p_stat));
-		CTR3(KTR_RUNQ, "runq_choose: pri=%d p=%p rqh=%p", pri, p, rqh);
 		TAILQ_REMOVE(rqh, p, p_procq);
 		if (TAILQ_EMPTY(rqh)) {
 			CTR0(KTR_RUNQ, "runq_choose: empty");
 			runq_clrbit(rq, pri);
 		}
-		return (p);
+	} else {
+		CTR2(KTR_RUNQ, "runq_choose: proc %p bound to cpu %d",
+		    p, (int)p->p_rqcpu);
 	}
-	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
-
-	return (PCPU_GET(idleproc));
+	cpu = p->p_rqcpu;
+	rq = &runqcpu[cpu];
+	rqh = &rq->rq_queues[pri];
+	TAILQ_REMOVE(rqh, p, p_proccpuq);
+	if (TAILQ_EMPTY(rqh)) {
+		CTR0(KTR_RUNQ, "runq_choose: cpu empty");
+		runq_clrbit(rq, pri);
+	}
+	return (p);
 }
 
+MALLOC_DEFINE(M_RUNQ, "runqueues", "Run queues");
+
 /*
  * Initialize a run structure.
  */
@@ -239,6 +291,19 @@
 		TAILQ_INIT(&rq->rq_queues[i]);
 }
 
+void
+runqcpu_init(struct runq **rqp)
+{
+	struct runq *rq;
+	int i;
+	
+	MALLOC(rq, struct runq *, sizeof(*runqcpu) * mp_ncpus, M_RUNQ,
+	    M_WAITOK);
+	*rqp = rq;
+	for (i = 0; i < mp_ncpus; i++)
+		runq_init(&rq[i]);
+}
+
 /*
  * Remove the process from the queue specified by its priority, and clear the
  * corresponding status bit if the queue becomes empty.
@@ -247,10 +312,25 @@
 runq_remove(struct runq *rq, struct proc *p)
 {
 	struct rqhead *rqh;
+	struct runq *rqcpu;
 	int pri;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	pri = p->p_rqindex;
+	rqcpu = &runqcpu[p->p_rqcpu];
+	rqh = &rqcpu->rq_queues[pri];
+	CTR4(KTR_RUNQ, "runq_cpu_remove: p=%p pri=%d %d rqh=%p",
+	    p, p->p_pri.pri_level, pri, rqh);
+	TAILQ_REMOVE(rqh, p, p_proccpuq);
+	if (TAILQ_EMPTY(rqh)) {
+		CTR0(KTR_RUNQ, "runq_cpu_remove: empty");
+		runq_clrbit(rqcpu, pri);
+	}
+	if ((p->p_sflag & PS_BOUND) != 0) {
+		CTR2(KTR_RUNQ, "runq_cpu_remove: bound p=%p cpu=%d",
+		    p, p->p_rqcpu);
+		return;
+	}
 	rqh = &rq->rq_queues[pri];
 	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
 	    p, p->p_pri.pri_level, pri, rqh);
Index: sys/pioctl.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/pioctl.h,v
retrieving revision 1.8
diff -u -r1.8 pioctl.h
--- sys/pioctl.h	1999/08/28 00:51:55	1.8
+++ sys/pioctl.h	2001/07/01 16:48:51
@@ -58,6 +58,7 @@
 			/* Get proc status */
 # define	PIOCSTATUS	_IOR('p', 6, struct procfs_status)
 # define	PIOCGFL	_IOR('p', 7, unsigned int)	/* Get flags */
+# define	PIOCBIND	_IOC(IOC_IN, 'p', 8, 0)	/* Bind cpu */
 
 # define S_EXEC	0x00000001	/* stop-on-exec */
 # define	S_SIG	0x00000002	/* stop-on-signal */
Index: sys/proc.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/proc.h,v
retrieving revision 1.166
diff -u -r1.166 proc.h
--- sys/proc.h	2001/06/11 23:00:35	1.166
+++ sys/proc.h	2001/07/01 16:48:51
@@ -152,6 +152,7 @@
 
 struct	proc {
 	TAILQ_ENTRY(proc) p_procq;	/* (j) Run/mutex queue. */
+	TAILQ_ENTRY(proc) p_proccpuq;	/* (j) Run/mutex queue (per-cpu). */
 	TAILQ_ENTRY(proc) p_slpq;	/* (j) Sleep queue. */
 	LIST_ENTRY(proc) p_list;	/* (d) List of all processes. */
 
@@ -218,6 +219,7 @@
 	char	p_lock;		/* (c) Process lock (prevent swap) count. */
 	u_char	p_oncpu;		/* (j) Which cpu we are on. */
 	u_char	p_lastcpu;		/* (j) Last cpu we were on. */
+	u_char	p_rqcpu;		/* (j) Cpu run queue we are on. */
 	char	p_rqindex;		/* (j) Run queue index. */
 
 	short	p_locks;	/* (*) DEBUG: lockmgr count of held locks */
@@ -329,6 +331,7 @@
 #define	PS_SWAPPING	0x00200	/* Process is being swapped. */
 #define	PS_ASTPENDING	0x00400	/* Process has a pending ast. */
 #define	PS_NEEDRESCHED	0x00800	/* Process needs to yield. */
+#define	PS_BOUND	0x01000	/* Process is bound to a cpu */
 
 #define	P_MAGIC		0xbeefface
 
Index: sys/runq.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/runq.h,v
retrieving revision 1.1
diff -u -r1.1 runq.h
--- sys/runq.h	2001/02/12 00:20:07	1.1
+++ sys/runq.h	2001/07/01 16:48:51
@@ -75,6 +75,7 @@
 int	runq_check(struct runq *);
 struct	proc *runq_choose(struct runq *);
 void	runq_init(struct runq *);
+void	runqcpu_init(struct runq **);
 void	runq_remove(struct runq *, struct proc *);
 
 #endif

--2oS5YaxWCcQjTEyO
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="bindcpu.c"

/*
 * Copyright 1997 Sean Eric Fagan
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by Sean Eric Fagan
 * 4. Neither the name of the author may be used to endorse or promote
 *    products derived from this software without specific prior written
 *    permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

#ifndef lint
static const char rcsid[] =
  "$FreeBSD: src/usr.sbin/procctl/procctl.c,v 1.6 2000/02/21 10:22:39 ru Exp $";
#endif /* not lint */

/*
 * procctl -- clear the event mask, and continue, any specified processes.
 * This is largely an example of how to use the procfs interface; however,
 * for now, it is also sometimes necessary, as a stopped process will not
 * otherwise continue.  (This will be fixed in a later version of the
 * procfs code, almost certainly; however, this program will still be useful
 * for some annoying circumstances.)
 */

#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/pioctl.h>

int
main(int ac, char **av) {
  int fd;
  int i, cpu;
    char buf[32];

    snprintf(buf, sizeof(buf), "/proc/%s/mem", av[1]);
    fd = open(buf, O_RDWR);
    cpu = atoi(av[2]);
    if (fd == -1) {
      warn("cannot open pid %s", av[1]);
      exit(1);
    }
	fprintf(stderr, "binding process %s to cpu %d\n", av[1], cpu);
    if (ioctl(fd, PIOCBIND, cpu) == -1) {
      warn("cannot bind process %s to cpu %d", av[1], cpu);
      exit(1);
    }
    close(fd);
	for (;;)
		;
  return 0;
}

--2oS5YaxWCcQjTEyO--

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-smp" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20010702003213.I84523>