Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 6 Aug 2006 20:33:31 GMT
From:      John Birrell <jb@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 103359 for review
Message-ID:  <200608062033.k76KXVY8015423@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=103359

Change 103359 by jb@jb_freebsd2 on 2006/08/06 20:32:38

	Merge in KSE again, but only if the KSE kernel option is defined.

Affected files ...

.. //depot/projects/dtrace/src/sys/i386/i386/machdep.c#7 edit
.. //depot/projects/dtrace/src/sys/kern/init_main.c#4 edit
.. //depot/projects/dtrace/src/sys/kern/kern_resource.c#4 edit
.. //depot/projects/dtrace/src/sys/kern/kern_switch.c#7 edit
.. //depot/projects/dtrace/src/sys/kern/kern_thr.c#8 edit
.. //depot/projects/dtrace/src/sys/kern/kern_thread.c#4 edit
.. //depot/projects/dtrace/src/sys/kern/sched_4bsd.c#11 edit
.. //depot/projects/dtrace/src/sys/kern/tty.c#3 edit
.. //depot/projects/dtrace/src/sys/posix4/ksched.c#7 edit

Differences ...

==== //depot/projects/dtrace/src/sys/i386/i386/machdep.c#7 (text+ko) ====

@@ -2079,7 +2079,11 @@
  	 * This may be done better later if it gets more high level
  	 * components in it. If so just link td->td_proc here.
 	 */
+#ifdef KSE
+	proc_linkup(&proc0, &ksegrp0, &thread0);
+#else
 	proc_linkup(&proc0, &thread0);
+#endif
 
 	metadata_missing = 0;
 	if (bootinfo.bi_modulep) {

==== //depot/projects/dtrace/src/sys/kern/init_main.c#4 (text+ko) ====

@@ -95,6 +95,9 @@
 static struct pgrp pgrp0;
 struct	proc proc0;
 struct	thread thread0 __aligned(8);
+#ifdef KSE
+struct	ksegrp ksegrp0;
+#endif
 struct	vmspace vmspace0;
 struct	proc *initproc;
 
@@ -221,9 +224,6 @@
 
 		if ((*sipp)->subsystem == SI_SUB_DONE)
 			continue;
-#if 0
-		printf("\t*%p(%p)\n", (*sipp)->func, (*sipp)->udata);
-#endif
 #if defined(VERBOSE_SYSINIT)
 		if ((*sipp)->subsystem > last) {
 			verbose = 1;
@@ -274,10 +274,6 @@
 
 		/* Call function */
 		(*((*sipp)->func))((*sipp)->udata);
-#if 0
-		printf("mi_startup: pstate=0x%lx pil=0x%lx\n", 
-		       rdpr(pstate), rdpr(pil));
-#endif
 
 #if defined(VERBOSE_SYSINIT)
 		if (verbose)
@@ -392,17 +388,35 @@
 	struct proc *p;
 	unsigned i;
 	struct thread *td;
+#ifdef KSE
+	struct ksegrp *kg;
+#endif
 
 	GIANT_REQUIRED;
 	p = &proc0;
 	td = &thread0;
+#ifdef KSE
+	kg = &ksegrp0;
+#endif
 
 	/*
 	 * Initialize magic number.
 	 */
 	p->p_magic = P_MAGIC;
 
+#ifdef KSE
 	/*
+	 * Initialize thread, process and ksegrp structures.
+	 */
+	procinit();	/* set up proc zone */
+	threadinit();	/* set up thead, upcall and KSEGRP zones */
+
+	/*
+	 * Initialise scheduler resources.
+	 * Add scheduler specific parts to proc, ksegrp, thread as needed.
+	 */
+#else
+	/*
 	 * Initialize thread and process structures.
 	 */
 	procinit();	/* set up proc zone */
@@ -412,6 +426,7 @@
 	 * Initialise scheduler resources.
 	 * Add scheduler specific parts to proc, thread as needed.
 	 */
+#endif
 	schedinit();	/* scheduler gets its house in order */
 	/*
 	 * Initialize sleep queue hash table
@@ -447,8 +462,13 @@
 	STAILQ_INIT(&p->p_ktr);
 	p->p_nice = NZERO;
 	td->td_state = TDS_RUNNING;
+#ifdef KSE
+	kg->kg_pri_class = PRI_TIMESHARE;
+	kg->kg_user_pri = PUSER;
+#else
 	td->td_pri_class = PRI_TIMESHARE;
 	td->td_user_pri = PUSER;
+#endif
 	td->td_priority = PVM;
 	td->td_base_pri = PUSER;
 	td->td_oncpu = 0;
@@ -611,7 +631,7 @@
 	p = td->td_proc;
 
 	vfs_mountroot();
-	
+
 	/*
 	 * Need just enough stack to hold the faked-up "execve()" arguments.
 	 */
@@ -687,7 +707,6 @@
 		args.fname = arg0;
 		args.argv = uap;
 		args.envv = NULL;
-		
 
 		/*
 		 * Now try to exec the program.  If can't for any reason

==== //depot/projects/dtrace/src/sys/kern/kern_resource.c#4 (text+ko) ====

@@ -292,7 +292,11 @@
 {
 	struct proc *curp;
 	struct proc *p;
+#ifdef KSE
+	struct ksegrp *kg;
+#else
 	struct thread *tdp;
+#endif
 	struct rtprio rtp;
 	int cierror, error;
 
@@ -328,14 +332,23 @@
 		 * as leaving it zero.
 		 */
 		if (uap->pid == 0) {
+#ifdef KSE
+			pri_to_rtp(td->td_ksegrp, &rtp);
+#else
 			pri_to_rtp(td, &rtp);
+#endif
 		} else {
 			struct rtprio rtp2;
 
 			rtp.type = RTP_PRIO_IDLE;
 			rtp.prio = RTP_PRIO_MAX;
+#ifdef KSE
+			FOREACH_KSEGRP_IN_PROC(p, kg) {
+				pri_to_rtp(kg, &rtp2);
+#else
 			FOREACH_THREAD_IN_PROC(p, tdp) {
 				pri_to_rtp(tdp, &rtp2);
+#endif
 				if (rtp2.type <  rtp.type ||
 				    (rtp2.type == rtp.type &&
 				    rtp2.prio < rtp.prio)) {
@@ -376,19 +389,39 @@
 			}
 		}
 
+#ifdef KSE
+		/*
+		 * If we are setting our own priority, set just our
+		 * KSEGRP but if we are doing another process,
+		 * do all the groups on that process. If we
+		 * specify our own pid we do the latter.
+		 */
+#else
 		/*
 		 * If we are setting our own priority, set just our
 		 * thread but if we are doing another process,
 		 * do all the threads on that process. If we
 		 * specify our own pid we do the latter.
 		 */
+#endif
 		mtx_lock_spin(&sched_lock);
 		if (uap->pid == 0) {
+#ifdef KSE
+			error = rtp_to_pri(&rtp, td->td_ksegrp);
+#else
 			error = rtp_to_pri(&rtp, td);
+#endif
 		} else {
+#ifdef KSE
+			FOREACH_KSEGRP_IN_PROC(p, kg) {
+				if ((error = rtp_to_pri(&rtp, kg)) != 0) {
+					break;
+				}
+#else
 			FOREACH_THREAD_IN_PROC(p, td) {
 				if ((error = rtp_to_pri(&rtp, td)) != 0)
 					break;
+#endif
 			}
 		}
 		mtx_unlock_spin(&sched_lock);
@@ -402,7 +435,11 @@
 }
 
 int
+#ifdef KSE
+rtp_to_pri(struct rtprio *rtp, struct ksegrp *kg)
+#else
 rtp_to_pri(struct rtprio *rtp, struct thread *td)
+#endif
 {
 
 	mtx_assert(&sched_lock, MA_OWNED);
@@ -410,42 +447,85 @@
 		return (EINVAL);
 	switch (RTP_PRIO_BASE(rtp->type)) {
 	case RTP_PRIO_REALTIME:
+#ifdef KSE
+		kg->kg_user_pri = PRI_MIN_REALTIME + rtp->prio;
+#else
 		td->td_user_pri = PRI_MIN_REALTIME + rtp->prio;
+#endif
 		break;
 	case RTP_PRIO_NORMAL:
+#ifdef KSE
+		kg->kg_user_pri = PRI_MIN_TIMESHARE + rtp->prio;
+#else
 		td->td_user_pri = PRI_MIN_TIMESHARE + rtp->prio;
+#endif
 		break;
 	case RTP_PRIO_IDLE:
+#ifdef KSE
+		kg->kg_user_pri = PRI_MIN_IDLE + rtp->prio;
+#else
 		td->td_user_pri = PRI_MIN_IDLE + rtp->prio;
+#endif
 		break;
 	default:
 		return (EINVAL);
 	}
+#ifdef KSE
+	sched_class(kg, rtp->type);
+	if (curthread->td_ksegrp == kg) {
+		sched_prio(curthread, kg->kg_user_pri); /* XXX dubious */
+	}
+#else
 	sched_class(td, rtp->type);	/* XXX fix */
 	if (curthread == td)
 		sched_prio(curthread, td->td_user_pri); /* XXX dubious */
+#endif
 	return (0);
 }
 
 void
+#ifdef KSE
+pri_to_rtp(struct ksegrp *kg, struct rtprio *rtp)
+#else
 pri_to_rtp(struct thread *td, struct rtprio *rtp)
+#endif
 {
 
 	mtx_assert(&sched_lock, MA_OWNED);
+#ifdef KSE
+	switch (PRI_BASE(kg->kg_pri_class)) {
+#else
 	switch (PRI_BASE(td->td_pri_class)) {
+#endif
 	case PRI_REALTIME:
+#ifdef KSE
+		rtp->prio = kg->kg_user_pri - PRI_MIN_REALTIME;
+#else
 		rtp->prio = td->td_user_pri - PRI_MIN_REALTIME;
+#endif
 		break;
 	case PRI_TIMESHARE:
+#ifdef KSE
+		rtp->prio = kg->kg_user_pri - PRI_MIN_TIMESHARE;
+#else
 		rtp->prio = td->td_user_pri - PRI_MIN_TIMESHARE;
+#endif
 		break;
 	case PRI_IDLE:
+#ifdef KSE
+		rtp->prio = kg->kg_user_pri - PRI_MIN_IDLE;
+#else
 		rtp->prio = td->td_user_pri - PRI_MIN_IDLE;
+#endif
 		break;
 	default:
 		break;
 	}
+#ifdef KSE
+	rtp->type = kg->kg_pri_class;
+#else
 	rtp->type = td->td_pri_class;
+#endif
 }
 
 #if defined(COMPAT_43)

==== //depot/projects/dtrace/src/sys/kern/kern_switch.c#7 (text+ko) ====

@@ -24,6 +24,69 @@
  * SUCH DAMAGE.
  */
 
+#ifdef KSE
+/***
+Here is the logic..
+
+If there are N processors, then there are at most N KSEs (kernel
+schedulable entities) working to process threads that belong to a
+KSEGROUP (kg). If there are X of these KSEs actually running at the
+moment in question, then there are at most M (N-X) of these KSEs on
+the run queue, as running KSEs are not on the queue.
+
+Runnable threads are queued off the KSEGROUP in priority order.
+If there are M or more threads runnable, the top M threads
+(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
+their priority from those threads and are put on the run queue.
+
+The last thread that had a priority high enough to have a KSE associated
+with it, AND IS ON THE RUN QUEUE is pointed to by
+kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
+assigned as all the available KSEs are activly running, or because there
+are no threads queued, that pointer is NULL.
+
+When a KSE is removed from the run queue to become runnable, we know
+it was associated with the highest priority thread in the queue (at the head
+of the queue). If it is also the last assigned we know M was 1 and must
+now be 0. Since the thread is no longer queued that pointer must be
+removed from it. Since we know there were no more KSEs available,
+(M was 1 and is now 0) and since we are not FREEING our KSE
+but using it, we know there are STILL no more KSEs available, we can prove
+that the next thread in the ksegrp list will not have a KSE to assign to
+it, so we can show that the pointer must be made 'invalid' (NULL).
+
+The pointer exists so that when a new thread is made runnable, it can
+have its priority compared with the last assigned thread to see if
+it should 'steal' its KSE or not.. i.e. is it 'earlier'
+on the list than that thread or later.. If it's earlier, then the KSE is
+removed from the last assigned (which is now not assigned a KSE)
+and reassigned to the new thread, which is placed earlier in the list.
+The pointer is then backed up to the previous thread (which may or may not
+be the new thread).
+
+When a thread sleeps or is removed, the KSE becomes available and if there
+are queued threads that are not assigned KSEs, the highest priority one of
+them is assigned the KSE, which is then placed back on the run queue at
+the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
+to point to it.
+
+The following diagram shows 2 KSEs and 3 threads from a single process.
+
+ RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
+              \    \____
+               \        \
+    KSEGROUP---thread--thread--thread    (queued in priority order)
+        \                 /
+         \_______________/
+          (last_assigned)
+
+The result of this scheme is that the M available KSEs are always
+queued at the priorities they have inherrited from the M highest priority
+threads for that KSEGROUP. If this situation changes, the KSEs are
+reassigned to keep this true.
+***/
+#endif
+
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD: src/sys/kern/kern_switch.c,v 1.122 2006/06/13 13:12:56 davidxu Exp $");
 
@@ -48,7 +111,6 @@
 #include <sys/sysctl.h>
 #endif
 
-
 /* Uncomment this to enable logging of critical_enter/exit. */
 #if 0
 #define	KTR_CRITICAL	KTR_SCHED
@@ -82,36 +144,79 @@
 /************************************************************************
  * Functions that manipulate runnability from a thread perspective.	*
  ************************************************************************/
+#ifdef KSE
 /*
+ * Select the KSE that will be run next.  From that find the thread, and
+ * remove it from the KSEGRP's run queue.  If there is thread clustering,
+ * this will be what does it.
+ */
+#else
+/*
  * Select the thread that will be run next.
  */
+#endif
 struct thread *
 choosethread(void)
 {
+#ifdef KSE
+	struct kse *ke;
+#endif
 	struct thread *td;
+#ifdef KSE
+	struct ksegrp *kg;
+#endif
 
 #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
 	if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
 		/* Shutting down, run idlethread on AP's */
 		td = PCPU_GET(idlethread);
+#ifdef KSE
+		ke = td->td_kse;
+#endif
 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
+#ifdef KSE
+		ke->ke_flags |= KEF_DIDRUN;
+#else
 		td->td_kse->ke_flags |= KEF_DIDRUN;
+#endif
 		TD_SET_RUNNING(td);
 		return (td);
 	}
 #endif
 
 retry:
+#ifdef KSE
+	ke = sched_choose();
+	if (ke) {
+		td = ke->ke_thread;
+		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
+		kg = ke->ke_ksegrp;
+		if (td->td_proc->p_flag & P_HADTHREADS) {
+			if (kg->kg_last_assigned == td) {
+				kg->kg_last_assigned = TAILQ_PREV(td,
+				    threadqueue, td_runq);
+			}
+			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+		}
+#else
 	td = sched_choose();
 	if (td) {
+#endif
 		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
 		    td, td->td_priority);
 	} else {
 		/* Simulate runq_choose() having returned the idle thread */
 		td = PCPU_GET(idlethread);
+#ifdef KSE
+		ke = td->td_kse;
+#endif
 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
 	}
+#ifdef KSE
+	ke->ke_flags |= KEF_DIDRUN;
+#else
 	td->td_kse->ke_flags |= KEF_DIDRUN;
+#endif
 
 	/*
 	 * If we are in panic, only allow system threads,
@@ -128,12 +233,105 @@
 	return (td);
 }
 
+#ifdef KSE
+/*
+ * Given a surplus system slot, try assign a new runnable thread to it.
+ * Called from:
+ *  sched_thread_exit()  (local)
+ *  sched_switch()  (local)
+ *  sched_thread_exit()  (local)
+ *  remrunqueue()  (local)  (not at the moment)
+ */
+static void
+slot_fill(struct ksegrp *kg)
+{
+	struct thread *td;
+
+	mtx_assert(&sched_lock, MA_OWNED);
+	while (kg->kg_avail_opennings > 0) {
+		/*
+		 * Find the first unassigned thread
+		 */
+		if ((td = kg->kg_last_assigned) != NULL)
+			td = TAILQ_NEXT(td, td_runq);
+		else
+			td = TAILQ_FIRST(&kg->kg_runq);
+
+		/*
+		 * If we found one, send it to the system scheduler.
+		 */
+		if (td) {
+			kg->kg_last_assigned = td;
+			sched_add(td, SRQ_YIELDING);
+			CTR2(KTR_RUNQ, "slot_fill: td%p -> kg%p", td, kg);
+		} else {
+			/* no threads to use up the slots. quit now */
+			break;
+		}
+	}
+}
+
+#ifdef	SCHED_4BSD
+/*
+ * Remove a thread from its KSEGRP's run queue.
+ * This in turn may remove it from a KSE if it was already assigned
+ * to one, possibly causing a new thread to be assigned to the KSE
+ * and the KSE getting a new priority.
+ */
+static void
+remrunqueue(struct thread *td)
+{
+	struct thread *td2, *td3;
+	struct ksegrp *kg;
+	struct kse *ke;
+
+	mtx_assert(&sched_lock, MA_OWNED);
+	KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
+	kg = td->td_ksegrp;
+	ke = td->td_kse;
+	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
+	TD_SET_CAN_RUN(td);
+	/*
+	 * If it is not a threaded process, take the shortcut.
+	 */
+	if ((td->td_proc->p_flag & P_HADTHREADS) == 0) {
+		/* remve from sys run queue and free up a slot */
+		sched_rem(td);
+		return;
+	}
+   	td3 = TAILQ_PREV(td, threadqueue, td_runq);
+	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+	if (ke->ke_state == KES_ONRUNQ) {
+		/*
+		 * This thread has been assigned to the system run queue.
+		 * We need to dissociate it and try assign the
+		 * KSE to the next available thread. Then, we should
+		 * see if we need to move the KSE in the run queues.
+		 */
+		sched_rem(td);
+		td2 = kg->kg_last_assigned;
+		KASSERT((td2 != NULL), ("last assigned has wrong value"));
+		if (td2 == td)
+			kg->kg_last_assigned = td3;
+		/* slot_fill(kg); */ /* will replace it with another */
+	}
+}
+#endif
+#endif
+
 /*
  * Change the priority of a thread that is on the run queue.
  */
 void
+#ifdef KSE
+adjustrunqueue( struct thread *td, int newpri)
+#else
 adjustrunqueue(struct thread *td, int newpri)
+#endif
 {
+#ifdef KSE
+	struct ksegrp *kg;
+#endif
 	struct kse *ke;
 
 	mtx_assert(&sched_lock, MA_OWNED);
@@ -141,6 +339,44 @@
 
 	ke = td->td_kse;
 	CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
+#ifdef KSE
+	/*
+	 * If it is not a threaded process, take the shortcut.
+	 */
+	if ((td->td_proc->p_flag & P_HADTHREADS) == 0) {
+		/* We only care about the kse in the run queue. */
+		td->td_priority = newpri;
+#ifndef SCHED_CORE
+		if (ke->ke_rqindex != (newpri / RQ_PPQ))
+#else
+		if (ke->ke_rqindex != newpri)
+#endif
+		{
+			sched_rem(td);
+			sched_add(td, SRQ_BORING);
+		}
+		return;
+	}
+
+	/* It is a threaded process */
+	kg = td->td_ksegrp;
+	if (ke->ke_state == KES_ONRUNQ
+#ifdef SCHED_ULE
+	 || ((ke->ke_flags & KEF_ASSIGNED) != 0 &&
+	     (ke->ke_flags & KEF_REMOVED) == 0)
+#endif
+	   ) {
+		if (kg->kg_last_assigned == td) {
+			kg->kg_last_assigned =
+			    TAILQ_PREV(td, threadqueue, td_runq);
+		}
+		sched_rem(td);
+	}
+	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+	TD_SET_CAN_RUN(td);
+	td->td_priority = newpri;
+	setrunqueue(td, SRQ_BORING);
+#else
 	/* We only care about the kse in the run queue. */
 	td->td_priority = newpri;
 #ifndef SCHED_CORE
@@ -152,14 +388,170 @@
 		sched_rem(td);
 		sched_add(td, SRQ_BORING);
 	}
+#endif
+}
+
+#ifdef KSE
+/*
+ * This function is called when a thread is about to be put on a
+ * ksegrp run queue because it has been made runnable or its
+ * priority has been adjusted and the ksegrp does not have a
+ * free kse slot.  It determines if a thread from the same ksegrp
+ * should be preempted.  If so, it tries to switch threads
+ * if the thread is on the same cpu or notifies another cpu that
+ * it should switch threads.
+ */
+
+static void
+maybe_preempt_in_ksegrp(struct thread *td)
+#if  !defined(SMP)
+{
+	struct thread *running_thread;
+
+	mtx_assert(&sched_lock, MA_OWNED);
+	running_thread = curthread;
+
+	if (running_thread->td_ksegrp != td->td_ksegrp)
+		return;
+
+	if (td->td_priority >= running_thread->td_priority)
+		return;
+#ifdef PREEMPTION
+#ifndef FULL_PREEMPTION
+	if (td->td_priority > PRI_MAX_ITHD) {
+		running_thread->td_flags |= TDF_NEEDRESCHED;
+		return;
+	}
+#endif /* FULL_PREEMPTION */
+
+	if (running_thread->td_critnest > 1)
+		running_thread->td_owepreempt = 1;
+	 else
+		 mi_switch(SW_INVOL, NULL);
+
+#else /* PREEMPTION */
+	running_thread->td_flags |= TDF_NEEDRESCHED;
+#endif /* PREEMPTION */
+	return;
+}
+
+#else /* SMP */
+{
+	struct thread *running_thread;
+	int worst_pri;
+	struct ksegrp *kg;
+	cpumask_t cpumask,dontuse;
+	struct pcpu *pc;
+	struct pcpu *best_pcpu;
+	struct thread *cputhread;
+
+	mtx_assert(&sched_lock, MA_OWNED);
+
+	running_thread = curthread;
+
+#if !defined(KSEG_PEEMPT_BEST_CPU)
+	if (running_thread->td_ksegrp != td->td_ksegrp) {
+#endif
+		kg = td->td_ksegrp;
+
+		/* if someone is ahead of this thread, wait our turn */
+		if (td != TAILQ_FIRST(&kg->kg_runq))
+			return;
+
+		worst_pri = td->td_priority;
+		best_pcpu = NULL;
+		dontuse   = stopped_cpus | idle_cpus_mask;
+
+		/*
+		 * Find a cpu with the worst priority that runs at thread from
+		 * the same  ksegrp - if multiple exist give first the last run
+		 * cpu and then the current cpu priority
+		 */
+
+		SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
+			cpumask   = pc->pc_cpumask;
+			cputhread = pc->pc_curthread;
+
+			if ((cpumask & dontuse)  ||
+			    cputhread->td_ksegrp != kg)
+				continue;
+
+			if (cputhread->td_priority > worst_pri) {
+				worst_pri = cputhread->td_priority;
+				best_pcpu = pc;
+				continue;
+			}
+
+			if (cputhread->td_priority == worst_pri &&
+			    best_pcpu != NULL &&
+			    (td->td_lastcpu == pc->pc_cpuid ||
+				(PCPU_GET(cpumask) == cpumask &&
+				    td->td_lastcpu != best_pcpu->pc_cpuid)))
+			    best_pcpu = pc;
+		}
+
+		/* Check if we need to preempt someone */
+		if (best_pcpu == NULL)
+			return;
+
+#if defined(IPI_PREEMPTION) && defined(PREEMPTION)
+#if !defined(FULL_PREEMPTION)
+		if (td->td_priority <= PRI_MAX_ITHD)
+#endif /* ! FULL_PREEMPTION */
+			{
+				ipi_selected(best_pcpu->pc_cpumask, IPI_PREEMPT);
+				return;
+			}
+#endif /* defined(IPI_PREEMPTION) && defined(PREEMPTION) */
+
+		if (PCPU_GET(cpuid) != best_pcpu->pc_cpuid) {
+			best_pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED;
+			ipi_selected(best_pcpu->pc_cpumask, IPI_AST);
+			return;
+		}
+#if !defined(KSEG_PEEMPT_BEST_CPU)
+	}
+#endif
+
+	if (td->td_priority >= running_thread->td_priority)
+		return;
+#ifdef PREEMPTION
+
+#if !defined(FULL_PREEMPTION)
+	if (td->td_priority > PRI_MAX_ITHD) {
+		running_thread->td_flags |= TDF_NEEDRESCHED;
+	}
+#endif /* ! FULL_PREEMPTION */
+
+	if (running_thread->td_critnest > 1)
+		running_thread->td_owepreempt = 1;
+	 else
+		 mi_switch(SW_INVOL, NULL);
+
+#else /* PREEMPTION */
+	running_thread->td_flags |= TDF_NEEDRESCHED;
+#endif /* PREEMPTION */
+	return;
 }
+#endif /* !SMP */
 
+
+int limitcount;
+#endif
 void
 setrunqueue(struct thread *td, int flags)
 {
+#ifdef KSE
+	struct ksegrp *kg;
+	struct thread *td2;
+	struct thread *tda;
 
+	CTR3(KTR_RUNQ, "setrunqueue: td:%p kg:%p pid:%d",
+	    td, td->td_ksegrp, td->td_proc->p_pid);
+#else
 	CTR2(KTR_RUNQ, "setrunqueue: td:%p pid:%d",
 	    td, td->td_proc->p_pid);
+#endif
 	CTR5(KTR_SCHED, "setrunqueue: %p(%s) prio %d by %p(%s)",
             td, td->td_proc->p_comm, td->td_priority, curthread,
             curthread->td_proc->p_comm);
@@ -169,7 +561,101 @@
 	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
 	    ("setrunqueue: bad thread state"));
 	TD_SET_RUNQ(td);
+#ifdef KSE
+	kg = td->td_ksegrp;
+	if ((td->td_proc->p_flag & P_HADTHREADS) == 0) {
+		/*
+		 * Common path optimisation: Only one of everything
+		 * and the KSE is always already attached.
+		 * Totally ignore the ksegrp run queue.
+		 */
+		if (kg->kg_avail_opennings != 1) {
+			if (limitcount < 1) {
+				limitcount++;
+				printf("pid %d: corrected slot count (%d->1)\n",
+				    td->td_proc->p_pid, kg->kg_avail_opennings);
+
+			}
+			kg->kg_avail_opennings = 1;
+		}
+		sched_add(td, flags);
+		return;
+	}
+
+	/*
+	 * If the concurrency has reduced, and we would go in the
+	 * assigned section, then keep removing entries from the
+	 * system run queue, until we are not in that section
+	 * or there is room for us to be put in that section.
+	 * What we MUST avoid is the case where there are threads of less
+	 * priority than the new one scheduled, but it can not
+	 * be scheduled itself. That would lead to a non contiguous set
+	 * of scheduled threads, and everything would break.
+	 */
+	tda = kg->kg_last_assigned;
+	while ((kg->kg_avail_opennings <= 0) &&
+	    (tda && (tda->td_priority > td->td_priority))) {
+		/*
+		 * None free, but there is one we can commandeer.
+		 */
+		CTR2(KTR_RUNQ,
+		    "setrunqueue: kg:%p: take slot from td: %p", kg, tda);
+		sched_rem(tda);
+		tda = kg->kg_last_assigned =
+		    TAILQ_PREV(tda, threadqueue, td_runq);
+	}
+
+	/*
+	 * Add the thread to the ksegrp's run queue at
+	 * the appropriate place.
+	 */
+	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
+		if (td2->td_priority > td->td_priority) {
+			TAILQ_INSERT_BEFORE(td2, td, td_runq);
+			break;
+		}
+	}
+	if (td2 == NULL) {
+		/* We ran off the end of the TAILQ or it was empty. */
+		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
+	}
+
+	/*
+	 * If we have a slot to use, then put the thread on the system
+	 * run queue and if needed, readjust the last_assigned pointer.
+	 * it may be that we need to schedule something anyhow
+	 * even if the availabel slots are -ve so that
+	 * all the items < last_assigned are scheduled.
+	 */
+	if (kg->kg_avail_opennings > 0) {
+		if (tda == NULL) {
+			/*
+			 * No pre-existing last assigned so whoever is first
+			 * gets the slot.. (maybe us)
+			 */
+			td2 = TAILQ_FIRST(&kg->kg_runq);
+			kg->kg_last_assigned = td2;
+		} else if (tda->td_priority > td->td_priority) {
+			td2 = td;
+		} else {
+			/*
+			 * We are past last_assigned, so
+			 * give the next slot to whatever is next,
+			 * which may or may not be us.
+			 */
+			td2 = TAILQ_NEXT(tda, td_runq);
+			kg->kg_last_assigned = td2;
+		}
+		sched_add(td2, flags);
+	} else {
+		CTR3(KTR_RUNQ, "setrunqueue: held: td%p kg%p pid%d",
+			td, td->td_ksegrp, td->td_proc->p_pid);
+		if ((flags & SRQ_YIELDING) == 0)
+			maybe_preempt_in_ksegrp(td);
+	}
+#else
 	sched_add(td, flags);
+#endif
 }
 
 /*
@@ -281,6 +767,24 @@
 	 */
 	MPASS(TD_ON_RUNQ(td));
 	MPASS(td->td_sched->ke_state != KES_ONRUNQ);
+#ifdef KSE
+	if (td->td_proc->p_flag & P_HADTHREADS) {
+		/*
+		 * If this is a threaded process we actually ARE on the
+		 * ksegrp run queue so take it off that first.
+		 * Also undo any damage done to the last_assigned pointer.
+		 * XXX Fix setrunqueue so this isn't needed
+		 */
+		struct ksegrp *kg;
+
+		kg = td->td_ksegrp;
+		if (kg->kg_last_assigned == td)
+			kg->kg_last_assigned =
+			    TAILQ_PREV(td, threadqueue, td_runq);
+		TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
+	}
+
+#endif
 	TD_SET_RUNNING(td);
 	CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
 	    td->td_proc->p_pid, td->td_proc->p_comm);
@@ -395,10 +899,11 @@
 	rqh = &rq->rq_queues[pri];
 	CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p",
 	    ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
-	if (flags & SRQ_PREEMPTED)
+	if (flags & SRQ_PREEMPTED) {
 		TAILQ_INSERT_HEAD(rqh, ke, ke_procq);
-	else
+	} else {
 		TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
+	}
 }
 
 /*
@@ -485,7 +990,11 @@
 	struct rqhead *rqh;
 	int pri;
 
+#ifdef KSE
+	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
+#else
 	KASSERT(ke->ke_thread->td_proc->p_sflag & PS_INMEM,
+#endif
 		("runq_remove: process swapped out"));
 	pri = ke->ke_rqindex;
 	rqh = &rq->rq_queues[pri];
@@ -503,6 +1012,24 @@
 #include <vm/uma.h>
 extern struct mtx kse_zombie_lock;
 
+#ifdef KSE
+/*
+ *  Allocate scheduler specific per-process resources.
+ * The thread and ksegrp have already been linked in.
+ * In this case just set the default concurrency value.
+ *
+ * Called from:
+ *  proc_init() (UMA init method)
+ */
+void
+sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td)
+{
+
+	/* This can go in sched_fork */
+	sched_init_concurrency(kg);
+}
+#endif
+
 /*
  * thread is being either created or recycled.
  * Fix up the per-scheduler resources associated with it.
@@ -523,4 +1050,63 @@
 	ke->ke_state	= KES_THREAD;
 }
 
+#ifdef KSE
+/*
+ * Set up an initial concurrency of 1
+ * and set the given thread (if given) to be using that
+ * concurrency slot.
+ * May be used "offline"..before the ksegrp is attached to the world
+ * and thus wouldn't need schedlock in that case.
+ * Called from:
+ *  thr_create()
+ *  proc_init() (UMA) via sched_newproc()
+ */
+void
+sched_init_concurrency(struct ksegrp *kg)
+{
+
+	CTR1(KTR_RUNQ,"kg %p init slots and concurrency to 1", kg);
+	kg->kg_concurrency = 1;

>>> TRUNCATED FOR MAIL (1000 lines) <<<



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200608062033.k76KXVY8015423>