Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 13 May 2011 12:39:38 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r221835 - in head/sys: i386/i386 i386/xen kern
Message-ID:  <201105131239.p4DCdccA071147@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Fri May 13 12:39:37 2011
New Revision: 221835
URL: http://svn.freebsd.org/changeset/base/221835

Log:
  Refactor Xen PV code to use new event timers subsystem. That uses one-shot
  Xen timer and time counter to provide one-shot and periodic time events.
  
  On my tests this reduces idle interruts rate down to about 30Hz, and accor-
  ding to Xen VM Manager reduces host CPU load by three times comparing to
  the previous periodic 100Hz clock. Also now, when needed, it is possible to
  increase HZ rate without useless CPU burning during idle periods.
  
  Now only ia64 and some ARMs left not migrated to the new event timers.

Modified:
  head/sys/i386/i386/machdep.c
  head/sys/i386/xen/clock.c
  head/sys/i386/xen/mp_machdep.c
  head/sys/kern/kern_clocksource.c

Modified: head/sys/i386/i386/machdep.c
==============================================================================
--- head/sys/i386/i386/machdep.c	Fri May 13 10:36:38 2011	(r221834)
+++ head/sys/i386/i386/machdep.c	Fri May 13 12:39:37 2011	(r221835)
@@ -1351,7 +1351,9 @@ void (*cpu_idle_fn)(int) = cpu_idle_acpi
 void
 cpu_idle(int busy)
 {
+#ifndef XEN
 	uint64_t msr;
+#endif
 
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
 	    busy, curcpu);
@@ -1367,34 +1369,32 @@ cpu_idle(int busy)
 			goto out;
 		}
 	}
+#endif
 
 	/* If we have time - switch timers into idle mode. */
 	if (!busy) {
 		critical_enter();
 		cpu_idleclock();
 	}
-#endif
 
-	/* Apply AMD APIC timer C1E workaround. */
-	if (cpu_ident_amdc1e
 #ifndef XEN
-	    && cpu_disable_deep_sleep
-#endif
-	    ) {
+	/* Apply AMD APIC timer C1E workaround. */
+	if (cpu_ident_amdc1e && cpu_disable_deep_sleep) {
 		msr = rdmsr(MSR_AMDK8_IPM);
 		if (msr & AMDK8_CMPHALT)
 			wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
 	}
+#endif
 
 	/* Call main idle method. */
 	cpu_idle_fn(busy);
 
-#ifndef XEN
 	/* Switch timers mack into active mode. */
 	if (!busy) {
 		cpu_activeclock();
 		critical_exit();
 	}
+#ifndef XEN
 out:
 #endif
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",

Modified: head/sys/i386/xen/clock.c
==============================================================================
--- head/sys/i386/xen/clock.c	Fri May 13 10:36:38 2011	(r221834)
+++ head/sys/i386/xen/clock.c	Fri May 13 12:39:37 2011	(r221835)
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/time.h>
+#include <sys/timeet.h>
 #include <sys/timetc.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
@@ -301,38 +302,44 @@ static struct timecounter xen_timecounte
 	0			/* quality */
 };
 
+static struct eventtimer xen_et;
+
+struct xen_et_state {
+	int		mode;
+#define	MODE_STOP	0
+#define	MODE_PERIODIC	1
+#define	MODE_ONESHOT	2
+	int64_t		period;
+	int64_t		next;
+};
+
+static DPCPU_DEFINE(struct xen_et_state, et_state);
+
 static int
 clkintr(void *arg)
 {
-	int64_t delta_cpu, delta;
-	struct trapframe *frame = (struct trapframe *)arg;
+	int64_t now;
 	int cpu = smp_processor_id();
 	struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
+	struct xen_et_state *state = DPCPU_PTR(et_state);
 
 	do {
 		__get_time_values_from_xen();
-		
-		delta = delta_cpu = 
-			shadow->system_timestamp + get_nsec_offset(shadow);
-		delta     -= processed_system_time;
-		delta_cpu -= per_cpu(processed_system_time, cpu);
-
+		now = shadow->system_timestamp + get_nsec_offset(shadow);
 	} while (!time_values_up_to_date(cpu));
-	
-	if (unlikely(delta < (int64_t)0) || unlikely(delta_cpu < (int64_t)0)) {
-		printf("Timer ISR: Time went backwards: %lld\n", delta);
-		return (FILTER_HANDLED);
-	}
-	
+
 	/* Process elapsed ticks since last call. */
-	while (delta >= NS_PER_TICK) {
-	        delta -= NS_PER_TICK;
-		processed_system_time += NS_PER_TICK;
-		per_cpu(processed_system_time, cpu) +=  NS_PER_TICK;
-		if (PCPU_GET(cpuid) == 0)
-		      hardclock(TRAPF_USERMODE(frame), TRAPF_PC(frame));
-		else
-		      hardclock_cpu(TRAPF_USERMODE(frame));
+	processed_system_time = now;
+	if (state->mode == MODE_PERIODIC) {
+		while (now >= state->next) {
+		        state->next += state->period;
+			if (xen_et.et_active)
+				xen_et.et_event_cb(&xen_et, xen_et.et_arg);
+		}
+		HYPERVISOR_set_timer_op(state->next + 50000);
+	} else if (state->mode == MODE_ONESHOT) {
+		if (xen_et.et_active)
+			xen_et.et_event_cb(&xen_et, xen_et.et_arg);
 	}
 	/*
 	 * Take synchronised time from Xen once a minute if we're not
@@ -484,12 +491,14 @@ DELAY(int n)
 void
 timer_restore(void)
 {
+	struct xen_et_state *state = DPCPU_PTR(et_state);
+
 	/* Get timebases for new environment. */ 
 	__get_time_values_from_xen();
 
 	/* Reset our own concept of passage of system time. */
 	processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
-	per_cpu(processed_system_time, 0) = processed_system_time;
+	state->next = processed_system_time;
 }
 
 void
@@ -503,7 +512,6 @@ startrtclock()
 	/* initialize xen values */
 	__get_time_values_from_xen();
 	processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
-	per_cpu(processed_system_time, 0) = processed_system_time;
 
 	__cpu_khz = 1000000ULL << 32;
 	info = &HYPERVISOR_shared_info->vcpu_info[0].time;
@@ -759,7 +767,49 @@ resettodr()
 }
 #endif
 
-static struct vcpu_set_periodic_timer xen_set_periodic_tick;
+static int
+xen_et_start(struct eventtimer *et,
+    struct bintime *first, struct bintime *period)
+{
+	struct xen_et_state *state = DPCPU_PTR(et_state);
+	struct shadow_time_info *shadow;
+	int64_t fperiod;
+
+	__get_time_values_from_xen();
+
+	if (period != NULL) {
+		state->mode = MODE_PERIODIC;
+		state->period = (1000000000LL *
+		    (uint32_t)(period->frac >> 32)) >> 32;
+		if (period->sec != 0)
+			state->period += 1000000000LL * period->sec;
+	} else {
+		state->mode = MODE_ONESHOT;
+		state->period = 0;
+	}
+	if (first != NULL) {
+		fperiod = (1000000000LL * (uint32_t)(first->frac >> 32)) >> 32;
+		if (first->sec != 0)
+			fperiod += 1000000000LL * first->sec;
+	} else
+		fperiod = state->period;
+
+	shadow = &per_cpu(shadow_time, smp_processor_id());
+	state->next = shadow->system_timestamp + get_nsec_offset(shadow);
+	state->next += fperiod;
+	HYPERVISOR_set_timer_op(state->next + 50000);
+	return (0);
+}
+
+static int
+xen_et_stop(struct eventtimer *et)
+{
+	struct xen_et_state *state = DPCPU_PTR(et_state);
+
+	state->mode = MODE_STOP;
+	HYPERVISOR_set_timer_op(0);
+	return (0);
+}
 
 /*
  * Start clocks running.
@@ -770,56 +820,48 @@ cpu_initclocks(void)
 	unsigned int time_irq;
 	int error;
 
-	xen_set_periodic_tick.period_ns = NS_PER_TICK;
-	
-	HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0,
-			   &xen_set_periodic_tick);
-	
-        error = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "clk", 
-	    clkintr, NULL, NULL,
-	    INTR_TYPE_CLK, &time_irq);
+	HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, 0, NULL);
+	error = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "cpu0:timer",
+	    clkintr, NULL, NULL, INTR_TYPE_CLK, &time_irq);
 	if (error)
 		panic("failed to register clock interrupt\n");
 	/* should fast clock be enabled ? */
-	
+
+	bzero(&xen_et, sizeof(xen_et));
+	xen_et.et_name = "ixen";
+	xen_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT |
+	    ET_FLAGS_PERCPU;
+	xen_et.et_quality = 600;
+	xen_et.et_frequency = 0;
+	xen_et.et_min_period.sec = 0;
+	xen_et.et_min_period.frac = 0x00400000LL << 32;
+	xen_et.et_max_period.sec = 2;
+	xen_et.et_max_period.frac = 0;
+	xen_et.et_start = xen_et_start;
+	xen_et.et_stop = xen_et_stop;
+	xen_et.et_priv = NULL;
+	et_register(&xen_et);
+
+	cpu_initclocks_bsp();
 }
 
 int
 ap_cpu_initclocks(int cpu)
 {
+	char buf[MAXCOMLEN + 1];
 	unsigned int time_irq;
 	int error;
 
-	xen_set_periodic_tick.period_ns = NS_PER_TICK;
-
-	HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, cpu,
-			   &xen_set_periodic_tick);
-        error = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "clk", 
-	    clkintr, NULL, NULL,
-	    INTR_TYPE_CLK, &time_irq);
+	HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL);
+	snprintf(buf, sizeof(buf), "cpu%d:timer", cpu);
+	error = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, buf,
+	    clkintr, NULL, NULL, INTR_TYPE_CLK, &time_irq);
 	if (error)
 		panic("failed to register clock interrupt\n");
 
-
 	return (0);
 }
 
-
-void
-cpu_startprofclock(void)
-{
-
-    	printf("cpu_startprofclock: profiling clock is not supported\n");
-}
-
-void
-cpu_stopprofclock(void)
-{
-
-    	printf("cpu_stopprofclock: profiling clock is not supported\n");
-}
-#define NSEC_PER_USEC 1000
-
 static uint32_t
 xen_get_timecount(struct timecounter *tc)
 {	
@@ -842,45 +884,11 @@ get_system_time(int ticks)
     return processed_system_time + (ticks * NS_PER_TICK);
 }
 
-/*
- * Track behavior of cur_timer->get_offset() functionality in timer_tsc.c
- */
-
-
-/* Convert jiffies to system time. */
-static uint64_t 
-ticks_to_system_time(int newticks)
-{
-	int delta;
-	uint64_t st;
-
-	delta = newticks - ticks;
-	if (delta < 1) {
-		/* Triggers in some wrap-around cases,
-		 * but that's okay:
-		 * we just end up with a shorter timeout. */
-		st = processed_system_time + NS_PER_TICK;
-	} else if (((unsigned int)delta >> (BITS_PER_LONG-3)) != 0) {
-		/* Very long timeout means there is no pending timer.
-		 * We indicate this to Xen by passing zero timeout. */
-		st = 0;
-	} else {
-		st = processed_system_time + delta * (uint64_t)NS_PER_TICK;
-	}
-
-	return (st);
-}
-
 void
 idle_block(void)
 {
-  uint64_t timeout;
-
-  timeout = ticks_to_system_time(ticks + 1) + NS_PER_TICK/2;
 
-  __get_time_values_from_xen();
-  PANIC_IF(HYPERVISOR_set_timer_op(timeout) != 0);
-  HYPERVISOR_sched_op(SCHEDOP_block, 0);
+	HYPERVISOR_sched_op(SCHEDOP_block, 0);
 }
 
 int
@@ -903,6 +911,3 @@ timer_spkr_setfreq(int freq)
 
 }
 
-
-	
-	

Modified: head/sys/i386/xen/mp_machdep.c
==============================================================================
--- head/sys/i386/xen/mp_machdep.c	Fri May 13 10:36:38 2011	(r221834)
+++ head/sys/i386/xen/mp_machdep.c	Fri May 13 12:39:37 2011	(r221835)
@@ -628,8 +628,11 @@ init_secondary(void)
 	while (smp_started == 0)
 		ia32_pause();
 
-	
 	PCPU_SET(curthread, PCPU_GET(idlethread));
+
+	/* Start per-CPU event timers. */
+	cpu_initclocks_ap();
+
 	/* enter the scheduler */
 	sched_throw(NULL);
 

Modified: head/sys/kern/kern_clocksource.c
==============================================================================
--- head/sys/kern/kern_clocksource.c	Fri May 13 10:36:38 2011	(r221834)
+++ head/sys/kern/kern_clocksource.c	Fri May 13 12:39:37 2011	(r221835)
@@ -31,9 +31,6 @@ __FBSDID("$FreeBSD$");
  * Common routines to manage event timers hardware.
  */
 
-/* XEN has own timer routines now. */
-#ifndef XEN
-
 #include "opt_device_polling.h"
 #include "opt_kdtrace.h"
 
@@ -899,5 +896,3 @@ sysctl_kern_eventtimer_periodic(SYSCTL_H
 SYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, sysctl_kern_eventtimer_periodic, "I", "Enable event timer periodic mode");
-
-#endif



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201105131239.p4DCdccA071147>