Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 10 Apr 2013 05:59:07 +0000 (UTC)
From:      Neel Natu <neel@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r249324 - in head: sys/amd64/include sys/amd64/vmm sys/x86/x86 usr.sbin/bhyve
Message-ID:  <201304100559.r3A5x7PF004306@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: neel
Date: Wed Apr 10 05:59:07 2013
New Revision: 249324
URL: http://svnweb.freebsd.org/changeset/base/249324

Log:
  Unsynchronized TSCs on the host require special handling in bhyve:
  
  - use clock_gettime(2) as the time base for the emulated ACPI timer instead
    of directly using rdtsc().
  
  - don't advertise the invariant TSC capability to the guest to discourage it
    from using the TSC as its time base.
  
  Discussed with:	jhb@ (about making 'smp_tsc' a global)
  Reported by:	Dan Mack on freebsd-virtualization@
  Obtained from:	NetApp

Modified:
  head/sys/amd64/include/clock.h
  head/sys/amd64/vmm/x86.c
  head/sys/x86/x86/tsc.c
  head/usr.sbin/bhyve/pmtmr.c

Modified: head/sys/amd64/include/clock.h
==============================================================================
--- head/sys/amd64/include/clock.h	Wed Apr 10 02:40:03 2013	(r249323)
+++ head/sys/amd64/include/clock.h	Wed Apr 10 05:59:07 2013	(r249324)
@@ -20,6 +20,9 @@ extern int	i8254_max_count;
 extern uint64_t	tsc_freq;
 extern int	tsc_is_invariant;
 extern int	tsc_perf_stat;
+#ifdef SMP
+extern int	smp_tsc;
+#endif
 
 void	i8254_init(void);
 

Modified: head/sys/amd64/vmm/x86.c
==============================================================================
--- head/sys/amd64/vmm/x86.c	Wed Apr 10 02:40:03 2013	(r249323)
+++ head/sys/amd64/vmm/x86.c	Wed Apr 10 05:59:07 2013	(r249324)
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/cpuset.h>
 
+#include <machine/clock.h>
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
@@ -89,11 +90,27 @@ x86_emulate_cpuid(struct vm *vm, int vcp
 		case CPUID_8000_0003:
 		case CPUID_8000_0004:
 		case CPUID_8000_0006:
-		case CPUID_8000_0007:
 		case CPUID_8000_0008:
 			cpuid_count(*eax, *ecx, regs);
 			break;
 
+		case CPUID_8000_0007:
+			cpuid_count(*eax, *ecx, regs);
+			/*
+			 * If the host TSCs are not synchronized across
+			 * physical cpus then we cannot advertise an
+			 * invariant tsc to a vcpu.
+			 *
+			 * XXX This still falls short because the vcpu
+			 * can observe the TSC moving backwards as it
+			 * migrates across physical cpus. But at least
+			 * it should discourage the guest from using the
+			 * TSC to keep track of time.
+			 */
+			if (!smp_tsc)
+				regs[3] &= ~AMDPM_TSC_INVARIANT;
+			break;
+
 		case CPUID_0000_0001:
 			do_cpuid(1, regs);
 

Modified: head/sys/x86/x86/tsc.c
==============================================================================
--- head/sys/x86/x86/tsc.c	Wed Apr 10 02:40:03 2013	(r249323)
+++ head/sys/x86/x86/tsc.c	Wed Apr 10 05:59:07 2013	(r249324)
@@ -61,7 +61,7 @@ SYSCTL_INT(_kern_timecounter, OID_AUTO, 
 TUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant);
 
 #ifdef SMP
-static int	smp_tsc;
+int	smp_tsc;
 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
     "Indicates whether the TSC is safe to use in SMP mode");
 TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc);

Modified: head/usr.sbin/bhyve/pmtmr.c
==============================================================================
--- head/usr.sbin/bhyve/pmtmr.c	Wed Apr 10 02:40:03 2013	(r249323)
+++ head/usr.sbin/bhyve/pmtmr.c	Wed Apr 10 05:59:07 2013	(r249324)
@@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/cpufunc.h>
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <time.h>
 #include <assert.h>
 #include <pthread.h>
@@ -53,35 +54,108 @@ __FBSDID("$FreeBSD$");
 #define PMTMR_FREQ	3579545  /* 3.579545MHz */
 
 static pthread_mutex_t pmtmr_mtx;
-static uint64_t	pmtmr_tscf;
+
 static uint64_t	pmtmr_old;
+
+static uint64_t	pmtmr_tscf;
 static uint64_t	pmtmr_tsc_old;
 
+static clockid_t clockid = CLOCK_UPTIME_FAST;
+static struct timespec pmtmr_uptime_old;
+
+#define	timespecsub(vvp, uvp)						\
+	do {								\
+		(vvp)->tv_sec -= (uvp)->tv_sec;				\
+		(vvp)->tv_nsec -= (uvp)->tv_nsec;			\
+		if ((vvp)->tv_nsec < 0) {				\
+			(vvp)->tv_sec--;				\
+			(vvp)->tv_nsec += 1000000000;			\
+		}							\
+	} while (0)
+
+static uint64_t
+timespec_to_pmtmr(const struct timespec *tsnew, const struct timespec *tsold)
+{
+	struct timespec tsdiff;
+	int64_t nsecs;
+
+	tsdiff = *tsnew;
+	timespecsub(&tsdiff, tsold);
+	nsecs = tsdiff.tv_sec * 1000000000 + tsdiff.tv_nsec;
+	assert(nsecs >= 0);
+
+	return (nsecs * PMTMR_FREQ / 1000000000 + pmtmr_old);
+}
+
+static uint64_t
+tsc_to_pmtmr(uint64_t tsc_new, uint64_t tsc_old)
+{
+
+	return ((tsc_new - tsc_old) * PMTMR_FREQ / pmtmr_tscf + pmtmr_old);
+}
+
+static void
+pmtmr_init(void)
+{
+	size_t len;
+	int smp_tsc, err;
+	struct timespec tsnew, tsold = { 0 };
+
+	len = sizeof(smp_tsc);
+	err = sysctlbyname("kern.timecounter.smp_tsc", &smp_tsc, &len, NULL, 0);
+	assert(err == 0);
+
+	if (smp_tsc) {
+		len = sizeof(pmtmr_tscf);
+		err = sysctlbyname("machdep.tsc_freq", &pmtmr_tscf, &len,
+				   NULL, 0);
+		assert(err == 0);
+
+		pmtmr_tsc_old = rdtsc();
+		pmtmr_old = tsc_to_pmtmr(pmtmr_tsc_old, 0);
+	} else {
+		if (getenv("BHYVE_PMTMR_PRECISE") != NULL)
+			clockid = CLOCK_UPTIME;
+
+		err = clock_gettime(clockid, &tsnew);
+		assert(err == 0);
+
+		pmtmr_uptime_old = tsnew;
+		pmtmr_old = timespec_to_pmtmr(&tsnew, &tsold);
+	}
+}
+
 static uint32_t
 pmtmr_val(void)
 {
+	struct timespec	tsnew;
 	uint64_t	pmtmr_tsc_new;
 	uint64_t	pmtmr_new;
+	int		error;
+
 	static int	inited = 0;
 
 	if (!inited) {
-		size_t len;
-
-		inited = 1;
 		pthread_mutex_init(&pmtmr_mtx, NULL);
-		len = sizeof(pmtmr_tscf);
-		sysctlbyname("machdep.tsc_freq", &pmtmr_tscf, &len,
-		    NULL, 0);
-		pmtmr_tsc_old = rdtsc();
-		pmtmr_old = pmtmr_tsc_old / pmtmr_tscf * PMTMR_FREQ;
+		pmtmr_init();
+		inited = 1;
 	}
 
 	pthread_mutex_lock(&pmtmr_mtx);
-	pmtmr_tsc_new = rdtsc();
-	pmtmr_new = (pmtmr_tsc_new - pmtmr_tsc_old) * PMTMR_FREQ / pmtmr_tscf +
-	    pmtmr_old;
+
+	if (pmtmr_tscf) {
+		pmtmr_tsc_new = rdtsc();
+		pmtmr_new = tsc_to_pmtmr(pmtmr_tsc_new, pmtmr_tsc_old);
+		pmtmr_tsc_old = pmtmr_tsc_new;
+	} else {
+		error = clock_gettime(clockid, &tsnew);
+		assert(error == 0);
+
+		pmtmr_new = timespec_to_pmtmr(&tsnew, &pmtmr_uptime_old);
+		pmtmr_uptime_old = tsnew;
+	}
 	pmtmr_old = pmtmr_new;
-	pmtmr_tsc_old = pmtmr_tsc_new;
+
 	pthread_mutex_unlock(&pmtmr_mtx);
 
 	return (pmtmr_new); 
@@ -102,4 +176,3 @@ pmtmr_handler(struct vmctx *ctx, int vcp
 }
 
 INOUT_PORT(pmtmr, IO_PMTMR, IOPORT_F_IN, pmtmr_handler);
-



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201304100559.r3A5x7PF004306>