Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 28 Jul 2012 19:02:02 +0300
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        amd64@freebsd.org
Cc:        Jim Harris <jim.harris@gmail.com>, Andriy Gapon <avg@freebsd.org>
Subject:   Use fences for kernel tsc reads.
Message-ID:  <20120728160202.GI2676@deviant.kiev.zoral.com.ua>

next in thread | raw e-mail | index | archive | help

--DZpZF4rn8ymM3zt1
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

Hi,

This was discussed on somewhat unwieldly thread on svn-src@ as a followup
to the commit r238755 which uncovered the problem in the first place.

Below is the commit candidate. It changes the fix in r238755 to use CPUID
instead of rmb(), since rmb() translates to locked operation on i386,
and experimentation shown that Nehalem's RDTSC can pass LOCK.

Also, I explicitely use LFENCE and MFENCE instead using rmb() and mb(),
for the same reason that on i386 implementation of *mb() is a locked
operation instead of fence.

Jim, could you please, recheck that on your machine there is no regression
in the TSC synchronization test ?

Handling of usermode will follow later.

diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
index 94d4133..881fcd2 100644
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -290,6 +290,13 @@ popcntq(u_long mask)
 }
=20
 static __inline void
+lfence(void)
+{
+
+	__asm __volatile("lfence" : : : "memory");
+}
+
+static __inline void
 mfence(void)
 {
=20
diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h
index 62d268d..7cd3663 100644
--- a/sys/i386/include/cpufunc.h
+++ b/sys/i386/include/cpufunc.h
@@ -155,6 +155,13 @@ cpu_mwait(u_long extensions, u_int hints)
 }
=20
 static __inline void
+lfence(void)
+{
+
+	__asm __volatile("lfence" : : : "memory");
+}
+
+static __inline void
 mfence(void)
 {
=20
diff --git a/sys/x86/x86/tsc.c b/sys/x86/x86/tsc.c
index c253a96..101cbb3 100644
--- a/sys/x86/x86/tsc.c
+++ b/sys/x86/x86/tsc.c
@@ -83,6 +83,10 @@ static void tsc_freq_changing(void *arg, const struct cf=
_level *level,
     int *status);
 static unsigned tsc_get_timecount(struct timecounter *tc);
 static unsigned tsc_get_timecount_low(struct timecounter *tc);
+static unsigned tsc_get_timecount_lfence(struct timecounter *tc);
+static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc);
+static unsigned tsc_get_timecount_mfence(struct timecounter *tc);
+static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc);
 static void tsc_levels_changed(void *arg, int unit);
=20
 static struct timecounter tsc_timecounter =3D {
@@ -262,6 +266,10 @@ probe_tsc_freq(void)
 		    (vm_guest =3D=3D VM_GUEST_NO &&
 		    CPUID_TO_FAMILY(cpu_id) >=3D 0x10))
 			tsc_is_invariant =3D 1;
+		if (cpu_feature & CPUID_SSE2) {
+			tsc_timecounter.tc_get_timecount =3D
+			    tsc_get_timecount_mfence;
+		}
 		break;
 	case CPU_VENDOR_INTEL:
 		if ((amd_pminfo & AMDPM_TSC_INVARIANT) !=3D 0 ||
@@ -271,6 +279,10 @@ probe_tsc_freq(void)
 		    (CPUID_TO_FAMILY(cpu_id) =3D=3D 0xf &&
 		    CPUID_TO_MODEL(cpu_id) >=3D 0x3))))
 			tsc_is_invariant =3D 1;
+		if (cpu_feature & CPUID_SSE2) {
+			tsc_timecounter.tc_get_timecount =3D
+			    tsc_get_timecount_lfence;
+		}
 		break;
 	case CPU_VENDOR_CENTAUR:
 		if (vm_guest =3D=3D VM_GUEST_NO &&
@@ -278,6 +290,10 @@ probe_tsc_freq(void)
 		    CPUID_TO_MODEL(cpu_id) >=3D 0xf &&
 		    (rdmsr(0x1203) & 0x100000000ULL) =3D=3D 0)
 			tsc_is_invariant =3D 1;
+		if (cpu_feature & CPUID_SSE2) {
+			tsc_timecounter.tc_get_timecount =3D
+			    tsc_get_timecount_lfence;
+		}
 		break;
 	}
=20
@@ -328,15 +344,26 @@ init_TSC(void)
=20
 #ifdef SMP
=20
-/* rmb is required here because rdtsc is not a serializing instruction. */
+/*
+ * RDTSC is not a serializing instruction, so we need to drain
+ * instruction stream before executing it. It could be fixed by use of
+ * RDTSCP, except the instruction is not available everywhere.
+ *
+ * Use CPUID for draining. The timecounters use MFENCE for AMD CPUs,
+ * and LFENCE for others (Intel and VIA) when SSE2 is present, and
+ * nothing on older machines which also do not issue RDTSC
+ * prematurely. There, testing for SSE2 and vendor is too cumbersome,
+ * and we learn about TSC presence from CPUID.
+ */
 #define	TSC_READ(x)			\
 static void				\
 tsc_read_##x(void *arg)			\
 {					\
 	uint32_t *tsc =3D arg;		\
+	u_int p[4];			\
 	u_int cpu =3D PCPU_GET(cpuid);	\
 					\
-	rmb();				\
+	do_cpuid(0, p);			\
 	tsc[cpu * 3 + x] =3D rdtsc32();	\
 }
 TSC_READ(0)
@@ -487,7 +514,16 @@ init:
 	for (shift =3D 0; shift < 31 && (tsc_freq >> shift) > max_freq; shift++)
 		;
 	if (shift > 0) {
-		tsc_timecounter.tc_get_timecount =3D tsc_get_timecount_low;
+		if (cpu_feature & CPUID_SSE2) {
+			if (cpu_vendor_id =3D=3D CPU_VENDOR_AMD) {
+				tsc_timecounter.tc_get_timecount =3D
+				    tsc_get_timecount_low_mfence;
+			} else {
+				tsc_timecounter.tc_get_timecount =3D
+				    tsc_get_timecount_low_lfence;
+			}
+		} else
+			tsc_timecounter.tc_get_timecount =3D tsc_get_timecount_low;
 		tsc_timecounter.tc_name =3D "TSC-low";
 		if (bootverbose)
 			printf("TSC timecounter discards lower %d bit(s)\n",
@@ -592,23 +628,55 @@ sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
 SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW,
     0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency");
=20
-static u_int
+static inline u_int
 tsc_get_timecount(struct timecounter *tc __unused)
 {
=20
 	return (rdtsc32());
 }
=20
-static u_int
+static inline u_int
 tsc_get_timecount_low(struct timecounter *tc)
 {
 	uint32_t rv;
=20
 	__asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
-	: "=3Da" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx");
+	    : "=3Da" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx");
 	return (rv);
 }
=20
+static inline u_int
+tsc_get_timecount_lfence(struct timecounter *tc __unused)
+{
+
+	lfence();
+	return (rdtsc32());
+}
+
+static inline u_int
+tsc_get_timecount_low_lfence(struct timecounter *tc)
+{
+
+	lfence();
+	return (tsc_get_timecount_low(tc));
+}
+
+static inline u_int
+tsc_get_timecount_mfence(struct timecounter *tc __unused)
+{
+
+	mfence();
+	return (rdtsc32());
+}
+
+static inline u_int
+tsc_get_timecount_low_mfence(struct timecounter *tc)
+{
+
+	mfence();
+	return (tsc_get_timecount_low(tc));
+}
+
 uint32_t
 cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th)
 {

--DZpZF4rn8ymM3zt1
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.12 (FreeBSD)

iEYEARECAAYFAlAUDPoACgkQC3+MBN1Mb4jzWQCeKjji4pIz/aFAvR2ZA4BPeYoR
0UsAmQHcbosSlJQ3DlgOfA9HFblw1Wsg
=Vlmf
-----END PGP SIGNATURE-----

--DZpZF4rn8ymM3zt1--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20120728160202.GI2676>