Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 15 Aug 2013 00:03:17 +0000 (UTC)
From:      Jung-uk Kim <jkim@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r254343 - in projects/atomic64/sys/i386: i386 include
Message-ID:  <201308150003.r7F03Hdr033119@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jkim
Date: Thu Aug 15 00:03:17 2013
New Revision: 254343
URL: http://svnweb.freebsd.org/changeset/base/254343

Log:
  Try to improve performance on i486 class CPUs.
  
  - Partially revert r253876. [1]
  - Use conditional jumps instead of function pointers. [2]  Note this change
  may break KPI, i. e., atomic_load_acq_64() and atomic_store_rel_64() are now
  real functions.
  
  Note this changes are not tested because I do not have i486-class CPUs.
  
  Objected by:		attilio [1]
  Suggested by:		bde [2]

Modified:
  projects/atomic64/sys/i386/i386/machdep.c
  projects/atomic64/sys/i386/include/atomic.h

Modified: projects/atomic64/sys/i386/i386/machdep.c
==============================================================================
--- projects/atomic64/sys/i386/i386/machdep.c	Wed Aug 14 23:36:33 2013	(r254342)
+++ projects/atomic64/sys/i386/i386/machdep.c	Thu Aug 15 00:03:17 2013	(r254343)
@@ -1548,29 +1548,55 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
 SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
     idle_sysctl, "A", "currently selected idle function");
 
-int (*atomic_cmpset_64)(volatile uint64_t *, uint64_t, uint64_t) =
-    atomic_cmpset_64_i386;
-uint64_t (*atomic_load_acq_64)(volatile uint64_t *) =
-    atomic_load_acq_64_i386;
-void (*atomic_store_rel_64)(volatile uint64_t *, uint64_t) =
-    atomic_store_rel_64_i386;
-uint64_t (*atomic_swap_64)(volatile uint64_t *, uint64_t) =
-    atomic_swap_64_i386;
-int (*atomic_testandset_64)(volatile uint64_t *, int) =
-    atomic_testandset_64_i386;
+static int	cpu_ident_cmxchg8b = 0;
 
 static void
 cpu_probe_cmpxchg8b(void)
 {
 
 	if ((cpu_feature & CPUID_CX8) != 0 ||
-	    cpu_vendor_id == CPU_VENDOR_RISE) {
-		atomic_cmpset_64 = atomic_cmpset_64_i586;
-		atomic_load_acq_64 = atomic_load_acq_64_i586;
-		atomic_store_rel_64 = atomic_store_rel_64_i586;
-		atomic_swap_64 = atomic_swap_64_i586;
-		atomic_testandset_64 = atomic_testandset_64_i586;
-	}
+	    cpu_vendor_id == CPU_VENDOR_RISE)
+		cpu_ident_cmxchg8b = 1;
+}
+
+int
+atomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src)
+{
+
+	if (cpu_ident_cmxchg8b)
+		return (atomic_cmpset_64_i586(dst, expect, src));
+	else
+		return (atomic_cmpset_64_i386(dst, expect, src));
+}
+
+uint64_t
+atomic_load_acq_64(volatile uint64_t *p)
+{
+
+	if (cpu_ident_cmxchg8b)
+		return (atomic_load_acq_64_i586(p));
+	else
+		return (atomic_load_acq_64_i386(p));
+}
+
+void
+atomic_store_rel_64(volatile uint64_t *p, uint64_t v)
+{
+
+	if (cpu_ident_cmxchg8b)
+		atomic_store_rel_64_i586(p, v);
+	else
+		atomic_store_rel_64_i386(p, v);
+}
+
+uint64_t
+atomic_swap_64(volatile uint64_t *p, uint64_t v)
+{
+
+	if (cpu_ident_cmxchg8b)
+		return (atomic_swap_64_i586(p, v));
+	else
+		return (atomic_swap_64_i386(p, v));
 }
 
 /*

Modified: projects/atomic64/sys/i386/include/atomic.h
==============================================================================
--- projects/atomic64/sys/i386/include/atomic.h	Wed Aug 14 23:36:33 2013	(r254342)
+++ projects/atomic64/sys/i386/include/atomic.h	Thu Aug 15 00:03:17 2013	(r254343)
@@ -124,76 +124,101 @@ atomic_##NAME##_barr_##TYPE(volatile u_#
 }							\
 struct __hack
 
-#define	ATOMIC_LOCK_I386(f)		\
-    __asm __volatile("pushfl; popl %0; cli" : "=r" (f))
-#define	ATOMIC_UNLOCK_I386(f)	\
-    __asm __volatile("pushl %0; popfl" : : "r" (f))
-
 #if defined(_KERNEL) && !defined(WANT_FUNCTIONS)
 
 /* I486 does not support SMP or CMPXCHG8B. */
 static __inline int
 atomic_cmpset_64_i386(volatile uint64_t *dst, uint64_t expect, uint64_t src)
 {
-	int res;
-	register_t lock;
+	volatile uint32_t *p;
 
-	res = 0;
-	ATOMIC_LOCK_I386(lock);
-	if (*dst == expect) {
-		*dst = src;
-		res = 1;
-	}
-	ATOMIC_UNLOCK_I386(lock);
-	return (res);
+	p = (volatile uint32_t *)dst;
+	__asm __volatile(
+	"	pushfl ;		"
+	"	cli ;			"
+	"	xorl	%1, %%eax ;	"
+	"	xorl	%2, %%edx ;	"
+	"	orl	%%edx, %%eax ;	"
+	"	jnz	1f ;		"
+	"	movl	%3, %1 ;	"
+	"	movl	%4, %2 ;	"
+	"	movl	$1, %%eax ;	"
+	"	jmp	2f ;		"
+	"1:				"
+	"	xorl	%%eax, %%eax ;	"
+	"2:				"
+	"	popfl"
+	: "+A" (expect),		/* 0 */
+	  "+m" (*p),			/* 1 */
+	  "+m" (*(p + 1))		/* 2 */
+	: "r" ((uint32_t)src),		/* 3 */
+	  "r" ((uint32_t)(src >> 32))	/* 4 */
+	: "memory", "cc");
+
+	return (expect);
 }
 
 static __inline uint64_t
 atomic_load_acq_64_i386(volatile uint64_t *p)
 {
+	volatile uint32_t *q;
 	uint64_t res;
-	register_t lock;
 
-	ATOMIC_LOCK_I386(lock);
-	res = *p;
-	ATOMIC_UNLOCK_I386(lock);
+	q = (volatile uint32_t *)p;
+	__asm __volatile(
+	"	pushfl ;		"
+	"	cli ;			"
+	"	movl	%1, %%eax ;	"
+	"	movl	%2, %%edx ;	"
+	"	popfl"
+	: "=&A" (res)			/* 0 */
+	: "m" (*q),			/* 1 */
+	  "m" (*(q + 1))		/* 2 */
+	: "memory");
+
 	return (res);
 }
 
 static __inline void
 atomic_store_rel_64_i386(volatile uint64_t *p, uint64_t v)
 {
-	register_t lock;
+	volatile uint32_t *q;
 
-	ATOMIC_LOCK_I386(lock);
-	*p = v;
-	ATOMIC_UNLOCK_I386(lock);
+	q = (volatile uint32_t *)p;
+	__asm __volatile(
+	"	pushfl ;		"
+	"	cli ;			"
+	"	movl	%%eax, %0 ;	"
+	"	movl	%%edx, %1 ;	"
+	"	popfl"
+	: "=m" (*q),			/* 0 */
+	  "=m" (*(q + 1))		/* 1 */
+	: "A" (v)			/* 2 */
+	: "memory");
 }
 
 static __inline uint64_t
 atomic_swap_64_i386(volatile uint64_t *p, uint64_t v)
 {
+	volatile uint32_t *q;
 	uint64_t res;
-	register_t lock;
 
-	ATOMIC_LOCK_I386(lock);
-	res = *p;
-	*p = v;
-	ATOMIC_UNLOCK_I386(lock);
-	return (res);
-}
-
-static __inline int
-atomic_testandset_64_i386(volatile uint64_t *p, u_int v)
-{
-	const uint64_t s = 1ULL << (v & 0x3f);
-	int res;
-	register_t lock;
+	q = (volatile uint32_t *)p;
+	__asm __volatile(
+	"	pushfl ;		"
+	"	cli ;			"
+	"	movl	%1, %%eax ;	"
+	"	movl	%2, %%edx ;	"
+	"	movl	%4, %2 ;	"
+	"	movl	%3, %1 ;	"
+	"	popfl"
+	: "=&A" (res),			/* 0 */
+	  "+m" (*q),			/* 1 */
+	  "+m" (*(q + 1))		/* 2 */
+	: "r" ((uint32_t)v),		/* 3 */
+	  "r" ((uint32_t)(v >> 32))	/* 4 */
+	: "memory");
 
-	ATOMIC_LOCK_I386(lock);
-	res = (*p & s) != 0;
-	*p |= s;
-	ATOMIC_UNLOCK_I386(lock);
 	return (res);
 }
 
@@ -264,18 +289,6 @@ atomic_swap_64_i586(volatile uint64_t *p
 	return (v);
 }
 
-static __inline int
-atomic_testandset_64_i586(volatile uint64_t *p, u_int v)
-{
-	const uint64_t s = 1ULL << (v & 0x3f);
-	uint64_t n;
-
-	do {
-		n = *p;
-	} while (!atomic_cmpset_64_i586(p, n, n | s));
-	return ((n & s) != 0);
-}
-
 #endif /* _KERNEL && !WANT_FUNCTIONS */
 
 /*
@@ -457,11 +470,10 @@ ATOMIC_STORE(long);
 #ifndef WANT_FUNCTIONS
 
 #ifdef _KERNEL
-extern int (*atomic_cmpset_64)(volatile uint64_t *, uint64_t, uint64_t);
-extern uint64_t (*atomic_load_acq_64)(volatile uint64_t *);
-extern void (*atomic_store_rel_64)(volatile uint64_t *, uint64_t);
-extern uint64_t (*atomic_swap_64)(volatile uint64_t *, uint64_t);
-extern int (*atomic_testandset_64)(volatile uint64_t *, u_int);
+int		atomic_cmpset_64(volatile uint64_t *, uint64_t, uint64_t);
+uint64_t	atomic_load_acq_64(volatile uint64_t *);
+void		atomic_store_rel_64(volatile uint64_t *, uint64_t);
+uint64_t	atomic_swap_64(volatile uint64_t *, uint64_t);
 #endif
 
 static __inline int



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201308150003.r7F03Hdr033119>