From owner-svn-src-stable-7@FreeBSD.ORG Mon Oct 12 16:11:25 2009 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 85399106568D; Mon, 12 Oct 2009 16:11:25 +0000 (UTC) (envelope-from attilio@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 7262E8FC15; Mon, 12 Oct 2009 16:11:25 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n9CGBPHW075258; Mon, 12 Oct 2009 16:11:25 GMT (envelope-from attilio@svn.freebsd.org) Received: (from attilio@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n9CGBPdF075255; Mon, 12 Oct 2009 16:11:25 GMT (envelope-from attilio@svn.freebsd.org) Message-Id: <200910121611.n9CGBPdF075255@svn.freebsd.org> From: Attilio Rao Date: Mon, 12 Oct 2009 16:11:25 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r197986 - in stable/7/sys: . amd64/include contrib/pf i386/include X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 12 Oct 2009 16:11:25 -0000 Author: attilio Date: Mon Oct 12 16:11:25 2009 New Revision: 197986 URL: http://svn.freebsd.org/changeset/base/197986 Log: MFC r197803, r197824, r197910: Per their definition, atomic instructions used in conjuction with memory barriers should also ensure that the compiler doesn't reorder paths where they are used. GCC, however, does that aggressively, even in presence of volatile operands. The most reliable way GCC offers for avoid instructions reordering is clobbering "memory". Not all our memory barriers, right now, clobber memory for GCC-like compilers. Fix these cases. Modified: stable/7/sys/ (props changed) stable/7/sys/amd64/include/atomic.h stable/7/sys/contrib/pf/ (props changed) stable/7/sys/i386/include/atomic.h Modified: stable/7/sys/amd64/include/atomic.h ============================================================================== --- stable/7/sys/amd64/include/atomic.h Mon Oct 12 16:05:31 2009 (r197985) +++ stable/7/sys/amd64/include/atomic.h Mon Oct 12 16:11:25 2009 (r197986) @@ -69,7 +69,8 @@ */ #if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM) #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ -void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v) +void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \ +void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v) int atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src); int atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src); @@ -93,8 +94,9 @@ void atomic_store_rel_##TYPE(volatile u #endif /* - * The assembly is volatilized to demark potential before-and-after side - * effects if an interrupt or SMP collision were to occur. + * The assembly is volatilized to avoid code chunk removal by the compiler. + * GCC aggressively reorders operations and memory clobbering is necessary + * in order to avoid that for memory barriers. */ #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ static __inline void \ @@ -104,6 +106,15 @@ atomic_##NAME##_##TYPE(volatile u_##TYPE : "=m" (*p) \ : CONS (V), "m" (*p)); \ } \ + \ +static __inline void \ +atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ +{ \ + __asm __volatile(MPLOCKED OP \ + : "=m" (*p) \ + : CONS (V), "m" (*p) \ + : "memory"); \ +} \ struct __hack /* @@ -201,18 +212,23 @@ atomic_fetchadd_long(volatile u_long *p, * PentiumPro or higher, reads may pass writes, so for that case we have * to use a serializing instruction (i.e. with LOCK) to do the load in * SMP kernels. For UP kernels, however, the cache of the single processor - * is always consistent, so we don't need any memory barriers. + * is always consistent, so we only need to take care of compiler. */ #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ - return (*p); \ + u_##TYPE tmp; \ + \ + tmp = *p; \ + __asm __volatile ("" : : : "memory"); \ + return (tmp); \ } \ \ static __inline void \ atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ + __asm __volatile ("" : : : "memory"); \ *p = v; \ } \ struct __hack @@ -243,7 +259,8 @@ atomic_store_rel_##TYPE(volatile u_##TYP __asm __volatile(SOP \ : "=m" (*p), /* 0 */ \ "+r" (v) /* 1 */ \ - : "m" (*p)); /* 2 */ \ + : "m" (*p) /* 2 */ \ + : "memory"); \ } \ struct __hack @@ -323,44 +340,43 @@ u_long atomic_readandclear_long(volatile #endif /* __GNUCLIKE_ASM */ -/* Acquire and release variants are identical to the normal ones. */ -#define atomic_set_acq_char atomic_set_char -#define atomic_set_rel_char atomic_set_char -#define atomic_clear_acq_char atomic_clear_char -#define atomic_clear_rel_char atomic_clear_char -#define atomic_add_acq_char atomic_add_char -#define atomic_add_rel_char atomic_add_char -#define atomic_subtract_acq_char atomic_subtract_char -#define atomic_subtract_rel_char atomic_subtract_char - -#define atomic_set_acq_short atomic_set_short -#define atomic_set_rel_short atomic_set_short -#define atomic_clear_acq_short atomic_clear_short -#define atomic_clear_rel_short atomic_clear_short -#define atomic_add_acq_short atomic_add_short -#define atomic_add_rel_short atomic_add_short -#define atomic_subtract_acq_short atomic_subtract_short -#define atomic_subtract_rel_short atomic_subtract_short - -#define atomic_set_acq_int atomic_set_int -#define atomic_set_rel_int atomic_set_int -#define atomic_clear_acq_int atomic_clear_int -#define atomic_clear_rel_int atomic_clear_int -#define atomic_add_acq_int atomic_add_int -#define atomic_add_rel_int atomic_add_int -#define atomic_subtract_acq_int atomic_subtract_int -#define atomic_subtract_rel_int atomic_subtract_int +#define atomic_set_acq_char atomic_set_barr_char +#define atomic_set_rel_char atomic_set_barr_char +#define atomic_clear_acq_char atomic_clear_barr_char +#define atomic_clear_rel_char atomic_clear_barr_char +#define atomic_add_acq_char atomic_add_barr_char +#define atomic_add_rel_char atomic_add_barr_char +#define atomic_subtract_acq_char atomic_subtract_barr_char +#define atomic_subtract_rel_char atomic_subtract_barr_char + +#define atomic_set_acq_short atomic_set_barr_short +#define atomic_set_rel_short atomic_set_barr_short +#define atomic_clear_acq_short atomic_clear_barr_short +#define atomic_clear_rel_short atomic_clear_barr_short +#define atomic_add_acq_short atomic_add_barr_short +#define atomic_add_rel_short atomic_add_barr_short +#define atomic_subtract_acq_short atomic_subtract_barr_short +#define atomic_subtract_rel_short atomic_subtract_barr_short + +#define atomic_set_acq_int atomic_set_barr_int +#define atomic_set_rel_int atomic_set_barr_int +#define atomic_clear_acq_int atomic_clear_barr_int +#define atomic_clear_rel_int atomic_clear_barr_int +#define atomic_add_acq_int atomic_add_barr_int +#define atomic_add_rel_int atomic_add_barr_int +#define atomic_subtract_acq_int atomic_subtract_barr_int +#define atomic_subtract_rel_int atomic_subtract_barr_int #define atomic_cmpset_acq_int atomic_cmpset_int #define atomic_cmpset_rel_int atomic_cmpset_int -#define atomic_set_acq_long atomic_set_long -#define atomic_set_rel_long atomic_set_long -#define atomic_clear_acq_long atomic_clear_long -#define atomic_clear_rel_long atomic_clear_long -#define atomic_add_acq_long atomic_add_long -#define atomic_add_rel_long atomic_add_long -#define atomic_subtract_acq_long atomic_subtract_long -#define atomic_subtract_rel_long atomic_subtract_long +#define atomic_set_acq_long atomic_set_barr_long +#define atomic_set_rel_long atomic_set_barr_long +#define atomic_clear_acq_long atomic_clear_barr_long +#define atomic_clear_rel_long atomic_clear_barr_long +#define atomic_add_acq_long atomic_add_barr_long +#define atomic_add_rel_long atomic_add_barr_long +#define atomic_subtract_acq_long atomic_subtract_barr_long +#define atomic_subtract_rel_long atomic_subtract_barr_long #define atomic_cmpset_acq_long atomic_cmpset_long #define atomic_cmpset_rel_long atomic_cmpset_long Modified: stable/7/sys/i386/include/atomic.h ============================================================================== --- stable/7/sys/i386/include/atomic.h Mon Oct 12 16:05:31 2009 (r197985) +++ stable/7/sys/i386/include/atomic.h Mon Oct 12 16:11:25 2009 (r197986) @@ -69,7 +69,8 @@ */ #if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM) #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ -void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v) +void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \ +void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v) int atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src); u_int atomic_fetchadd_int(volatile u_int *p, u_int v); @@ -91,8 +92,9 @@ void atomic_store_rel_##TYPE(volatile u #endif /* - * The assembly is volatilized to demark potential before-and-after side - * effects if an interrupt or SMP collision were to occur. + * The assembly is volatilized to avoid code chunk removal by the compiler. + * GCC aggressively reorders operations and memory clobbering is necessary + * in order to avoid that for memory barriers. */ #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ static __inline void \ @@ -102,6 +104,15 @@ atomic_##NAME##_##TYPE(volatile u_##TYPE : "=m" (*p) \ : CONS (V), "m" (*p)); \ } \ + \ +static __inline void \ +atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ +{ \ + __asm __volatile(MPLOCKED OP \ + : "=m" (*p) \ + : CONS (V), "m" (*p) \ + : "memory"); \ +} \ struct __hack /* @@ -190,18 +201,23 @@ atomic_fetchadd_int(volatile u_int *p, u * PentiumPro or higher, reads may pass writes, so for that case we have * to use a serializing instruction (i.e. with LOCK) to do the load in * SMP kernels. For UP kernels, however, the cache of the single processor - * is always consistent, so we don't need any memory barriers. + * is always consistent, so we only need to take care of compiler. */ #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ - return (*p); \ + u_##TYPE tmp; \ + \ + tmp = *p; \ + __asm __volatile("" : : : "memory"); \ + return (tmp); \ } \ \ static __inline void \ atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ + __asm __volatile("" : : : "memory"); \ *p = v; \ } \ struct __hack @@ -232,7 +248,8 @@ atomic_store_rel_##TYPE(volatile u_##TYP __asm __volatile(SOP \ : "=m" (*p), /* 0 */ \ "+r" (v) /* 1 */ \ - : "m" (*p)); /* 2 */ \ + : "m" (*p) /* 2 */ \ + : "memory"); \ } \ struct __hack @@ -327,44 +344,43 @@ u_long atomic_readandclear_long(volatile #endif /* __GNUCLIKE_ASM */ -/* Acquire and release variants are identical to the normal ones. */ -#define atomic_set_acq_char atomic_set_char -#define atomic_set_rel_char atomic_set_char -#define atomic_clear_acq_char atomic_clear_char -#define atomic_clear_rel_char atomic_clear_char -#define atomic_add_acq_char atomic_add_char -#define atomic_add_rel_char atomic_add_char -#define atomic_subtract_acq_char atomic_subtract_char -#define atomic_subtract_rel_char atomic_subtract_char - -#define atomic_set_acq_short atomic_set_short -#define atomic_set_rel_short atomic_set_short -#define atomic_clear_acq_short atomic_clear_short -#define atomic_clear_rel_short atomic_clear_short -#define atomic_add_acq_short atomic_add_short -#define atomic_add_rel_short atomic_add_short -#define atomic_subtract_acq_short atomic_subtract_short -#define atomic_subtract_rel_short atomic_subtract_short - -#define atomic_set_acq_int atomic_set_int -#define atomic_set_rel_int atomic_set_int -#define atomic_clear_acq_int atomic_clear_int -#define atomic_clear_rel_int atomic_clear_int -#define atomic_add_acq_int atomic_add_int -#define atomic_add_rel_int atomic_add_int -#define atomic_subtract_acq_int atomic_subtract_int -#define atomic_subtract_rel_int atomic_subtract_int +#define atomic_set_acq_char atomic_set_barr_char +#define atomic_set_rel_char atomic_set_barr_char +#define atomic_clear_acq_char atomic_clear_barr_char +#define atomic_clear_rel_char atomic_clear_barr_char +#define atomic_add_acq_char atomic_add_barr_char +#define atomic_add_rel_char atomic_add_barr_char +#define atomic_subtract_acq_char atomic_subtract_barr_char +#define atomic_subtract_rel_char atomic_subtract_barr_char + +#define atomic_set_acq_short atomic_set_barr_short +#define atomic_set_rel_short atomic_set_barr_short +#define atomic_clear_acq_short atomic_clear_barr_short +#define atomic_clear_rel_short atomic_clear_barr_short +#define atomic_add_acq_short atomic_add_barr_short +#define atomic_add_rel_short atomic_add_barr_short +#define atomic_subtract_acq_short atomic_subtract_barr_short +#define atomic_subtract_rel_short atomic_subtract_barr_short + +#define atomic_set_acq_int atomic_set_barr_int +#define atomic_set_rel_int atomic_set_barr_int +#define atomic_clear_acq_int atomic_clear_barr_int +#define atomic_clear_rel_int atomic_clear_barr_int +#define atomic_add_acq_int atomic_add_barr_int +#define atomic_add_rel_int atomic_add_barr_int +#define atomic_subtract_acq_int atomic_subtract_barr_int +#define atomic_subtract_rel_int atomic_subtract_barr_int #define atomic_cmpset_acq_int atomic_cmpset_int #define atomic_cmpset_rel_int atomic_cmpset_int -#define atomic_set_acq_long atomic_set_long -#define atomic_set_rel_long atomic_set_long -#define atomic_clear_acq_long atomic_clear_long -#define atomic_clear_rel_long atomic_clear_long -#define atomic_add_acq_long atomic_add_long -#define atomic_add_rel_long atomic_add_long -#define atomic_subtract_acq_long atomic_subtract_long -#define atomic_subtract_rel_long atomic_subtract_long +#define atomic_set_acq_long atomic_set_barr_long +#define atomic_set_rel_long atomic_set_barr_long +#define atomic_clear_acq_long atomic_clear_barr_long +#define atomic_clear_rel_long atomic_clear_barr_long +#define atomic_add_acq_long atomic_add_barr_long +#define atomic_add_rel_long atomic_add_barr_long +#define atomic_subtract_acq_long atomic_subtract_barr_long +#define atomic_subtract_rel_long atomic_subtract_barr_long #define atomic_cmpset_acq_long atomic_cmpset_long #define atomic_cmpset_rel_long atomic_cmpset_long