Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 2 Mar 2012 21:29:00 +0100
From:      Tijl Coosemans <tijl@freebsd.org>
To:        Bruce Evans <brde@optusnet.com.au>
Cc:        svn-src-head@freebsd.org, svn-src-all@freebsd.org, src-committers@freebsd.org, Dimitry Andric <dim@freebsd.org>
Subject:   Re: svn commit: r232266 - in head/sys: amd64/include i386/include pc98/include x86/include
Message-ID:  <201203022129.10849.tijl@freebsd.org>
In-Reply-To: <20120302223355.Q2543@besplex.bde.org>
References:  <201202281939.q1SJdtYr084858@svn.freebsd.org> <20120302130435.J929@besplex.bde.org> <20120302223355.Q2543@besplex.bde.org>

next in thread | previous in thread | raw e-mail | index | archive | help
--nextPart26093839.3xFXd3iPZv
Content-Type: Text/Plain;
  charset="iso-8859-1"
Content-Transfer-Encoding: quoted-printable

On Friday 02 March 2012 12:52:41 Bruce Evans wrote:
> On Fri, 2 Mar 2012, Bruce Evans wrote:
>=20
> > On Thu, 1 Mar 2012, Tijl Coosemans wrote:
> >
> >> On Wednesday 29 February 2012 08:44:54 Bruce Evans wrote:
> >>> ...
> >>> So any macro version must use gcc features to be safe.  The following
> >>> seems to work:
> >>>=20
> >>> #define	__bswap16(x)	__extension__ ({ __uint16_t __x =3D x;
> >>>  	(__uint16_t)(__x << 8 | __x >> 8); })
> >>>=20
> >>> clang now produces "rolw $8,x" when x is volatile.  This seems to
> >>> violate volatile semantics.  gcc produces load-rolw-store then.  Both
> >>> produce "rolw $8,x" when x is not volatile.
> >>=20
> >> I'll have a closer look at the patch tomorrow, but the Intel
> >> documentation for the bswap instruction recommends to use xchg for
> >> bswap16.
> >
> > That's what i386 used to do, using an asm (see for example the RELENG_4
> > version), but the asm was replaced by C code and the compiler apparently
> > knows better.  This might depend on -mtune.  I tested with the default,
> > ...
>=20
> Here is another version.  It has now been tested a bit at runtime.
> I had to restore almost all of the complications, so the following
> reduces to mostly style changes including comments about surprising
> details.  It has 1 fix for clang (the comitted version doesn't
> work for clang).
>=20
> % Index: endian.h
> % =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> % RCS file: /home/ncvs/src/sys/x86/include/endian.h,v
> % retrieving revision 1.1
> % diff -u -2 -r1.1 endian.h
> % --- endian.h	28 Feb 2012 19:39:54 -0000	1.1
> % +++ endian.h	2 Mar 2012 10:43:53 -0000
> % @@ -37,8 +37,4 @@
> %  #include <sys/_types.h>
> %=20
> % -#ifdef __cplusplus
> % -extern "C" {
> % -#endif
> % -
> %  /*
> %   * Define the order of 32-bit words in 64-bit words.
> % @@ -68,25 +64,17 @@
> %  #endif
> %=20
> % -#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE_BUILTIN_CONSTANT_P)
> % -
> % -#define	__bswap16_const(_x)	(__uint16_t)((_x) << 8 | (_x) >> 8)
> % +#define	__bswap16_gen(x)	(__uint16_t)((x) << 8 | (x) >> 8)
> % +#define	__bswap32_gen(x)		\
> % +	(((__uint32_t)__bswap16(x) << 16) | __bswap16((x) >> 16))
> % +#define	__bswap64_gen(x)		\
> % +	(((__uint64_t)__bswap32(x) << 32) | __bswap32((x) >> 32))
> %=20
> % -#define	__bswap16(_x)			\
> % -	(__builtin_constant_p(_x) ?	\
> % -	    __bswap16_const((__uint16_t)(_x)) : __bswap16_var(_x))
> % -
> % -#define	__bswap32_const(_x)		\
> % -	(((__uint32_t)__bswap16(_x) << 16) | __bswap16((_x) >> 16))
> % -
> % -#define	__bswap32(_x)			\
> % -	(__builtin_constant_p(_x) ?	\
> % -	    __bswap32_const((__uint32_t)(_x)) : __bswap32_var(_x))
> % +#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE_BUILTIN_CONSTANT_P)
> %=20
> % -#define	__bswap64_const(_x)		\
> % -	(((__uint64_t)__bswap32(_x) << 32) | __bswap32((_x) >> 32))
> % +/* The following mess is to avoid multiple evaluation of x. */
> %=20
> % -#define	__bswap64(_x)			\
> % -	(__builtin_constant_p(_x) ?	\
> % -	    __bswap64_const((__uint64_t)(_x)) : __bswap64_var(_x))
> % +#define	__bswap16(x)			\
> % +	(__builtin_constant_p(x) ?	\
> % +	    __bswap16_gen((__uint16_t)(x)) : __bswap16_var((__uint16_t)(x)))
>=20
> For clang, x must be cast in the `var' clause too, since when x is
> something like 0x12345678 (having been passed here without conversion
> by __bswap32()), clang parses the `var' clause and warns about the
> implicit truncation of 0x12345678 to 0x5678 in it.  This is a bug
> in clang -- when warning about fine details, it is important not to
> do it for non-problems.
>=20
> %=20
> %  static __inline __uint16_t
> % @@ -94,7 +82,19 @@
> %  {
> %=20
> % -	return (__bswap16_const(_x));
> % +	return (__bswap16_gen(_x));
> %  }
> %=20
> % +/*
> % + * The following messes are old optimizations for gcc.  The messes have
> % + * been reduced significantly, but I don't know how to implement the
> % + * preferred way of defining defaults above and overriding them cleanly
> % + * here.  Even clang needs help for the 64-bit case, so to keep the
> % + * ifdefs sane we use this for the 32-bit case with clang too.
> % + */
> % +
> % +#define	__bswap32(x)			\
> % +	(__builtin_constant_p(x) ?	\
> % +	    __bswap32_gen((__uint32_t)(x)) : __bswap32_var(x))
> % +
> %  static __inline __uint32_t
> %  __bswap32_var(__uint32_t _x)
> % @@ -105,34 +105,37 @@
> %  }
> %=20
> % +#define	__bswap64(x)			\
> % +	(__builtin_constant_p(x) ?	\
> % +	    __bswap64_gen((__uint64_t)(x)) : __bswap64_var(x))
> % +
> %  static __inline __uint64_t
> %  __bswap64_var(__uint64_t _x)
> %  {
> % -#ifdef	_LP64
> % +
> % +#ifdef _LP64
> %  	__asm ("bswap %0" : "+r" (_x));
> %  	return (_x);
> %  #else
> % -	return (__bswap64_const(_x));
> % +	/*
> % +	 * It is important for the optimizations that the following is not
> % +	 * really generic, but expands to 2 __bswap32_var()'s.
> % +	 */
> % +	return (__bswap64_gen(_x));
> %  #endif
> %  }
> %=20
> % -#define	__htonl(x)	__bswap32(x)
> % -#define	__htons(x)	__bswap16(x)
> % -#define	__ntohl(x)	__bswap32(x)
> % -#define	__ntohs(x)	__bswap16(x)
> % +#else /* !(__GNUCLIKE_ASM && __GNUCLIKE_BUILTIN_CONSTANT_P */
> %=20
> % -#else /* !(__GNUCLIKE_ASM && __GNUCLIKE_BUILTIN_CONSTANT_P) */
> % -
> % -/*
> % - * No optimizations are available for this compiler.  Fall back to
> % - * non-optimized functions by defining the constant usually used to pr=
event
> % - * redefinition.
> % - */
> % -#define	_BYTEORDER_FUNC_DEFINED
> % +/* XXX these are broken, since they evaluate x more than once. */

Maybe make them static inline functions. That won't be very efficient at
=2DO0 but it'll be correct and this implementation probably isn't very
efficient anyway. Otherwise the patch looks good to me.

> % +#define	__bswap16(x)	(__bswap16_gen((__uint16_t)(x)))
> % +#define	__bswap32(x)	(__bswap32_gen((__uint32_t)(x)))
> % +#define	__bswap64(x)	(__bswap64_gen((__uint64_t)(x)))
> %=20
> %  #endif /* __GNUCLIKE_ASM && __GNUCLIKE_BUILTIN_CONSTANT_P */
> %=20
> % -#ifdef __cplusplus
> % -}
> % -#endif
> % +#define	__htonl(x)	__bswap32(x)
> % +#define	__htons(x)	__bswap16(x)
> % +#define	__ntohl(x)	__bswap32(x)
> % +#define	__ntohs(x)	__bswap16(x)
> %=20
> %  #endif /* !_MACHINE_ENDIAN_H_ */


--nextPart26093839.3xFXd3iPZv
Content-Type: application/pgp-signature; name=signature.asc 
Content-Description: This is a digitally signed message part.

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.18 (FreeBSD)

iF4EABEIAAYFAk9RLZYACgkQfoCS2CCgtium0QD9Fj2tmyw4NZPOavHtHihtQsUV
4LcQFnwfHsHdhZg4L20A/RZYbbqqQtSQlQh91qF6fS0ycfua46zMGI/BN4E0jqUC
=uzCf
-----END PGP SIGNATURE-----

--nextPart26093839.3xFXd3iPZv--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201203022129.10849.tijl>