Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 20 Jul 2018 12:42:24 +0000 (UTC)
From:      Bruce Evans <bde@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r336545 - in head/lib/msun: ld128 ld80 src
Message-ID:  <201807201242.w6KCgORn048599@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: bde
Date: Fri Jul 20 12:42:24 2018
New Revision: 336545
URL: https://svnweb.freebsd.org/changeset/base/336545

Log:
  Centralize the complications for special efficient rounding to integers.
  
  This was open-coded in range reduction for trig and exp functions.  Now
  there are 3 static inline functions rnint[fl]() that replace open-coded
  expressions, and type-generic irint() and i64rint() macros that hide the
  complications for efficiently using non-generic irint() and irintl()
  functions and casts.
  
  Special details:
  
  ld128/e_rem_pio2l.h needs to use i64rint() since it needs a 46-bit integer
  result.  Everything else only needs a (less than) 32-bit integer result so
  uses irint().
  
  Float and double cases now use float_t and double_t locally instead of
  STRICT_ASSIGN() to avoid bugs in extra precision.
  
  On amd64, inline asm is now only used for irint() on long doubles.  The SSE
  asm for irint() on amd64 only existed because the ifdef tangles made the
  correct method of simply casting to int for this case non-obvious.

Modified:
  head/lib/msun/ld128/e_rem_pio2l.h
  head/lib/msun/ld128/k_expl.h
  head/lib/msun/ld128/s_expl.c
  head/lib/msun/ld80/e_rem_pio2l.h
  head/lib/msun/ld80/k_expl.h
  head/lib/msun/ld80/s_expl.c
  head/lib/msun/src/e_rem_pio2.c
  head/lib/msun/src/e_rem_pio2f.c
  head/lib/msun/src/math_private.h

Modified: head/lib/msun/ld128/e_rem_pio2l.h
==============================================================================
--- head/lib/msun/ld128/e_rem_pio2l.h	Fri Jul 20 12:38:07 2018	(r336544)
+++ head/lib/msun/ld128/e_rem_pio2l.h	Fri Jul 20 12:42:24 2018	(r336545)
@@ -74,14 +74,9 @@ __ieee754_rem_pio2l(long double x, long double *y)
 	if (ex < BIAS + 45 || ex == BIAS + 45 &&
 	    u.bits.manh < 0x921fb54442d1LL) {
 	    /* |x| ~< 2^45*(pi/2), medium size */
-	    /* Use a specialized rint() to get fn.  Assume round-to-nearest. */
-	    fn = x*invpio2+0x1.8p112;
-	    fn = fn-0x1.8p112;
-#ifdef HAVE_EFFICIENT_I64RINT
+	    /* TODO: use only double precision for fn, as in expl(). */
+	    fn = rnintl(x * invpio2);
 	    n  = i64rint(fn);
-#else
-	    n  = fn;
-#endif
 	    r  = x-fn*pio2_1;
 	    w  = fn*pio2_1t;	/* 1st round good to 180 bit */
 	    {

Modified: head/lib/msun/ld128/k_expl.h
==============================================================================
--- head/lib/msun/ld128/k_expl.h	Fri Jul 20 12:38:07 2018	(r336544)
+++ head/lib/msun/ld128/k_expl.h	Fri Jul 20 12:42:24 2018	(r336545)
@@ -244,16 +244,8 @@ __k_expl(long double x, long double *hip, long double 
 	int n, n2;
 
 	/* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */
-	/* Use a specialized rint() to get fn.  Assume round-to-nearest. */
-	/* XXX assume no extra precision for the additions, as for trig fns. */
-	/* XXX this set of comments is now quadruplicated. */
-	/* XXX but see ../src/e_exp.c for a fix using double_t. */
-	fn = (double)x * INV_L + 0x1.8p52 - 0x1.8p52;
-#if defined(HAVE_EFFICIENT_IRINT)
+	fn = rnint((double)x * INV_L);
 	n = irint(fn);
-#else
-	n = (int)fn;
-#endif
 	n2 = (unsigned)n % INTERVALS;
 	/* Depend on the sign bit being propagated: */
 	*kp = n >> LOG2_INTERVALS;

Modified: head/lib/msun/ld128/s_expl.c
==============================================================================
--- head/lib/msun/ld128/s_expl.c	Fri Jul 20 12:38:07 2018	(r336544)
+++ head/lib/msun/ld128/s_expl.c	Fri Jul 20 12:42:24 2018	(r336545)
@@ -268,13 +268,8 @@ expm1l(long double x)
 	}
 
 	/* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */
-	/* Use a specialized rint() to get fn.  Assume round-to-nearest. */
-	fn = (double)x * INV_L + 0x1.8p52 - 0x1.8p52;
-#if defined(HAVE_EFFICIENT_IRINT)
+	fn = rnint((double)x * INV_L);
 	n = irint(fn);
-#else
-	n = (int)fn;
-#endif
 	n2 = (unsigned)n % INTERVALS;
 	k = n >> LOG2_INTERVALS;
 	r1 = x - fn * L1;

Modified: head/lib/msun/ld80/e_rem_pio2l.h
==============================================================================
--- head/lib/msun/ld80/e_rem_pio2l.h	Fri Jul 20 12:38:07 2018	(r336544)
+++ head/lib/msun/ld80/e_rem_pio2l.h	Fri Jul 20 12:42:24 2018	(r336545)
@@ -84,14 +84,8 @@ __ieee754_rem_pio2l(long double x, long double *y)
 	ex = expsign & 0x7fff;
 	if (ex < BIAS + 25 || (ex == BIAS + 25 && u.bits.manh < 0xc90fdaa2)) {
 	    /* |x| ~< 2^25*(pi/2), medium size */
-	    /* Use a specialized rint() to get fn.  Assume round-to-nearest. */
-	    fn = x*invpio2+0x1.8p63;
-	    fn = fn-0x1.8p63;
-#ifdef HAVE_EFFICIENT_IRINT
+	    fn = rnintl(x*invpio2);
 	    n  = irint(fn);
-#else
-	    n  = fn;
-#endif
 	    r  = x-fn*pio2_1;
 	    w  = fn*pio2_1t;	/* 1st round good to 102 bit */
 	    {

Modified: head/lib/msun/ld80/k_expl.h
==============================================================================
--- head/lib/msun/ld80/k_expl.h	Fri Jul 20 12:38:07 2018	(r336544)
+++ head/lib/msun/ld80/k_expl.h	Fri Jul 20 12:42:24 2018	(r336545)
@@ -225,16 +225,9 @@ __k_expl(long double x, long double *hip, long double 
 	int n, n2;
 
 	/* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */
-	/* Use a specialized rint() to get fn.  Assume round-to-nearest. */
-	fn = x * INV_L + 0x1.8p63 - 0x1.8p63;
+	fn = rnintl(x * INV_L);
 	r = x - fn * L1 - fn * L2;	/* r = r1 + r2 done independently. */
-#if defined(HAVE_EFFICIENT_IRINTL)
-	n = irintl(fn);
-#elif defined(HAVE_EFFICIENT_IRINT)
 	n = irint(fn);
-#else
-	n = (int)fn;
-#endif
 	n2 = (unsigned)n % INTERVALS;
 	/* Depend on the sign bit being propagated: */
 	*kp = n >> LOG2_INTERVALS;

Modified: head/lib/msun/ld80/s_expl.c
==============================================================================
--- head/lib/msun/ld80/s_expl.c	Fri Jul 20 12:38:07 2018	(r336544)
+++ head/lib/msun/ld80/s_expl.c	Fri Jul 20 12:42:24 2018	(r336545)
@@ -225,15 +225,8 @@ expm1l(long double x)
 	}
 
 	/* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */
-	/* Use a specialized rint() to get fn.  Assume round-to-nearest. */
-	fn = x * INV_L + 0x1.8p63 - 0x1.8p63;
-#if defined(HAVE_EFFICIENT_IRINTL)
-	n = irintl(fn);
-#elif defined(HAVE_EFFICIENT_IRINT)
+	fn = rnintl(x * INV_L);
 	n = irint(fn);
-#else
-	n = (int)fn;
-#endif
 	n2 = (unsigned)n % INTERVALS;
 	k = n >> LOG2_INTERVALS;
 	r1 = x - fn * L1;

Modified: head/lib/msun/src/e_rem_pio2.c
==============================================================================
--- head/lib/msun/src/e_rem_pio2.c	Fri Jul 20 12:38:07 2018	(r336544)
+++ head/lib/msun/src/e_rem_pio2.c	Fri Jul 20 12:42:24 2018	(r336545)
@@ -127,14 +127,8 @@ __ieee754_rem_pio2(double x, double *y)
 	}
 	if(ix<0x413921fb) {	/* |x| ~< 2^20*(pi/2), medium size */
 medium:
-	    /* Use a specialized rint() to get fn.  Assume round-to-nearest. */
-	    STRICT_ASSIGN(double,fn,x*invpio2+0x1.8p52);
-	    fn = fn-0x1.8p52;
-#ifdef HAVE_EFFICIENT_IRINT
+	    fn = rnint((double_t)x*invpio2);
 	    n  = irint(fn);
-#else
-	    n  = (int32_t)fn;
-#endif
 	    r  = x-fn*pio2_1;
 	    w  = fn*pio2_1t;	/* 1st round good to 85 bit */
 	    {

Modified: head/lib/msun/src/e_rem_pio2f.c
==============================================================================
--- head/lib/msun/src/e_rem_pio2f.c	Fri Jul 20 12:38:07 2018	(r336544)
+++ head/lib/msun/src/e_rem_pio2f.c	Fri Jul 20 12:42:24 2018	(r336545)
@@ -55,14 +55,8 @@ __ieee754_rem_pio2f(float x, double *y)
 	ix = hx&0x7fffffff;
     /* 33+53 bit pi is good enough for medium size */
 	if(ix<0x4dc90fdb) {		/* |x| ~< 2^28*(pi/2), medium size */
-	    /* Use a specialized rint() to get fn.  Assume round-to-nearest. */
-	    STRICT_ASSIGN(double,fn,x*invpio2+0x1.8p52);
-	    fn = fn-0x1.8p52;
-#ifdef HAVE_EFFICIENT_IRINT
+	    fn = rnint((float_t)x*invpio2);
 	    n  = irint(fn);
-#else
-	    n  = (int32_t)fn;
-#endif
 	    r  = x-fn*pio2_1;
 	    w  = fn*pio2_1t;
 	    *y = r-w;

Modified: head/lib/msun/src/math_private.h
==============================================================================
--- head/lib/msun/src/math_private.h	Fri Jul 20 12:38:07 2018	(r336544)
+++ head/lib/msun/src/math_private.h	Fri Jul 20 12:42:24 2018	(r336545)
@@ -571,47 +571,115 @@ CMPLXL(long double x, long double y)
 
 #endif /* _COMPLEX_H */
  
-#ifdef __GNUCLIKE_ASM
+/*
+ * The rnint() family rounds to the nearest integer for a restricted range
+ * range of args (up to about 2**MANT_DIG).  We assume that the current
+ * rounding mode is FE_TONEAREST so that this can be done efficiently.
+ * Extra precision causes more problems in practice, and we only centralize
+ * this here to reduce those problems, and have not solved the efficiency
+ * problems.  The exp2() family uses a more delicate version of this that
+ * requires extracting bits from the intermediate value, so it is not
+ * centralized here and should copy any solution of the efficiency problems.
+ */
 
-/* Asm versions of some functions. */
+static inline double
+rnint(__double_t x)
+{
+	/*
+	 * This casts to double to kill any extra precision.  This depends
+	 * on the cast being applied to a double_t to avoid compiler bugs
+	 * (this is a cleaner version of STRICT_ASSIGN()).  This is
+	 * inefficient if there actually is extra precision, but is hard
+	 * to improve on.  We use double_t in the API to minimise conversions
+	 * for just calling here.  Note that we cannot easily change the
+	 * magic number to the one that works directly with double_t, since
+	 * the rounding precision is variable at runtime on x86 so the
+	 * magic number would need to be variable.  Assuming that the
+	 * rounding precision is always the default is too fragile.  This
+	 * and many other complications will move when the default is
+	 * changed to FP_PE.
+	 */
+	return ((double)(x + 0x1.8p52) - 0x1.8p52);
+}
 
-#ifdef __amd64__
+static inline float
+rnintf(__float_t x)
+{
+	/*
+	 * As for rnint(), except we could just call that to handle the
+	 * extra precision case, usually without losing efficiency.
+	 */
+	return ((float)(x + 0x1.8p23F) - 0x1.8p23F);
+}
+
+#ifdef LDBL_MANT_DIG
+/*
+ * The complications for extra precision are smaller for rnintl() since it
+ * can safely assume that the rounding precision has been increased from
+ * its default to FP_PE on x86.  We don't exploit that here to get small
+ * optimizations from limiting the rangle to double.  We just need it for
+ * the magic number to work with long doubles.  ld128 callers should use
+ * rnint() instead of this if possible.  ld80 callers should prefer
+ * rnintl() since for amd64 this avoids swapping the register set, while
+ * for i386 it makes no difference (assuming FP_PE), and for other arches
+ * it makes little difference.
+ */
+static inline long double
+rnintl(long double x)
+{
+	return (x + __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2 -
+	    __CONCAT(0x1.8p, LDBL_MANT_DIG) / 2);
+}
+#endif /* LDBL_MANT_DIG */
+
+/*
+ * irint() and i64rint() give the same result as casting to their integer
+ * return type provided their arg is a floating point integer.  They can
+ * sometimes be more efficient because no rounding is required.
+ */
+#if (defined(amd64) || defined(__i386__)) && defined(__GNUCLIKE_ASM)
+#define	irint(x)						\
+    (sizeof(x) == sizeof(float) &&				\
+    sizeof(__float_t) == sizeof(long double) ? irintf(x) :	\
+    sizeof(x) == sizeof(double) &&				\
+    sizeof(__double_t) == sizeof(long double) ? irintd(x) :	\
+    sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x))
+#else
+#define	irint(x)	((int)(x))
+#endif
+
+#define	i64rint(x)	((int64_t)(x))	/* only needed for ld128 so not opt. */
+
+#if defined(__i386__) && defined(__GNUCLIKE_ASM)
 static __inline int
-irint(double x)
+irintf(float x)
 {
 	int n;
 
-	asm("cvtsd2si %1,%0" : "=r" (n) : "x" (x));
+	__asm("fistl %0" : "=m" (n) : "t" (x));
 	return (n);
 }
-#define	HAVE_EFFICIENT_IRINT
-#endif
 
-#ifdef __i386__
 static __inline int
-irint(double x)
+irintd(double x)
 {
 	int n;
 
-	asm("fistl %0" : "=m" (n) : "t" (x));
+	__asm("fistl %0" : "=m" (n) : "t" (x));
 	return (n);
 }
-#define	HAVE_EFFICIENT_IRINT
 #endif
 
-#if defined(__amd64__) || defined(__i386__)
+#if (defined(__amd64__) || defined(__i386__)) && defined(__GNUCLIKE_ASM)
 static __inline int
 irintl(long double x)
 {
 	int n;
 
-	asm("fistl %0" : "=m" (n) : "t" (x));
+	__asm("fistl %0" : "=m" (n) : "t" (x));
 	return (n);
 }
-#define	HAVE_EFFICIENT_IRINTL
 #endif
-
-#endif /* __GNUCLIKE_ASM */
 
 #ifdef DEBUG
 #if defined(__amd64__) || defined(__i386__)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201807201242.w6KCgORn048599>